diff --git a/crmd/Makefile.am b/crmd/Makefile.am index 0eab529514..5d60a25b67 100644 --- a/crmd/Makefile.am +++ b/crmd/Makefile.am @@ -1,57 +1,58 @@ # # Copyright (C) 2004 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common halibdir = $(CRM_DAEMON_DIR) ## binary progs halib_PROGRAMS = crmd ## SOURCES -noinst_HEADERS = crmd.h crmd_fsa.h crmd_messages.h fsa_defines.h \ - fsa_matrix.h fsa_proto.h crmd_utils.h crmd_callbacks.h \ - crmd_lrm.h te_callbacks.h tengine.h +noinst_HEADERS = crmd_alerts.h crmd_callbacks.h crmd_fsa.h crmd.h \ + crmd_lrm.h crmd_messages.h crmd_utils.h fsa_defines.h \ + fsa_matrix.h fsa_proto.h membership.h te_callbacks.h \ + tengine.h throttle.h crmd_CFLAGS = $(CFLAGS_HARDENED_EXE) crmd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) crmd_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/transition/libtransitioner.la \ $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(CLUSTERLIBS) crmd_SOURCES = main.c crmd.c corosync.c crmd_alerts.c \ fsa.c control.c messages.c membership.c callbacks.c attrd.c \ election.c join_client.c join_dc.c subsystems.c throttle.c \ cib.c pengine.c tengine.c lrm.c lrm_state.c remote_lrmd_ra.c \ utils.c misc.c te_events.c te_actions.c te_utils.c te_callbacks.c if BUILD_HEARTBEAT_SUPPORT crmd_SOURCES += heartbeat.c endif if BUILD_XML_HELP man7_MANS = crmd.7 endif CLEANFILES = $(man7_MANS) diff --git a/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt index a44e21a3cf..3418881fe1 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Advanced-Resources.txt @@ -1,1029 +1,1355 @@ = Advanced Resource Types = [[group-resources]] == Groups - A Syntactic Shortcut == indexterm:[Group Resources] -indexterm:[Resources,Groups] +indexterm:[Resource,Groups] One of the most common elements of a cluster is a set of resources that need to be located together, start sequentially, and stop in the reverse order. To simplify this configuration, we support the concept of groups. .A group of two primitive resources ====== [source,XML] ------- ------- ====== Although the example above contains only two resources, there is no limit to the number of resources a group can contain. The example is also sufficient to explain the fundamental properties of a group: * Resources are started in the order they appear in (+Public-IP+ first, then +Email+) * Resources are stopped in the reverse order to which they appear in (+Email+ first, then +Public-IP+) If a resource in the group can't run anywhere, then nothing after that is allowed to run, too. * If +Public-IP+ can't run anywhere, neither can +Email+; * but if +Email+ can't run anywhere, this does not affect +Public-IP+ in any way The group above is logically equivalent to writing: .How the cluster sees a group resource ====== [source,XML] ------- ------- ====== Obviously as the group grows bigger, the reduced configuration effort can become significant. Another (typical) example of a group is a DRBD volume, the filesystem mount, an IP address, and an application that uses them. === Group Properties === .Properties of a Group Resource [width="95%",cols="3m,5<",options="header",align="center"] |========================================================= |Field |Description |id |A unique name for the group indexterm:[id,Group Resource Property] indexterm:[Resource,Group Property,id] |========================================================= === Group Options === Groups inherit the +priority+, +target-role+, and +is-managed+ properties from primitive resources. See <> for information about those properties. === Group Instance Attributes === Groups have no instance attributes. However, any that are set for the group object will be inherited by the group's children. === Group Contents === Groups may only contain a collection of cluster resources (see <>). To refer to a child of a group resource, just use the child's +id+ instead of the group's. === Group Constraints === Although it is possible to reference a group's children in constraints, it is usually preferable to reference the group itself. .Some constraints involving groups ====== [source,XML] ------- ------- ====== === Group Stickiness === indexterm:[resource-stickiness,Groups] Stickiness, the measure of how much a resource wants to stay where it is, is additive in groups. Every active resource of the group will contribute its stickiness value to the group's total. So if the default +resource-stickiness+ is 100, and a group has seven members, five of which are active, then the group as a whole will prefer its current location with a score of 500. [[s-resource-clone]] == Clones - Resources That Get Active on Multiple Hosts == indexterm:[Clone Resources] -indexterm:[Resources,Clones] +indexterm:[Resource,Clones] Clones were initially conceived as a convenient way to start multiple instances of an IP address resource and have them distributed throughout the cluster for load balancing. They have turned out to quite useful for a number of purposes including integrating with the Distributed Lock Manager (used by many cluster filesystems), the fencing subsystem, and OCFS2. You can clone any resource, provided the resource agent supports it. Three types of cloned resources exist: * Anonymous * Globally unique * Stateful 'Anonymous' clones are the simplest. These behave completely identically everywhere they are running. Because of this, there can be only one copy of an anonymous clone active per machine. 'Globally unique' clones are distinct entities. A copy of the clone running on one machine is not equivalent to another instance on another node, nor would any two copies on the same node be equivalent. 'Stateful' clones are covered later in <>. .A clone of an LSB resource ====== [source,XML] ------- ------- ====== === Clone Properties === .Properties of a Clone Resource [width="95%",cols="3m,5<",options="header",align="center"] |========================================================= |Field |Description |id |A unique name for the clone indexterm:[id,Clone Property] indexterm:[Clone,Property,id] |========================================================= === Clone Options === Options inherited from <> resources: +priority, target-role, is-managed+ .Clone-specific configuration options [width="95%",cols="1m,1,3<",options="header",align="center"] |========================================================= |Field |Default |Description |clone-max |number of nodes in cluster |How many copies of the resource to start indexterm:[clone-max,Clone Option] indexterm:[Clone,Option,clone-max] |clone-node-max |1 |How many copies of the resource can be started on a single node indexterm:[clone-node-max,Clone Option] indexterm:[Clone,Option,clone-node-max] |clone-min |1 |Require at least this number of clone instances to be runnable before allowing resources depending on the clone to be runnable '(since 1.1.14)' indexterm:[clone-min,Clone Option] indexterm:[Clone,Option,clone-min] |notify |true |When stopping or starting a copy of the clone, tell all the other copies beforehand and again when the action was successful. Allowed values: +false+, +true+ indexterm:[notify,Clone Option] indexterm:[Clone,Option,notify] |globally-unique |false |Does each copy of the clone perform a different function? Allowed values: +false+, +true+ indexterm:[globally-unique,Clone Option] indexterm:[Clone,Option,globally-unique] |ordered |false |Should the copies be started in series (instead of in parallel)? Allowed values: +false+, +true+ indexterm:[ordered,Clone Option] indexterm:[Clone,Option,ordered] |interleave |false |If this clone depends on another clone via an ordering constraint, is it allowed to start after the local instance of the other clone starts, rather than wait for all instances of the other clone to start? Allowed values: +false+, +true+ indexterm:[interleave,Clone Option] indexterm:[Clone,Option,interleave] |========================================================= === Clone Instance Attributes === Clones have no instance attributes; however, any that are set here will be inherited by the clone's children. === Clone Contents === Clones must contain exactly one primitive or group resource. [WARNING] You should never reference the name of a clone's child. If you think you need to do this, you probably need to re-evaluate your design. === Clone Constraints === In most cases, a clone will have a single copy on each active cluster node. If this is not the case, you can indicate which nodes the cluster should preferentially assign copies to with resource location constraints. These constraints are written no differently from those for primitive resources except that the clone's +id+ is used. .Some constraints involving clones ====== [source,XML] ------- ------- ====== Ordering constraints behave slightly differently for clones. In the example above, +apache-stats+ will wait until all copies of +apache-clone+ that need to be started have done so before being started itself. Only if _no_ copies can be started will +apache-stats+ be prevented from being active. Additionally, the clone will wait for +apache-stats+ to be stopped before stopping itself. Colocation of a primitive or group resource with a clone means that the resource can run on any machine with an active copy of the clone. The cluster will choose a copy based on where the clone is running and the resource's own location preferences. Colocation between clones is also possible. If one clone +A+ is colocated with another clone +B+, the set of allowed locations for +A+ is limited to nodes on which +B+ is (or will be) active. Placement is then performed normally. [[s-clone-stickiness]] === Clone Stickiness === indexterm:[resource-stickiness,Clones] To achieve a stable allocation pattern, clones are slightly sticky by default. If no value for +resource-stickiness+ is provided, the clone will use a value of 1. Being a small value, it causes minimal disturbance to the score calculations of other resources but is enough to prevent Pacemaker from needlessly moving copies around the cluster. [NOTE] ==== For globally unique clones, this may result in multiple instances of the clone staying on a single node, even after another eligible node becomes active (for example, after being put into standby mode then made active again). If you do not want this behavior, specify a +resource-stickiness+ of 0 for the clone temporarily and let the cluster adjust, then set it back to 1 if you want the default behavior to apply again. ==== === Clone Resource Agent Requirements === Any resource can be used as an anonymous clone, as it requires no additional support from the resource agent. Whether it makes sense to do so depends on your resource and its resource agent. Globally unique clones do require some additional support in the resource agent. In particular, it must only respond with +$\{OCF_SUCCESS}+ if the node has that exact instance active. All other probes for instances of the clone should result in +$\{OCF_NOT_RUNNING}+ (or one of the other OCF error codes if they are failed). Individual instances of a clone are identified by appending a colon and a numerical offset, e.g. +apache:2+. Resource agents can find out how many copies there are by examining the +OCF_RESKEY_CRM_meta_clone_max+ environment variable and which copy it is by examining +OCF_RESKEY_CRM_meta_clone+. The resource agent must not make any assumptions (based on +OCF_RESKEY_CRM_meta_clone+) about which numerical instances are active. In particular, the list of active copies will not always be an unbroken sequence, nor always start at 0. ==== Clone Notifications ==== Supporting notifications requires the +notify+ action to be implemented. If supported, the notify action will be passed a number of extra variables which, when combined with additional context, can be used to calculate the current state of the cluster and what is about to happen to it. .Environment variables supplied with Clone notify actions [width="95%",cols="5,3<",options="header",align="center"] |========================================================= |Variable |Description |OCF_RESKEY_CRM_meta_notify_type |Allowed values: +pre+, +post+ indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,type] indexterm:[type,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_operation |Allowed values: +start+, +stop+ indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,operation] indexterm:[operation,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_start_resource |Resources to be started indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,start_resource] indexterm:[start_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_stop_resource |Resources to be stopped indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,stop_resource] indexterm:[stop_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_active_resource |Resources that are running indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,active_resource] indexterm:[active_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_inactive_resource |Resources that are not running indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,inactive_resource] indexterm:[inactive_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_start_uname |Nodes on which resources will be started indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,start_uname] indexterm:[start_uname,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_stop_uname |Nodes on which resources will be stopped indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,stop_uname] indexterm:[stop_uname,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_active_uname |Nodes on which resources are running indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,active_uname] indexterm:[active_uname,Notification Environment Variable] |========================================================= The variables come in pairs, such as +OCF_RESKEY_CRM_meta_notify_start_resource+ and +OCF_RESKEY_CRM_meta_notify_start_uname+ and should be treated as an array of whitespace-separated elements. +OCF_RESKEY_CRM_meta_notify_inactive_resource+ is an exception as the matching +uname+ variable does not exist since inactive resources are not running on any node. Thus in order to indicate that +clone:0+ will be started on +sles-1+, +clone:2+ will be started on +sles-3+, and +clone:3+ will be started on +sles-2+, the cluster would set .Notification variables ====== [source,Bash] ------- OCF_RESKEY_CRM_meta_notify_start_resource="clone:0 clone:2 clone:3" OCF_RESKEY_CRM_meta_notify_start_uname="sles-1 sles-3 sles-2" ------- ====== ==== Proper Interpretation of Notification Environment Variables ==== .Pre-notification (stop): * Active resources: +$OCF_RESKEY_CRM_meta_notify_active_resource+ * Inactive resources: +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ * Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ .Post-notification (stop) / Pre-notification (start): * Active resources ** +$OCF_RESKEY_CRM_meta_notify_active_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Inactive resources ** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ .Post-notification (start): * Active resources: ** +$OCF_RESKEY_CRM_meta_notify_active_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Inactive resources: ** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ [[s-resource-multistate]] == Multi-state - Resources That Have Multiple Modes == indexterm:[Multi-state Resources] -indexterm:[Resources,Multi-state] +indexterm:[Resource,Multi-state] Multi-state resources are a specialization of clone resources; please ensure you understand <> before continuing! Multi-state resources allow the instances to be in one of two operating modes (called 'roles'). The roles are called 'master' and 'slave', but can mean whatever you wish them to mean. The only limitation is that when an instance is started, it must come up in the slave role. === Multi-state Properties === .Properties of a Multi-State Resource [width="95%",cols="3m,5<",options="header",align="center"] |========================================================= |Field |Description |id |Your name for the multi-state resource indexterm:[id,Multi-State Property] indexterm:[Multi-State,Property,id] |========================================================= === Multi-state Options === Options inherited from <> resources: +priority+, +target-role+, +is-managed+ Options inherited from <> resources: +clone-max+, +clone-node-max+, +notify+, +globally-unique+, +ordered+, +interleave+ .Multi-state-specific resource configuration options [width="95%",cols="1m,1,3<",options="header",align="center"] |========================================================= |Field |Default |Description |master-max |1 |How many copies of the resource can be promoted to the +master+ role indexterm:[master-max,Multi-State Option] indexterm:[Multi-State,Option,master-max] |master-node-max |1 |How many copies of the resource can be promoted to the +master+ role on a single node indexterm:[master-node-max,Multi-State Option] indexterm:[Multi-State,Option,master-node-max] |========================================================= === Multi-state Instance Attributes === Multi-state resources have no instance attributes; however, any that are set here will be inherited by a master's children. === Multi-state Contents === Masters must contain exactly one primitive or group resource. [WARNING] You should never reference the name of a master's child. If you think you need to do this, you probably need to re-evaluate your design. === Monitoring Multi-State Resources === The usual monitor actions are insufficient to monitor a multi-state resource, because pacemaker needs to verify not only that the resource is active, but also that its actual role matches its intended one. Define two monitoring actions: the usual one will cover the slave role, and an additional one with +role="master"+ will cover the master role. .Monitoring both states of a multi-state resource ====== [source,XML] ------- ------- ====== [IMPORTANT] =========== It is crucial that _every_ monitor operation has a different interval! Pacemaker currently differentiates between operations only by resource and interval; so if (for example) a master/slave resource had the same monitor interval for both roles, Pacemaker would ignore the role when checking the status -- which would cause unexpected return codes, and therefore unnecessary complications. =========== === Multi-state Constraints === In most cases, multi-state resources will have a single copy on each active cluster node. If this is not the case, you can indicate which nodes the cluster should preferentially assign copies to with resource location constraints. These constraints are written no differently from those for primitive resources except that the master's +id+ is used. When considering multi-state resources in constraints, for most purposes it is sufficient to treat them as clones. The exception is that the +first-action+ and/or +then-action+ fields for ordering constraints may be set to +promote+ or +demote+ to constrain the master role, and colocation constraints may contain +rsc-role+ and/or +with-rsc-role+ fields. .Additional colocation constraint options for multi-state resources [width="95%",cols="1m,1,3<",options="header",align="center"] |========================================================= |Field |Default |Description |rsc-role |Started |An additional attribute of colocation constraints that specifies the role that +rsc+ must be in. Allowed values: +Started+, +Master+, +Slave+. indexterm:[rsc-role,Ordering Constraints] indexterm:[Constraints,Ordering,rsc-role] |with-rsc-role |Started |An additional attribute of colocation constraints that specifies the role that +with-rsc+ must be in. Allowed values: +Started+, +Master+, +Slave+. indexterm:[with-rsc-role,Ordering Constraints] indexterm:[Constraints,Ordering,with-rsc-role] |========================================================= .Constraints involving multi-state resources ====== [source,XML] ------- ------- ====== In the example above, +myApp+ will wait until one of the database copies has been started and promoted to master before being started itself on the same node. Only if no copies can be promoted will +myApp+ be prevented from being active. Additionally, the cluster will wait for +myApp+ to be stopped before demoting the database. Colocation of a primitive or group resource with a multi-state resource means that it can run on any machine with an active copy of the multi-state resource that has the specified role (+master+ or +slave+). In the example above, the cluster will choose a location based on where database is running as a +master+, and if there are multiple +master+ instances it will also factor in +myApp+'s own location preferences when deciding which location to choose. Colocation with regular clones and other multi-state resources is also possible. In such cases, the set of allowed locations for the +rsc+ clone is (after role filtering) limited to nodes on which the +with-rsc+ multi-state resource is (or will be) in the specified role. Placement is then performed as normal. ==== Using Multi-state Resources in Colocation Sets ==== .Additional colocation set options relevant to multi-state resources [width="95%",cols="1m,1,6<",options="header",align="center"] |========================================================= |Field |Default |Description |role |Started |The role that 'all members' of the set must be in. Allowed values: +Started+, +Master+, +Slave+. indexterm:[role,Ordering Constraints] indexterm:[Constraints,Ordering,role] |========================================================= In the following example +B+'s master must be located on the same node as +A+'s master. Additionally resources +C+ and +D+ must be located on the same node as +A+'s and +B+'s masters. .Colocate C and D with A's and B's master instances ====== [source,XML] ------- ------- ====== ==== Using Multi-state Resources in Ordering Sets ==== .Additional ordered set options relevant to multi-state resources [width="95%",cols="1m,1,3<",options="header",align="center"] |========================================================= |Field |Default |Description |action |value of +first-action+ |An additional attribute of ordering constraint sets that specifies the action that applies to 'all members' of the set. Allowed values: +start+, +stop+, +promote+, +demote+. indexterm:[action,Ordering Constraints] indexterm:[Constraints,Ordering,action] |========================================================= .Start C and D after first promoting A and B ====== [source,XML] ------- ------- ====== In the above example, +B+ cannot be promoted to a master role until +A+ has been promoted. Additionally, resources +C+ and +D+ must wait until +A+ and +B+ have been promoted before they can start. === Multi-state Stickiness === indexterm:[resource-stickiness,Multi-State] As with regular clones, multi-state resources are slightly sticky by default. See <> for details. === Which Resource Instance is Promoted === During the start operation, most resource agents should call the `crm_master` utility. This tool automatically detects both the resource and host and should be used to set a preference for being promoted. Based on this, +master-max+, and +master-node-max+, the instance(s) with the highest preference will be promoted. An alternative is to create a location constraint that indicates which nodes are most preferred as masters. .Explicitly preferring node1 to be promoted to master ====== [source,XML] ------- ------- ====== === Requirements for Multi-state Resource Agents === Since multi-state resources are an extension of cloned resources, all the requirements for resource agents that support clones are also requirements for resource agents that support multi-state resources. Additionally, multi-state resources require two extra actions, +demote+ and +promote+, which are responsible for changing the state of the resource. Like +start+ and +stop+, they should return +$\{OCF_SUCCESS}+ if they completed successfully or a relevant error code if they did not. The states can mean whatever you wish, but when the resource is started, it must come up in the mode called +slave+. From there the cluster will decide which instances to promote to +master+. In addition to the clone requirements for monitor actions, agents must also _accurately_ report which state they are in. The cluster relies on the agent to report its status (including role) accurately and does not indicate to the agent what role it currently believes it to be in. .Role implications of OCF return codes [width="95%",cols="1,1<",options="header",align="center"] |========================================================= |Monitor Return Code |Description |OCF_NOT_RUNNING |Stopped indexterm:[Return Code,OCF_NOT_RUNNING] |OCF_SUCCESS |Running (Slave) indexterm:[Return Code,OCF_SUCCESS] |OCF_RUNNING_MASTER |Running (Master) indexterm:[Return Code,OCF_RUNNING_MASTER] |OCF_FAILED_MASTER |Failed (Master) indexterm:[Return Code,OCF_FAILED_MASTER] |Other |Failed (Slave) |========================================================= ==== Multi-state Notifications ==== Like clones, supporting notifications requires the +notify+ action to be implemented. If supported, the notify action will be passed a number of extra variables which, when combined with additional context, can be used to calculate the current state of the cluster and what is about to happen to it. .Environment variables supplied with multi-state notify actions footnote:[Emphasized variables are specific to +Master+ resources, and all behave in the same manner as described for Clone resources.] [width="95%",cols="5,3<",options="header",align="center"] |========================================================= |Variable |Description |OCF_RESKEY_CRM_meta_notify_type |Allowed values: +pre+, +post+ indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,type] indexterm:[type,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_operation |Allowed values: +start+, +stop+ indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,operation] indexterm:[operation,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_active_resource |Resources that are running indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,active_resource] indexterm:[active_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_inactive_resource |Resources that are not running indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,inactive_resource] indexterm:[inactive_resource,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_master_resource_ |Resources that are running in +Master+ mode indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,master_resource] indexterm:[master_resource,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_slave_resource_ |Resources that are running in +Slave+ mode indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,slave_resource] indexterm:[slave_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_start_resource |Resources to be started indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,start_resource] indexterm:[start_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_stop_resource |Resources to be stopped indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,stop_resource] indexterm:[stop_resource,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_promote_resource_ |Resources to be promoted indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,promote_resource] indexterm:[promote_resource,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_demote_resource_ |Resources to be demoted indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,demote_resource] indexterm:[demote_resource,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_start_uname |Nodes on which resources will be started indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,start_uname] indexterm:[start_uname,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_stop_uname |Nodes on which resources will be stopped indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,stop_uname] indexterm:[stop_uname,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_promote_uname_ |Nodes on which resources will be promoted indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,promote_uname] indexterm:[promote_uname,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_demote_uname_ |Nodes on which resources will be demoted indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,demote_uname] indexterm:[demote_uname,Notification Environment Variable] |OCF_RESKEY_CRM_meta_notify_active_uname |Nodes on which resources are running indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,active_uname] indexterm:[active_uname,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_master_uname_ |Nodes on which resources are running in +Master+ mode indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,master_uname] indexterm:[master_uname,Notification Environment Variable] |_OCF_RESKEY_CRM_meta_notify_slave_uname_ |Nodes on which resources are running in +Slave+ mode indexterm:[Environment Variable,OCF_RESKEY_CRM_meta_notify_,slave_uname] indexterm:[slave_uname,Notification Environment Variable] |========================================================= ==== Proper Interpretation of Multi-state Notification Environment Variables ==== .Pre-notification (demote): * +Active+ resources: +$OCF_RESKEY_CRM_meta_notify_active_resource+ * +Master+ resources: +$OCF_RESKEY_CRM_meta_notify_master_resource+ * +Slave+ resources: +$OCF_RESKEY_CRM_meta_notify_slave_resource+ * Inactive resources: +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ * Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ .Post-notification (demote) / Pre-notification (stop): * +Active+ resources: +$OCF_RESKEY_CRM_meta_notify_active_resource+ * +Master+ resources: ** +$OCF_RESKEY_CRM_meta_notify_master_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * +Slave+ resources: +$OCF_RESKEY_CRM_meta_notify_slave_resource+ * Inactive resources: +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ * Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ .Post-notification (stop) / Pre-notification (start) * +Active+ resources: ** +$OCF_RESKEY_CRM_meta_notify_active_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * +Master+ resources: ** +$OCF_RESKEY_CRM_meta_notify_master_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * +Slave+ resources: ** +$OCF_RESKEY_CRM_meta_notify_slave_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Inactive resources: ** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ .Post-notification (start) / Pre-notification (promote) * +Active+ resources: ** +$OCF_RESKEY_CRM_meta_notify_active_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+ * +Master+ resources: ** +$OCF_RESKEY_CRM_meta_notify_master_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * +Slave+ resources: ** +$OCF_RESKEY_CRM_meta_notify_slave_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Inactive resources: ** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ .Post-notification (promote) * +Active+ resources: ** +$OCF_RESKEY_CRM_meta_notify_active_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+ * +Master+ resources: ** +$OCF_RESKEY_CRM_meta_notify_master_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_demote_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * +Slave+ resources: ** +$OCF_RESKEY_CRM_meta_notify_slave_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_start_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * Inactive resources: ** +$OCF_RESKEY_CRM_meta_notify_inactive_resource+ ** plus +$OCF_RESKEY_CRM_meta_notify_stop_resource+ ** minus +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources to be promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * Resources to be demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources to be stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ * Resources that were started: +$OCF_RESKEY_CRM_meta_notify_start_resource+ * Resources that were promoted: +$OCF_RESKEY_CRM_meta_notify_promote_resource+ * Resources that were demoted: +$OCF_RESKEY_CRM_meta_notify_demote_resource+ * Resources that were stopped: +$OCF_RESKEY_CRM_meta_notify_stop_resource+ + +[[s-resource-bundle]] +== Bundles - Isolated Environments == +indexterm:[bundle] +indexterm:[Resource,bundle] +indexterm:[Docker,bundle] + +Pacemaker (version 1.1.17 and later) supports a special syntax for combining an +isolated environment with the infrastructure support that it needs: the +'bundle'. + +The only isolation technology currently supported by Pacemaker bundles +is https://www.docker.com/[Docker] containers. +footnote:[Docker is a trademark of Docker, Inc. No endorsement by or +association with Docker, Inc. is implied.] + +.A bundle for a containerized web server +==== +[source,XML] +---- + + + + + + + + + + + +---- +==== + +=== Bundle Properties === + +.Properties of a Bundle +[width="95%",cols="3m,5<",options="header",align="center"] +|========================================================= + +|Field +|Description + +|id +|A unique name for the bundle (required) + indexterm:[id,bundle] + indexterm:[bundle,Property,id] + +|description +|Arbitrary text (not used by Pacemaker) + indexterm:[description,bundle] + indexterm:[bundle,Property,description] + +|========================================================= + +=== Docker Properties === + +A bundle must contain exactly one ++ element. + +Before configuring a bundle in Pacemaker, the user must install Docker and +supply a fully configured Docker image on every node allowed to run the bundle. + +Pacemaker will create an implicit +ocf:heartbeat:docker+ resource to manage +a bundle's Docker container. + +.Properties of a Bundle's Docker Element +[width="95%",cols="2m,1,4<",options="header",align="center"] +|========================================================= + +|Field +|Default +|Description + +|image +| +|Docker image tag (required) + indexterm:[image,Docker] + indexterm:[Docker,Property,image] + +|replicas +|Value of +masters+ if that is positive, else 1 +|A positive integer specifying the number of container instances to launch + indexterm:[replicas,Docker] + indexterm:[Docker,Property,replicas] + +|replicas-per-host +|1 +|A positive integer specifying the number of container instances allowed to run + on a single node + indexterm:[replicas-per-host,Docker] + indexterm:[Docker,Property,replicas-per-host] + +|masters +|0 +|A non-negative integer that, if positive, indicates that the containerized + service should be treated as a master/slave service, with this many master + instances allowed + indexterm:[masters,Docker] + indexterm:[Docker,Property,masters] + +|network +| +|If specified, this will be passed to +docker run+ as the + https://docs.docker.com/engine/reference/run/#network-settings[network setting] + for the Docker container. + indexterm:[network,Docker] + indexterm:[Docker,Property,network] + +|options +| +|Extra command-line options to pass to `docker run` + indexterm:[options,Docker] + indexterm:[Docker,Property,options] + +|========================================================= + +=== Bundle Network Properties === + +A bundle may optionally contain one ++ element. +indexterm:[bundle,network] + +.Properties of a Bundle's Network Element +[width="95%",cols="2m,1,4<",options="header",align="center"] +|========================================================= + +|Field +|Default +|Description + +|ip-range-start +| +|If specified, Pacemaker will create an implicit +ocf:heartbeat:IPaddr2+ + resource for each container instance, starting with this IP address, + using up to +replicas+ sequential addresses. These addresses can be used + from the host's network to reach the service inside the container, though + it is not visible within the container itself. Only IPv4 addresses are + currently supported. + indexterm:[ip-range-start,network] + indexterm:[network,Property,ip-range-start] + +|host-netmask +|32 +|If +ip-range-start+ is specified, the IP addresses are created with this + CIDR netmask (as a number of bits). + indexterm:[host-netmask,network] + indexterm:[network,Property,host-netmask] + +|host-interface +| +|If +ip-range-start+ is specified, the IP addresses are created on this + host interface (by default, it will be determined from the IP address). + indexterm:[host-interface,network] + indexterm:[network,Property,host-interface] + +|control-port +| +|If the bundle contains a +primitive+, the cluster will use this integer TCP + port for communication with Pacemaker Remote inside the container. This takes + precedence over the value of any PCMK_remote_port environment variable set + in the container image. This can allow a +primitive+ to be specified without + using +ip-start-range+, in which case +replicas-per-host+ must be 1. + indexterm:[control-port,network] + indexterm:[network,Property,control-port] + +|========================================================= + +Additionally, a ++ element may optionally contain one or more +++ elements. +indexterm:[bundle,network,port-mapping] + +.Properties of a Bundle's Port-Mapping Element +[width="95%",cols="2m,1,4<",options="header",align="center"] +|========================================================= + +|Field +|Default +|Description + +|id +| +|A unique name for the port mapping (required) + indexterm:[id,port-mapping] + indexterm:[port-mapping,Property,id] + +|port +| +|If this is specified, connections to this TCP port number on the host network + (on the container's assigned IP address, if +ip-range-start+ is specified) + will be forwarded to the container network. Exactly one of +port+ or +range+ + must be specified in a +port-mapping+. + indexterm:[port,port-mapping] + indexterm:[port-mapping,Property,port] + +|internal-port +|value of +port+ +|If +port+ and this are specified, connections to +port+ on the host's network + will be forwarded to this port on the container network. + indexterm:[internal-port,port-mapping] + indexterm:[port-mapping,Property,internal-port] + +|range +| +|If this is specified, connections to these TCP port numbers (expressed as + 'first_port'-'last_port') on the host network (on the container's assigned IP + address, if +ip-range-start+ is specified) will be forwarded to the same ports + in the container network. Exactly one of +port+ or +range+ must be specified + in a +port-mapping+. + indexterm:[range,port-mapping] + indexterm:[port-mapping,Property,range] + +|========================================================= + +[NOTE] +==== +If the bundle contains a +primitive+, Pacemaker will automatically map the ++control-port+, so it is not necessary to specify that port in a ++port-mapping+. +==== + +=== Bundle Storage Properties === + +A bundle may optionally contain one ++ element. A ++ element +has no properties of its own, but may contain one or more ++ +elements. +indexterm:[bundle,storage,storage-mapping] + +.Properties of a Bundle's Storage-Mapping Element +[width="95%",cols="2m,1,4<",options="header",align="center"] +|========================================================= + +|Field +|Default +|Description + +|id +| +|A unique name for the storage mapping (required) + indexterm:[id,storage-mapping] + indexterm:[storage-mapping,Property,id] + +|source-dir +| +|The absolute path on the host's filesystem that will be mapped into the + container. Exactly one of +source-dir+ and +source-dir-root+ must be specified + in a +storage-mapping+. + indexterm:[source-dir,storage-mapping] + indexterm:[storage-mapping,Property,source-dir] + +|source-dir-root +| +|The start of a path on the host's filesystem that will be mapped into the + container, using a different subdirectory on the host for each container + instance. Exactly one of +source-dir+ and +source-dir-root+ must be specified + in a +storage-mapping+. + indexterm:[source-dir-root,storage-mapping] + indexterm:[storage-mapping,Property,source-dir-root] + +|target-dir +| +|The path name within the container where the host storage will be mapped + (required) + indexterm:[target-dir,storage-mapping] + indexterm:[storage-mapping,Property,target-dir] + +|options +| +|File system mount options to use when mapping the storage + indexterm:[options,storage-mapping] + indexterm:[storage-mapping,Property,options] + +|========================================================= + +[NOTE] +==== +If the bundle contains a +primitive+, +Pacemaker will automatically map the equivalent of ++source-dir=/etc/pacemaker/authkey target-dir=/etc/pacemaker/authkey+ +and +source-dir-root=/var/log/container target-dir=/var/log+ into the +container, so it is not necessary to specify those paths in a ++storage-mapping+. +==== + +=== Bundle Primitive === + +A bundle may optionally contain one ++ resource +(see <>). The primitive may have operations, +instance attributes and meta-attributes defined, as usual. + +If a bundle contains a primitive resource, the container image must include +the Pacemaker Remote daemon, and either +ip-range-start+ or +control-port+ +must be configured in the bundle. Pacemaker will create an implicit ++ocf:pacemaker:remote+ resource for the connection, launch Pacemaker Remote +within the container, and monitor and manage the primitive resource via +Pacemaker Remote. + +If the bundle has more than one container instance (replica), the primitive +resource will function as an implicit clone (see <>) -- +a multistate clone if the bundle has +masters+ greater than zero +(see <>). + +[IMPORTANT] +==== +If Pacemaker Remote nodes are used in a cluster with bundle resources that +contain a +primitive+, it is strongly recommended to configure location +constraints prohibiting the bundles from running on those nodes. Running a +bundle with a +primitive+ on a Pacemaker Remote node might work if ++ip-range-start+ is used, but that configuration has not yet been tested. +==== + +=== Limitations of Bundles === + +Currently, bundles may not be cloned, or included in groups or colocation +constraints. This includes the bundle's primitive and any resources +implicitly created by Pacemaker for the bundle. + +Bundles do not have instance attributes, meta-attributes, utilization +attributes, or operations of their own, though a primitive included in a bundle +may. diff --git a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt index 23912d3e8b..2beaed87d4 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Resources.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Resources.txt @@ -1,848 +1,849 @@ = Cluster Resources = +[[s-resource-primitive]] == What is a Cluster Resource? == indexterm:[Resource] A resource is a service made highly available by a cluster. The simplest type of resource, a 'primitive' resource, is described in this chapter. More complex forms, such as groups and clones, are described in later chapters. Every primitive resource has a 'resource agent'. A resource agent is an external program that abstracts the service it provides and present a consistent view to the cluster. This allows the cluster to be agnostic about the resources it manages. The cluster doesn't need to understand how the resource works because it relies on the resource agent to do the right thing when given a `start`, `stop` or `monitor` command. For this reason, it is crucial that resource agents are well-tested. Typically, resource agents come in the form of shell scripts. However, they can be written using any technology (such as C, Python or Perl) that the author is comfortable with. [[s-resource-supported]] == Resource Classes == indexterm:[Resource,class] Pacemaker supports several classes of agents: * OCF * LSB * Upstart * Systemd * Service * Fencing * Nagios Plugins === Open Cluster Framework === indexterm:[Resource,OCF] indexterm:[OCF,Resources] indexterm:[Open Cluster Framework,Resources] The OCF standard footnote:[See http://www.opencf.org/cgi-bin/viewcvs.cgi/specs/ra/resource-agent-api.txt?rev=HEAD -- at least as it relates to resource agents. The Pacemaker implementation has been somewhat extended from the OCF specs, but none of those changes are incompatible with the original OCF specification.] is basically an extension of the Linux Standard Base conventions for init scripts to: * support parameters, * make them self-describing, and * make them extensible OCF specs have strict definitions of the exit codes that actions must return. footnote:[ The resource-agents source code includes the `ocf-tester` script, which can be useful in this regard. ] The cluster follows these specifications exactly, and giving the wrong exit code will cause the cluster to behave in ways you will likely find puzzling and annoying. In particular, the cluster needs to distinguish a completely stopped resource from one which is in some erroneous and indeterminate state. Parameters are passed to the resource agent as environment variables, with the special prefix +OCF_RESKEY_+. So, a parameter which the user thinks of as +ip+ will be passed to the resource agent as +OCF_RESKEY_ip+. The number and purpose of the parameters is left to the resource agent; however, the resource agent should use the `meta-data` command to advertise any that it supports. The OCF class is the most preferred as it is an industry standard, highly flexible (allowing parameters to be passed to agents in a non-positional manner) and self-describing. For more information, see the http://www.linux-ha.org/wiki/OCF_Resource_Agents[reference] and <>. === Linux Standard Base === indexterm:[Resource,LSB] indexterm:[LSB,Resources] indexterm:[Linux Standard Base,Resources] LSB resource agents are those found in +/etc/init.d+. Generally, they are provided by the OS distribution and, in order to be used with the cluster, they must conform to the LSB Spec. footnote:[ See http://refspecs.linux-foundation.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html for the LSB Spec as it relates to init scripts. ] [WARNING] ==== Many distributions claim LSB compliance but ship with broken init scripts. For details on how to check whether your init script is LSB-compatible, see <>. Common problematic violations of the LSB standard include: * Not implementing the status operation at all * Not observing the correct exit status codes for `start/stop/status` actions * Starting a started resource returns an error * Stopping a stopped resource returns an error ==== [IMPORTANT] ==== Remember to make sure the computer is _not_ configured to start any services at boot time -- that should be controlled by the cluster. ==== === Systemd === indexterm:[Resource,Systemd] indexterm:[Systemd,Resources] Some newer distributions have replaced the old http://en.wikipedia.org/wiki/Init#SysV-style["SysV"] style of initialization daemons and scripts with an alternative called http://www.freedesktop.org/wiki/Software/systemd[Systemd]. Pacemaker is able to manage these services _if they are present_. Instead of init scripts, systemd has 'unit files'. Generally, the services (unit files) are provided by the OS distribution, but there are online guides for converting from init scripts. footnote:[For example, http://0pointer.de/blog/projects/systemd-for-admins-3.html] [IMPORTANT] ==== Remember to make sure the computer is _not_ configured to start any services at boot time -- that should be controlled by the cluster. ==== === Upstart === indexterm:[Resource,Upstart] indexterm:[Upstart,Resources] Some newer distributions have replaced the old http://en.wikipedia.org/wiki/Init#SysV-style["SysV"] style of initialization daemons (and scripts) with an alternative called http://upstart.ubuntu.com/[Upstart]. Pacemaker is able to manage these services _if they are present_. Instead of init scripts, upstart has 'jobs'. Generally, the services (jobs) are provided by the OS distribution. [IMPORTANT] ==== Remember to make sure the computer is _not_ configured to start any services at boot time -- that should be controlled by the cluster. ==== === System Services === indexterm:[Resource,System Services] indexterm:[System Service,Resources] Since there are various types of system services (+systemd+, +upstart+, and +lsb+), Pacemaker supports a special +service+ alias which intelligently figures out which one applies to a given cluster node. This is particularly useful when the cluster contains a mix of +systemd+, +upstart+, and +lsb+. In order, Pacemaker will try to find the named service as: . an LSB init script . a Systemd unit file . an Upstart job === STONITH === indexterm:[Resource,STONITH] indexterm:[STONITH,Resources] The STONITH class is used exclusively for fencing-related resources. This is discussed later in <>. === Nagios Plugins === indexterm:[Resource,Nagios Plugins] indexterm:[Nagios Plugins,Resources] Nagios Plugins footnote:[The project has two independent forks, hosted at https://www.nagios-plugins.org/ and https://www.monitoring-plugins.org/. Output from both projects' plugins is similar, so plugins from either project can be used with pacemaker.] allow us to monitor services on remote hosts. Pacemaker is able to do remote monitoring with the plugins _if they are present_. A common use case is to configure them as resources belonging to a resource container (usually a virtual machine), and the container will be restarted if any of them has failed. Another use is to configure them as ordinary resources to be used for monitoring hosts or services via the network. The supported parameters are same as the long options of the plugin. [[primitive-resource]] == Resource Properties == These values tell the cluster which resource agent to use for the resource, where to find that resource agent and what standards it conforms to. .Properties of a Primitive Resource [width="95%",cols="1m,6<",options="header",align="center"] |========================================================= |Field |Description |id |Your name for the resource indexterm:[id,Resource] indexterm:[Resource,Property,id] |class |The standard the resource agent conforms to. Allowed values: +lsb+, +nagios+, +ocf+, +service+, +stonith+, +systemd+, +upstart+ indexterm:[class,Resource] indexterm:[Resource,Property,class] |type |The name of the Resource Agent you wish to use. E.g. +IPaddr+ or +Filesystem+ indexterm:[type,Resource] indexterm:[Resource,Property,type] |provider |The OCF spec allows multiple vendors to supply the same resource agent. To use the OCF resource agents supplied by the Heartbeat project, you would specify +heartbeat+ here. indexterm:[provider,Resource] indexterm:[Resource,Property,provider] |========================================================= The XML definition of a resource can be queried with the `crm_resource` tool. For example: ---- # crm_resource --resource Email --query-xml ---- might produce: .A system resource definition ===== [source,XML] ===== [NOTE] ===== One of the main drawbacks to system services (LSB, systemd or Upstart) resources is that they do not allow any parameters! ===== //// See https://tools.ietf.org/html/rfc5737 for choice of example IP address //// .An OCF resource definition ===== [source,XML] ------- ------- ===== [[s-resource-options]] == Resource Options == Resources have two types of options: 'meta-attributes' and 'instance attributes'. Meta-attributes apply to any type of resource, while instance attributes are specific to each resource agent. === Resource Meta-Attributes === Meta-attributes are used by the cluster to decide how a resource should behave and can be easily set using the `--meta` option of the `crm_resource` command. .Meta-attributes of a Primitive Resource [width="95%",cols="2m,2,5> resources, promoted to master if appropriate) * +Slave:+ Allow the resource to be started, but only in Slave mode if the resource is <> * +Master:+ Equivalent to +Started+ indexterm:[target-role,Resource Option] indexterm:[Resource,Option,target-role] |is-managed |TRUE |Is the cluster allowed to start and stop the resource? Allowed values: +true+, +false+ indexterm:[is-managed,Resource Option] indexterm:[Resource,Option,is-managed] |resource-stickiness |value of +resource-stickiness+ in the +rsc_defaults+ section |How much does the resource prefer to stay where it is? indexterm:[resource-stickiness,Resource Option] indexterm:[Resource,Option,resource-stickiness] |requires |fencing (unless +stonith-enabled+ is +false+ or +class+ is +stonith+, in which case it defaults to quorum) |Conditions under which the resource can be started '(since 1.1.8)' Allowed values: * +nothing:+ can always be started * +quorum:+ The cluster can only start this resource if a majority of the configured nodes are active * +fencing:+ The cluster can only start this resource if a majority of the configured nodes are active _and_ any failed or unknown nodes have been powered off * +unfencing:+ The cluster can only start this resource if a majority of the configured nodes are active _and_ any failed or unknown nodes have been powered off _and_ only on nodes that have been 'unfenced' '(since 1.1.9)' indexterm:[requires,Resource Option] indexterm:[Resource,Option,requires] |migration-threshold |INFINITY |How many failures may occur for this resource on a node, before this node is marked ineligible to host this resource. A value of 0 indicates that this feature is disabled (the node will never be marked ineligible); by constrast, the cluster treats INFINITY (the default) as a very large but finite number. This option has an effect only if the failed operation has on-fail=restart (the default), and additionally for failed start operations, if the cluster property start-failure-is-fatal is false. indexterm:[migration-threshold,Resource Option] indexterm:[Resource,Option,migration-threshold] |failure-timeout |0 |How many seconds to wait before acting as if the failure had not occurred, and potentially allowing the resource back to the node on which it failed. A value of 0 indicates that this feature is disabled. As with any time-based actions, this is not guaranteed to be checked more frequently than the value of +cluster-recheck-interval+ (see <>). indexterm:[failure-timeout,Resource Option] indexterm:[Resource,Option,failure-timeout] |multiple-active |stop_start |What should the cluster do if it ever finds the resource active on more than one node? Allowed values: * +block:+ mark the resource as unmanaged * +stop_only:+ stop all active instances and leave them that way * +stop_start:+ stop all active instances and start the resource in one location only indexterm:[multiple-active,Resource Option] indexterm:[Resource,Option,multiple-active] |allow-migrate |TRUE for ocf:pacemaker:remote resources, FALSE otherwise |Whether the cluster should try to "live migrate" this resource when it needs to be moved (see <>) |remote-node | |The name of the Pacemaker Remote guest node this resource is associated with, if any. If specified, this both enables the resource as a guest node and defines the unique name used to identify the guest node. The guest must be configured to run the Pacemaker Remote daemon when it is started. +WARNING:+ This value cannot overlap with any resource or node IDs. '(since 1.1.9)' |remote-port |3121 |If +remote-node+ is specified, the port on the guest used for its Pacemaker Remote connection. The Pacemaker Remote daemon on the guest must be configured to listen on this port. '(since 1.1.9)' |remote-addr |value of +remote-node+ |If +remote-node+ is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote. The Pacemaker Remote daemon on the guest must be configured to accept connections on this address. '(since 1.1.9)' |remote-connect-timeout |60s |If +remote-node+ is specified, how long before a pending guest connection will time out. '(since 1.1.10)' |========================================================= As an example of setting resource options, if you performed the following commands on an LSB Email resource: ------- # crm_resource --meta --resource Email --set-parameter priority --parameter-value 100 # crm_resource -m -r Email -p multiple-active -v block ------- the resulting resource definition might be: .An LSB resource with cluster options ===== [source,XML] ------- ------- ===== [[s-resource-defaults]] === Setting Global Defaults for Resource Meta-Attributes === To set a default value for a resource option, add it to the +rsc_defaults+ section with `crm_attribute`. For example, ---- # crm_attribute --type rsc_defaults --name is-managed --update false ---- would prevent the cluster from starting or stopping any of the resources in the configuration (unless of course the individual resources were specifically enabled by having their +is-managed+ set to +true+). === Resource Instance Attributes === The resource agents of some resource classes (lsb, systemd and upstart 'not' among them) can be given parameters which determine how they behave and which instance of a service they control. If your resource agent supports parameters, you can add them with the `crm_resource` command. For example, ---- # crm_resource --resource Public-IP --set-parameter ip --parameter-value 192.0.2.2 ---- would create an entry in the resource like this: .An example OCF resource with instance attributes ===== [source,XML] ------- ------- ===== For an OCF resource, the result would be an environment variable called +OCF_RESKEY_ip+ with a value of +192.0.2.2+. The list of instance attributes supported by an OCF resource agent can be found by calling the resource agent with the `meta-data` command. The output contains an XML description of all the supported attributes, their purpose and default values. .Displaying the metadata for the Dummy resource agent template ===== ---- # export OCF_ROOT=/usr/lib/ocf # $OCF_ROOT/resource.d/pacemaker/Dummy meta-data ---- [source,XML] ------- 1.0 This is a Dummy Resource Agent. It does absolutely nothing except keep track of whether its running or not. Its purpose in life is for testing and to serve as a template for RA writers. NB: Please pay attention to the timeouts specified in the actions section below. They should be meaningful for the kind of resource the agent manages. They should be the minimum advised timeouts, but they shouldn't/cannot cover _all_ possible resource instances. So, try to be neither overly generous nor too stingy, but moderate. The minimum timeouts should never be below 10 seconds. Example stateless resource agent Location to store the resource state in. State file Fake attribute that can be changed to cause a reload Fake attribute that can be changed to cause a reload Number of seconds to sleep during operations. This can be used to test how the cluster reacts to operation timeouts. Operation sleep duration in seconds. ------- ===== == Resource Operations == indexterm:[Resource,Action] 'Operations' are actions the cluster can perform on a resource by calling the resource agent. Resource agents must support certain common operations such as start, stop and monitor, and may implement any others. Some operations are generated by the cluster itself, for example, stopping and starting resources as needed. You can configure operations in the cluster configuration. As an example, by default the cluster will 'not' ensure your resources stay healthy once they are started. footnote:[Currently, anyway. Automatic monitoring operations may be added in a future version of Pacemaker.] To instruct the cluster to do this, you need to add a +monitor+ operation to the resource's definition. .An OCF resource with a recurring health check ===== [source,XML] ------- ------- ===== .Properties of an Operation [width="95%",cols="2m,3,6>. indexterm:[interval,Action Property] indexterm:[Action,Property,interval] |timeout | |How long to wait before declaring the action has failed indexterm:[timeout,Action Property] indexterm:[Action,Property,timeout] |on-fail |restart '(except for stop operations, which default to' fence 'when STONITH is enabled and' block 'otherwise)' |The action to take if this action ever fails. Allowed values: * +ignore:+ Pretend the resource did not fail. * +block:+ Don't perform any further operations on the resource. * +stop:+ Stop the resource and do not start it elsewhere. * +restart:+ Stop the resource and start it again (possibly on a different node). * +fence:+ STONITH the node on which the resource failed. * +standby:+ Move _all_ resources away from the node on which the resource failed. indexterm:[on-fail,Action Property] indexterm:[Action,Property,on-fail] |enabled |TRUE |If +false+, ignore this operation definition. This is typically used to pause a particular recurring monitor operation; for instance, it can complement the respective resource being unmanaged (+is-managed=false+), as this alone will <>. Disabling the operation does not suppress all actions of the given type. Allowed values: +true+, +false+. indexterm:[enabled,Action Property] indexterm:[Action,Property,enabled] |record-pending |FALSE |If +true+, the intention to perform the operation is recorded so that GUIs and CLI tools can indicate that an operation is in progress. This is best set as an _operation default_ (see next section). Allowed values: +true+, +false+. indexterm:[enabled,Action Property] indexterm:[Action,Property,enabled] |role | |Run the operation only on node(s) that the cluster thinks should be in the specified role. This only makes sense for recurring monitor operations. Allowed (case-sensitive) values: +Stopped+, +Started+, and in the case of <> resources, +Slave+ and +Master+. indexterm:[role,Action Property] indexterm:[Action,Property,role] |========================================================= [[s-resource-monitoring]] === Monitoring Resources for Failure === When Pacemaker first starts a resource, it runs one-time monitor operations (referred to as 'probes') to ensure the resource is running where it's supposed to be, and not running where it's not supposed to be. (This behavior can be affected by the +resource-discovery+ location constraint property.) Other than those initial probes, Pacemaker will not (by default) check that the resource continues to stay healthy. As in the example above, you must configure monitor operations explicitly to perform these checks. By default, a monitor operation will ensure that the resource is running where it is supposed to. The +target-role+ property can be used for further checking. For example, if a resource has one monitor operation with +interval=10 role=Started+ and a second monitor operation with +interval=11 role=Stopped+, the cluster will run the first monitor on any nodes it thinks 'should' be running the resource, and the second monitor on any nodes that it thinks 'should not' be running the resource (for the truly paranoid, who want to know when an administrator manually starts a service by mistake). [[s-monitoring-unmanaged]] === Monitoring Resources When Administration is Disabled === Recurring monitor operations behave differently under various administrative settings: * When a resource is unmanaged (by setting +is-managed=false+): No monitors will be stopped. + If the unmanaged resource is stopped on a node where the cluster thinks it should be running, the cluster will detect and report that it is not, but it will not consider the monitor failed, and will not try to start the resource until it is managed again. + Starting the unmanaged resource on a different node is strongly discouraged and will at least cause the cluster to consider the resource failed, and may require the resource's +target-role+ to be set to +Stopped+ then +Started+ to be recovered. * When a node is put into standby: All resources will be moved away from the node, and all monitor operations will be stopped on the node, except those with +role=Stopped+. Monitor operations with +role=Stopped+ will be started on the node if appropriate. * When the cluster is put into maintenance mode: All resources will be marked as unmanaged. All monitor operations will be stopped, except those with +role=Stopped+. As with single unmanaged resources, starting a resource on a node other than where the cluster expects it to be will cause problems. [[s-operation-defaults]] === Setting Global Defaults for Operations === You can change the global default values for operation properties in a given cluster. These are defined in an +op_defaults+ section of the CIB's +configuration+ section, and can be set with `crm_attribute`. For example, ---- # crm_attribute --type op_defaults --name timeout --update 20s ---- would default each operation's +timeout+ to 20 seconds. If an operation's definition also includes a value for +timeout+, then that value would be used for that operation instead. === When Implicit Operations Take a Long Time === The cluster will always perform a number of implicit operations: +start+, +stop+ and a non-recurring +monitor+ operation used at startup to check whether the resource is already active. If one of these is taking too long, then you can create an entry for them and specify a longer timeout. .An OCF resource with custom timeouts for its implicit actions ===== [source,XML] ------- ------- ===== === Multiple Monitor Operations === Provided no two operations (for a single resource) have the same name and interval, you can have as many monitor operations as you like. In this way, you can do a superficial health check every minute and progressively more intense ones at higher intervals. To tell the resource agent what kind of check to perform, you need to provide each monitor with a different value for a common parameter. The OCF standard creates a special parameter called +OCF_CHECK_LEVEL+ for this purpose and dictates that it is "made available to the resource agent without the normal +OCF_RESKEY+ prefix". Whatever name you choose, you can specify it by adding an +instance_attributes+ block to the +op+ tag. It is up to each resource agent to look for the parameter and decide how to use it. .An OCF resource with two recurring health checks, performing different levels of checks specified via +OCF_CHECK_LEVEL+. ===== [source,XML] ------- ------- ===== === Disabling a Monitor Operation === The easiest way to stop a recurring monitor is to just delete it. However, there can be times when you only want to disable it temporarily. In such cases, simply add +enabled="false"+ to the operation's definition. .Example of an OCF resource with a disabled health check ===== [source,XML] ------- ------- ===== This can be achieved from the command line by executing: ---- # cibadmin --modify --xml-text '' ---- Once you've done whatever you needed to do, you can then re-enable it with ---- # cibadmin --modify --xml-text '' ---- diff --git a/doc/Pacemaker_Explained/en-US/Ch-Rules.txt b/doc/Pacemaker_Explained/en-US/Ch-Rules.txt index dbd970b694..a0ab3d7c41 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Rules.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Rules.txt @@ -1,600 +1,600 @@ = Rules = //// We prefer [[ch-rules]], but older versions of asciidoc don't deal well with that construct for chapter headings //// anchor:ch-rules[Chapter 8, Rules] indexterm:[Resource,Constraint,Rule] Rules can be used to make your configuration more dynamic. One common example is to set one value for +resource-stickiness+ during working hours, to prevent resources from being moved back to their most preferred location, and another on weekends when no-one is around to notice an outage. Another use of rules might be to assign machines to different processing groups (using a node attribute) based on time and to then use that attribute when creating location constraints. Each rule can contain a number of expressions, date-expressions and even other rules. The results of the expressions are combined based on the rule's +boolean-op+ field to determine if the rule ultimately evaluates to +true+ or +false+. What happens next depends on the context in which the rule is being used. == Rule Properties == .Properties of a Rule [width="95%",cols="2m,1,5<",options="header",align="center"] |========================================================= |Field |Default |Description |role |+Started+ |Limits the rule to apply only when the resource is in the specified role. Allowed values are +Started+, +Slave+, and +Master+. A rule with +role="Master"+ cannot determine the initial location of a clone instance and will only affect which of the active instances will be promoted. indexterm:[role,Constraint Rule] indexterm:[Constraint,Rule,role] |score | |The score to apply if the rule evaluates to +true+. Limited to use in rules that are part of location constraints. indexterm:[score,Constraint Rule] indexterm:[Constraint,Rule,score] |score-attribute | |The node attribute to look up and use as a score if the rule evaluates to +true+. Limited to use in rules that are part of location constraints. indexterm:[score-attribute,Constraint Rule] indexterm:[Constraint,Rule,score-attribute] |boolean-op |+and+ |How to combine the result of multiple expression objects. Allowed values are +and+ and +or+. indexterm:[boolean-op,Constraint Rule] indexterm:[Constraint,Rule,boolean-op] |========================================================= == Node Attribute Expressions == indexterm:[Resource,Constraint,Attribute Expression] Expression objects are used to control a resource based on the attributes defined by a node or nodes. .Properties of an Expression [width="95%",cols="1m,1,5 ---- ==== .Equivalent expression ==== [source,XML] ---- ---- ==== .9am-5pm Monday-Friday ==== [source,XML] ------- ------- ==== Please note that the +16+ matches up to +16:59:59+, as the numeric value (hour) still matches! .9am-6pm Monday through Friday or anytime Saturday ==== [source,XML] ------- - + ------- ==== .9am-5pm or 9pm-12am Monday through Friday ==== [source,XML] ------- - - + + ------- ==== .Mondays in March 2005 ==== [source,XML] ------- - + ------- ==== [NOTE] ====== Because no time is specified with the above dates, 00:00:00 is implied. This means that the range includes all of 2005-03-01 but none of 2005-04-01. You may wish to write +end="2005-03-31T23:59:59"+ to avoid confusion. ====== .A full moon on Friday the 13th ===== [source,XML] ------- - + ------- ===== == Using Rules to Determine Resource Location == indexterm:[Rule,Determine Resource Location] indexterm:[Resource,Location,Determine by Rules] A location constraint may contain rules. When the constraint's outermost rule evaluates to +false+, the cluster treats the constraint as if it were not there. When the rule evaluates to +true+, the node's preference for running the resource is updated with the score associated with the rule. If this sounds familiar, it is because you have been using a simplified syntax for location constraint rules already. Consider the following location constraint: .Prevent myApacheRsc from running on c001n03 ===== [source,XML] ------- ------- ===== This constraint can be more verbosely written as: .Prevent myApacheRsc from running on c001n03 - expanded version ===== [source,XML] ------- ------- ===== The advantage of using the expanded form is that one can then add extra clauses to the rule, such as limiting the rule such that it only applies during certain times of the day or days of the week. === Location Rules Based on Other Node Properties === The expanded form allows us to match on node properties other than its name. If we rated each machine's CPU power such that the cluster had the following nodes section: .A sample nodes section for use with score-attribute ===== [source,XML] ------- ------- ===== then we could prevent resources from running on underpowered machines with this rule: [source,XML] ------- ------- === Using +score-attribute+ Instead of +score+ === When using +score-attribute+ instead of +score+, each node matched by the rule has its score adjusted differently, according to its value for the named node attribute. Thus, in the previous example, if a rule used +score-attribute="cpu_mips"+, +c001n01+ would have its preference to run the resource increased by +1234+ whereas +c001n02+ would have its preference increased by +5678+. == Using Rules to Control Resource Options == Often some cluster nodes will be different from their peers. Sometimes, these differences -- e.g. the location of a binary or the names of network interfaces -- require resources to be configured differently depending on the machine they're hosted on. By defining multiple +instance_attributes+ objects for the resource and adding a rule to each, we can easily handle these special cases. In the example below, +mySpecialRsc+ will use eth1 and port 9999 when run on +node1+, eth2 and port 8888 on +node2+ and default to eth0 and port 9999 for all other nodes. .Defining different resource options based on the node name ===== [source,XML] ------- ------- ===== The order in which +instance_attributes+ objects are evaluated is determined by their score (highest to lowest). If not supplied, score defaults to zero, and objects with an equal score are processed in listed order. If the +instance_attributes+ object has no rule or a +rule+ that evaluates to +true+, then for any parameter the resource does not yet have a value for, the resource will use the parameter values defined by the +instance_attributes+. For example, given the configuration above, if the resource is placed on node1: . +special-node1+ has the highest score (3) and so is evaluated first; its rule evaluates to +true+, so +interface+ is set to +eth1+. . +special-node2+ is evaluated next with score 2, but its rule evaluates to +false+, so it is ignored. . +defaults+ is evaluated last with score 1, and has no rule, so its values are examined; +interface+ is already defined, so the value here is not used, but +port+ is not yet defined, so +port+ is set to +9999+. == Using Rules to Control Cluster Options == indexterm:[Rule,Controlling Cluster Options] indexterm:[Cluster,Setting Options with Rules] Controlling cluster options is achieved in much the same manner as specifying different resource options on different nodes. The difference is that because they are cluster options, one cannot (or should not, because they won't work) use attribute-based expressions. The following example illustrates how to set a different +resource-stickiness+ value during and outside work hours. This allows resources to automatically move back to their most preferred hosts, but at a time that (in theory) does not interfere with business activities. .Change +resource-stickiness+ during working hours ===== [source,XML] ------- ------- ===== [[s-rules-recheck]] == Ensuring Time-Based Rules Take Effect == A Pacemaker cluster is an event-driven system. As such, it won't recalculate the best place for resources to run unless something (like a resource failure or configuration change) happens. This can mean that a location constraint that only allows resource X to run between 9am and 5pm is not enforced. If you rely on time-based rules, the +cluster-recheck-interval+ cluster option (which defaults to 15 minutes) is essential. This tells the cluster to periodically recalculate the ideal state of the cluster. For example, if you set +cluster-recheck-interval="5m"+, then sometime between 09:00 and 09:05 the cluster would notice that it needs to start resource X, and between 17:00 and 17:05 it would realize that X needed to be stopped. The timing of the actual start and stop actions depends on what other actions the cluster may need to perform first. diff --git a/fencing/Makefile.am b/fencing/Makefile.am index c53ead6569..3bb628f359 100644 --- a/fencing/Makefile.am +++ b/fencing/Makefile.am @@ -1,77 +1,77 @@ # Author: Sun Jiang Dong # Copyright (c) 2004 International Business Machines # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common ## binary progs testdir = $(datadir)/$(PACKAGE)/tests/fencing test_SCRIPTS = regression.py halibdir = $(CRM_DAEMON_DIR) halib_PROGRAMS = stonithd stonith-test sbin_PROGRAMS = stonith_admin sbin_SCRIPTS = fence_legacy fence_pcmk -noinst_HEADERS = internal.h +noinst_HEADERS = internal.h standalone_config.h if BUILD_XML_HELP man7_MANS = stonithd.7 endif stonith_test_SOURCES = test.c stonith_test_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(CRYPTOLIB) $(CLUSTERLIBS) stonith_admin_SOURCES = admin.c stonith_admin_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(CRYPTOLIB) $(CLUSTERLIBS) stonithd_CPPFLAGS = -I$(top_srcdir)/pengine $(AM_CPPFLAGS) stonithd_YFLAGS = -d stonithd_CFLAGS = $(CFLAGS_HARDENED_EXE) stonithd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) stonithd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la \ $(CRYPTOLIB) $(CLUSTERLIBS) stonithd_SOURCES = main.c commands.c remote.c if BUILD_STONITH_CONFIG BUILT_SOURCES = standalone_config.h stonithd_SOURCES += standalone_config.c config.y config.l stonithd_AM_LFLAGS = -o$(LEX_OUTPUT_ROOT).c endif # lex/yacc issues: CFLAGS = $(CFLAGS_COPY:-Werror=) CLEANFILES = $(man7_MANS) $(man8_MANS) diff --git a/include/crm_internal.h b/include/crm_internal.h index cf0f32d471..ab96b8fe55 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,394 +1,393 @@ /* crm_internal.h */ /* * Copyright (C) 2006 - 2008 * Andrew Beekhof * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef CRM_INTERNAL__H # define CRM_INTERNAL__H # include # include # include # include # include # include # include # include # include /* Dynamic loading of libraries */ void *find_library_function(void **handle, const char *lib, const char *fn, int fatal); void *convert_const_pointer(const void *ptr); /* For ACLs */ char *uid2username(uid_t uid); void determine_request_user(const char *user, xmlNode * request, const char *field); const char *crm_acl_get_set_user(xmlNode * request, const char *field, const char *peer_user); # if ENABLE_ACL # include static inline gboolean is_privileged(const char *user) { if (user == NULL) { return FALSE; } else if (strcmp(user, CRM_DAEMON_USER) == 0) { return TRUE; } else if (strcmp(user, "root") == 0) { return TRUE; } return FALSE; } # endif /* CLI option processing*/ # ifdef HAVE_GETOPT_H # include # else # define no_argument 0 # define required_argument 1 # endif # define pcmk_option_default 0x00000 # define pcmk_option_hidden 0x00001 # define pcmk_option_paragraph 0x00002 # define pcmk_option_example 0x00004 struct crm_option { /* Fields from 'struct option' in getopt.h */ /* name of long option */ const char *name; /* * one of no_argument, required_argument, and optional_argument: * whether option takes an argument */ int has_arg; /* if not NULL, set *flag to val when option found */ int *flag; /* if flag not NULL, value to set *flag to; else return value */ int val; /* Custom fields */ const char *desc; long flags; }; void crm_set_options(const char *short_options, const char *usage, struct crm_option *long_options, const char *app_desc); int crm_get_option(int argc, char **argv, int *index); int crm_get_option_long(int argc, char **argv, int *index, const char **longname); int crm_help(char cmd, int exit_code); /* Cluster Option Processing */ typedef struct pe_cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; gboolean(*is_valid) (const char *); const char *description_short; const char *description_long; } pe_cluster_option; const char *cluster_option(GHashTable * options, gboolean(*validate) (const char *), const char *name, const char *old_name, const char *def_value); const char *get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len, const char *name); void config_metadata(const char *name, const char *version, const char *desc_short, const char *desc_long, pe_cluster_option * option_list, int len); void verify_all_options(GHashTable * options, pe_cluster_option * option_list, int len); gboolean check_time(const char *value); gboolean check_timer(const char *value); gboolean check_boolean(const char *value); gboolean check_number(const char *value); gboolean check_positive_number(const char *value); gboolean check_quorum(const char *value); gboolean check_script(const char *value); gboolean check_utilization(const char *value); long crm_get_sbd_timeout(void); gboolean check_sbd_timeout(const char *value); /* Shared PE/crmd functionality */ void filter_action_parameters(xmlNode * param_set, const char *version); /* Resource operation updates */ xmlNode *create_operation_update(xmlNode * parent, lrmd_event_data_t * event, const char * caller_version, int target_rc, const char * node, const char * origin, int level); /* char2score */ extern int node_score_red; extern int node_score_green; extern int node_score_yellow; extern int node_score_infinity; /* Assorted convenience functions */ int crm_pid_active(long pid, const char *daemon); void crm_make_daemon(const char *name, gboolean daemonize, const char *pidfile); char *generate_op_key(const char *rsc_id, const char *op_type, int interval); char *generate_notify_key(const char *rsc_id, const char *notify_type, const char *op_type); char *generate_transition_magic_v202(const char *transition_key, int op_status); char *generate_transition_magic(const char *transition_key, int op_status, int op_rc); char *generate_transition_key(int action, int transition_id, int target_rc, const char *node); static inline long long crm_clear_bit(const char *function, int line, const char *target, long long word, long long bit) { long long rc = (word & ~bit); if (rc == word) { /* Unchanged */ } else if (target) { crm_trace("Bit 0x%.8llx for %s cleared by %s:%d", bit, target, function, line); } else { crm_trace("Bit 0x%.8llx cleared by %s:%d", bit, function, line); } return rc; } static inline long long crm_set_bit(const char *function, int line, const char *target, long long word, long long bit) { long long rc = (word | bit); if (rc == word) { /* Unchanged */ } else if (target) { crm_trace("Bit 0x%.8llx for %s set by %s:%d", bit, target, function, line); } else { crm_trace("Bit 0x%.8llx set by %s:%d", bit, function, line); } return rc; } # define set_bit(word, bit) word = crm_set_bit(__FUNCTION__, __LINE__, NULL, word, bit) # define clear_bit(word, bit) word = crm_clear_bit(__FUNCTION__, __LINE__, NULL, word, bit) char *generate_hash_key(const char *crm_msg_reference, const char *sys); /*! remote tcp/tls helper functions */ typedef struct crm_remote_s crm_remote_t; int crm_remote_send(crm_remote_t * remote, xmlNode * msg); int crm_remote_ready(crm_remote_t * remote, int total_timeout /*ms */ ); gboolean crm_remote_recv(crm_remote_t * remote, int total_timeout /*ms */ , int *disconnected); xmlNode *crm_remote_parse_buffer(crm_remote_t * remote); int crm_remote_tcp_connect(const char *host, int port); int crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */ int *timer_id, void *userdata, void (*callback) (void *userdata, int sock)); int crm_remote_accept(int ssock); void crm_sockaddr2str(void *sa, char *s); # ifdef HAVE_GNUTLS_GNUTLS_H /*! * \internal * \brief Initiate the client handshake after establishing the tcp socket. * \note This is a blocking function, it will block until the entire handshake * is complete or until the timeout period is reached. * \retval 0 success * \retval negative, failure */ int crm_initiate_client_tls_handshake(crm_remote_t * remote, int timeout_ms); /*! * \internal * \brief Create client or server session for anon DH encryption credentials * \param sock, the socket the session will use for transport * \param type, GNUTLS_SERVER or GNUTLS_CLIENT * \param credentials, gnutls_anon_server_credentials_t or gnutls_anon_client_credentials_t * * \retval gnutls_session_t * on success * \retval NULL on failure */ void *crm_create_anon_tls_session(int sock, int type, void *credentials); /*! * \internal * \brief Create client or server session for PSK credentials * \param sock, the socket the session will use for transport * \param type, GNUTLS_SERVER or GNUTLS_CLIENT * \param credentials, gnutls_psk_server_credentials_t or gnutls_osk_client_credentials_t * * \retval gnutls_session_t * on success * \retval NULL on failure */ void *create_psk_tls_session(int csock, int type, void *credentials); # endif # define REMOTE_MSG_TERMINATOR "\r\n\r\n" const char *daemon_option(const char *option); void set_daemon_option(const char *option, const char *value); gboolean daemon_option_enabled(const char *daemon, const char *option); void strip_text_nodes(xmlNode * xml); void pcmk_panic(const char *origin); void sysrq_init(void); pid_t pcmk_locate_sbd(void); long crm_pidfile_inuse(const char *filename, long mypid, const char *daemon); long crm_read_pidfile(const char *filename); # define crm_config_err(fmt...) { crm_config_error = TRUE; crm_err(fmt); } # define crm_config_warn(fmt...) { crm_config_warning = TRUE; crm_warn(fmt); } # define attrd_channel T_ATTRD # define F_ATTRD_KEY "attr_key" # define F_ATTRD_ATTRIBUTE "attr_name" # define F_ATTRD_REGEX "attr_regex" # define F_ATTRD_TASK "task" # define F_ATTRD_VALUE "attr_value" # define F_ATTRD_SET "attr_set" # define F_ATTRD_IS_REMOTE "attr_is_remote" # define F_ATTRD_IS_PRIVATE "attr_is_private" # define F_ATTRD_SECTION "attr_section" # define F_ATTRD_DAMPEN "attr_dampening" # define F_ATTRD_IGNORE_LOCALLY "attr_ignore_locally" # define F_ATTRD_HOST "attr_host" # define F_ATTRD_HOST_ID "attr_host_id" # define F_ATTRD_USER "attr_user" # define F_ATTRD_WRITER "attr_writer" # define F_ATTRD_VERSION "attr_version" # define F_ATTRD_RESOURCE "attr_resource" # define F_ATTRD_OPERATION "attr_clear_operation" # define F_ATTRD_INTERVAL "attr_clear_interval" /* attrd operations */ # define ATTRD_OP_PEER_REMOVE "peer-remove" # define ATTRD_OP_UPDATE "update" # define ATTRD_OP_UPDATE_BOTH "update-both" # define ATTRD_OP_UPDATE_DELAY "update-delay" # define ATTRD_OP_QUERY "query" # define ATTRD_OP_REFRESH "refresh" # define ATTRD_OP_FLUSH "flush" # define ATTRD_OP_SYNC "sync" # define ATTRD_OP_SYNC_RESPONSE "sync-response" # define ATTRD_OP_CLEAR_FAILURE "clear-failure" # if SUPPORT_COROSYNC # if CS_USES_LIBQB # include # include typedef struct qb_ipc_request_header cs_ipc_header_request_t; typedef struct qb_ipc_response_header cs_ipc_header_response_t; # else # include # include # include typedef coroipc_request_header_t cs_ipc_header_request_t; typedef coroipc_response_header_t cs_ipc_header_response_t; # endif # else typedef struct { int size __attribute__ ((aligned(8))); int id __attribute__ ((aligned(8))); } __attribute__ ((aligned(8))) cs_ipc_header_request_t; typedef struct { int size __attribute__ ((aligned(8))); int id __attribute__ ((aligned(8))); int error __attribute__ ((aligned(8))); } __attribute__ ((aligned(8))) cs_ipc_header_response_t; # endif void attrd_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void stonith_ipc_server_init(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t * crmd_ipc_server_init(struct qb_ipcs_service_handlers *cb); void cib_ipc_servers_init(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb); void cib_ipc_servers_destroy(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm); static inline void *realloc_safe(void *ptr, size_t size) { void *ret = realloc(ptr, size); if (ret == NULL) { free(ptr); /* make coverity happy */ abort(); } return ret; } const char *crm_xml_add_last_written(xmlNode *xml_node); void crm_xml_dump(xmlNode * data, int options, char **buffer, int *offset, int *max, int depth); void crm_buffer_add_char(char **buffer, int *offset, int *max, char c); gboolean crm_digest_verify(xmlNode *input, const char *expected); /* cross-platform compatibility functions */ char *crm_compat_realpath(const char *path); /* IPC Proxy Backend Shared Functions */ typedef struct remote_proxy_s { char *node_name; char *session_id; gboolean is_local; crm_ipc_t *ipc; mainloop_io_t *source; uint32_t last_request_id; lrmd_t *lrm; } remote_proxy_t; remote_proxy_t *remote_proxy_new( lrmd_t *lrmd, struct ipc_client_callbacks *proxy_callbacks, const char *node_name, const char *session_id, const char *channel); int remote_proxy_check(lrmd_t *lrmd, GHashTable *hash); void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg); void remote_proxy_ack_shutdown(lrmd_t *lrmd); void remote_proxy_nack_shutdown(lrmd_t *lrmd); int remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata); void remote_proxy_disconnected(gpointer data); void remote_proxy_free(gpointer data); -void remote_proxy_end_session(remote_proxy_t *proxy); void remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg); void remote_proxy_relay_response(remote_proxy_t *proxy, xmlNode *msg, int msg_id); char* crm_versioned_param_summary(xmlNode *versioned_params, const char *name); void crm_summarize_versioned_params(xmlNode *param_set, xmlNode *versioned_params); #endif /* CRM_INTERNAL__H */ diff --git a/lib/lrmd/proxy_common.c b/lib/lrmd/proxy_common.c index 81796d40f9..67f310d7c3 100644 --- a/lib/lrmd/proxy_common.c +++ b/lib/lrmd/proxy_common.c @@ -1,320 +1,320 @@ /* * Copyright (c) 2015 David Vossel * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ #include #include #include #include #include #include #include #include #include #include int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); GHashTable *proxy_table = NULL; static void remote_proxy_notify_destroy(lrmd_t *lrmd, const char *session_id) { /* sending to the remote node that an ipc connection has been destroyed */ xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, F_LRMD_IPC_SESSION, session_id); lrmd_internal_proxy_send(lrmd, msg); free_xml(msg); } /*! * \brief Send an acknowledgment of a remote proxy shutdown request. * * \param[in] lrmd Connection to proxy */ void remote_proxy_ack_shutdown(lrmd_t *lrmd) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_ACK); lrmd_internal_proxy_send(lrmd, msg); free_xml(msg); } /*! * \brief We're not gonna shutdown as response to * a remote proxy shutdown request. * * \param[in] lrmd Connection to proxy */ void remote_proxy_nack_shutdown(lrmd_t *lrmd) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_NACK); lrmd_internal_proxy_send(lrmd, msg); free_xml(msg); } void remote_proxy_relay_event(remote_proxy_t *proxy, xmlNode *msg) { /* sending to the remote node an event msg. */ xmlNode *event = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(event, F_LRMD_IPC_OP, LRMD_IPC_OP_EVENT); crm_xml_add(event, F_LRMD_IPC_SESSION, proxy->session_id); add_message_xml(event, F_LRMD_IPC_MSG, msg); crm_log_xml_explicit(event, "EventForProxy"); lrmd_internal_proxy_send(proxy->lrm, event); free_xml(event); } void remote_proxy_relay_response(remote_proxy_t *proxy, xmlNode *msg, int msg_id) { /* sending to the remote node a response msg. */ xmlNode *response = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(response, F_LRMD_IPC_OP, LRMD_IPC_OP_RESPONSE); crm_xml_add(response, F_LRMD_IPC_SESSION, proxy->session_id); crm_xml_add_int(response, F_LRMD_IPC_MSG_ID, msg_id); add_message_xml(response, F_LRMD_IPC_MSG, msg); lrmd_internal_proxy_send(proxy->lrm, response); free_xml(response); } -void +static void remote_proxy_end_session(remote_proxy_t *proxy) { if (proxy == NULL) { return; } crm_trace("ending session ID %s", proxy->session_id); if (proxy->source) { mainloop_del_ipc_client(proxy->source); } } void remote_proxy_free(gpointer data) { remote_proxy_t *proxy = data; crm_trace("freed proxy session ID %s", proxy->session_id); free(proxy->node_name); free(proxy->session_id); free(proxy); } int remote_proxy_dispatch(const char *buffer, ssize_t length, gpointer userdata) { /* Async responses from cib and friends back to clients via pacemaker_remoted */ xmlNode *xml = NULL; uint32_t flags = 0; remote_proxy_t *proxy = userdata; xml = string2xml(buffer); if (xml == NULL) { crm_warn("Received a NULL msg from IPC service."); return 1; } flags = crm_ipc_buffer_flags(proxy->ipc); if (flags & crm_ipc_proxied_relay_response) { crm_trace("Passing response back to %.8s on %s: %.200s - request id: %d", proxy->session_id, proxy->node_name, buffer, proxy->last_request_id); remote_proxy_relay_response(proxy, xml, proxy->last_request_id); proxy->last_request_id = 0; } else { crm_trace("Passing event back to %.8s on %s: %.200s", proxy->session_id, proxy->node_name, buffer); remote_proxy_relay_event(proxy, xml); } free_xml(xml); return 1; } void remote_proxy_disconnected(gpointer userdata) { remote_proxy_t *proxy = userdata; crm_trace("destroying %p", proxy); proxy->source = NULL; proxy->ipc = NULL; if(proxy->lrm) { remote_proxy_notify_destroy(proxy->lrm, proxy->session_id); proxy->lrm = NULL; } g_hash_table_remove(proxy_table, proxy->session_id); } remote_proxy_t * remote_proxy_new(lrmd_t *lrmd, struct ipc_client_callbacks *proxy_callbacks, const char *node_name, const char *session_id, const char *channel) { remote_proxy_t *proxy = NULL; if(channel == NULL) { crm_err("No channel specified to proxy"); remote_proxy_notify_destroy(lrmd, session_id); return NULL; } proxy = calloc(1, sizeof(remote_proxy_t)); proxy->node_name = strdup(node_name); proxy->session_id = strdup(session_id); proxy->lrm = lrmd; if (safe_str_eq(crm_system_name, CRM_SYSTEM_CRMD) && safe_str_eq(channel, CRM_SYSTEM_CRMD)) { /* The crmd doesn't need to connect to itself */ proxy->is_local = TRUE; } else { proxy->source = mainloop_add_ipc_client(channel, G_PRIORITY_LOW, 0, proxy, proxy_callbacks); proxy->ipc = mainloop_get_ipc_client(proxy->source); if (proxy->source == NULL) { remote_proxy_free(proxy); remote_proxy_notify_destroy(lrmd, session_id); return NULL; } } crm_trace("new remote proxy client established to %s on %s, session id %s", channel, node_name, session_id); g_hash_table_insert(proxy_table, proxy->session_id, proxy); return proxy; } void remote_proxy_cb(lrmd_t *lrmd, const char *node_name, xmlNode *msg) { const char *op = crm_element_value(msg, F_LRMD_IPC_OP); const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); int msg_id = 0; /* sessions are raw ipc connections to IPC, * all we do is proxy requests/responses exactly * like they are given to us at the ipc level. */ CRM_CHECK(op != NULL, return); CRM_CHECK(session != NULL, return); crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); /* This is msg from remote ipc client going to real ipc server */ if (safe_str_eq(op, LRMD_IPC_OP_DESTROY)) { remote_proxy_end_session(proxy); } else if (safe_str_eq(op, LRMD_IPC_OP_REQUEST)) { int flags = 0; xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); const char *name = crm_element_value(msg, F_LRMD_IPC_CLIENT); CRM_CHECK(request != NULL, return); if (proxy == NULL) { /* proxy connection no longer exists */ remote_proxy_notify_destroy(lrmd, session); return; } /* crmd requests MUST be handled by the crmd, not us */ CRM_CHECK(proxy->is_local == FALSE, remote_proxy_end_session(proxy); return); if (crm_ipc_connected(proxy->ipc) == FALSE) { remote_proxy_end_session(proxy); return; } proxy->last_request_id = 0; crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); #if ENABLE_ACL CRM_ASSERT(node_name); crm_acl_get_set_user(request, F_LRMD_IPC_USER, node_name); #endif if(is_set(flags, crm_ipc_proxied)) { const char *type = crm_element_value(request, F_TYPE); int rc = 0; if (safe_str_eq(type, T_ATTRD) && crm_element_value(request, F_ATTRD_HOST) == NULL) { crm_xml_add(request, F_ATTRD_HOST, proxy->node_name); } rc = crm_ipc_send(proxy->ipc, request, flags, 5000, NULL); if(rc < 0) { xmlNode *op_reply = create_xml_node(NULL, "nack"); crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); /* Send a n'ack so the caller doesn't block */ crm_xml_add(op_reply, "function", __FUNCTION__); crm_xml_add_int(op_reply, "line", __LINE__); crm_xml_add_int(op_reply, "rc", rc); remote_proxy_relay_response(proxy, op_reply, msg_id); free_xml(op_reply); } else { crm_trace("Relayed %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); proxy->last_request_id = msg_id; } } else { int rc = pcmk_ok; xmlNode *op_reply = NULL; /* For backwards compatibility with pacemaker_remoted <= 1.1.10 */ crm_trace("Relaying %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); rc = crm_ipc_send(proxy->ipc, request, flags, 10000, &op_reply); if(rc < 0) { crm_err("Could not relay %s request %d from %s to %s for %s: %s (%d)", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name, pcmk_strerror(rc), rc); } else { crm_trace("Relayed %s request %d from %s to %s for %s", op, msg_id, proxy->node_name, crm_ipc_name(proxy->ipc), name); } if(op_reply) { remote_proxy_relay_response(proxy, op_reply, msg_id); free_xml(op_reply); } } } else { crm_err("Unknown proxy operation: %s", op); } } diff --git a/lib/pengine/container.c b/lib/pengine/container.c index ed63b80896..7e99e68c3e 100644 --- a/lib/pengine/container.c +++ b/lib/pengine/container.c @@ -1,876 +1,900 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #define VARIANT_CONTAINER 1 #include "./variant.h" void tuple_free(container_grouping_t *tuple); static char * next_ip(const char *last_ip) { unsigned int oct1 = 0; unsigned int oct2 = 0; unsigned int oct3 = 0; unsigned int oct4 = 0; int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4); if (rc != 4) { /*@ TODO check for IPv6 */ return NULL; } else if (oct3 > 253) { return NULL; } else if (oct4 > 253) { ++oct3; oct4 = 1; } else { ++oct4; } return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4); } static int allocate_ip(container_variant_data_t *data, container_grouping_t *tuple, char *buffer, int max) { if(data->ip_range_start == NULL) { return 0; } else if(data->ip_last) { tuple->ipaddr = next_ip(data->ip_last); } else { tuple->ipaddr = strdup(data->ip_range_start); } data->ip_last = tuple->ipaddr; #if 0 return snprintf(buffer, max, " --add-host=%s-%d:%s --link %s-docker-%d:%s-link-%d", data->prefix, tuple->offset, tuple->ipaddr, data->prefix, tuple->offset, data->prefix, tuple->offset); #else return snprintf(buffer, max, " --add-host=%s-%d:%s", data->prefix, tuple->offset, tuple->ipaddr); #endif } static xmlNode * create_resource(const char *name, const char *provider, const char *kind) { xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, name); crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, "ocf"); crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider); crm_xml_add(rsc, XML_ATTR_TYPE, kind); return rsc; } static void create_nvp(xmlNode *parent, const char *name, const char *value) { xmlNode *xml_nvp = create_xml_node(parent, XML_CIB_TAG_NVPAIR); crm_xml_set_id(xml_nvp, "%s-%s", ID(parent), name); crm_xml_add(xml_nvp, XML_NVPAIR_ATTR_NAME, name); crm_xml_add(xml_nvp, XML_NVPAIR_ATTR_VALUE, value); } static void create_op(xmlNode *parent, const char *prefix, const char *task, const char *interval) { xmlNode *xml_op = create_xml_node(parent, "op"); crm_xml_set_id(xml_op, "%s-%s-%s", prefix, task, interval); crm_xml_add(xml_op, XML_LRM_ATTR_INTERVAL, interval); crm_xml_add(xml_op, "name", task); } +/*! + * \internal + * \brief Check whether cluster can manage resource inside container + * + * \param[in] data Container variant data + * + * \return TRUE if networking configuration is acceptable, FALSE otherwise + * + * \note The resource is manageable if an IP range or control port has been + * specified. If a control port is used without an IP range, replicas per + * host must be 1. + */ static bool valid_network(container_variant_data_t *data) { if(data->ip_range_start) { return TRUE; } if(data->control_port) { if(data->replicas_per_host > 1) { pe_err("Specifying the 'control-port' for %s requires 'replicas-per-host=1'", data->prefix); data->replicas_per_host = 1; + /* @TODO to be sure: clear_bit(rsc->flags, pe_rsc_unique); */ } return TRUE; } return FALSE; } static bool create_ip_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if(data->ip_range_start) { char *id = NULL; xmlNode *xml_ip = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-ip-%s", data->prefix, tuple->ipaddr); crm_xml_sanitize_id(id); xml_ip = create_resource(id, "heartbeat", "IPaddr2"); free(id); xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset); create_nvp(xml_obj, "ip", tuple->ipaddr); if(data->host_network) { create_nvp(xml_obj, "nic", data->host_network); } if(data->host_netmask) { create_nvp(xml_obj, "cidr_netmask", data->host_netmask); } else { create_nvp(xml_obj, "cidr_netmask", "32"); } xml_obj = create_xml_node(xml_ip, "operations"); create_op(xml_obj, ID(xml_ip), "monitor", "60s"); // TODO: Other ops? Timeouts and intervals from underlying resource? if (common_unpack(xml_ip, &tuple->ip, NULL, data_set) == false) { return FALSE; } parent->children = g_list_append(parent->children, tuple->ip); } return TRUE; } static bool create_docker_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { int offset = 0, max = 4096; char *buffer = calloc(1, max+1); int doffset = 0, dmax = 1024; char *dbuffer = calloc(1, dmax+1); char *id = NULL; xmlNode *xml_docker = NULL; xmlNode *xml_obj = NULL; id = crm_strdup_printf("%s-docker-%d", data->prefix, tuple->offset); crm_xml_sanitize_id(id); xml_docker = create_resource(id, "heartbeat", "docker"); free(id); xml_obj = create_xml_node(xml_docker, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset); create_nvp(xml_obj, "image", data->image); create_nvp(xml_obj, "allow_pull", "true"); create_nvp(xml_obj, "force_kill", "false"); create_nvp(xml_obj, "reuse", "false"); offset += snprintf(buffer+offset, max-offset, "-h %s-%d --restart=no ", data->prefix, tuple->offset); if(data->docker_network) { // offset += snprintf(buffer+offset, max-offset, " --link-local-ip=%s", tuple->ipaddr); offset += snprintf(buffer+offset, max-offset, " --net=%s", data->docker_network); } for(GListPtr pIter = data->mounts; pIter != NULL; pIter = pIter->next) { container_mount_t *mount = pIter->data; if(mount->flags) { char *source = crm_strdup_printf( "%s/%s-%d", mount->source, data->prefix, tuple->offset); if(doffset > 0) { doffset += snprintf(dbuffer+doffset, dmax-doffset, ","); } doffset += snprintf(dbuffer+doffset, dmax-doffset, "%s", source); offset += snprintf(buffer+offset, max-offset, " -v %s:%s", source, mount->target); free(source); } else { offset += snprintf(buffer+offset, max-offset, " -v %s:%s", mount->source, mount->target); } if(mount->options) { offset += snprintf(buffer+offset, max-offset, ":%s", mount->options); } } for(GListPtr pIter = data->ports; pIter != NULL; pIter = pIter->next) { container_port_t *port = pIter->data; if(tuple->ipaddr) { offset += snprintf(buffer+offset, max-offset, " -p %s:%s:%s", tuple->ipaddr, port->source, port->target); } else { offset += snprintf(buffer+offset, max-offset, " -p %s:%s", port->source, port->target); } } if(data->docker_run_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_run_options); } if(data->docker_host_options) { offset += snprintf(buffer+offset, max-offset, " %s", data->docker_host_options); } create_nvp(xml_obj, "run_opts", buffer); free(buffer); create_nvp(xml_obj, "mount_points", dbuffer); free(dbuffer); if(tuple->child) { char *command = NULL; if(data->control_port) { command = crm_strdup_printf(SBIN_DIR"/pacemaker_remoted -p %s", data->control_port); } else { command = crm_strdup_printf(SBIN_DIR"/pacemaker_remoted -p %d", DEFAULT_REMOTE_PORT); } create_nvp(xml_obj, "run_cmd", command); free(command); /* TODO: Allow users to specify their own? * * We just want to know if the container is alive, we'll * monitor the child independantly */ create_nvp(xml_obj, "monitor_cmd", "/bin/true"); /* } else if(child && data->untrusted) { * Support this use-case? * * The ability to have resources started/stopped by us, but * unable to set attributes, etc. * * Arguably better to control API access this with ACLs like * "normal" remote nodes * * create_nvp(xml_obj, "run_cmd", "/usr/libexec/pacemaker/lrmd"); * create_nvp(xml_obj, "monitor_cmd", "/usr/libexec/pacemaker/lrmd_internal_ctl -c poke"); */ } else { /* TODO: Allow users to specify their own? * * We don't know what's in the container, so we just want * to know if it is alive */ create_nvp(xml_obj, "monitor_cmd", "/bin/true"); } xml_obj = create_xml_node(xml_docker, "operations"); create_op(xml_obj, ID(xml_docker), "monitor", "60s"); // TODO: Other ops? Timeouts and intervals from underlying resource? if (common_unpack(xml_docker, &tuple->docker, NULL, data_set) == FALSE) { return FALSE; } parent->children = g_list_append(parent->children, tuple->docker); tuple->docker->parent = parent; return TRUE; } static bool create_remote_resource( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { - if(valid_network(data) && tuple->child) { + if (tuple->child && valid_network(data)) { node_t *node = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_remote = NULL; char *nodeid = crm_strdup_printf("%s-%d", data->prefix, tuple->offset); char *id = NULL; if (remote_id_conflict(nodeid, data_set)) { // The biggest hammer we have id = crm_strdup_printf("pcmk-internal-%s-remote-%d", tuple->child->id, tuple->offset); CRM_ASSERT(remote_id_conflict(id, data_set) == FALSE); } else { id = strdup(nodeid); } xml_remote = create_resource(id, "pacemaker", "remote"); free(id); xml_obj = create_xml_node(xml_remote, "operations"); create_op(xml_obj, ID(xml_remote), "monitor", "60s"); xml_obj = create_xml_node(xml_remote, XML_TAG_ATTR_SETS); crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, tuple->offset); if(tuple->ipaddr) { create_nvp(xml_obj, "addr", tuple->ipaddr); } else { create_nvp(xml_obj, "addr", "localhost"); } if(data->control_port) { create_nvp(xml_obj, "port", data->control_port); } else { create_nvp(xml_obj, "port", crm_itoa(DEFAULT_REMOTE_PORT)); } xml_obj = create_xml_node(xml_remote, XML_TAG_META_SETS); crm_xml_set_id(xml_obj, "%s-meta-%d", data->prefix, tuple->offset); create_nvp(xml_obj, XML_OP_ATTR_ALLOW_MIGRATE, "false"); // Sets up node->details->remote_rsc->container == tuple->docker create_nvp(xml_obj, XML_RSC_ATTR_CONTAINER, tuple->docker->id); // TODO: Do this generically, eg with rsc->flags // create_nvp(xml_obj, XML_RSC_ATTR_INTERNAL_RSC, "true"); // Suppress printing // tuple->docker->fillers = g_list_append(tuple->docker->fillers, child); // -INFINITY prevents anyone else from running here node = pe_create_node(strdup(nodeid), nodeid, "remote", "-INFINITY", data_set); tuple->node = node_copy(node); tuple->node->weight = 500; nodeid = NULL; id = NULL; if (common_unpack(xml_remote, &tuple->remote, NULL, data_set) == FALSE) { return FALSE; } tuple->node->details->remote_rsc = tuple->remote; parent->children = g_list_append(parent->children, tuple->remote); } return TRUE; } static bool create_container( resource_t *parent, container_variant_data_t *data, container_grouping_t *tuple, pe_working_set_t * data_set) { if(create_docker_resource(parent, data, tuple, data_set) == FALSE) { return TRUE; } if(create_ip_resource(parent, data, tuple, data_set) == FALSE) { return TRUE; } if(create_remote_resource(parent, data, tuple, data_set) == FALSE) { return TRUE; } if(tuple->child && tuple->ipaddr) { add_hash_param(tuple->child->meta, "external-ip", tuple->ipaddr); } return FALSE; } static void mount_free(container_mount_t *mount) { free(mount->source); free(mount->target); free(mount->options); free(mount); } static void port_free(container_port_t *port) { free(port->source); free(port->target); free(port); } gboolean container_unpack(resource_t * rsc, pe_working_set_t * data_set) { const char *value = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_resource = NULL; container_variant_data_t *container_data = NULL; pe_rsc_trace(rsc, "Processing resource %s...", rsc->id); container_data = calloc(1, sizeof(container_variant_data_t)); rsc->variant_opaque = container_data; container_data->prefix = strdup(rsc->id); xml_obj = first_named_child(rsc->xml, "docker"); if(xml_obj == NULL) { return FALSE; } + value = crm_element_value(xml_obj, "masters"); + container_data->masters = crm_parse_int(value, "0"); + if (container_data->masters < 0) { + pe_err("'masters' for %s must be nonnegative integer, using 0", + rsc->id); + container_data->masters = 0; + } + value = crm_element_value(xml_obj, "replicas"); - if(value == NULL) { - value = crm_element_value(xml_obj, "masters"); + if ((value == NULL) && (container_data->masters > 0)) { + container_data->replicas = container_data->masters; + } else { + container_data->replicas = crm_parse_int(value, "1"); + } + if (container_data->replicas < 1) { + pe_err("'replicas' for %s must be positive integer, using 1", rsc->id); + container_data->replicas = 1; } - container_data->replicas = crm_parse_int(value, "1"); /* * Communication between containers on the same host via the * floating IPs only works if docker is started with: * --userland-proxy=false --ip-masq=false */ value = crm_element_value(xml_obj, "replicas-per-host"); container_data->replicas_per_host = crm_parse_int(value, "1"); - - if(container_data->replicas_per_host == 1) { + if (container_data->replicas_per_host < 1) { + pe_err("'replicas-per-host' for %s must be positive integer, using 1", + rsc->id); + container_data->replicas_per_host = 1; + } + if (container_data->replicas_per_host == 1) { clear_bit(rsc->flags, pe_rsc_unique); } - value = crm_element_value(xml_obj, "masters"); - container_data->masters = crm_parse_int(value, "1"); - container_data->docker_run_options = crm_element_value_copy(xml_obj, "options"); container_data->image = crm_element_value_copy(xml_obj, "image"); + container_data->docker_network = crm_element_value_copy(xml_obj, "network"); xml_obj = first_named_child(rsc->xml, "network"); if(xml_obj) { container_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start"); container_data->host_netmask = crm_element_value_copy(xml_obj, "host-netmask"); - container_data->host_network = crm_element_value_copy(xml_obj, "host-network"); + container_data->host_network = crm_element_value_copy(xml_obj, "host-interface"); container_data->control_port = crm_element_value_copy(xml_obj, "control-port"); - container_data->docker_network = crm_element_value_copy(xml_obj, "docker-network"); for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { container_port_t *port = calloc(1, sizeof(container_port_t)); port->source = crm_element_value_copy(xml_child, "port"); if(port->source == NULL) { port->source = crm_element_value_copy(xml_child, "range"); } else { port->target = crm_element_value_copy(xml_child, "internal-port"); } if(port->source != NULL && strlen(port->source) > 0) { if(port->target == NULL) { port->target = strdup(port->source); } container_data->ports = g_list_append(container_data->ports, port); } else { pe_err("Invalid port directive %s", ID(xml_child)); port_free(port); } } } xml_obj = first_named_child(rsc->xml, "storage"); for (xmlNode *xml_child = __xml_first_child_element(xml_obj); xml_child != NULL; xml_child = __xml_next_element(xml_child)) { container_mount_t *mount = calloc(1, sizeof(container_mount_t)); mount->source = crm_element_value_copy(xml_child, "source-dir"); if(mount->source == NULL) { mount->source = crm_element_value_copy(xml_child, "source-dir-root"); mount->flags = 1; } mount->target = crm_element_value_copy(xml_child, "target-dir"); mount->options = crm_element_value_copy(xml_child, "options"); if(mount->source && mount->target) { container_data->mounts = g_list_append(container_data->mounts, mount); } else { pe_err("Invalid mount directive %s", ID(xml_child)); mount_free(mount); } } xml_obj = first_named_child(rsc->xml, "primitive"); - if(xml_obj && valid_network(container_data) && container_data->replicas > 0) { + if (xml_obj && valid_network(container_data)) { char *value = NULL; xmlNode *xml_set = NULL; if(container_data->masters > 0) { xml_resource = create_xml_node(NULL, XML_CIB_TAG_MASTER); } else { xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION); } crm_xml_set_id(xml_resource, "%s-%s", container_data->prefix, xml_resource->name); xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS); crm_xml_set_id(xml_resource, "%s-%s-meta", container_data->prefix, xml_resource->name); create_nvp(xml_set, XML_RSC_ATTR_ORDERED, "true"); value = crm_itoa(container_data->replicas); create_nvp(xml_set, XML_RSC_ATTR_INCARNATION_MAX, value); free(value); value = crm_itoa(container_data->replicas_per_host); create_nvp(xml_set, XML_RSC_ATTR_INCARNATION_NODEMAX, value); free(value); if(container_data->replicas_per_host > 1) { create_nvp(xml_set, XML_RSC_ATTR_UNIQUE, "true"); } else { create_nvp(xml_set, XML_RSC_ATTR_UNIQUE, "false"); } if(container_data->masters) { value = crm_itoa(container_data->masters); create_nvp(xml_set, XML_RSC_ATTR_MASTER_MAX, value); free(value); } //crm_xml_add(xml_obj, XML_ATTR_ID, container_data->prefix); add_node_copy(xml_resource, xml_obj); - /* } else if(xml_obj && container_data->ip_range_start) { */ - /* xml_resource = copy_xml(xml_resource); */ - } else if(xml_obj) { - pe_err("Cannot control %s inside container %s without a value for ip-range-start", + pe_err("Cannot control %s inside %s without either ip-range-start or control-port", rsc->id, ID(xml_obj)); return FALSE; } if(xml_resource) { int lpc = 0; GListPtr childIter = NULL; resource_t *new_rsc = NULL; container_mount_t *mount = NULL; container_port_t *port = calloc(1, sizeof(container_port_t)); int offset = 0, max = 1024; char *buffer = calloc(1, max+1); mount = calloc(1, sizeof(container_mount_t)); mount->source = strdup(DEFAULT_REMOTE_KEY_LOCATION); mount->target = strdup(DEFAULT_REMOTE_KEY_LOCATION); mount->options = NULL; mount->flags = 0; container_data->mounts = g_list_append(container_data->mounts, mount); mount = calloc(1, sizeof(container_mount_t)); mount->source = strdup("/var/log/containers"); mount->target = strdup("/var/log"); mount->options = NULL; mount->flags = 1; container_data->mounts = g_list_append(container_data->mounts, mount); if(container_data->control_port) { port->source = strdup(container_data->control_port); } else { port->source = crm_itoa(DEFAULT_REMOTE_PORT); } port->target = strdup(port->source); container_data->ports = g_list_append(container_data->ports, port); if (common_unpack(xml_resource, &new_rsc, rsc, data_set) == FALSE) { pe_err("Failed unpacking resource %s", crm_element_value(rsc->xml, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } return FALSE; } container_data->child = new_rsc; container_data->child->orig_xml = xml_obj; // Also the trigger for common_free() // to free xml_resource as container_data->child->xml for(childIter = container_data->child->children; childIter != NULL; childIter = childIter->next) { container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t)); tuple->child = childIter->data; tuple->offset = lpc++; offset += allocate_ip(container_data, tuple, buffer+offset, max-offset); container_data->tuples = g_list_append(container_data->tuples, tuple); } container_data->docker_host_options = buffer; } else { // Just a naked container, no pacemaker-remote int offset = 0, max = 1024; char *buffer = calloc(1, max+1); for(int lpc = 0; lpc < container_data->replicas; lpc++) { container_grouping_t *tuple = calloc(1, sizeof(container_grouping_t)); tuple->offset = lpc; offset += allocate_ip(container_data, tuple, buffer+offset, max-offset); container_data->tuples = g_list_append(container_data->tuples, tuple); } container_data->docker_host_options = buffer; } for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; // TODO: Remove from list if create_container() returns TRUE create_container(rsc, container_data, tuple, data_set); } if(container_data->child) { rsc->children = g_list_append(rsc->children, container_data->child); } return TRUE; } gboolean container_active(resource_t * rsc, gboolean all) { return TRUE; } resource_t * find_container_child(const char *stem, resource_t * rsc, node_t *node) { container_variant_data_t *container_data = NULL; resource_t *parent = uber_parent(rsc); CRM_ASSERT(parent->parent); parent = parent->parent; get_container_variant_data(container_data, parent); if (is_not_set(rsc->flags, pe_rsc_unique)) { for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->node->details == node->details) { rsc = tuple->child; break; } } } if (rsc && safe_str_neq(stem, rsc->id)) { free(rsc->clone_name); rsc->clone_name = strdup(stem); } return rsc; } static void container_print_xml(resource_t * rsc, const char *pre_text, long options, void *print_data) { container_variant_data_t *container_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (pre_text == NULL) { pre_text = ""; } child_text = crm_concat(pre_text, " ", ' '); status_print("%sid); status_print("managed=\"%s\" ", is_set(rsc->flags, pe_rsc_managed) ? "true" : "false"); status_print("failed=\"%s\" ", is_set(rsc->flags, pe_rsc_failed) ? "true" : "false"); status_print(">\n"); get_container_variant_data(container_data, rsc); status_print("%sDocker container: %s [%s]%s%s", pre_text, rsc->id, container_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(tuple->ip) { tuple->ip->fns->print(tuple->ip, child_text, options, print_data); } if(tuple->child) { tuple->child->fns->print(tuple->child, child_text, options, print_data); } if(tuple->docker) { tuple->docker->fns->print(tuple->docker, child_text, options, print_data); } if(tuple->remote) { tuple->remote->fns->print(tuple->remote, child_text, options, print_data); } } status_print("%s\n", pre_text); free(child_text); } static void tuple_print(container_grouping_t * tuple, const char *pre_text, long options, void *print_data) { node_t *node = NULL; resource_t *rsc = tuple->child; int offset = 0; char buffer[LINE_MAX]; if(rsc == NULL) { rsc = tuple->docker; } if(tuple->remote) { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->remote)); } else { offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", rsc_printable_id(tuple->docker)); } if(tuple->ipaddr) { offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)", tuple->ipaddr); } if(tuple->remote && tuple->remote->running_on != NULL) { node = tuple->remote->running_on->data; } else if (tuple->remote == NULL && rsc->running_on != NULL) { node = rsc->running_on->data; } common_print(rsc, pre_text, buffer, node, options, print_data); } void container_print(resource_t * rsc, const char *pre_text, long options, void *print_data) { container_variant_data_t *container_data = NULL; char *child_text = NULL; CRM_CHECK(rsc != NULL, return); if (options & pe_print_xml) { container_print_xml(rsc, pre_text, options, print_data); return; } get_container_variant_data(container_data, rsc); if (pre_text == NULL) { pre_text = " "; } child_text = crm_strdup_printf(" %s", pre_text); status_print("%sDocker container%s: %s [%s]%s%s\n", pre_text, container_data->replicas>1?" set":"", rsc->id, container_data->image, is_set(rsc->flags, pe_rsc_unique) ? " (unique)" : "", is_set(rsc->flags, pe_rsc_managed) ? "" : " (unmanaged)"); for (GListPtr gIter = container_data->tuples; gIter != NULL; gIter = gIter->next) { container_grouping_t *tuple = (container_grouping_t *)gIter->data; CRM_ASSERT(tuple); if(is_set(options, pe_print_clone_details)) { if(g_list_length(container_data->tuples) > 1) { status_print(" %sReplica[%d]\n", pre_text, tuple->offset); } if(tuple->ip) { tuple->ip->fns->print(tuple->ip, child_text, options, print_data); } if(tuple->docker) { tuple->docker->fns->print(tuple->docker, child_text, options, print_data); } if(tuple->remote) { tuple->remote->fns->print(tuple->remote, child_text, options, print_data); } if(tuple->child) { tuple->child->fns->print(tuple->child, child_text, options, print_data); } } else { char *child_text = crm_strdup_printf("%s ", pre_text); tuple_print(tuple, child_text, options, print_data); } } } void tuple_free(container_grouping_t *tuple) { if(tuple == NULL) { return; } // TODO: Free tuple->node ? if(tuple->ip) { tuple->ip->fns->free(tuple->ip); tuple->ip = NULL; } if(tuple->child) { tuple->child->fns->free(tuple->child); tuple->child = NULL; } if(tuple->docker) { tuple->docker->fns->free(tuple->docker); tuple->docker = NULL; } if(tuple->remote) { tuple->remote->fns->free(tuple->remote); tuple->remote = NULL; } free(tuple->ipaddr); free(tuple); } void container_free(resource_t * rsc) { container_variant_data_t *container_data = NULL; CRM_CHECK(rsc != NULL, return); get_container_variant_data(container_data, rsc); pe_rsc_trace(rsc, "Freeing %s", rsc->id); free(container_data->prefix); free(container_data->image); free(container_data->control_port); free(container_data->host_network); free(container_data->host_netmask); free(container_data->ip_range_start); free(container_data->docker_network); free(container_data->docker_run_options); free(container_data->docker_host_options); g_list_free_full(container_data->tuples, (GDestroyNotify)tuple_free); g_list_free_full(container_data->mounts, (GDestroyNotify)mount_free); g_list_free_full(container_data->ports, (GDestroyNotify)port_free); common_free(rsc); } enum rsc_role_e container_resource_state(const resource_t * rsc, gboolean current) { enum rsc_role_e container_role = RSC_ROLE_UNKNOWN; return container_role; } diff --git a/lib/pengine/failcounts.c b/lib/pengine/failcounts.c index cee0edfdd8..8ce69eef6c 100644 --- a/lib/pengine/failcounts.c +++ b/lib/pengine/failcounts.c @@ -1,333 +1,340 @@ /* * Copyright (C) 2008-2017 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include int get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, pe_working_set_t *data_set) { return get_failcount_full(node, rsc, last_failure, TRUE, NULL, data_set); } static gboolean is_matched_failure(const char *rsc_id, xmlNode *conf_op_xml, xmlNode *lrm_op_xml) { gboolean matched = FALSE; const char *conf_op_name = NULL; int conf_op_interval = 0; const char *lrm_op_task = NULL; int lrm_op_interval = 0; const char *lrm_op_id = NULL; char *last_failure_key = NULL; if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) { return FALSE; } conf_op_name = crm_element_value(conf_op_xml, "name"); conf_op_interval = crm_get_msec(crm_element_value(conf_op_xml, "interval")); lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK); crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_INTERVAL, &lrm_op_interval); if (safe_str_eq(conf_op_name, lrm_op_task) == FALSE || conf_op_interval != lrm_op_interval) { return FALSE; } lrm_op_id = ID(lrm_op_xml); last_failure_key = generate_op_key(rsc_id, "last_failure", 0); if (safe_str_eq(last_failure_key, lrm_op_id)) { matched = TRUE; } else { char *expected_op_key = generate_op_key(rsc_id, conf_op_name, conf_op_interval); if (safe_str_eq(expected_op_key, lrm_op_id)) { int rc = 0; int target_rc = get_target_rc(lrm_op_xml); crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc); if (rc != target_rc) { matched = TRUE; } } free(expected_op_key); } free(last_failure_key); return matched; } static gboolean block_failure(node_t *node, resource_t *rsc, xmlNode *xml_op, pe_working_set_t *data_set) { char *xml_name = clone_strip(rsc->id); char *xpath = crm_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name); xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); gboolean should_block = FALSE; free(xpath); #if 0 /* A good idea? */ if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { /* In this case, stop on-fail defaults to block in unpack_operation() */ return TRUE; } #endif if (xpathObj) { int max = numXpathResults(xpathObj); int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *pref = getXpathResult(xpathObj, lpc); if (xml_op) { should_block = is_matched_failure(xml_name, pref, xml_op); if (should_block) { break; } } else { const char *conf_op_name = NULL; int conf_op_interval = 0; char *lrm_op_xpath = NULL; xmlXPathObject *lrm_op_xpathObj = NULL; conf_op_name = crm_element_value(pref, "name"); conf_op_interval = crm_get_msec(crm_element_value(pref, "interval")); lrm_op_xpath = crm_strdup_printf("//node_state[@uname='%s']" "//lrm_resource[@id='%s']" "/lrm_rsc_op[@operation='%s'][@interval='%d']", node->details->uname, xml_name, conf_op_name, conf_op_interval); lrm_op_xpathObj = xpath_search(data_set->input, lrm_op_xpath); free(lrm_op_xpath); if (lrm_op_xpathObj) { int max2 = numXpathResults(lrm_op_xpathObj); int lpc2 = 0; for (lpc2 = 0; lpc2 < max2; lpc2++) { xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj, lpc2); should_block = is_matched_failure(xml_name, pref, lrm_op_xml); if (should_block) { break; } } } freeXpathObject(lrm_op_xpathObj); if (should_block) { break; } } } } free(xml_name); freeXpathObject(xpathObj); return should_block; } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } /*! * \internal * \brief Compile regular expression to match a failure-related node attribute * * \param[in] prefix Attribute prefix to match * \param[in] rsc_name Resource name to match as used in failure attributes * \param[in] is_legacy Whether DC uses per-resource fail counts * \param[in] is_unique Whether the resource is a globally unique clone * \param[out] re Where to store resulting regular expression * * \note Fail attributes are named like PREFIX-RESOURCE#OP_INTERVAL. + * The caller is responsible for freeing re with regfree(). */ static void generate_fail_regex(const char *prefix, const char *rsc_name, gboolean is_legacy, gboolean is_unique, regex_t *re) { char *pattern; /* @COMPAT DC < 1.1.17: Fail counts used to be per-resource rather than * per-operation. */ const char *op_pattern = (is_legacy? "" : "#.+_[0-9]+"); /* Ignore instance numbers for anything other than globally unique clones. * Anonymous clone fail counts could contain an instance number if the * clone was initially unique, failed, then was converted to anonymous. * @COMPAT Also, before 1.1.8, anonymous clone fail counts always contained * clone instance numbers. */ const char *instance_pattern = (is_unique? "" : "(:[0-9]+)?"); pattern = crm_strdup_printf("^%s-%s%s%s$", prefix, rsc_name, instance_pattern, op_pattern); CRM_LOG_ASSERT(regcomp(re, pattern, REG_EXTENDED|REG_NOSUB) == 0); free(pattern); } /*! * \internal * \brief Compile regular expressions to match failure-related node attributes * - * \param[in] rsc Resource being checked for failures - * \param[in] data_set Data set (for CRM feature set version) - * \param[out] re Where to store resulting regular expression + * \param[in] rsc Resource being checked for failures + * \param[in] data_set Data set (for CRM feature set version) + * \param[out] failcount_re Storage for regular expression for fail count + * \param[out] lastfailure_re Storage for regular expression for last failure + * + * \note The caller is responsible for freeing the expressions with regfree(). */ static void generate_fail_regexes(resource_t *rsc, pe_working_set_t *data_set, regex_t *failcount_re, regex_t *lastfailure_re) { char *rsc_name = rsc_fail_name(rsc); const char *version = crm_element_value(data_set->input, XML_ATTR_CRM_VERSION); gboolean is_legacy = (compare_version(version, "3.0.13") < 0); generate_fail_regex(CRM_FAIL_COUNT_PREFIX, rsc_name, is_legacy, is_set(rsc->flags, pe_rsc_unique), failcount_re); generate_fail_regex(CRM_LAST_FAILURE_PREFIX, rsc_name, is_legacy, is_set(rsc->flags, pe_rsc_unique), lastfailure_re); free(rsc_name); } int get_failcount_full(node_t *node, resource_t *rsc, time_t *last_failure, bool effective, xmlNode *xml_op, pe_working_set_t *data_set) { char *key = NULL; const char *value = NULL; regex_t failcount_re, lastfailure_re; int failcount = 0; time_t last = 0; GHashTableIter iter; generate_fail_regexes(rsc, data_set, &failcount_re, &lastfailure_re); /* Resource fail count is sum of all matching operation fail counts */ g_hash_table_iter_init(&iter, node->details->attrs); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (regexec(&failcount_re, key, 0, NULL, 0) == 0) { failcount = merge_weights(failcount, char2score(value)); } else if (regexec(&lastfailure_re, key, 0, NULL, 0) == 0) { last = QB_MAX(last, crm_int_helper(value, NULL)); } } + regfree(&failcount_re); + regfree(&lastfailure_re); + if ((failcount > 0) && (last > 0) && (last_failure != NULL)) { *last_failure = last; } /* If failure blocks the resource, disregard any failure timeout */ if ((failcount > 0) && rsc->failure_timeout && block_failure(node, rsc, xml_op, data_set)) { pe_warn("Ignoring failure timeout %d for %s because it conflicts with on-fail=block", rsc->id, rsc->failure_timeout); rsc->failure_timeout = 0; } /* If all failures have expired, ignore fail count */ if (effective && (failcount > 0) && (last > 0) && rsc->failure_timeout) { time_t now = get_effective_time(data_set); if (now > (last + rsc->failure_timeout)) { crm_debug("Failcount for %s on %s expired after %ds", rsc->id, node->details->uname, rsc->failure_timeout); failcount = 0; } } if (failcount > 0) { char *score = score2char(failcount); crm_info("%s has failed %s times on %s", rsc->id, score, node->details->uname); free(score); } return failcount; } /* If it's a resource container, get its failcount plus all the failcounts of * the resources within it */ int get_failcount_all(node_t *node, resource_t *rsc, time_t *last_failure, pe_working_set_t *data_set) { int failcount_all = 0; failcount_all = get_failcount(node, rsc, last_failure, data_set); if (rsc->fillers) { GListPtr gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { resource_t *filler = (resource_t *) gIter->data; time_t filler_last_failure = 0; failcount_all += get_failcount(node, filler, &filler_last_failure, data_set); if (last_failure && filler_last_failure > *last_failure) { *last_failure = filler_last_failure; } } if (failcount_all != 0) { char *score = score2char(failcount_all); crm_info("Container %s and the resources within it have failed %s times on %s", rsc->id, score, node->details->uname); free(score); } } return failcount_all; } diff --git a/tools/Makefile.am b/tools/Makefile.am index c88766dd1a..35480357f2 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,142 +1,144 @@ # # Copyright (C) 2004-2009 Andrew Beekhof # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # include $(top_srcdir)/Makefile.common if BUILD_SYSTEMD systemdunit_DATA = crm_mon.service endif COMMONLIBS = \ $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/cib/libcib.la \ $(CURSESLIBS) $(CLUSTERLIBS) +noinst_HEADERS = crm_resource.h fake_transition.h + pcmkdir = $(datadir)/$(PACKAGE) pcmk_DATA = report.common report.collector sbin_SCRIPTS = crm_report crm_standby crm_master crm_failcount if BUILD_CIBSECRETS sbin_SCRIPTS += cibsecret endif EXTRA_DIST = $(sbin_SCRIPTS) sbin_PROGRAMS = crm_simulate crmadmin cibadmin crm_node crm_attribute crm_resource crm_verify \ crm_shadow attrd_updater crm_diff crm_mon iso8601 crm_ticket crm_error testdir = $(datadir)/$(PACKAGE)/tests/cli test_SCRIPTS = regression.sh test_DATA = regression.dates.exp regression.tools.exp regression.acls.exp \ regression.validity.exp if BUILD_HEARTBEAT_SUPPORT sbin_PROGRAMS += crm_uuid endif if BUILD_SERVICELOG sbin_PROGRAMS += notifyServicelogEvent endif if BUILD_OPENIPMI_SERVICELOG sbin_PROGRAMS += ipmiservicelogd endif ## SOURCES MAN8DEPS = crm_attribute crm_node crmadmin_SOURCES = crmadmin.c crmadmin_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(COMMONLIBS) $(CLUSTERLIBS) crm_error_SOURCES = crm_error.c crm_error_LDADD = $(COMMONLIBS) crm_uuid_SOURCES = crm_uuid.c crm_uuid_LDADD = $(COMMONLIBS) $(top_builddir)/lib/cluster/libcrmcluster.la cibadmin_SOURCES = cibadmin.c cibadmin_LDADD = $(COMMONLIBS) crm_shadow_SOURCES = cib_shadow.c crm_shadow_LDADD = $(COMMONLIBS) crm_node_SOURCES = crm_node.c crm_node_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ $(COMMONLIBS) $(CLUSTERLIBS) crm_simulate_SOURCES = crm_simulate.c fake_transition.c crm_simulate_CFLAGS = -I$(top_srcdir)/pengine crm_simulate_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/transition/libtransitioner.la \ $(COMMONLIBS) crm_diff_SOURCES = crm_diff.c crm_diff_LDADD = $(COMMONLIBS) crm_mon_SOURCES = crm_mon.c crm_mon_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/pengine/libpengine.la \ $(COMMONLIBS) $(SNMPLIBS) $(ESMTPLIBS) # Arguments could be made that this should live in crm/pengine crm_verify_SOURCES = crm_verify.c crm_verify_LDADD = $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la \ $(COMMONLIBS) crm_attribute_SOURCES = crm_attribute.c crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la $(COMMONLIBS) crm_resource_SOURCES = crm_resource.c crm_resource_ban.c crm_resource_runtime.c crm_resource_print.c fake_transition.c crm_resource_CFLAGS = -I$(top_srcdir)/pengine crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ $(top_builddir)/lib/services/libcrmservice.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la \ $(top_builddir)/lib/transition/libtransitioner.la \ $(COMMONLIBS) iso8601_SOURCES = test.iso8601.c iso8601_LDADD = $(COMMONLIBS) attrd_updater_SOURCES = attrd_updater.c attrd_updater_LDADD = $(COMMONLIBS) crm_ticket_SOURCES = crm_ticket.c crm_ticket_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/pengine/libpengine.la \ $(COMMONLIBS) if BUILD_SERVICELOG notifyServicelogEvent_SOURCES = notifyServicelogEvent.c notifyServicelogEvent_CFLAGS = $(SERVICELOG_CFLAGS) notifyServicelogEvent_LDADD = $(top_builddir)/lib/common/libcrmcommon.la $(SERVICELOG_LIBS) endif if BUILD_OPENIPMI_SERVICELOG ipmiservicelogd_SOURCES = ipmiservicelogd.c ipmiservicelogd_CFLAGS = $(OPENIPMI_SERVICELOG_CFLAGS) $(SERVICELOG_CFLAGS) ipmiservicelogd_LDFLAGS = $(top_builddir)/lib/common/libcrmcommon.la $(OPENIPMI_SERVICELOG_LIBS) $(SERVICELOG_LIBS) endif CLEANFILES = $(man8_MANS) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index af96909546..41f4fc5015 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,1709 +1,1715 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include bool do_trace = FALSE; bool do_force = FALSE; int crmd_replies_needed = 1; /* The welcome message */ const char *attr_set_type = XML_TAG_ATTR_SETS; static int do_find_resource(const char *rsc, resource_t * the_rsc, pe_working_set_t * data_set) { int found = 0; GListPtr lpc = NULL; for (lpc = the_rsc->running_on; lpc != NULL; lpc = lpc->next) { node_t *node = (node_t *) lpc->data; crm_trace("resource %s is running on: %s", rsc, node->details->uname); if (BE_QUIET) { fprintf(stdout, "%s\n", node->details->uname); } else { const char *state = ""; if (!pe_rsc_is_clone(the_rsc) && the_rsc->fns->state(the_rsc, TRUE) == RSC_ROLE_MASTER) { state = "Master"; } fprintf(stdout, "resource %s is running on: %s %s\n", rsc, node->details->uname, state); } found++; } if (BE_QUIET == FALSE && found == 0) { fprintf(stderr, "resource %s is NOT running\n", rsc); } return found; } int cli_resource_search(const char *rsc, pe_working_set_t * data_set) { int found = 0; resource_t *the_rsc = NULL; resource_t *parent = NULL; if (the_rsc == NULL) { the_rsc = pe_find_resource(data_set->resources, rsc); } if (the_rsc == NULL) { return -ENXIO; } if (pe_rsc_is_clone(the_rsc)) { GListPtr gIter = the_rsc->children; for (; gIter != NULL; gIter = gIter->next) { found += do_find_resource(rsc, gIter->data, data_set); } /* The anonymous clone children's common ID is supplied */ } else if ((parent = uber_parent(the_rsc)) != NULL && pe_rsc_is_clone(parent) && is_not_set(the_rsc->flags, pe_rsc_unique) && the_rsc->clone_name && safe_str_eq(rsc, the_rsc->clone_name) && safe_str_neq(rsc, the_rsc->id)) { GListPtr gIter = parent->children; for (; gIter != NULL; gIter = gIter->next) { found += do_find_resource(rsc, gIter->data, data_set); } } else { found += do_find_resource(rsc, the_rsc, data_set); } return found; } resource_t * find_rsc_or_clone(const char *rsc, pe_working_set_t * data_set) { resource_t *the_rsc = pe_find_resource(data_set->resources, rsc); if (the_rsc == NULL) { char *as_clone = crm_concat(rsc, "0", ':'); the_rsc = pe_find_resource(data_set->resources, as_clone); free(as_clone); } return the_rsc; } static int find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int offset = 0; static int xpath_max = 1024; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; if(value) { *value = NULL; } if(the_cib == NULL) { return -ENOTCONN; } xpath_string = calloc(1, xpath_max); offset += snprintf(xpath_string + offset, xpath_max - offset, "%s", get_object_path("resources")); offset += snprintf(xpath_string + offset, xpath_max - offset, "//*[@id=\"%s\"]", rsc); if (set_type) { offset += snprintf(xpath_string + offset, xpath_max - offset, "/%s", set_type); if (set_name) { offset += snprintf(xpath_string + offset, xpath_max - offset, "[@id=\"%s\"]", set_name); } } offset += snprintf(xpath_string + offset, xpath_max - offset, "//nvpair["); if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, "@id=\"%s\"", attr_id); } if (attr_name) { if (attr_id) { offset += snprintf(xpath_string + offset, xpath_max - offset, " and "); } offset += snprintf(xpath_string + offset, xpath_max - offset, "@name=\"%s\"", attr_name); } offset += snprintf(xpath_string + offset, xpath_max - offset, "]"); CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = -EINVAL; printf("Multiple attributes match name=%s\n", attr_name); for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { printf(" Value: %s \t(id=%s)\n", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } } else if(value) { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } bail: free(xpath_string); free_xml(xml_search); return rc; } static resource_t * find_matching_attr_resource(resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd) { int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; if(do_force == TRUE) { return rsc; } else if(rsc->parent) { switch(rsc->parent->variant) { case pe_group: if (BE_QUIET == FALSE) { printf("Performing %s of '%s' for '%s' will not apply to its peers in '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } break; case pe_master: case pe_clone: rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); free(local_attr_id); if(rc != pcmk_ok) { rsc = rsc->parent; if (BE_QUIET == FALSE) { printf("Performing %s of '%s' on '%s', the parent of '%s'\n", cmd, attr_name, rsc->id, rsc_id); } } break; default: break; } } else if (rsc->parent && BE_QUIET == FALSE) { printf("Forcing %s of '%s' for '%s' instead of '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } else if(rsc->parent == NULL && rsc->children) { resource_t *child = rsc->children->data; if(child->variant == pe_native) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if(rc == pcmk_ok) { rsc = child; if (BE_QUIET == FALSE) { printf("A value for '%s' already exists in child '%s', performing %s on that instead of '%s'\n", attr_name, lookup_id, cmd, rsc_id); } } free(local_attr_id); free(lookup_id); } } return rsc; } int cli_resource_update_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, bool recursive, cib_t * cib, pe_working_set_t * data_set) { int rc = pcmk_ok; static bool need_init = TRUE; char *lookup_id = NULL; char *local_attr_id = NULL; char *local_attr_set = NULL; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; bool use_attributes_tag = FALSE; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } if(attr_id == NULL && do_force == FALSE && pcmk_ok != find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL)) { printf("\n"); } if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { if (do_force == FALSE) { rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", uber_parent(rsc)->id, attr_name, local_attr_id); printf(" Delete '%s' first or use --force to override\n", local_attr_id); } free(local_attr_id); if (rc == pcmk_ok) { return -ENOTUNIQ; } } } else { rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "update"); } lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok) { crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id); attr_id = local_attr_id; } else if (rc != -ENXIO) { free(lookup_id); free(local_attr_id); return rc; } else { const char *value = NULL; xmlNode *cib_top = NULL; const char *tag = crm_element_name(rsc->xml); cib->cmds->query(cib, "/cib", &cib_top, cib_sync_call | cib_scope_local | cib_xpath | cib_no_children); value = crm_element_value(cib_top, "ignore_dtd"); if (value != NULL) { use_attributes_tag = TRUE; } else { value = crm_element_value(cib_top, XML_ATTR_VALIDATION); if (crm_ends_with(value, "-0.6")) { use_attributes_tag = TRUE; } } free_xml(cib_top); if (attr_set == NULL) { local_attr_set = crm_concat(lookup_id, attr_set_type, '-'); attr_set = local_attr_set; } if (attr_id == NULL) { local_attr_id = crm_concat(attr_set, attr_name, '-'); attr_id = local_attr_id; } if (use_attributes_tag && safe_str_eq(tag, XML_CIB_TAG_MASTER)) { tag = "master_slave"; /* use the old name */ } xml_top = create_xml_node(NULL, tag); crm_xml_add(xml_top, XML_ATTR_ID, lookup_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); if (use_attributes_tag) { xml_obj = create_xml_node(xml_obj, XML_TAG_ATTRS); } } xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); if (xml_top == NULL) { xml_top = xml_obj; } crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, attr_value); crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Set '%s' option: id=%s%s%s%s%s=%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value); } free_xml(xml_top); free(lookup_id); free(local_attr_id); free(local_attr_set); if(recursive && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { GListPtr lpc = NULL; if(need_init) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); need_init = FALSE; unpack_constraints(cib_constraints, data_set); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } } crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs); set_bit(rsc->flags, pe_rsc_allocating); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; resource_t *peer = cons->rsc_lh; crm_debug("Checking %s %d", cons->id, cons->score); if (cons->score > 0 && is_not_set(peer->flags, pe_rsc_allocating)) { /* Don't get into colocation loops */ crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, peer->id); cli_resource_update_attribute(peer->id, NULL, NULL, attr_name, attr_value, recursive, cib, data_set); } } } return rc; } int cli_resource_delete_attribute(const char *rsc_id, const char *attr_set, const char *attr_id, const char *attr_name, cib_t * cib, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; resource_t *rsc = find_rsc_or_clone(rsc_id, data_set); if (rsc == NULL) { return -ENXIO; } if(attr_id == NULL && do_force == FALSE && find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) != pcmk_ok) { printf("\n"); } if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { rsc = find_matching_attr_resource(rsc, rsc_id, attr_set, attr_id, attr_name, cib, "delete"); } lookup_id = clone_strip(rsc->id); rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == -ENXIO) { free(lookup_id); return pcmk_ok; } else if (rc != pcmk_ok) { free(lookup_id); return rc; } if (attr_id == NULL) { attr_id = local_attr_id; } xml_obj = create_xml_node(NULL, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_obj, XML_ATTR_ID, attr_id); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, attr_name); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->delete(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Deleted '%s' option: id=%s%s%s%s%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(lookup_id); free_xml(xml_obj); free(local_attr_id); return rc; } static int send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, const char *host_uname, const char *rsc_id, bool only_failed, pe_working_set_t * data_set) { char *our_pid = NULL; char *key = NULL; int rc = -ECOMM; xmlNode *cmd = NULL; xmlNode *xml_rsc = NULL; const char *value = NULL; const char *router_node = host_uname; xmlNode *params = NULL; xmlNode *msg_data = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found", rsc_id); return -ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s", rsc_id); return -EINVAL; } else if (host_uname == NULL) { CMD_ERR("Please supply a hostname with -H"); return -EINVAL; } else { node_t *node = pe_find_node(data_set->nodes, host_uname); if (node && is_remote_node(node)) { if (node->details->remote_rsc == NULL || node->details->remote_rsc->running_on == NULL) { CMD_ERR("No lrmd connection detected to remote node %s", host_uname); return -ENXIO; } node = node->details->remote_rsc->running_on->data; router_node = node->details->uname; } } key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); free(key); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); if (safe_str_neq(router_node, host_uname)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); if (rsc->clone_name) { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id); } else { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_ATTR_TYPE); if (value == NULL) { CMD_ERR("%s has no type! Aborting...", rsc_id); return -ENXIO; } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_CLASS); if (value == NULL) { CMD_ERR("%s has no class! Aborting...", rsc_id); return -ENXIO; } crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); params = create_xml_node(msg_data, XML_TAG_ATTRS); crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); key = crm_meta_name(XML_LRM_ATTR_INTERVAL); crm_xml_add(params, key, "60000"); /* 1 minute */ free(key); our_pid = calloc(1, 11); if (our_pid != NULL) { snprintf(our_pid, 10, "%d", getpid()); our_pid[10] = '\0'; } cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); /* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */ free_xml(msg_data); if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { rc = 0; } else { CMD_ERR("Could not send %s op to the crmd", op); rc = -ENOTCONN; } free_xml(cmd); return rc; } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, resource_t *rsc, const char *operation, const char *interval, pe_working_set_t *data_set) { int rc = pcmk_ok; node_t *node = NULL; char *rsc_name = NULL; int attr_options = attrd_opt_none; if (rsc == NULL) { return -ENXIO; } else if (rsc->children) { GListPtr lpc = NULL; for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; rc = cli_resource_delete(crmd_channel, host_uname, child, operation, interval, data_set); if(rc != pcmk_ok || (pe_rsc_is_clone(rsc) && is_not_set(rsc->flags, pe_rsc_unique))) { return rc; } } return pcmk_ok; } else if (host_uname == NULL) { GListPtr lpc = NULL; for (lpc = data_set->nodes; lpc != NULL; lpc = lpc->next) { node = (node_t *) lpc->data; if (node->details->online) { cli_resource_delete(crmd_channel, node->details->uname, rsc, operation, interval, data_set); } } return pcmk_ok; } node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { printf("Unable to clean up %s because node %s not found\n", rsc->id, host_uname); return -ENODEV; } if (!node->details->rsc_discovery_enabled) { printf("Unable to clean up %s because resource discovery disabled on %s\n", rsc->id, host_uname); return -EOPNOTSUPP; } /* Erase the resource's entire LRM history in the CIB, even if we're only * clearing a single operation's fail count. If we erased only entries for a * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc->id, TRUE, data_set); if (rc != pcmk_ok) { printf("Unable to clean up %s history on %s: %s\n", rsc->id, host_uname, pcmk_strerror(rc)); return rc; } if (node->details->remote_rsc == NULL) { crmd_replies_needed++; } rsc_name = rsc_fail_name(rsc); if (is_remote_node(node)) { attr_options |= attrd_opt_remote; } rc = attrd_clear_delegate(NULL, host_uname, rsc_name, operation, interval, NULL, attr_options); if (rc != pcmk_ok) { printf("Cleaned %s history on %s, but unable to clear failures: %s\n", rsc->id, host_uname, pcmk_strerror(rc)); } else { printf("Cleaned up %s on %s\n", rsc->id, host_uname); } free(rsc_name); return rc; } void cli_resource_check(cib_t * cib_conn, resource_t *rsc) { int need_nl = 0; char *role_s = NULL; char *managed = NULL; resource_t *parent = uber_parent(rsc); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); if(role_s) { enum rsc_role_e role = text2role(role_s); if(role == RSC_ROLE_UNKNOWN) { // Treated as if unset } else if(role == RSC_ROLE_STOPPED) { printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id); need_nl++; } else if(parent->variant == pe_master && role == RSC_ROLE_SLAVE) { printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id); need_nl++; } } if(managed && crm_is_true(managed) == FALSE) { printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id); need_nl++; } if(need_nl) { printf("\n"); } } int cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set) { crm_warn("Failing: %s", rsc_id); return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); } static GHashTable * generate_resource_params(resource_t * rsc, pe_working_set_t * data_set) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; if (!rsc) { crm_err("Resource does not exist in config"); return NULL; } params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); meta = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); combined = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(params, rsc, NULL /* TODO: Pass in local node */ , data_set); get_meta_attributes(meta, rsc, NULL /* TODO: Pass in local node */ , data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } static bool resource_is_running_on(resource_t *rsc, const char *host) { bool found = TRUE; GListPtr hIter = NULL; GListPtr hosts = NULL; if(rsc == NULL) { return FALSE; } rsc->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pe_node_t *node = (pe_node_t *) hIter->data; if(strcmp(host, node->details->uname) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } else if(strcmp(host, node->details->id) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if(host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = FALSE; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = FALSE; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { resource_t *rsc = (resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (rsc->variant == pe_group) { active = g_list_concat(active, get_active_resources(host, rsc->children)); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } static GList *subtract_lists(GList *from, GList *items) { GList *item = NULL; GList *result = g_list_copy(from); for (item = items; item != NULL; item = item->next) { GList *candidate = NULL; for (candidate = from; candidate != NULL; candidate = candidate->next) { crm_info("Comparing %s with %s", candidate->data, item->data); if(strcmp(candidate->data, item->data) == 0) { result = g_list_remove(result, candidate->data); break; } } } return result; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { fprintf(stdout, "%s%s\n", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as working set input * * This also updates the working set timestamp to the current time. * * \param[in] data_set Working set instance to update * \param[in] xml XML to use as input * * \return pcmk_ok on success, -ENOKEY if unable to upgrade XML * \note On success, caller is responsible for freeing memory allocated for * data_set->now. * \todo This follows the example of other callers of cli_config_update() * and returns -ENOKEY ("Required key not available") if that fails, * but perhaps -pcmk_err_schema_validation would be better in that case. */ int update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml) { if (cli_config_update(xml, NULL, FALSE) == FALSE) { return -ENOKEY; } data_set->input = *xml; data_set->now = crm_time_new(NULL); return pcmk_ok; } /*! * \internal * \brief Update a working set's XML input based on a CIB query * * \param[in] data_set Data set instance to initialize * \param[in] cib Connection to the CIB * * \return pcmk_ok on success, -errno on failure * \note On success, caller is responsible for freeing memory allocated for * data_set->input and data_set->now. */ static int update_working_set_from_cib(pe_working_set_t * data_set, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { fprintf(stderr, "Could not obtain the current CIB: %s (%d)\n", pcmk_strerror(rc), rc); return rc; } rc = update_working_set_xml(data_set, &cib_xml_copy); if (rc != pcmk_ok) { fprintf(stderr, "Could not upgrade the current CIB XML\n"); free_xml(cib_xml_copy); return rc; } return pcmk_ok; } static int update_dataset(cib_t *cib, pe_working_set_t * data_set, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc; cleanup_alloc_calculations(data_set); rc = update_working_set_from_cib(data_set, cib); if (rc != pcmk_ok) { return rc; } if(simulate) { pid = crm_itoa(getpid()); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { fprintf(stderr, "Could not create shadow cib: '%s'\n", pid); rc = -ENXIO; goto cleanup; } rc = write_xml_file(data_set->input, shadow_file, FALSE); if (rc < 0) { fprintf(stderr, "Could not populate shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); if(rc != pcmk_ok) { fprintf(stderr, "Could not connect to shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } do_calculations(data_set, data_set->input, NULL); run_simulation(data_set, shadow_cib, NULL, TRUE); rc = update_dataset(shadow_cib, data_set, FALSE); } else { cluster_status(data_set); } cleanup: /* Do not free data_set->input here, we need rsc->xml to be valid later on */ cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } static int max_delay_for_resource(pe_working_set_t * data_set, resource_t *rsc) { int delay = 0; int max_delay = 0; if(rsc && rsc->children) { GList *iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; delay = max_delay_for_resource(data_set, child); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id); max_delay = delay; } } } else if(rsc) { char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP); action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set); const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT); max_delay = crm_int_helper(value, NULL); pe_free_action(stop); } return max_delay; } static int max_delay_in(pe_working_set_t * data_set, GList *resources) { int max_delay = 0; GList *item = NULL; for (item = resources; item != NULL; item = item->next) { int delay = 0; resource_t *rsc = pe_find_resource(data_set->resources, (const char *)item->data); delay = max_delay_for_resource(data_set, rsc); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id); max_delay = delay; } } return 5 + (max_delay / 1000); } #define waiting_for_starts(d, r, h) ((g_list_length(d) > 0) || \ (resource_is_running_on((r), (h)) == FALSE)) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in] rsc The resource to restart * \param[in] host The host to restart the resource on (or NULL for all) * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but a two-second * granularity is actually used; if 0, a timeout will be * calculated based on the resource timeout) * \param[in] cib Connection to the CIB for modifying/checking resource * * \return pcmk_ok on success, -errno on failure (exits on certain failures) */ int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib) { int rc = 0; int lpc = 0; int before = 0; int step_timeout_s = 0; int sleep_interval = 2; int timeout = timeout_ms / 1000; bool is_clone = FALSE; char *rsc_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pe_working_set_t data_set; if(resource_is_running_on(rsc, host) == FALSE) { const char *id = rsc->clone_name?rsc->clone_name:rsc->id; if(host) { printf("%s is not running on %s and so cannot be restarted\n", id, host); } else { printf("%s is not running anywhere and so cannot be restarted\n", id); } return -ENXIO; } /* We might set the target-role meta-attribute */ attr_set_type = XML_TAG_META_SETS; rsc_id = strdup(rsc->id); if(pe_rsc_is_clone(rsc)) { is_clone = TRUE; } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ set_working_set_defaults(&data_set); rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stdout, "Could not get new resource list: %s (%d)\n", pcmk_strerror(rc), rc); free(rsc_id); return rc; } restart_target_active = get_active_resources(host, data_set.resources); current_active = get_active_resources(host, data_set.resources); dump_list(current_active, "Origin"); if(is_clone && host) { /* Stop the clone instance by banning it from the host */ BE_QUIET = TRUE; rc = cli_resource_ban(rsc_id, host, NULL, cib); } else { /* Stop the resource by setting target-role to Stopped. * Remember any existing target-role so we can restore it later * (though it only makes any difference if it's Slave). */ char *lookup_id = clone_strip(rsc->id); find_resource_attr(cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role); free(lookup_id); rc = cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, RSC_STOPPED, FALSE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not set target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); if (current_active) { g_list_free_full(current_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } free(rsc_id); return crm_exit(rc); } rc = update_dataset(cib, &data_set, TRUE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources would be stopped\n"); goto failure; } target_active = get_active_resources(host, data_set.resources); dump_list(target_active, "Target"); list_delta = subtract_lists(current_active, target_active); fprintf(stdout, "Waiting for %d resources to stop:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while(g_list_length(list_delta) > 0) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; lpc < step_timeout_s && g_list_length(list_delta) > 0; lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were stopped\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(current_active, target_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ fprintf(stderr, "Could not complete shutdown of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } if(is_clone && host) { rc = cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not unset target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); free(rsc_id); return crm_exit(rc); } if (target_active) { g_list_free_full(target_active, free); } target_active = restart_target_active; if (list_delta) { g_list_free(list_delta); } list_delta = subtract_lists(target_active, current_active); fprintf(stdout, "Waiting for %d resources to start again:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were started\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ current_active = get_active_resources(NULL, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(target_active, current_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ fprintf(stdout, "Could not complete restart of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } rc = pcmk_ok; goto done; failure: if(is_clone && host) { cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { cli_resource_update_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); } else { cli_resource_delete_attribute(rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } done: if (list_delta) { g_list_free(list_delta); } if (current_active) { g_list_free_full(current_active, free); } if (target_active && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } cleanup_alloc_calculations(&data_set); free(rsc_id); return rc; } #define action_is_pending(action) \ ((is_set((action)->flags, pe_action_optional) == FALSE) \ && (is_set((action)->flags, pe_action_runnable) == TRUE) \ && (is_set((action)->flags, pe_action_pseudo) == FALSE)) /*! * \internal * \brief Return TRUE if any actions in a list are pending * * \param[in] actions List of actions to check * * \return TRUE if any actions in the list are pending, FALSE otherwise */ static bool actions_are_pending(GListPtr actions) { GListPtr action; for (action = actions; action != NULL; action = action->next) { if (action_is_pending((action_t *) action->data)) { return TRUE; } } return FALSE; } /*! * \internal * \brief Print pending actions to stderr * * \param[in] actions List of actions to check * * \return void */ static void print_pending_actions(GListPtr actions) { GListPtr action; fprintf(stderr, "Pending actions:\n"); for (action = actions; action != NULL; action = action->next) { action_t *a = (action_t *) action->data; if (action_is_pending(a)) { fprintf(stderr, "\tAction %d: %s", a->id, a->uuid); if (a->node) { fprintf(stderr, "\ton %s", a->node->details->uname); } fprintf(stderr, "\n"); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but one-second granularity * is actually used; if 0, a default will be used) * \param[in] cib Connection to the CIB * * \return pcmk_ok on success, -errno on failure */ int wait_till_stable(int timeout_ms, cib_t * cib) { pe_working_set_t data_set; int rc = -1; int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; set_working_set_defaults(&data_set); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff > 0) { crm_info("Waiting up to %d seconds for cluster actions to complete", time_diff); } else { print_pending_actions(data_set.actions); cleanup_alloc_calculations(&data_set); return -ETIME; } if (rc == pcmk_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ cleanup_alloc_calculations(&data_set); rc = update_working_set_from_cib(&data_set, cib); if (rc != pcmk_ok) { cleanup_alloc_calculations(&data_set); return rc; } do_calculations(&data_set, data_set.input, NULL); } while (actions_are_pending(data_set.actions)); return pcmk_ok; } int cli_resource_execute(const char *rsc_id, const char *rsc_action, GHashTable *override_hash, cib_t * cib, pe_working_set_t *data_set) { int rc = pcmk_ok; svc_action_t *op = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; const char *action = NULL; GHashTable *params = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Must supply a resource id with -r"); return -ENXIO; } if (safe_str_eq(rsc_action, "validate")) { action = "validate-all"; } else if (safe_str_eq(rsc_action, "force-check")) { action = "monitor"; } else if (safe_str_eq(rsc_action, "force-stop")) { action = rsc_action+6; } else if (safe_str_eq(rsc_action, "force-start") || safe_str_eq(rsc_action, "force-demote") || safe_str_eq(rsc_action, "force-promote")) { action = rsc_action+6; if(pe_rsc_is_clone(rsc)) { rc = cli_resource_search(rsc_id, data_set); if(rc > 0 && do_force == FALSE) { CMD_ERR("It is not safe to %s %s here: the cluster claims it is already active", action, rsc_id); CMD_ERR("Try setting target-role=stopped first or specifying --force"); crm_exit(EPERM); } } } if(pe_rsc_is_clone(rsc)) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->children->data; } if(rsc->variant == pe_group) { CMD_ERR("Sorry, --%s doesn't support group resources", rsc_action); crm_exit(EOPNOTSUPP); } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { CMD_ERR("Sorry, --%s doesn't support %s resources yet", rsc_action, rclass); crm_exit(EOPNOTSUPP); } params = generate_resource_params(rsc, data_set); /* add crm_feature_set env needed by some resource agents */ g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); op = resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params, 0); + if (op == NULL) { + /* Re-run with stderr enabled so we can display a sane error message */ + crm_enable_stderr(TRUE); + op = resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, + -1, params, 0); + + /* We know op will be NULL, but this makes static analysis happy */ + services_action_free(op); + + return crm_exit(EINVAL); + } + if(do_trace) { setenv("OCF_TRACE_RA", "1", 1); } - if(op && override_hash) { + if (override_hash) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, override_hash); while (g_hash_table_iter_next(&iter, (gpointer *) & name, (gpointer *) & value)) { printf("Overriding the cluster configuration for '%s' with '%s' = '%s'\n", rsc->id, name, value); g_hash_table_replace(op->params, strdup(name), strdup(value)); } } - if(op == NULL) { - /* Re-run but with stderr enabled so we can display a sane error message */ - crm_enable_stderr(TRUE); - resources_action_create(rsc->id, rclass, rprov, rtype, action, 0, -1, params, 0); - return crm_exit(EINVAL); - - } else if (services_action_sync(op)) { + if (services_action_sync(op)) { int more, lpc, last; char *local_copy = NULL; if (op->status == PCMK_LRM_OP_DONE) { printf("Operation %s for %s (%s:%s:%s) returned %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->rc); } else { printf("Operation %s for %s (%s:%s:%s) failed: %d\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, op->status); } /* hide output for validate-all if not in verbose */ if (!do_trace && safe_str_eq(action, "validate-all")) goto done; if (op->stdout_data) { local_copy = strdup(op->stdout_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stdout: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } if (op->stderr_data) { local_copy = strdup(op->stderr_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stderr: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } } done: rc = op->rc; services_action_free(op); return rc; } int cli_resource_move(const char *rsc_id, const char *host_name, cib_t * cib, pe_working_set_t *data_set) { int rc = -EINVAL; int count = 0; node_t *current = NULL; node_t *dest = pe_find_node(data_set->nodes, host_name); resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); bool cur_is_dest = FALSE; if (rsc == NULL) { CMD_ERR("Resource '%s' not moved: not found", rsc_id); return -ENXIO; } else if (scope_master && rsc->variant != pe_master) { resource_t *p = uber_parent(rsc); if(p->variant == pe_master) { CMD_ERR("Using parent '%s' for --move command instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { CMD_ERR("Ignoring '--master' option: not valid for %s resources.", get_resource_typename(rsc->variant)); scope_master = FALSE; } } if(rsc->variant == pe_master) { GListPtr iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { rsc = child; count++; } } if(scope_master == FALSE && count == 0) { count = g_list_length(rsc->running_on); } } else if (pe_rsc_is_clone(rsc)) { count = g_list_length(rsc->running_on); } else if (g_list_length(rsc->running_on) > 1) { CMD_ERR("Resource '%s' not moved: active on multiple nodes", rsc_id); return rc; } if(dest == NULL) { CMD_ERR("Error performing operation: node '%s' is unknown", host_name); return -ENXIO; } if(g_list_length(rsc->running_on) == 1) { current = rsc->running_on->data; } if(current == NULL) { /* Nothing to check */ } else if(scope_master && rsc->fns->state(rsc, TRUE) != RSC_ROLE_MASTER) { crm_trace("%s is already active on %s but not in correct state", rsc_id, dest->details->uname); } else if (safe_str_eq(current->details->uname, dest->details->uname)) { cur_is_dest = TRUE; if (do_force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, scope_master?"promoted":"active", dest->details->uname); } else { CMD_ERR("Error performing operation: %s is already %s on %s", rsc_id, scope_master?"promoted":"active", dest->details->uname); return rc; } } /* Clear any previous constraints for 'dest' */ cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(rsc_id, dest->details->uname, cib); crm_trace("%s%s now prefers node %s%s", rsc->id, scope_master?" (master)":"", dest->details->uname, do_force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if(do_force && (cur_is_dest == FALSE)) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib); } else if(count > 1) { CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move one to %s", rsc_id, scope_master?"promoted":"active", count, dest->details->uname); CMD_ERR("You can prevent '%s' from being %s at a specific location with:" " --ban %s--host ", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":""); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; } diff --git a/xml/resources-2.8.rng b/xml/resources-2.8.rng index 870e8045d2..3bc74be072 100644 --- a/xml/resources-2.8.rng +++ b/xml/resources-2.8.rng @@ -1,308 +1,303 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ([0-9\-]+) - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ([0-9\-]+) + + + + + + + + + + + + + + + + + + + + + + + + + + + Stopped Started Slave Master nothing quorum fencing unfencing ignore block stop restart standby fence restart-container ocf lsb heartbeat stonith upstart service systemd nagios