diff --git a/cts/cli/crm_verify_invalid_fencing_topology.xml b/cts/cli/crm_verify_invalid_fencing_topology.xml new file mode 100644 index 0000000000..a6647fbd13 --- /dev/null +++ b/cts/cli/crm_verify_invalid_fencing_topology.xml @@ -0,0 +1,260 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp index 83988543ad..d234de7413 100644 --- a/cts/cli/regression.tools.exp +++ b/cts/cli/regression.tools.exp @@ -1,10376 +1,10381 @@ Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Validate CIB =#=#=#= =#=#=#= Current cib after: Validate CIB =#=#=#= =#=#=#= End test: Validate CIB - OK (0) =#=#=#= * Passed: cibadmin - Validate CIB =#=#=#= Begin test: List all available options (invalid type) =#=#=#= crm_attribute: Invalid --list-options value 'asdf'. Allowed values: cluster =#=#=#= End test: List all available options (invalid type) - Incorrect usage (64) =#=#=#= * Passed: crm_attribute - List all available options (invalid type) =#=#=#= Begin test: List all available options (invalid type) (XML) =#=#=#= crm_attribute: Invalid --list-options value 'asdf'. Allowed values: cluster =#=#=#= End test: List all available options (invalid type) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_attribute - List all available options (invalid type) (XML) =#=#=#= Begin test: List non-advanced cluster options =#=#=#= Pacemaker cluster options Also known as properties, these are options that affect behavior across the entire cluster. They are configured within cluster_property_set elements inside the crm_config subsection of the CIB configuration section. * dc-version: Pacemaker version on cluster node elected Designated Controller (DC) * Includes a hash which identifies the exact revision the code was built from. Used for diagnostic purposes. * Possible values (generated by Pacemaker): version (no default) * cluster-infrastructure: The messaging layer on which Pacemaker is currently running * Used for informational and diagnostic purposes. * Possible values (generated by Pacemaker): string (no default) * cluster-name: An arbitrary name for the cluster * This optional value is mostly for users' convenience as desired in administration, but may also be used in Pacemaker configuration rules via the #cluster-name node attribute, and by higher-level tools and resource agents. * Possible values: string (no default) * dc-deadtime: How long to wait for a response from other nodes during start-up * The optimal value will depend on the speed and load of your network and the type of switches used. * Possible values: duration (default: ) * cluster-recheck-interval: Polling interval to recheck cluster state and evaluate rules with date specifications * Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure-timeout settings and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. A value of 0 disables polling. A positive value sets an interval in seconds, unless other units are specified (for example, "5min"). * Possible values: duration (default: ) * fence-reaction: How a cluster node should react if notified of its own fencing * A cluster node may receive notification of a "succeeded" fencing that targeted it if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Use "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. * Possible values: "stop" (default), "panic" * no-quorum-policy: What to do when the cluster does not have quorum * Possible values: "stop" (default), "freeze", "ignore", "demote", "suicide" * shutdown-lock: Whether to lock resources to a cleanly shut down node * When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. * Possible values: boolean (default: ) * shutdown-lock-limit: Do not lock resources to a cleanly shut down node longer than this * If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. * Possible values: duration (default: ) * enable-acl: Enable Access Control Lists (ACLs) for the CIB * Possible values: boolean (default: ) * symmetric-cluster: Whether resources can run on any node by default * Possible values: boolean (default: ) * maintenance-mode: Whether the cluster should refrain from monitoring, starting, and stopping resources * Possible values: boolean (default: ) * start-failure-is-fatal: Whether a start failure should prevent a resource from being recovered on the same node * When true, the cluster will immediately ban a resource from a node if it fails to start there. When false, the cluster will instead check the resource's fail count against its migration-threshold. * Possible values: boolean (default: ) * enable-startup-probes: Whether the cluster should check for active resources during start-up * Possible values: boolean (default: ) * stonith-action: Action to send to fence device when a node needs to be fenced ("poweroff" is a deprecated alias for "off") * Possible values: "reboot" (default), "off", "poweroff" * stonith-timeout: How long to wait for on, off, and reboot fence actions to complete by default * Possible values: duration (default: ) * have-watchdog: Whether watchdog integration is enabled * This is set automatically by the cluster according to whether SBD is detected to be in use. User-configured values are ignored. The value `true` is meaningful if diskless SBD is used and `stonith-watchdog-timeout` is nonzero. In that case, if fencing is required, watchdog-based self-fencing will be performed via SBD without requiring a fencing resource explicitly configured. * Possible values (generated by Pacemaker): boolean (default: ) * stonith-watchdog-timeout: How long before nodes can be assumed to be safely down when watchdog-based self-fencing via SBD is in use * If this is set to a positive value, lost nodes are assumed to achieve self-fencing using watchdog-based SBD within this much time. This does not require a fencing resource to be explicitly configured, though a fence_watchdog resource can be configured, to limit use to specific nodes. If this is set to 0 (the default), the cluster will never assume watchdog-based self-fencing. If this is set to a negative value, the cluster will use twice the local value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, or otherwise treat this as 0. WARNING: When used, this timeout must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use watchdog-based SBD, and Pacemaker will refuse to start on any of those nodes where this is not true for the local value or SBD is not active. When this is set to a negative value, `SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes that use SBD, otherwise data corruption or loss could occur. * Possible values: timeout (default: ) * stonith-max-attempts: How many times fencing can fail before it will no longer be immediately re-attempted on a target * Possible values: score (default: ) * concurrent-fencing: Allow performing fencing operations in parallel * Possible values: boolean (default: ) * priority-fencing-delay: Apply fencing delay targeting the lost nodes with the highest total resource priority * Apply specified delay for the fencings that are targeting the lost nodes with the highest total resource priority in case we don't have the majority of the nodes in our cluster partition, so that the more significant nodes potentially win any fencing match, which is especially meaningful under split-brain of 2-node cluster. A promoted resource instance takes the base priority + 1 on calculation if the base priority is not 0. Any static/random delays that are introduced by `pcmk_delay_base/max` configured for the corresponding fencing resources will be added to this delay. This delay should be significantly greater than, safely twice, the maximum `pcmk_delay_base/max`. By default, priority fencing delay is disabled. * Possible values: duration (default: ) * node-pending-timeout: How long to wait for a node that has joined the cluster to join the controller process group * Fence nodes that do not join the controller process group within this much time after joining the cluster, to allow the cluster to continue managing resources. A value of 0 means never fence pending nodes. Setting the value to 2h means fence nodes after 2 hours. * Possible values: duration (default: ) * cluster-delay: Maximum time for node-to-node communication * The node elected Designated Controller (DC) will consider an action failed if it does not get a response from the node executing the action within this time (after considering the action's own timeout). The "correct" value will depend on the speed and load of your network and cluster nodes. * Possible values: duration (default: ) * load-threshold: Maximum amount of system load that should be used by cluster nodes * The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit * Possible values: percentage (default: ) * node-action-limit: Maximum number of jobs that can be scheduled per node (defaults to 2x cores) * Possible values: integer (default: ) * batch-limit: Maximum number of jobs that the cluster may execute in parallel across all nodes * The "correct" value will depend on the speed and load of your network and cluster nodes. If set to 0, the cluster will impose a dynamically calculated limit when any node has a high load. * Possible values: integer (default: ) * migration-limit: The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) * Possible values: integer (default: ) * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a cluster daemon * Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). * Possible values: nonnegative_integer (default: ) * stop-all-resources: Whether the cluster should stop all active resources * Possible values: boolean (default: ) * stop-orphan-resources: Whether to stop resources that were removed from the configuration * Possible values: boolean (default: ) * stop-orphan-actions: Whether to cancel recurring actions removed from the configuration * Possible values: boolean (default: ) * pe-error-series-max: The number of scheduler inputs resulting in errors to save * Zero to disable, -1 to store unlimited. * Possible values: integer (default: ) * pe-warn-series-max: The number of scheduler inputs resulting in warnings to save * Zero to disable, -1 to store unlimited. * Possible values: integer (default: ) * pe-input-series-max: The number of scheduler inputs without errors or warnings to save * Zero to disable, -1 to store unlimited. * Possible values: integer (default: ) * node-health-strategy: How cluster should react to node health attributes * Requires external entities to create node attributes (named with the prefix "#health") with values "red", "yellow", or "green". * Possible values: "none" (default), "migrate-on-red", "only-green", "progressive", "custom" * node-health-base: Base health score assigned to a node * Only used when "node-health-strategy" is set to "progressive". * Possible values: score (default: ) * node-health-green: The score to use for a node health attribute whose value is "green" * Only used when "node-health-strategy" is set to "custom" or "progressive". * Possible values: score (default: ) * node-health-yellow: The score to use for a node health attribute whose value is "yellow" * Only used when "node-health-strategy" is set to "custom" or "progressive". * Possible values: score (default: ) * node-health-red: The score to use for a node health attribute whose value is "red" * Only used when "node-health-strategy" is set to "custom" or "progressive". * Possible values: score (default: ) * placement-strategy: How the cluster should allocate resources to nodes * Possible values: "default" (default), "utilization", "minimal", "balanced" =#=#=#= End test: List non-advanced cluster options - OK (0) =#=#=#= * Passed: crm_attribute - List non-advanced cluster options =#=#=#= Begin test: List non-advanced cluster options (XML) (shows all) =#=#=#= 1.1 Also known as properties, these are options that affect behavior across the entire cluster. They are configured within cluster_property_set elements inside the crm_config subsection of the CIB configuration section. Pacemaker cluster options Includes a hash which identifies the exact revision the code was built from. Used for diagnostic purposes. Pacemaker version on cluster node elected Designated Controller (DC) Used for informational and diagnostic purposes. The messaging layer on which Pacemaker is currently running This optional value is mostly for users' convenience as desired in administration, but may also be used in Pacemaker configuration rules via the #cluster-name node attribute, and by higher-level tools and resource agents. An arbitrary name for the cluster The optimal value will depend on the speed and load of your network and the type of switches used. How long to wait for a response from other nodes during start-up Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure-timeout settings and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. A value of 0 disables polling. A positive value sets an interval in seconds, unless other units are specified (for example, "5min"). Polling interval to recheck cluster state and evaluate rules with date specifications A cluster node may receive notification of a "succeeded" fencing that targeted it if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Use "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. How a cluster node should react if notified of its own fencing Declare an election failed if it is not decided within this much time. If you need to adjust this value, it probably indicates the presence of a bug. Declare an election failed if it is not decided within this much time. If you need to adjust this value, it probably indicates the presence of a bug. Exit immediately if shutdown does not complete within this much time. If you need to adjust this value, it probably indicates the presence of a bug. Exit immediately if shutdown does not complete within this much time. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. Delay cluster recovery for this much time to allow for additional events to occur. Useful if your configuration is sensitive to the order in which ping updates arrive. Enabling this option will slow down cluster recovery under all conditions What to do when the cluster does not have quorum What to do when the cluster does not have quorum When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. Whether to lock resources to a cleanly shut down node If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. Do not lock resources to a cleanly shut down node longer than this Enable Access Control Lists (ACLs) for the CIB Enable Access Control Lists (ACLs) for the CIB Whether resources can run on any node by default Whether resources can run on any node by default Whether the cluster should refrain from monitoring, starting, and stopping resources Whether the cluster should refrain from monitoring, starting, and stopping resources When true, the cluster will immediately ban a resource from a node if it fails to start there. When false, the cluster will instead check the resource's fail count against its migration-threshold. Whether a start failure should prevent a resource from being recovered on the same node Whether the cluster should check for active resources during start-up Whether the cluster should check for active resources during start-up If false, unresponsive nodes are immediately assumed to be harmless, and resources that were active on them may be recovered elsewhere. This can result in a "split-brain" situation, potentially leading to data loss and/or service unavailability. Whether nodes may be fenced as part of recovery Action to send to fence device when a node needs to be fenced ("poweroff" is a deprecated alias for "off") Action to send to fence device when a node needs to be fenced ("poweroff" is a deprecated alias for "off") How long to wait for on, off, and reboot fence actions to complete by default How long to wait for on, off, and reboot fence actions to complete by default This is set automatically by the cluster according to whether SBD is detected to be in use. User-configured values are ignored. The value `true` is meaningful if diskless SBD is used and `stonith-watchdog-timeout` is nonzero. In that case, if fencing is required, watchdog-based self-fencing will be performed via SBD without requiring a fencing resource explicitly configured. Whether watchdog integration is enabled If this is set to a positive value, lost nodes are assumed to achieve self-fencing using watchdog-based SBD within this much time. This does not require a fencing resource to be explicitly configured, though a fence_watchdog resource can be configured, to limit use to specific nodes. If this is set to 0 (the default), the cluster will never assume watchdog-based self-fencing. If this is set to a negative value, the cluster will use twice the local value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, or otherwise treat this as 0. WARNING: When used, this timeout must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use watchdog-based SBD, and Pacemaker will refuse to start on any of those nodes where this is not true for the local value or SBD is not active. When this is set to a negative value, `SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes that use SBD, otherwise data corruption or loss could occur. How long before nodes can be assumed to be safely down when watchdog-based self-fencing via SBD is in use How many times fencing can fail before it will no longer be immediately re-attempted on a target How many times fencing can fail before it will no longer be immediately re-attempted on a target Allow performing fencing operations in parallel Allow performing fencing operations in parallel Setting this to false may lead to a "split-brain" situation, potentially leading to data loss and/or service unavailability. Whether to fence unseen nodes at start-up Apply specified delay for the fencings that are targeting the lost nodes with the highest total resource priority in case we don't have the majority of the nodes in our cluster partition, so that the more significant nodes potentially win any fencing match, which is especially meaningful under split-brain of 2-node cluster. A promoted resource instance takes the base priority + 1 on calculation if the base priority is not 0. Any static/random delays that are introduced by `pcmk_delay_base/max` configured for the corresponding fencing resources will be added to this delay. This delay should be significantly greater than, safely twice, the maximum `pcmk_delay_base/max`. By default, priority fencing delay is disabled. Apply fencing delay targeting the lost nodes with the highest total resource priority Fence nodes that do not join the controller process group within this much time after joining the cluster, to allow the cluster to continue managing resources. A value of 0 means never fence pending nodes. Setting the value to 2h means fence nodes after 2 hours. How long to wait for a node that has joined the cluster to join the controller process group The node elected Designated Controller (DC) will consider an action failed if it does not get a response from the node executing the action within this time (after considering the action's own timeout). The "correct" value will depend on the speed and load of your network and cluster nodes. Maximum time for node-to-node communication The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit Maximum amount of system load that should be used by cluster nodes Maximum number of jobs that can be scheduled per node (defaults to 2x cores) Maximum number of jobs that can be scheduled per node (defaults to 2x cores) The "correct" value will depend on the speed and load of your network and cluster nodes. If set to 0, the cluster will impose a dynamically calculated limit when any node has a high load. Maximum number of jobs that the cluster may execute in parallel across all nodes The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). Maximum IPC message backlog before disconnecting a cluster daemon Whether the cluster should stop all active resources Whether the cluster should stop all active resources Whether to stop resources that were removed from the configuration Whether to stop resources that were removed from the configuration Whether to cancel recurring actions removed from the configuration Whether to cancel recurring actions removed from the configuration Values other than default are poorly tested and potentially dangerous. Whether to remove stopped resources from the executor Zero to disable, -1 to store unlimited. The number of scheduler inputs resulting in errors to save Zero to disable, -1 to store unlimited. The number of scheduler inputs resulting in warnings to save Zero to disable, -1 to store unlimited. The number of scheduler inputs without errors or warnings to save Requires external entities to create node attributes (named with the prefix "#health") with values "red", "yellow", or "green". How cluster should react to node health attributes Only used when "node-health-strategy" is set to "progressive". Base health score assigned to a node Only used when "node-health-strategy" is set to "custom" or "progressive". The score to use for a node health attribute whose value is "green" Only used when "node-health-strategy" is set to "custom" or "progressive". The score to use for a node health attribute whose value is "yellow" Only used when "node-health-strategy" is set to "custom" or "progressive". The score to use for a node health attribute whose value is "red" How the cluster should allocate resources to nodes How the cluster should allocate resources to nodes =#=#=#= End test: List non-advanced cluster options (XML) (shows all) - OK (0) =#=#=#= * Passed: crm_attribute - List non-advanced cluster options (XML) (shows all) =#=#=#= Begin test: List all available cluster options =#=#=#= Pacemaker cluster options Also known as properties, these are options that affect behavior across the entire cluster. They are configured within cluster_property_set elements inside the crm_config subsection of the CIB configuration section. * dc-version: Pacemaker version on cluster node elected Designated Controller (DC) * Includes a hash which identifies the exact revision the code was built from. Used for diagnostic purposes. * Possible values (generated by Pacemaker): version (no default) * cluster-infrastructure: The messaging layer on which Pacemaker is currently running * Used for informational and diagnostic purposes. * Possible values (generated by Pacemaker): string (no default) * cluster-name: An arbitrary name for the cluster * This optional value is mostly for users' convenience as desired in administration, but may also be used in Pacemaker configuration rules via the #cluster-name node attribute, and by higher-level tools and resource agents. * Possible values: string (no default) * dc-deadtime: How long to wait for a response from other nodes during start-up * The optimal value will depend on the speed and load of your network and the type of switches used. * Possible values: duration (default: ) * cluster-recheck-interval: Polling interval to recheck cluster state and evaluate rules with date specifications * Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure-timeout settings and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. A value of 0 disables polling. A positive value sets an interval in seconds, unless other units are specified (for example, "5min"). * Possible values: duration (default: ) * fence-reaction: How a cluster node should react if notified of its own fencing * A cluster node may receive notification of a "succeeded" fencing that targeted it if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Use "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. * Possible values: "stop" (default), "panic" * no-quorum-policy: What to do when the cluster does not have quorum * Possible values: "stop" (default), "freeze", "ignore", "demote", "suicide" * shutdown-lock: Whether to lock resources to a cleanly shut down node * When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. * Possible values: boolean (default: ) * shutdown-lock-limit: Do not lock resources to a cleanly shut down node longer than this * If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. * Possible values: duration (default: ) * enable-acl: Enable Access Control Lists (ACLs) for the CIB * Possible values: boolean (default: ) * symmetric-cluster: Whether resources can run on any node by default * Possible values: boolean (default: ) * maintenance-mode: Whether the cluster should refrain from monitoring, starting, and stopping resources * Possible values: boolean (default: ) * start-failure-is-fatal: Whether a start failure should prevent a resource from being recovered on the same node * When true, the cluster will immediately ban a resource from a node if it fails to start there. When false, the cluster will instead check the resource's fail count against its migration-threshold. * Possible values: boolean (default: ) * enable-startup-probes: Whether the cluster should check for active resources during start-up * Possible values: boolean (default: ) * stonith-action: Action to send to fence device when a node needs to be fenced ("poweroff" is a deprecated alias for "off") * Possible values: "reboot" (default), "off", "poweroff" * stonith-timeout: How long to wait for on, off, and reboot fence actions to complete by default * Possible values: duration (default: ) * have-watchdog: Whether watchdog integration is enabled * This is set automatically by the cluster according to whether SBD is detected to be in use. User-configured values are ignored. The value `true` is meaningful if diskless SBD is used and `stonith-watchdog-timeout` is nonzero. In that case, if fencing is required, watchdog-based self-fencing will be performed via SBD without requiring a fencing resource explicitly configured. * Possible values (generated by Pacemaker): boolean (default: ) * stonith-watchdog-timeout: How long before nodes can be assumed to be safely down when watchdog-based self-fencing via SBD is in use * If this is set to a positive value, lost nodes are assumed to achieve self-fencing using watchdog-based SBD within this much time. This does not require a fencing resource to be explicitly configured, though a fence_watchdog resource can be configured, to limit use to specific nodes. If this is set to 0 (the default), the cluster will never assume watchdog-based self-fencing. If this is set to a negative value, the cluster will use twice the local value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, or otherwise treat this as 0. WARNING: When used, this timeout must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use watchdog-based SBD, and Pacemaker will refuse to start on any of those nodes where this is not true for the local value or SBD is not active. When this is set to a negative value, `SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes that use SBD, otherwise data corruption or loss could occur. * Possible values: timeout (default: ) * stonith-max-attempts: How many times fencing can fail before it will no longer be immediately re-attempted on a target * Possible values: score (default: ) * concurrent-fencing: Allow performing fencing operations in parallel * Possible values: boolean (default: ) * priority-fencing-delay: Apply fencing delay targeting the lost nodes with the highest total resource priority * Apply specified delay for the fencings that are targeting the lost nodes with the highest total resource priority in case we don't have the majority of the nodes in our cluster partition, so that the more significant nodes potentially win any fencing match, which is especially meaningful under split-brain of 2-node cluster. A promoted resource instance takes the base priority + 1 on calculation if the base priority is not 0. Any static/random delays that are introduced by `pcmk_delay_base/max` configured for the corresponding fencing resources will be added to this delay. This delay should be significantly greater than, safely twice, the maximum `pcmk_delay_base/max`. By default, priority fencing delay is disabled. * Possible values: duration (default: ) * node-pending-timeout: How long to wait for a node that has joined the cluster to join the controller process group * Fence nodes that do not join the controller process group within this much time after joining the cluster, to allow the cluster to continue managing resources. A value of 0 means never fence pending nodes. Setting the value to 2h means fence nodes after 2 hours. * Possible values: duration (default: ) * cluster-delay: Maximum time for node-to-node communication * The node elected Designated Controller (DC) will consider an action failed if it does not get a response from the node executing the action within this time (after considering the action's own timeout). The "correct" value will depend on the speed and load of your network and cluster nodes. * Possible values: duration (default: ) * load-threshold: Maximum amount of system load that should be used by cluster nodes * The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit * Possible values: percentage (default: ) * node-action-limit: Maximum number of jobs that can be scheduled per node (defaults to 2x cores) * Possible values: integer (default: ) * batch-limit: Maximum number of jobs that the cluster may execute in parallel across all nodes * The "correct" value will depend on the speed and load of your network and cluster nodes. If set to 0, the cluster will impose a dynamically calculated limit when any node has a high load. * Possible values: integer (default: ) * migration-limit: The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) * Possible values: integer (default: ) * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a cluster daemon * Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). * Possible values: nonnegative_integer (default: ) * stop-all-resources: Whether the cluster should stop all active resources * Possible values: boolean (default: ) * stop-orphan-resources: Whether to stop resources that were removed from the configuration * Possible values: boolean (default: ) * stop-orphan-actions: Whether to cancel recurring actions removed from the configuration * Possible values: boolean (default: ) * pe-error-series-max: The number of scheduler inputs resulting in errors to save * Zero to disable, -1 to store unlimited. * Possible values: integer (default: ) * pe-warn-series-max: The number of scheduler inputs resulting in warnings to save * Zero to disable, -1 to store unlimited. * Possible values: integer (default: ) * pe-input-series-max: The number of scheduler inputs without errors or warnings to save * Zero to disable, -1 to store unlimited. * Possible values: integer (default: ) * node-health-strategy: How cluster should react to node health attributes * Requires external entities to create node attributes (named with the prefix "#health") with values "red", "yellow", or "green". * Possible values: "none" (default), "migrate-on-red", "only-green", "progressive", "custom" * node-health-base: Base health score assigned to a node * Only used when "node-health-strategy" is set to "progressive". * Possible values: score (default: ) * node-health-green: The score to use for a node health attribute whose value is "green" * Only used when "node-health-strategy" is set to "custom" or "progressive". * Possible values: score (default: ) * node-health-yellow: The score to use for a node health attribute whose value is "yellow" * Only used when "node-health-strategy" is set to "custom" or "progressive". * Possible values: score (default: ) * node-health-red: The score to use for a node health attribute whose value is "red" * Only used when "node-health-strategy" is set to "custom" or "progressive". * Possible values: score (default: ) * placement-strategy: How the cluster should allocate resources to nodes * Possible values: "default" (default), "utilization", "minimal", "balanced" * ADVANCED OPTIONS: * election-timeout: Declare an election failed if it is not decided within this much time. If you need to adjust this value, it probably indicates the presence of a bug. * Possible values: duration (default: ) * shutdown-escalation: Exit immediately if shutdown does not complete within this much time. If you need to adjust this value, it probably indicates the presence of a bug. * Possible values: duration (default: ) * join-integration-timeout: If you need to adjust this value, it probably indicates the presence of a bug. * Possible values: duration (default: ) * join-finalization-timeout: If you need to adjust this value, it probably indicates the presence of a bug. * Possible values: duration (default: ) * transition-delay: Enabling this option will slow down cluster recovery under all conditions * Delay cluster recovery for this much time to allow for additional events to occur. Useful if your configuration is sensitive to the order in which ping updates arrive. * Possible values: duration (default: ) * stonith-enabled: Whether nodes may be fenced as part of recovery * If false, unresponsive nodes are immediately assumed to be harmless, and resources that were active on them may be recovered elsewhere. This can result in a "split-brain" situation, potentially leading to data loss and/or service unavailability. * Possible values: boolean (default: ) * startup-fencing: Whether to fence unseen nodes at start-up * Setting this to false may lead to a "split-brain" situation, potentially leading to data loss and/or service unavailability. * Possible values: boolean (default: ) * DEPRECATED OPTIONS (will be removed in a future release): * remove-after-stop: Whether to remove stopped resources from the executor * Values other than default are poorly tested and potentially dangerous. * Possible values: boolean (default: ) =#=#=#= End test: List all available cluster options - OK (0) =#=#=#= * Passed: crm_attribute - List all available cluster options =#=#=#= Begin test: List all available cluster options (XML) =#=#=#= 1.1 Also known as properties, these are options that affect behavior across the entire cluster. They are configured within cluster_property_set elements inside the crm_config subsection of the CIB configuration section. Pacemaker cluster options Includes a hash which identifies the exact revision the code was built from. Used for diagnostic purposes. Pacemaker version on cluster node elected Designated Controller (DC) Used for informational and diagnostic purposes. The messaging layer on which Pacemaker is currently running This optional value is mostly for users' convenience as desired in administration, but may also be used in Pacemaker configuration rules via the #cluster-name node attribute, and by higher-level tools and resource agents. An arbitrary name for the cluster The optimal value will depend on the speed and load of your network and the type of switches used. How long to wait for a response from other nodes during start-up Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure-timeout settings and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. A value of 0 disables polling. A positive value sets an interval in seconds, unless other units are specified (for example, "5min"). Polling interval to recheck cluster state and evaluate rules with date specifications A cluster node may receive notification of a "succeeded" fencing that targeted it if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Use "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. How a cluster node should react if notified of its own fencing Declare an election failed if it is not decided within this much time. If you need to adjust this value, it probably indicates the presence of a bug. Declare an election failed if it is not decided within this much time. If you need to adjust this value, it probably indicates the presence of a bug. Exit immediately if shutdown does not complete within this much time. If you need to adjust this value, it probably indicates the presence of a bug. Exit immediately if shutdown does not complete within this much time. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. If you need to adjust this value, it probably indicates the presence of a bug. Delay cluster recovery for this much time to allow for additional events to occur. Useful if your configuration is sensitive to the order in which ping updates arrive. Enabling this option will slow down cluster recovery under all conditions What to do when the cluster does not have quorum What to do when the cluster does not have quorum When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. Whether to lock resources to a cleanly shut down node If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. Do not lock resources to a cleanly shut down node longer than this Enable Access Control Lists (ACLs) for the CIB Enable Access Control Lists (ACLs) for the CIB Whether resources can run on any node by default Whether resources can run on any node by default Whether the cluster should refrain from monitoring, starting, and stopping resources Whether the cluster should refrain from monitoring, starting, and stopping resources When true, the cluster will immediately ban a resource from a node if it fails to start there. When false, the cluster will instead check the resource's fail count against its migration-threshold. Whether a start failure should prevent a resource from being recovered on the same node Whether the cluster should check for active resources during start-up Whether the cluster should check for active resources during start-up If false, unresponsive nodes are immediately assumed to be harmless, and resources that were active on them may be recovered elsewhere. This can result in a "split-brain" situation, potentially leading to data loss and/or service unavailability. Whether nodes may be fenced as part of recovery Action to send to fence device when a node needs to be fenced ("poweroff" is a deprecated alias for "off") Action to send to fence device when a node needs to be fenced ("poweroff" is a deprecated alias for "off") How long to wait for on, off, and reboot fence actions to complete by default How long to wait for on, off, and reboot fence actions to complete by default This is set automatically by the cluster according to whether SBD is detected to be in use. User-configured values are ignored. The value `true` is meaningful if diskless SBD is used and `stonith-watchdog-timeout` is nonzero. In that case, if fencing is required, watchdog-based self-fencing will be performed via SBD without requiring a fencing resource explicitly configured. Whether watchdog integration is enabled If this is set to a positive value, lost nodes are assumed to achieve self-fencing using watchdog-based SBD within this much time. This does not require a fencing resource to be explicitly configured, though a fence_watchdog resource can be configured, to limit use to specific nodes. If this is set to 0 (the default), the cluster will never assume watchdog-based self-fencing. If this is set to a negative value, the cluster will use twice the local value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, or otherwise treat this as 0. WARNING: When used, this timeout must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use watchdog-based SBD, and Pacemaker will refuse to start on any of those nodes where this is not true for the local value or SBD is not active. When this is set to a negative value, `SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes that use SBD, otherwise data corruption or loss could occur. How long before nodes can be assumed to be safely down when watchdog-based self-fencing via SBD is in use How many times fencing can fail before it will no longer be immediately re-attempted on a target How many times fencing can fail before it will no longer be immediately re-attempted on a target Allow performing fencing operations in parallel Allow performing fencing operations in parallel Setting this to false may lead to a "split-brain" situation, potentially leading to data loss and/or service unavailability. Whether to fence unseen nodes at start-up Apply specified delay for the fencings that are targeting the lost nodes with the highest total resource priority in case we don't have the majority of the nodes in our cluster partition, so that the more significant nodes potentially win any fencing match, which is especially meaningful under split-brain of 2-node cluster. A promoted resource instance takes the base priority + 1 on calculation if the base priority is not 0. Any static/random delays that are introduced by `pcmk_delay_base/max` configured for the corresponding fencing resources will be added to this delay. This delay should be significantly greater than, safely twice, the maximum `pcmk_delay_base/max`. By default, priority fencing delay is disabled. Apply fencing delay targeting the lost nodes with the highest total resource priority Fence nodes that do not join the controller process group within this much time after joining the cluster, to allow the cluster to continue managing resources. A value of 0 means never fence pending nodes. Setting the value to 2h means fence nodes after 2 hours. How long to wait for a node that has joined the cluster to join the controller process group The node elected Designated Controller (DC) will consider an action failed if it does not get a response from the node executing the action within this time (after considering the action's own timeout). The "correct" value will depend on the speed and load of your network and cluster nodes. Maximum time for node-to-node communication The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit Maximum amount of system load that should be used by cluster nodes Maximum number of jobs that can be scheduled per node (defaults to 2x cores) Maximum number of jobs that can be scheduled per node (defaults to 2x cores) The "correct" value will depend on the speed and load of your network and cluster nodes. If set to 0, the cluster will impose a dynamically calculated limit when any node has a high load. Maximum number of jobs that the cluster may execute in parallel across all nodes The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). Maximum IPC message backlog before disconnecting a cluster daemon Whether the cluster should stop all active resources Whether the cluster should stop all active resources Whether to stop resources that were removed from the configuration Whether to stop resources that were removed from the configuration Whether to cancel recurring actions removed from the configuration Whether to cancel recurring actions removed from the configuration Values other than default are poorly tested and potentially dangerous. Whether to remove stopped resources from the executor Zero to disable, -1 to store unlimited. The number of scheduler inputs resulting in errors to save Zero to disable, -1 to store unlimited. The number of scheduler inputs resulting in warnings to save Zero to disable, -1 to store unlimited. The number of scheduler inputs without errors or warnings to save Requires external entities to create node attributes (named with the prefix "#health") with values "red", "yellow", or "green". How cluster should react to node health attributes Only used when "node-health-strategy" is set to "progressive". Base health score assigned to a node Only used when "node-health-strategy" is set to "custom" or "progressive". The score to use for a node health attribute whose value is "green" Only used when "node-health-strategy" is set to "custom" or "progressive". The score to use for a node health attribute whose value is "yellow" Only used when "node-health-strategy" is set to "custom" or "progressive". The score to use for a node health attribute whose value is "red" How the cluster should allocate resources to nodes How the cluster should allocate resources to nodes =#=#=#= End test: List all available cluster options (XML) - OK (0) =#=#=#= * Passed: crm_attribute - List all available cluster options (XML) =#=#=#= Begin test: Query the value of an attribute that does not exist =#=#=#= crm_attribute: Error performing operation: No such device or address =#=#=#= End test: Query the value of an attribute that does not exist - No such object (105) =#=#=#= * Passed: crm_attribute - Query the value of an attribute that does not exist =#=#=#= Begin test: Configure something before erasing =#=#=#= =#=#=#= Current cib after: Configure something before erasing =#=#=#= =#=#=#= End test: Configure something before erasing - OK (0) =#=#=#= * Passed: crm_attribute - Configure something before erasing =#=#=#= Begin test: Test '++' XML attribute update syntax =#=#=#= =#=#=#= Current cib after: Test '++' XML attribute update syntax =#=#=#= =#=#=#= End test: Test '++' XML attribute update syntax - OK (0) =#=#=#= * Passed: cibadmin - Test '++' XML attribute update syntax =#=#=#= Begin test: Test '+=' XML attribute update syntax =#=#=#= =#=#=#= Current cib after: Test '+=' XML attribute update syntax =#=#=#= =#=#=#= End test: Test '+=' XML attribute update syntax - OK (0) =#=#=#= * Passed: cibadmin - Test '+=' XML attribute update syntax =#=#=#= Begin test: Test '++' nvpair value update syntax =#=#=#= =#=#=#= Current cib after: Test '++' nvpair value update syntax =#=#=#= =#=#=#= End test: Test '++' nvpair value update syntax - OK (0) =#=#=#= * Passed: crm_attribute - Test '++' nvpair value update syntax =#=#=#= Begin test: Test '++' nvpair value update syntax (XML) =#=#=#= =#=#=#= Current cib after: Test '++' nvpair value update syntax (XML) =#=#=#= =#=#=#= End test: Test '++' nvpair value update syntax (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Test '++' nvpair value update syntax (XML) =#=#=#= Begin test: Test '+=' nvpair value update syntax =#=#=#= =#=#=#= Current cib after: Test '+=' nvpair value update syntax =#=#=#= =#=#=#= End test: Test '+=' nvpair value update syntax - OK (0) =#=#=#= * Passed: crm_attribute - Test '+=' nvpair value update syntax =#=#=#= Begin test: Test '+=' nvpair value update syntax (XML) =#=#=#= =#=#=#= Current cib after: Test '+=' nvpair value update syntax (XML) =#=#=#= =#=#=#= End test: Test '+=' nvpair value update syntax (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Test '+=' nvpair value update syntax (XML) =#=#=#= Begin test: Test '++' XML attribute update syntax (--score not set) =#=#=#= =#=#=#= Current cib after: Test '++' XML attribute update syntax (--score not set) =#=#=#= =#=#=#= End test: Test '++' XML attribute update syntax (--score not set) - OK (0) =#=#=#= * Passed: cibadmin - Test '++' XML attribute update syntax (--score not set) =#=#=#= Begin test: Test '+=' XML attribute update syntax (--score not set) =#=#=#= =#=#=#= Current cib after: Test '+=' XML attribute update syntax (--score not set) =#=#=#= =#=#=#= End test: Test '+=' XML attribute update syntax (--score not set) - OK (0) =#=#=#= * Passed: cibadmin - Test '+=' XML attribute update syntax (--score not set) =#=#=#= Begin test: Test '++' nvpair value update syntax (--score not set) =#=#=#= =#=#=#= Current cib after: Test '++' nvpair value update syntax (--score not set) =#=#=#= =#=#=#= End test: Test '++' nvpair value update syntax (--score not set) - OK (0) =#=#=#= * Passed: crm_attribute - Test '++' nvpair value update syntax (--score not set) =#=#=#= Begin test: Test '++' nvpair value update syntax (--score not set) (XML) =#=#=#= =#=#=#= Current cib after: Test '++' nvpair value update syntax (--score not set) (XML) =#=#=#= =#=#=#= End test: Test '++' nvpair value update syntax (--score not set) (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Test '++' nvpair value update syntax (--score not set) (XML) =#=#=#= Begin test: Test '+=' nvpair value update syntax (--score not set) =#=#=#= =#=#=#= Current cib after: Test '+=' nvpair value update syntax (--score not set) =#=#=#= =#=#=#= End test: Test '+=' nvpair value update syntax (--score not set) - OK (0) =#=#=#= * Passed: crm_attribute - Test '+=' nvpair value update syntax (--score not set) =#=#=#= Begin test: Test '+=' nvpair value update syntax (--score not set) (XML) =#=#=#= =#=#=#= Current cib after: Test '+=' nvpair value update syntax (--score not set) (XML) =#=#=#= =#=#=#= End test: Test '+=' nvpair value update syntax (--score not set) (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Test '+=' nvpair value update syntax (--score not set) (XML) =#=#=#= Begin test: Require --force for CIB erasure =#=#=#= cibadmin: The supplied command is considered dangerous. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= Current cib after: Require --force for CIB erasure =#=#=#= =#=#=#= End test: Require --force for CIB erasure - Operation not safe (107) =#=#=#= * Passed: cibadmin - Require --force for CIB erasure =#=#=#= Begin test: Allow CIB erasure with --force =#=#=#= =#=#=#= End test: Allow CIB erasure with --force - OK (0) =#=#=#= * Passed: cibadmin - Allow CIB erasure with --force =#=#=#= Begin test: Query CIB =#=#=#= =#=#=#= Current cib after: Query CIB =#=#=#= =#=#=#= End test: Query CIB - OK (0) =#=#=#= * Passed: cibadmin - Query CIB =#=#=#= Begin test: Set cluster option =#=#=#= =#=#=#= Current cib after: Set cluster option =#=#=#= =#=#=#= End test: Set cluster option - OK (0) =#=#=#= * Passed: crm_attribute - Set cluster option =#=#=#= Begin test: Query new cluster option =#=#=#= =#=#=#= Current cib after: Query new cluster option =#=#=#= =#=#=#= End test: Query new cluster option - OK (0) =#=#=#= * Passed: cibadmin - Query new cluster option =#=#=#= Begin test: Query cluster options =#=#=#= =#=#=#= Current cib after: Query cluster options =#=#=#= =#=#=#= End test: Query cluster options - OK (0) =#=#=#= * Passed: cibadmin - Query cluster options =#=#=#= Begin test: Set no-quorum policy =#=#=#= =#=#=#= Current cib after: Set no-quorum policy =#=#=#= =#=#=#= End test: Set no-quorum policy - OK (0) =#=#=#= * Passed: crm_attribute - Set no-quorum policy =#=#=#= Begin test: Delete nvpair =#=#=#= =#=#=#= Current cib after: Delete nvpair =#=#=#= =#=#=#= End test: Delete nvpair - OK (0) =#=#=#= * Passed: cibadmin - Delete nvpair =#=#=#= Begin test: Create operation should fail =#=#=#= Call failed: File exists =#=#=#= Current cib after: Create operation should fail =#=#=#= =#=#=#= End test: Create operation should fail - Requested item already exists (108) =#=#=#= * Passed: cibadmin - Create operation should fail =#=#=#= Begin test: Modify cluster options section =#=#=#= =#=#=#= Current cib after: Modify cluster options section =#=#=#= =#=#=#= End test: Modify cluster options section - OK (0) =#=#=#= * Passed: cibadmin - Modify cluster options section =#=#=#= Begin test: Query updated cluster option =#=#=#= =#=#=#= Current cib after: Query updated cluster option =#=#=#= =#=#=#= End test: Query updated cluster option - OK (0) =#=#=#= * Passed: cibadmin - Query updated cluster option =#=#=#= Begin test: Set duplicate cluster option =#=#=#= =#=#=#= Current cib after: Set duplicate cluster option =#=#=#= =#=#=#= End test: Set duplicate cluster option - OK (0) =#=#=#= * Passed: crm_attribute - Set duplicate cluster option =#=#=#= Begin test: Setting multiply defined cluster option should fail =#=#=#= crm_attribute: Please choose from one of the matches below and supply the 'id' with --attr-id Multiple attributes match name=cluster-delay Value: 60s (id=cib-bootstrap-options-cluster-delay) Value: 40s (id=duplicate-cluster-delay) =#=#=#= Current cib after: Setting multiply defined cluster option should fail =#=#=#= =#=#=#= End test: Setting multiply defined cluster option should fail - Multiple items match request (109) =#=#=#= * Passed: crm_attribute - Setting multiply defined cluster option should fail =#=#=#= Begin test: Set cluster option with -s =#=#=#= =#=#=#= Current cib after: Set cluster option with -s =#=#=#= =#=#=#= End test: Set cluster option with -s - OK (0) =#=#=#= * Passed: crm_attribute - Set cluster option with -s =#=#=#= Begin test: Delete cluster option with -i =#=#=#= Deleted crm_config option: id=(null) name=cluster-delay =#=#=#= Current cib after: Delete cluster option with -i =#=#=#= =#=#=#= End test: Delete cluster option with -i - OK (0) =#=#=#= * Passed: crm_attribute - Delete cluster option with -i =#=#=#= Begin test: Create node1 and bring it online =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Current cluster status: * Full List of Resources: * No resources Performing Requested Modifications: * Bringing node node1 online Transition Summary: Executing Cluster Transition: Revised Cluster Status: * Node List: * Online: [ node1 ] * Full List of Resources: * No resources =#=#=#= Current cib after: Create node1 and bring it online =#=#=#= =#=#=#= End test: Create node1 and bring it online - OK (0) =#=#=#= * Passed: crm_simulate - Create node1 and bring it online =#=#=#= Begin test: Create node attribute =#=#=#= =#=#=#= Current cib after: Create node attribute =#=#=#= =#=#=#= End test: Create node attribute - OK (0) =#=#=#= * Passed: crm_attribute - Create node attribute =#=#=#= Begin test: Query new node attribute =#=#=#= =#=#=#= Current cib after: Query new node attribute =#=#=#= =#=#=#= End test: Query new node attribute - OK (0) =#=#=#= * Passed: cibadmin - Query new node attribute =#=#=#= Begin test: Create second node attribute =#=#=#= =#=#=#= Current cib after: Create second node attribute =#=#=#= =#=#=#= End test: Create second node attribute - OK (0) =#=#=#= * Passed: crm_attribute - Create second node attribute =#=#=#= Begin test: Query node attributes by pattern =#=#=#= scope=nodes name=ram value=1024M scope=nodes name=rattr value=XYZ =#=#=#= End test: Query node attributes by pattern - OK (0) =#=#=#= * Passed: crm_attribute - Query node attributes by pattern =#=#=#= Begin test: Update node attributes by pattern =#=#=#= =#=#=#= Current cib after: Update node attributes by pattern =#=#=#= =#=#=#= End test: Update node attributes by pattern - OK (0) =#=#=#= * Passed: crm_attribute - Update node attributes by pattern =#=#=#= Begin test: Delete node attributes by pattern =#=#=#= Deleted nodes attribute: id=nodes-node1-rattr name=rattr =#=#=#= Current cib after: Delete node attributes by pattern =#=#=#= =#=#=#= End test: Delete node attributes by pattern - OK (0) =#=#=#= * Passed: crm_attribute - Delete node attributes by pattern =#=#=#= Begin test: Set a transient (fail-count) node attribute =#=#=#= =#=#=#= Current cib after: Set a transient (fail-count) node attribute =#=#=#= =#=#=#= End test: Set a transient (fail-count) node attribute - OK (0) =#=#=#= * Passed: crm_attribute - Set a transient (fail-count) node attribute =#=#=#= Begin test: Query a fail count =#=#=#= scope=status name=fail-count-foo value=3 =#=#=#= Current cib after: Query a fail count =#=#=#= =#=#=#= End test: Query a fail count - OK (0) =#=#=#= * Passed: crm_failcount - Query a fail count =#=#=#= Begin test: Show node attributes with crm_simulate =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Current cluster status: * Node List: * Online: [ node1 ] * Full List of Resources: * No resources * Node Attributes: * Node: node1: * ram : 1024M =#=#=#= End test: Show node attributes with crm_simulate - OK (0) =#=#=#= * Passed: crm_simulate - Show node attributes with crm_simulate =#=#=#= Begin test: Set a second transient node attribute =#=#=#= =#=#=#= Current cib after: Set a second transient node attribute =#=#=#= =#=#=#= End test: Set a second transient node attribute - OK (0) =#=#=#= * Passed: crm_attribute - Set a second transient node attribute =#=#=#= Begin test: Query transient node attributes by pattern =#=#=#= scope=status name=fail-count-foo value=3 scope=status name=fail-count-bar value=5 =#=#=#= End test: Query transient node attributes by pattern - OK (0) =#=#=#= * Passed: crm_attribute - Query transient node attributes by pattern =#=#=#= Begin test: Update transient node attributes by pattern =#=#=#= =#=#=#= Current cib after: Update transient node attributes by pattern =#=#=#= =#=#=#= End test: Update transient node attributes by pattern - OK (0) =#=#=#= * Passed: crm_attribute - Update transient node attributes by pattern =#=#=#= Begin test: Delete transient node attributes by pattern =#=#=#= Deleted status attribute: id=status-node1-fail-count-foo name=fail-count-foo Deleted status attribute: id=status-node1-fail-count-bar name=fail-count-bar =#=#=#= Current cib after: Delete transient node attributes by pattern =#=#=#= =#=#=#= End test: Delete transient node attributes by pattern - OK (0) =#=#=#= * Passed: crm_attribute - Delete transient node attributes by pattern =#=#=#= Begin test: crm_attribute given invalid delete usage =#=#=#= crm_attribute: Error: must specify attribute name or pattern to delete =#=#=#= End test: crm_attribute given invalid delete usage - Incorrect usage (64) =#=#=#= * Passed: crm_attribute - crm_attribute given invalid delete usage =#=#=#= Begin test: Set a utilization node attribute =#=#=#= =#=#=#= Current cib after: Set a utilization node attribute =#=#=#= =#=#=#= End test: Set a utilization node attribute - OK (0) =#=#=#= * Passed: crm_attribute - Set a utilization node attribute =#=#=#= Begin test: Query utilization node attribute =#=#=#= scope=nodes name=cpu value=1 =#=#=#= End test: Query utilization node attribute - OK (0) =#=#=#= * Passed: crm_attribute - Query utilization node attribute =#=#=#= Begin test: Digest calculation =#=#=#= Digest: =#=#=#= Current cib after: Digest calculation =#=#=#= =#=#=#= End test: Digest calculation - OK (0) =#=#=#= * Passed: cibadmin - Digest calculation =#=#=#= Begin test: Replace operation should fail =#=#=#= Call failed: Update was older than existing configuration =#=#=#= Current cib after: Replace operation should fail =#=#=#= =#=#=#= End test: Replace operation should fail - Update was older than existing configuration (103) =#=#=#= * Passed: cibadmin - Replace operation should fail =#=#=#= Begin test: Default standby value =#=#=#= scope=status name=standby value=off =#=#=#= Current cib after: Default standby value =#=#=#= =#=#=#= End test: Default standby value - OK (0) =#=#=#= * Passed: crm_standby - Default standby value =#=#=#= Begin test: Set standby status =#=#=#= =#=#=#= Current cib after: Set standby status =#=#=#= =#=#=#= End test: Set standby status - OK (0) =#=#=#= * Passed: crm_standby - Set standby status =#=#=#= Begin test: Query standby value =#=#=#= scope=nodes name=standby value=true =#=#=#= Current cib after: Query standby value =#=#=#= =#=#=#= End test: Query standby value - OK (0) =#=#=#= * Passed: crm_standby - Query standby value =#=#=#= Begin test: Delete standby value =#=#=#= Deleted nodes attribute: id=nodes-node1-standby name=standby =#=#=#= Current cib after: Delete standby value =#=#=#= =#=#=#= End test: Delete standby value - OK (0) =#=#=#= * Passed: crm_standby - Delete standby value =#=#=#= Begin test: Create a resource =#=#=#= =#=#=#= Current cib after: Create a resource =#=#=#= =#=#=#= End test: Create a resource - OK (0) =#=#=#= * Passed: cibadmin - Create a resource =#=#=#= Begin test: crm_resource run with extra arguments =#=#=#= crm_resource: non-option ARGV-elements: [1 of 2] foo [2 of 2] bar =#=#=#= End test: crm_resource run with extra arguments - Incorrect usage (64) =#=#=#= * Passed: crm_resource - crm_resource run with extra arguments =#=#=#= Begin test: List all available resource options (invalid type) =#=#=#= crm_resource: Error parsing option --list-options =#=#=#= End test: List all available resource options (invalid type) - Incorrect usage (64) =#=#=#= * Passed: crm_resource - List all available resource options (invalid type) =#=#=#= Begin test: List all available resource options (invalid type) (XML) =#=#=#= crm_resource: Error parsing option --list-options =#=#=#= End test: List all available resource options (invalid type) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_resource - List all available resource options (invalid type) (XML) =#=#=#= Begin test: List non-advanced primitive meta-attributes =#=#=#= Primitive meta-attributes Meta-attributes applicable to primitive resources * priority: Resource assignment priority * If not all resources can be active, the cluster will stop lower-priority resources in order to keep higher-priority ones active. * Possible values: score (default: ) * critical: Default value for influence in colocation constraints * Use this value as the default for influence in all colocation constraints involving this resource, as well as in the implicit colocation constraints created if this resource is in a group. * Possible values: boolean (default: ) * target-role: State the cluster should attempt to keep this resource in * "Stopped" forces the resource to be stopped. "Started" allows the resource to be started (and in the case of promotable clone resources, promoted if appropriate). "Unpromoted" allows the resource to be started, but only in the unpromoted role if the resource is promotable. "Promoted" is equivalent to "Started". * Possible values: "Stopped", "Started" (default), "Unpromoted", "Promoted" * is-managed: Whether the cluster is allowed to actively change the resource's state * If false, the cluster will not start, stop, promote, or demote the resource on any node. Recurring actions for the resource are unaffected. If true, a true value for the maintenance-mode cluster option, the maintenance node attribute, or the maintenance resource meta-attribute overrides this. * Possible values: boolean (default: ) * maintenance: If true, the cluster will not schedule any actions involving the resource * If true, the cluster will not start, stop, promote, or demote the resource on any node, and will pause any recurring monitors (except those specifying role as "Stopped"). If false, a true value for the maintenance-mode cluster option or maintenance node attribute overrides this. * Possible values: boolean (default: ) * resource-stickiness: Score to add to the current node when a resource is already active * Score to add to the current node when a resource is already active. This allows running resources to stay where they are, even if they would be placed elsewhere if they were being started from a stopped state. The default is 1 for individual clone instances, and 0 for all other resources. * Possible values: score (no default) * requires: Conditions under which the resource can be started * Conditions under which the resource can be started. "nothing" means the cluster can always start this resource. "quorum" means the cluster can start this resource only if a majority of the configured nodes are active. "fencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced. "unfencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced, and only on nodes that have been unfenced. The default is "quorum" for resources with a class of stonith; otherwise, "unfencing" if unfencing is active in the cluster; otherwise, "fencing" if the stonith-enabled cluster option is true; otherwise, "quorum". * Possible values: "nothing", "quorum", "fencing", "unfencing" * migration-threshold: Number of failures on a node before the resource becomes ineligible to run there. * Number of failures that may occur for this resource on a node, before that node is marked ineligible to host this resource. A value of 0 indicates that this feature is disabled (the node will never be marked ineligible). By contrast, the cluster treats "INFINITY" (the default) as a very large but finite number. This option has an effect only if the failed operation specifies its on-fail attribute as "restart" (the default), and additionally for failed start operations, if the start-failure-is-fatal cluster property is set to false. * Possible values: score (default: ) * failure-timeout: Number of seconds before acting as if a failure had not occurred * Number of seconds after a failed action for this resource before acting as if the failure had not occurred, and potentially allowing the resource back to the node on which it failed. A value of 0 indicates that this feature is disabled. * Possible values: duration (default: ) * multiple-active: What to do if the cluster finds the resource active on more than one node * What to do if the cluster finds the resource active on more than one node. "block" means to mark the resource as unmanaged. "stop_only" means to stop all active instances of this resource and leave them stopped. "stop_start" means to stop all active instances of this resource and start the resource in one location only. "stop_unexpected" means to stop all active instances of this resource except where the resource should be active. (This should be used only when extra instances are not expected to disrupt existing instances, and the resource agent's monitor of an existing instance is capable of detecting any problems that could be caused. Note that any resources ordered after this one will still need to be restarted.) * Possible values: "block", "stop_only", "stop_start" (default), "stop_unexpected" * allow-migrate: Whether the cluster should try to "live migrate" this resource when it needs to be moved * Whether the cluster should try to "live migrate" this resource when it needs to be moved. The default is true for ocf:pacemaker:remote resources, and false otherwise. * Possible values: boolean (no default) * allow-unhealthy-nodes: Whether the resource should be allowed to run on a node even if the node's health score would otherwise prevent it * Possible values: boolean (default: ) * container-attribute-target: Where to check user-defined node attributes * Whether to check user-defined node attributes on the physical host where a container is running or on the local node. This is usually set for a bundle resource and inherited by the bundle's primitive resource. A value of "host" means to check user-defined node attributes on the underlying physical host. Any other value means to check user-defined node attributes on the local node (for a bundled primitive resource, this is the bundle node). * Possible values: string (no default) * remote-node: Name of the Pacemaker Remote guest node this resource is associated with, if any * Name of the Pacemaker Remote guest node this resource is associated with, if any. If specified, this both enables the resource as a guest node and defines the unique name used to identify the guest node. The guest must be configured to run the Pacemaker Remote daemon when it is started. WARNING: This value cannot overlap with any resource or node IDs. * Possible values: string (no default) * remote-addr: If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote * If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote. The Pacemaker Remote daemon on the guest must be configured to accept connections on this address. The default is the value of the remote-node meta-attribute. * Possible values: string (no default) * remote-port: If remote-node is specified, port on the guest used for its Pacemaker Remote connection * If remote-node is specified, the port on the guest used for its Pacemaker Remote connection. The Pacemaker Remote daemon on the guest must be configured to listen on this port. * Possible values: port (default: ) * remote-connect-timeout: If remote-node is specified, how long before a pending Pacemaker Remote guest connection times out. * Possible values: timeout (default: ) * remote-allow-migrate: If remote-node is specified, this acts as the allow-migrate meta-attribute for the implicit remote connection resource (ocf:pacemaker:remote). * Possible values: boolean (default: ) =#=#=#= End test: List non-advanced primitive meta-attributes - OK (0) =#=#=#= * Passed: crm_resource - List non-advanced primitive meta-attributes =#=#=#= Begin test: List non-advanced primitive meta-attributes (XML) (shows all) =#=#=#= 1.1 Meta-attributes applicable to primitive resources Primitive meta-attributes If not all resources can be active, the cluster will stop lower-priority resources in order to keep higher-priority ones active. Resource assignment priority Use this value as the default for influence in all colocation constraints involving this resource, as well as in the implicit colocation constraints created if this resource is in a group. Default value for influence in colocation constraints "Stopped" forces the resource to be stopped. "Started" allows the resource to be started (and in the case of promotable clone resources, promoted if appropriate). "Unpromoted" allows the resource to be started, but only in the unpromoted role if the resource is promotable. "Promoted" is equivalent to "Started". State the cluster should attempt to keep this resource in If false, the cluster will not start, stop, promote, or demote the resource on any node. Recurring actions for the resource are unaffected. If true, a true value for the maintenance-mode cluster option, the maintenance node attribute, or the maintenance resource meta-attribute overrides this. Whether the cluster is allowed to actively change the resource's state If true, the cluster will not start, stop, promote, or demote the resource on any node, and will pause any recurring monitors (except those specifying role as "Stopped"). If false, a true value for the maintenance-mode cluster option or maintenance node attribute overrides this. If true, the cluster will not schedule any actions involving the resource Score to add to the current node when a resource is already active. This allows running resources to stay where they are, even if they would be placed elsewhere if they were being started from a stopped state. The default is 1 for individual clone instances, and 0 for all other resources. Score to add to the current node when a resource is already active Conditions under which the resource can be started. "nothing" means the cluster can always start this resource. "quorum" means the cluster can start this resource only if a majority of the configured nodes are active. "fencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced. "unfencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced, and only on nodes that have been unfenced. The default is "quorum" for resources with a class of stonith; otherwise, "unfencing" if unfencing is active in the cluster; otherwise, "fencing" if the stonith-enabled cluster option is true; otherwise, "quorum". Conditions under which the resource can be started Number of failures that may occur for this resource on a node, before that node is marked ineligible to host this resource. A value of 0 indicates that this feature is disabled (the node will never be marked ineligible). By contrast, the cluster treats "INFINITY" (the default) as a very large but finite number. This option has an effect only if the failed operation specifies its on-fail attribute as "restart" (the default), and additionally for failed start operations, if the start-failure-is-fatal cluster property is set to false. Number of failures on a node before the resource becomes ineligible to run there. Number of seconds after a failed action for this resource before acting as if the failure had not occurred, and potentially allowing the resource back to the node on which it failed. A value of 0 indicates that this feature is disabled. Number of seconds before acting as if a failure had not occurred What to do if the cluster finds the resource active on more than one node. "block" means to mark the resource as unmanaged. "stop_only" means to stop all active instances of this resource and leave them stopped. "stop_start" means to stop all active instances of this resource and start the resource in one location only. "stop_unexpected" means to stop all active instances of this resource except where the resource should be active. (This should be used only when extra instances are not expected to disrupt existing instances, and the resource agent's monitor of an existing instance is capable of detecting any problems that could be caused. Note that any resources ordered after this one will still need to be restarted.) What to do if the cluster finds the resource active on more than one node Whether the cluster should try to "live migrate" this resource when it needs to be moved. The default is true for ocf:pacemaker:remote resources, and false otherwise. Whether the cluster should try to "live migrate" this resource when it needs to be moved Whether the resource should be allowed to run on a node even if the node's health score would otherwise prevent it Whether the resource should be allowed to run on a node even if the node's health score would otherwise prevent it Whether to check user-defined node attributes on the physical host where a container is running or on the local node. This is usually set for a bundle resource and inherited by the bundle's primitive resource. A value of "host" means to check user-defined node attributes on the underlying physical host. Any other value means to check user-defined node attributes on the local node (for a bundled primitive resource, this is the bundle node). Where to check user-defined node attributes Name of the Pacemaker Remote guest node this resource is associated with, if any. If specified, this both enables the resource as a guest node and defines the unique name used to identify the guest node. The guest must be configured to run the Pacemaker Remote daemon when it is started. WARNING: This value cannot overlap with any resource or node IDs. Name of the Pacemaker Remote guest node this resource is associated with, if any If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote. The Pacemaker Remote daemon on the guest must be configured to accept connections on this address. The default is the value of the remote-node meta-attribute. If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote If remote-node is specified, the port on the guest used for its Pacemaker Remote connection. The Pacemaker Remote daemon on the guest must be configured to listen on this port. If remote-node is specified, port on the guest used for its Pacemaker Remote connection If remote-node is specified, how long before a pending Pacemaker Remote guest connection times out. If remote-node is specified, how long before a pending Pacemaker Remote guest connection times out. If remote-node is specified, this acts as the allow-migrate meta-attribute for the implicit remote connection resource (ocf:pacemaker:remote). If remote-node is specified, this acts as the allow-migrate meta-attribute for the implicit remote connection resource (ocf:pacemaker:remote). =#=#=#= End test: List non-advanced primitive meta-attributes (XML) (shows all) - OK (0) =#=#=#= * Passed: crm_resource - List non-advanced primitive meta-attributes (XML) (shows all) =#=#=#= Begin test: List all available primitive meta-attributes =#=#=#= Primitive meta-attributes Meta-attributes applicable to primitive resources * priority: Resource assignment priority * If not all resources can be active, the cluster will stop lower-priority resources in order to keep higher-priority ones active. * Possible values: score (default: ) * critical: Default value for influence in colocation constraints * Use this value as the default for influence in all colocation constraints involving this resource, as well as in the implicit colocation constraints created if this resource is in a group. * Possible values: boolean (default: ) * target-role: State the cluster should attempt to keep this resource in * "Stopped" forces the resource to be stopped. "Started" allows the resource to be started (and in the case of promotable clone resources, promoted if appropriate). "Unpromoted" allows the resource to be started, but only in the unpromoted role if the resource is promotable. "Promoted" is equivalent to "Started". * Possible values: "Stopped", "Started" (default), "Unpromoted", "Promoted" * is-managed: Whether the cluster is allowed to actively change the resource's state * If false, the cluster will not start, stop, promote, or demote the resource on any node. Recurring actions for the resource are unaffected. If true, a true value for the maintenance-mode cluster option, the maintenance node attribute, or the maintenance resource meta-attribute overrides this. * Possible values: boolean (default: ) * maintenance: If true, the cluster will not schedule any actions involving the resource * If true, the cluster will not start, stop, promote, or demote the resource on any node, and will pause any recurring monitors (except those specifying role as "Stopped"). If false, a true value for the maintenance-mode cluster option or maintenance node attribute overrides this. * Possible values: boolean (default: ) * resource-stickiness: Score to add to the current node when a resource is already active * Score to add to the current node when a resource is already active. This allows running resources to stay where they are, even if they would be placed elsewhere if they were being started from a stopped state. The default is 1 for individual clone instances, and 0 for all other resources. * Possible values: score (no default) * requires: Conditions under which the resource can be started * Conditions under which the resource can be started. "nothing" means the cluster can always start this resource. "quorum" means the cluster can start this resource only if a majority of the configured nodes are active. "fencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced. "unfencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced, and only on nodes that have been unfenced. The default is "quorum" for resources with a class of stonith; otherwise, "unfencing" if unfencing is active in the cluster; otherwise, "fencing" if the stonith-enabled cluster option is true; otherwise, "quorum". * Possible values: "nothing", "quorum", "fencing", "unfencing" * migration-threshold: Number of failures on a node before the resource becomes ineligible to run there. * Number of failures that may occur for this resource on a node, before that node is marked ineligible to host this resource. A value of 0 indicates that this feature is disabled (the node will never be marked ineligible). By contrast, the cluster treats "INFINITY" (the default) as a very large but finite number. This option has an effect only if the failed operation specifies its on-fail attribute as "restart" (the default), and additionally for failed start operations, if the start-failure-is-fatal cluster property is set to false. * Possible values: score (default: ) * failure-timeout: Number of seconds before acting as if a failure had not occurred * Number of seconds after a failed action for this resource before acting as if the failure had not occurred, and potentially allowing the resource back to the node on which it failed. A value of 0 indicates that this feature is disabled. * Possible values: duration (default: ) * multiple-active: What to do if the cluster finds the resource active on more than one node * What to do if the cluster finds the resource active on more than one node. "block" means to mark the resource as unmanaged. "stop_only" means to stop all active instances of this resource and leave them stopped. "stop_start" means to stop all active instances of this resource and start the resource in one location only. "stop_unexpected" means to stop all active instances of this resource except where the resource should be active. (This should be used only when extra instances are not expected to disrupt existing instances, and the resource agent's monitor of an existing instance is capable of detecting any problems that could be caused. Note that any resources ordered after this one will still need to be restarted.) * Possible values: "block", "stop_only", "stop_start" (default), "stop_unexpected" * allow-migrate: Whether the cluster should try to "live migrate" this resource when it needs to be moved * Whether the cluster should try to "live migrate" this resource when it needs to be moved. The default is true for ocf:pacemaker:remote resources, and false otherwise. * Possible values: boolean (no default) * allow-unhealthy-nodes: Whether the resource should be allowed to run on a node even if the node's health score would otherwise prevent it * Possible values: boolean (default: ) * container-attribute-target: Where to check user-defined node attributes * Whether to check user-defined node attributes on the physical host where a container is running or on the local node. This is usually set for a bundle resource and inherited by the bundle's primitive resource. A value of "host" means to check user-defined node attributes on the underlying physical host. Any other value means to check user-defined node attributes on the local node (for a bundled primitive resource, this is the bundle node). * Possible values: string (no default) * remote-node: Name of the Pacemaker Remote guest node this resource is associated with, if any * Name of the Pacemaker Remote guest node this resource is associated with, if any. If specified, this both enables the resource as a guest node and defines the unique name used to identify the guest node. The guest must be configured to run the Pacemaker Remote daemon when it is started. WARNING: This value cannot overlap with any resource or node IDs. * Possible values: string (no default) * remote-addr: If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote * If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote. The Pacemaker Remote daemon on the guest must be configured to accept connections on this address. The default is the value of the remote-node meta-attribute. * Possible values: string (no default) * remote-port: If remote-node is specified, port on the guest used for its Pacemaker Remote connection * If remote-node is specified, the port on the guest used for its Pacemaker Remote connection. The Pacemaker Remote daemon on the guest must be configured to listen on this port. * Possible values: port (default: ) * remote-connect-timeout: If remote-node is specified, how long before a pending Pacemaker Remote guest connection times out. * Possible values: timeout (default: ) * remote-allow-migrate: If remote-node is specified, this acts as the allow-migrate meta-attribute for the implicit remote connection resource (ocf:pacemaker:remote). * Possible values: boolean (default: ) =#=#=#= End test: List all available primitive meta-attributes - OK (0) =#=#=#= * Passed: crm_resource - List all available primitive meta-attributes =#=#=#= Begin test: List all available primitive meta-attributes (XML) =#=#=#= 1.1 Meta-attributes applicable to primitive resources Primitive meta-attributes If not all resources can be active, the cluster will stop lower-priority resources in order to keep higher-priority ones active. Resource assignment priority Use this value as the default for influence in all colocation constraints involving this resource, as well as in the implicit colocation constraints created if this resource is in a group. Default value for influence in colocation constraints "Stopped" forces the resource to be stopped. "Started" allows the resource to be started (and in the case of promotable clone resources, promoted if appropriate). "Unpromoted" allows the resource to be started, but only in the unpromoted role if the resource is promotable. "Promoted" is equivalent to "Started". State the cluster should attempt to keep this resource in If false, the cluster will not start, stop, promote, or demote the resource on any node. Recurring actions for the resource are unaffected. If true, a true value for the maintenance-mode cluster option, the maintenance node attribute, or the maintenance resource meta-attribute overrides this. Whether the cluster is allowed to actively change the resource's state If true, the cluster will not start, stop, promote, or demote the resource on any node, and will pause any recurring monitors (except those specifying role as "Stopped"). If false, a true value for the maintenance-mode cluster option or maintenance node attribute overrides this. If true, the cluster will not schedule any actions involving the resource Score to add to the current node when a resource is already active. This allows running resources to stay where they are, even if they would be placed elsewhere if they were being started from a stopped state. The default is 1 for individual clone instances, and 0 for all other resources. Score to add to the current node when a resource is already active Conditions under which the resource can be started. "nothing" means the cluster can always start this resource. "quorum" means the cluster can start this resource only if a majority of the configured nodes are active. "fencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced. "unfencing" means the cluster can start this resource only if a majority of the configured nodes are active and any failed or unknown nodes have been fenced, and only on nodes that have been unfenced. The default is "quorum" for resources with a class of stonith; otherwise, "unfencing" if unfencing is active in the cluster; otherwise, "fencing" if the stonith-enabled cluster option is true; otherwise, "quorum". Conditions under which the resource can be started Number of failures that may occur for this resource on a node, before that node is marked ineligible to host this resource. A value of 0 indicates that this feature is disabled (the node will never be marked ineligible). By contrast, the cluster treats "INFINITY" (the default) as a very large but finite number. This option has an effect only if the failed operation specifies its on-fail attribute as "restart" (the default), and additionally for failed start operations, if the start-failure-is-fatal cluster property is set to false. Number of failures on a node before the resource becomes ineligible to run there. Number of seconds after a failed action for this resource before acting as if the failure had not occurred, and potentially allowing the resource back to the node on which it failed. A value of 0 indicates that this feature is disabled. Number of seconds before acting as if a failure had not occurred What to do if the cluster finds the resource active on more than one node. "block" means to mark the resource as unmanaged. "stop_only" means to stop all active instances of this resource and leave them stopped. "stop_start" means to stop all active instances of this resource and start the resource in one location only. "stop_unexpected" means to stop all active instances of this resource except where the resource should be active. (This should be used only when extra instances are not expected to disrupt existing instances, and the resource agent's monitor of an existing instance is capable of detecting any problems that could be caused. Note that any resources ordered after this one will still need to be restarted.) What to do if the cluster finds the resource active on more than one node Whether the cluster should try to "live migrate" this resource when it needs to be moved. The default is true for ocf:pacemaker:remote resources, and false otherwise. Whether the cluster should try to "live migrate" this resource when it needs to be moved Whether the resource should be allowed to run on a node even if the node's health score would otherwise prevent it Whether the resource should be allowed to run on a node even if the node's health score would otherwise prevent it Whether to check user-defined node attributes on the physical host where a container is running or on the local node. This is usually set for a bundle resource and inherited by the bundle's primitive resource. A value of "host" means to check user-defined node attributes on the underlying physical host. Any other value means to check user-defined node attributes on the local node (for a bundled primitive resource, this is the bundle node). Where to check user-defined node attributes Name of the Pacemaker Remote guest node this resource is associated with, if any. If specified, this both enables the resource as a guest node and defines the unique name used to identify the guest node. The guest must be configured to run the Pacemaker Remote daemon when it is started. WARNING: This value cannot overlap with any resource or node IDs. Name of the Pacemaker Remote guest node this resource is associated with, if any If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote. The Pacemaker Remote daemon on the guest must be configured to accept connections on this address. The default is the value of the remote-node meta-attribute. If remote-node is specified, the IP address or hostname used to connect to the guest via Pacemaker Remote If remote-node is specified, the port on the guest used for its Pacemaker Remote connection. The Pacemaker Remote daemon on the guest must be configured to listen on this port. If remote-node is specified, port on the guest used for its Pacemaker Remote connection If remote-node is specified, how long before a pending Pacemaker Remote guest connection times out. If remote-node is specified, how long before a pending Pacemaker Remote guest connection times out. If remote-node is specified, this acts as the allow-migrate meta-attribute for the implicit remote connection resource (ocf:pacemaker:remote). If remote-node is specified, this acts as the allow-migrate meta-attribute for the implicit remote connection resource (ocf:pacemaker:remote). =#=#=#= End test: List all available primitive meta-attributes (XML) - OK (0) =#=#=#= * Passed: crm_resource - List all available primitive meta-attributes (XML) =#=#=#= Begin test: List non-advanced fencing parameters =#=#=#= Fencing resource common parameters Special parameters that are available for all fencing resources, regardless of type. They are processed by Pacemaker, rather than by the fence agent or the fencing library. * pcmk_host_map: A mapping of node names to port numbers for devices that do not support node names. * For example, "node1:1;node2:2,3" would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2. * Possible values: string (no default) * pcmk_host_list: Nodes targeted by this device * Comma-separated list of nodes that can be targeted by this device (for example, "node1,node2,node3"). If pcmk_host_check is "static-list", either this or pcmk_host_map must be set. * Possible values: string (no default) * pcmk_host_check: How to determine which nodes can be targeted by the device * Use "dynamic-list" to query the device via the 'list' command; "static-list" to check the pcmk_host_list attribute; "status" to query the device via the 'status' command; or "none" to assume every device can fence every node. The default value is "static-list" if pcmk_host_map or pcmk_host_list is set; otherwise "dynamic-list" if the device supports the list operation; otherwise "status" if the device supports the status operation; otherwise "none" * Possible values: "dynamic-list", "static-list", "status", "none" * pcmk_delay_max: Enable a delay of no more than the time specified before executing fencing actions. * Enable a delay of no more than the time specified before executing fencing actions. Pacemaker derives the overall delay by taking the value of pcmk_delay_base and adding a random delay value such that the sum is kept below this maximum. * Possible values: duration (default: ) * pcmk_delay_base: Enable a base delay for fencing actions and specify base delay value. * This enables a static delay for fencing actions, which can help avoid "death matches" where two nodes try to fence each other at the same time. If pcmk_delay_max is also used, a random delay will be added such that the total delay is kept below that value. This can be set to a single time value to apply to any node targeted by this device (useful if a separate device is configured for each target), or to a node map (for example, "node1:1s;node2:5") to set a different value for each target. * Possible values: string (default: ) * pcmk_action_limit: The maximum number of actions can be performed in parallel on this device * Cluster property concurrent-fencing="true" needs to be configured first. Then use this to specify the maximum number of actions can be performed in parallel on this device. A value of -1 means an unlimited number of actions can be performed in parallel. * Possible values: integer (default: ) =#=#=#= End test: List non-advanced fencing parameters - OK (0) =#=#=#= * Passed: crm_resource - List non-advanced fencing parameters =#=#=#= Begin test: List non-advanced fencing parameters (XML) (shows all) =#=#=#= 1.1 Special parameters that are available for all fencing resources, regardless of type. They are processed by Pacemaker, rather than by the fence agent or the fencing library. Fencing resource common parameters Some devices do not support the standard 'port' parameter or may provide additional ones. Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced. A value of "none" can be used to tell the cluster not to supply any additional parameters. An alternate parameter to supply instead of 'port' For example, "node1:1;node2:2,3" would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2. A mapping of node names to port numbers for devices that do not support node names. Comma-separated list of nodes that can be targeted by this device (for example, "node1,node2,node3"). If pcmk_host_check is "static-list", either this or pcmk_host_map must be set. Nodes targeted by this device Use "dynamic-list" to query the device via the 'list' command; "static-list" to check the pcmk_host_list attribute; "status" to query the device via the 'status' command; or "none" to assume every device can fence every node. The default value is "static-list" if pcmk_host_map or pcmk_host_list is set; otherwise "dynamic-list" if the device supports the list operation; otherwise "status" if the device supports the status operation; otherwise "none" How to determine which nodes can be targeted by the device Enable a delay of no more than the time specified before executing fencing actions. Pacemaker derives the overall delay by taking the value of pcmk_delay_base and adding a random delay value such that the sum is kept below this maximum. Enable a delay of no more than the time specified before executing fencing actions. This enables a static delay for fencing actions, which can help avoid "death matches" where two nodes try to fence each other at the same time. If pcmk_delay_max is also used, a random delay will be added such that the total delay is kept below that value. This can be set to a single time value to apply to any node targeted by this device (useful if a separate device is configured for each target), or to a node map (for example, "node1:1s;node2:5") to set a different value for each target. Enable a base delay for fencing actions and specify base delay value. Cluster property concurrent-fencing="true" needs to be configured first. Then use this to specify the maximum number of actions can be performed in parallel on this device. A value of -1 means an unlimited number of actions can be performed in parallel. The maximum number of actions can be performed in parallel on this device Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'reboot' action. An alternate command to run instead of 'reboot' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'reboot' actions. Specify an alternate timeout to use for 'reboot' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'reboot' action before giving up. The maximum number of times to try the 'reboot' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'off' action. An alternate command to run instead of 'off' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'off' actions. Specify an alternate timeout to use for 'off' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'off' action before giving up. The maximum number of times to try the 'off' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'on' action. An alternate command to run instead of 'on' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'on' actions. Specify an alternate timeout to use for 'on' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'on' action before giving up. The maximum number of times to try the 'on' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'list' action. An alternate command to run instead of 'list' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'list' actions. Specify an alternate timeout to use for 'list' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'list' action before giving up. The maximum number of times to try the 'list' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'monitor' action. An alternate command to run instead of 'monitor' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'monitor' actions. Specify an alternate timeout to use for 'monitor' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'monitor' action before giving up. The maximum number of times to try the 'monitor' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'status' action. An alternate command to run instead of 'status' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'status' actions. Specify an alternate timeout to use for 'status' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'status' action before giving up. The maximum number of times to try the 'status' command within the timeout period =#=#=#= End test: List non-advanced fencing parameters (XML) (shows all) - OK (0) =#=#=#= * Passed: crm_resource - List non-advanced fencing parameters (XML) (shows all) =#=#=#= Begin test: List all available fencing parameters =#=#=#= Fencing resource common parameters Special parameters that are available for all fencing resources, regardless of type. They are processed by Pacemaker, rather than by the fence agent or the fencing library. * pcmk_host_map: A mapping of node names to port numbers for devices that do not support node names. * For example, "node1:1;node2:2,3" would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2. * Possible values: string (no default) * pcmk_host_list: Nodes targeted by this device * Comma-separated list of nodes that can be targeted by this device (for example, "node1,node2,node3"). If pcmk_host_check is "static-list", either this or pcmk_host_map must be set. * Possible values: string (no default) * pcmk_host_check: How to determine which nodes can be targeted by the device * Use "dynamic-list" to query the device via the 'list' command; "static-list" to check the pcmk_host_list attribute; "status" to query the device via the 'status' command; or "none" to assume every device can fence every node. The default value is "static-list" if pcmk_host_map or pcmk_host_list is set; otherwise "dynamic-list" if the device supports the list operation; otherwise "status" if the device supports the status operation; otherwise "none" * Possible values: "dynamic-list", "static-list", "status", "none" * pcmk_delay_max: Enable a delay of no more than the time specified before executing fencing actions. * Enable a delay of no more than the time specified before executing fencing actions. Pacemaker derives the overall delay by taking the value of pcmk_delay_base and adding a random delay value such that the sum is kept below this maximum. * Possible values: duration (default: ) * pcmk_delay_base: Enable a base delay for fencing actions and specify base delay value. * This enables a static delay for fencing actions, which can help avoid "death matches" where two nodes try to fence each other at the same time. If pcmk_delay_max is also used, a random delay will be added such that the total delay is kept below that value. This can be set to a single time value to apply to any node targeted by this device (useful if a separate device is configured for each target), or to a node map (for example, "node1:1s;node2:5") to set a different value for each target. * Possible values: string (default: ) * pcmk_action_limit: The maximum number of actions can be performed in parallel on this device * Cluster property concurrent-fencing="true" needs to be configured first. Then use this to specify the maximum number of actions can be performed in parallel on this device. A value of -1 means an unlimited number of actions can be performed in parallel. * Possible values: integer (default: ) * ADVANCED OPTIONS: * pcmk_host_argument: An alternate parameter to supply instead of 'port' * Some devices do not support the standard 'port' parameter or may provide additional ones. Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced. A value of "none" can be used to tell the cluster not to supply any additional parameters. * Possible values: string (default: ) * pcmk_reboot_action: An alternate command to run instead of 'reboot' * Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'reboot' action. * Possible values: string (default: ) * pcmk_reboot_timeout: Specify an alternate timeout to use for 'reboot' actions instead of stonith-timeout * Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'reboot' actions. * Possible values: timeout (default: ) * pcmk_reboot_retries: The maximum number of times to try the 'reboot' command within the timeout period * Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'reboot' action before giving up. * Possible values: integer (default: ) * pcmk_off_action: An alternate command to run instead of 'off' * Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'off' action. * Possible values: string (default: ) * pcmk_off_timeout: Specify an alternate timeout to use for 'off' actions instead of stonith-timeout * Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'off' actions. * Possible values: timeout (default: ) * pcmk_off_retries: The maximum number of times to try the 'off' command within the timeout period * Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'off' action before giving up. * Possible values: integer (default: ) * pcmk_on_action: An alternate command to run instead of 'on' * Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'on' action. * Possible values: string (default: ) * pcmk_on_timeout: Specify an alternate timeout to use for 'on' actions instead of stonith-timeout * Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'on' actions. * Possible values: timeout (default: ) * pcmk_on_retries: The maximum number of times to try the 'on' command within the timeout period * Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'on' action before giving up. * Possible values: integer (default: ) * pcmk_list_action: An alternate command to run instead of 'list' * Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'list' action. * Possible values: string (default: ) * pcmk_list_timeout: Specify an alternate timeout to use for 'list' actions instead of stonith-timeout * Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'list' actions. * Possible values: timeout (default: ) * pcmk_list_retries: The maximum number of times to try the 'list' command within the timeout period * Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'list' action before giving up. * Possible values: integer (default: ) * pcmk_monitor_action: An alternate command to run instead of 'monitor' * Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'monitor' action. * Possible values: string (default: ) * pcmk_monitor_timeout: Specify an alternate timeout to use for 'monitor' actions instead of stonith-timeout * Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'monitor' actions. * Possible values: timeout (default: ) * pcmk_monitor_retries: The maximum number of times to try the 'monitor' command within the timeout period * Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'monitor' action before giving up. * Possible values: integer (default: ) * pcmk_status_action: An alternate command to run instead of 'status' * Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'status' action. * Possible values: string (default: ) * pcmk_status_timeout: Specify an alternate timeout to use for 'status' actions instead of stonith-timeout * Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'status' actions. * Possible values: timeout (default: ) * pcmk_status_retries: The maximum number of times to try the 'status' command within the timeout period * Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'status' action before giving up. * Possible values: integer (default: ) =#=#=#= End test: List all available fencing parameters - OK (0) =#=#=#= * Passed: crm_resource - List all available fencing parameters =#=#=#= Begin test: List all available fencing parameters (XML) =#=#=#= 1.1 Special parameters that are available for all fencing resources, regardless of type. They are processed by Pacemaker, rather than by the fence agent or the fencing library. Fencing resource common parameters Some devices do not support the standard 'port' parameter or may provide additional ones. Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced. A value of "none" can be used to tell the cluster not to supply any additional parameters. An alternate parameter to supply instead of 'port' For example, "node1:1;node2:2,3" would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2. A mapping of node names to port numbers for devices that do not support node names. Comma-separated list of nodes that can be targeted by this device (for example, "node1,node2,node3"). If pcmk_host_check is "static-list", either this or pcmk_host_map must be set. Nodes targeted by this device Use "dynamic-list" to query the device via the 'list' command; "static-list" to check the pcmk_host_list attribute; "status" to query the device via the 'status' command; or "none" to assume every device can fence every node. The default value is "static-list" if pcmk_host_map or pcmk_host_list is set; otherwise "dynamic-list" if the device supports the list operation; otherwise "status" if the device supports the status operation; otherwise "none" How to determine which nodes can be targeted by the device Enable a delay of no more than the time specified before executing fencing actions. Pacemaker derives the overall delay by taking the value of pcmk_delay_base and adding a random delay value such that the sum is kept below this maximum. Enable a delay of no more than the time specified before executing fencing actions. This enables a static delay for fencing actions, which can help avoid "death matches" where two nodes try to fence each other at the same time. If pcmk_delay_max is also used, a random delay will be added such that the total delay is kept below that value. This can be set to a single time value to apply to any node targeted by this device (useful if a separate device is configured for each target), or to a node map (for example, "node1:1s;node2:5") to set a different value for each target. Enable a base delay for fencing actions and specify base delay value. Cluster property concurrent-fencing="true" needs to be configured first. Then use this to specify the maximum number of actions can be performed in parallel on this device. A value of -1 means an unlimited number of actions can be performed in parallel. The maximum number of actions can be performed in parallel on this device Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'reboot' action. An alternate command to run instead of 'reboot' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'reboot' actions. Specify an alternate timeout to use for 'reboot' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'reboot' action before giving up. The maximum number of times to try the 'reboot' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'off' action. An alternate command to run instead of 'off' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'off' actions. Specify an alternate timeout to use for 'off' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'off' action before giving up. The maximum number of times to try the 'off' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'on' action. An alternate command to run instead of 'on' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'on' actions. Specify an alternate timeout to use for 'on' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'on' action before giving up. The maximum number of times to try the 'on' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'list' action. An alternate command to run instead of 'list' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'list' actions. Specify an alternate timeout to use for 'list' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'list' action before giving up. The maximum number of times to try the 'list' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'monitor' action. An alternate command to run instead of 'monitor' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'monitor' actions. Specify an alternate timeout to use for 'monitor' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'monitor' action before giving up. The maximum number of times to try the 'monitor' command within the timeout period Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific, command that implements the 'status' action. An alternate command to run instead of 'status' Some devices need much more/less time to complete than normal. Use this to specify an alternate, device-specific, timeout for 'status' actions. Specify an alternate timeout to use for 'status' actions instead of stonith-timeout Some devices do not support multiple connections. Operations may "fail" if the device is busy with another task. In that case, Pacemaker will automatically retry the operation if there is time remaining. Use this option to alter the number of times Pacemaker tries a 'status' action before giving up. The maximum number of times to try the 'status' command within the timeout period =#=#=#= End test: List all available fencing parameters (XML) - OK (0) =#=#=#= * Passed: crm_resource - List all available fencing parameters (XML) =#=#=#= Begin test: crm_resource given both -r and resource config =#=#=#= crm_resource: --resource cannot be used with --class, --agent, and --provider =#=#=#= End test: crm_resource given both -r and resource config - Incorrect usage (64) =#=#=#= * Passed: crm_resource - crm_resource given both -r and resource config =#=#=#= Begin test: crm_resource given resource config with invalid action =#=#=#= crm_resource: --class, --agent, and --provider can only be used with --validate and --force-* =#=#=#= End test: crm_resource given resource config with invalid action - Incorrect usage (64) =#=#=#= * Passed: crm_resource - crm_resource given resource config with invalid action =#=#=#= Begin test: Create a resource meta attribute =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Set 'dummy' option: id=dummy-meta_attributes-is-managed set=dummy-meta_attributes name=is-managed value=false =#=#=#= Current cib after: Create a resource meta attribute =#=#=#= =#=#=#= End test: Create a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute =#=#=#= Begin test: Query a resource meta attribute =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity false =#=#=#= Current cib after: Query a resource meta attribute =#=#=#= =#=#=#= End test: Query a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Query a resource meta attribute =#=#=#= Begin test: Remove a resource meta attribute =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Deleted 'dummy' option: id=dummy-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Remove a resource meta attribute =#=#=#= =#=#=#= End test: Remove a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Remove a resource meta attribute =#=#=#= Begin test: Create another resource meta attribute =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= End test: Create another resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Create another resource meta attribute =#=#=#= Begin test: Show why a resource is not running =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= End test: Show why a resource is not running - OK (0) =#=#=#= * Passed: crm_resource - Show why a resource is not running =#=#=#= Begin test: Remove another resource meta attribute =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= End test: Remove another resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Remove another resource meta attribute =#=#=#= Begin test: Get a non-existent attribute from a resource element with output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Attribute 'nonexistent' not found for 'dummy' =#=#=#= End test: Get a non-existent attribute from a resource element with output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Get a non-existent attribute from a resource element with output-as=xml =#=#=#= Begin test: Get a non-existent attribute from a resource element without output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Attribute 'nonexistent' not found for 'dummy' =#=#=#= Current cib after: Get a non-existent attribute from a resource element without output-as=xml =#=#=#= =#=#=#= End test: Get a non-existent attribute from a resource element without output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Get a non-existent attribute from a resource element without output-as=xml =#=#=#= Begin test: Get an existent attribute from a resource element with output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity ocf =#=#=#= End test: Get an existent attribute from a resource element with output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Get an existent attribute from a resource element with output-as=xml =#=#=#= Begin test: Get an existent attribute from a resource element without output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity ocf =#=#=#= Current cib after: Get an existent attribute from a resource element without output-as=xml =#=#=#= =#=#=#= End test: Get an existent attribute from a resource element without output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Get an existent attribute from a resource element without output-as=xml =#=#=#= Begin test: Set a non-existent attribute for a resource element with output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= Current cib after: Set a non-existent attribute for a resource element with output-as=xml =#=#=#= =#=#=#= End test: Set a non-existent attribute for a resource element with output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Set a non-existent attribute for a resource element with output-as=xml =#=#=#= Begin test: Set an existent attribute for a resource element with output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= Current cib after: Set an existent attribute for a resource element with output-as=xml =#=#=#= =#=#=#= End test: Set an existent attribute for a resource element with output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Set an existent attribute for a resource element with output-as=xml =#=#=#= Begin test: Delete an existent attribute for a resource element with output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= Current cib after: Delete an existent attribute for a resource element with output-as=xml =#=#=#= =#=#=#= End test: Delete an existent attribute for a resource element with output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Delete an existent attribute for a resource element with output-as=xml =#=#=#= Begin test: Delete a non-existent attribute for a resource element with output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= Current cib after: Delete a non-existent attribute for a resource element with output-as=xml =#=#=#= =#=#=#= End test: Delete a non-existent attribute for a resource element with output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Delete a non-existent attribute for a resource element with output-as=xml =#=#=#= Begin test: Set a non-existent attribute for a resource element without output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Set attribute: name=description value=test_description =#=#=#= Current cib after: Set a non-existent attribute for a resource element without output-as=xml =#=#=#= =#=#=#= End test: Set a non-existent attribute for a resource element without output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Set a non-existent attribute for a resource element without output-as=xml =#=#=#= Begin test: Set an existent attribute for a resource element without output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Set attribute: name=description value=test_description =#=#=#= Current cib after: Set an existent attribute for a resource element without output-as=xml =#=#=#= =#=#=#= End test: Set an existent attribute for a resource element without output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Set an existent attribute for a resource element without output-as=xml =#=#=#= Begin test: Delete an existent attribute for a resource element without output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Deleted attribute: description =#=#=#= Current cib after: Delete an existent attribute for a resource element without output-as=xml =#=#=#= =#=#=#= End test: Delete an existent attribute for a resource element without output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Delete an existent attribute for a resource element without output-as=xml =#=#=#= Begin test: Delete a non-existent attribute for a resource element without output-as=xml =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Deleted attribute: description =#=#=#= Current cib after: Delete a non-existent attribute for a resource element without output-as=xml =#=#=#= =#=#=#= End test: Delete a non-existent attribute for a resource element without output-as=xml - OK (0) =#=#=#= * Passed: crm_resource - Delete a non-existent attribute for a resource element without output-as=xml =#=#=#= Begin test: Create a resource attribute =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Set 'dummy' option: id=dummy-instance_attributes-delay set=dummy-instance_attributes name=delay value=10s =#=#=#= Current cib after: Create a resource attribute =#=#=#= =#=#=#= End test: Create a resource attribute - OK (0) =#=#=#= * Passed: crm_resource - Create a resource attribute =#=#=#= Begin test: List the configured resources =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Full List of Resources: * dummy (ocf:pacemaker:Dummy): Stopped =#=#=#= Current cib after: List the configured resources =#=#=#= =#=#=#= End test: List the configured resources - OK (0) =#=#=#= * Passed: crm_resource - List the configured resources =#=#=#= Begin test: List the configured resources in XML =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity =#=#=#= End test: List the configured resources in XML - OK (0) =#=#=#= * Passed: crm_resource - List the configured resources in XML =#=#=#= Begin test: Implicitly list the configured resources =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity Full List of Resources: * dummy (ocf:pacemaker:Dummy): Stopped =#=#=#= End test: Implicitly list the configured resources - OK (0) =#=#=#= * Passed: crm_resource - Implicitly list the configured resources =#=#=#= Begin test: List IDs of instantiated resources =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity dummy =#=#=#= End test: List IDs of instantiated resources - OK (0) =#=#=#= * Passed: crm_resource - List IDs of instantiated resources =#=#=#= Begin test: Show XML configuration of resource =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity dummy (ocf:pacemaker:Dummy): Stopped Resource XML: =#=#=#= End test: Show XML configuration of resource - OK (0) =#=#=#= * Passed: crm_resource - Show XML configuration of resource =#=#=#= Begin test: Show XML configuration of resource, output as XML =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity ]]> =#=#=#= End test: Show XML configuration of resource, output as XML - OK (0) =#=#=#= * Passed: crm_resource - Show XML configuration of resource, output as XML =#=#=#= Begin test: Require a destination when migrating a resource that is stopped =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity crm_resource: Resource 'dummy' not moved: active in 0 locations. To prevent 'dummy' from running on a specific location, specify a node. =#=#=#= Current cib after: Require a destination when migrating a resource that is stopped =#=#=#= =#=#=#= End test: Require a destination when migrating a resource that is stopped - Incorrect usage (64) =#=#=#= * Passed: crm_resource - Require a destination when migrating a resource that is stopped =#=#=#= Begin test: Don't support migration to non-existent locations =#=#=#= unpack_resources error: Resource start-up disabled since no STONITH resources have been defined unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity crm_resource: Node 'i.do.not.exist' not found Error performing operation: No such object =#=#=#= Current cib after: Don't support migration to non-existent locations =#=#=#= =#=#=#= End test: Don't support migration to non-existent locations - No such object (105) =#=#=#= * Passed: crm_resource - Don't support migration to non-existent locations =#=#=#= Begin test: Create a fencing resource =#=#=#= =#=#=#= Current cib after: Create a fencing resource =#=#=#= =#=#=#= End test: Create a fencing resource - OK (0) =#=#=#= * Passed: cibadmin - Create a fencing resource =#=#=#= Begin test: Bring resources online =#=#=#= Current cluster status: * Node List: * Online: [ node1 ] * Full List of Resources: * dummy (ocf:pacemaker:Dummy): Stopped * Fence (stonith:fence_true): Stopped Transition Summary: * Start dummy ( node1 ) * Start Fence ( node1 ) Executing Cluster Transition: * Resource action: dummy monitor on node1 * Resource action: Fence monitor on node1 * Resource action: dummy start on node1 * Resource action: Fence start on node1 Revised Cluster Status: * Node List: * Online: [ node1 ] * Full List of Resources: * dummy (ocf:pacemaker:Dummy): Started node1 * Fence (stonith:fence_true): Started node1 =#=#=#= Current cib after: Bring resources online =#=#=#= =#=#=#= End test: Bring resources online - OK (0) =#=#=#= * Passed: crm_simulate - Bring resources online =#=#=#= Begin test: Try to move a resource to its existing location =#=#=#= crm_resource: Error performing operation: Requested item already exists =#=#=#= Current cib after: Try to move a resource to its existing location =#=#=#= =#=#=#= End test: Try to move a resource to its existing location - Requested item already exists (108) =#=#=#= * Passed: crm_resource - Try to move a resource to its existing location =#=#=#= Begin test: Try to move a resource that doesn't exist =#=#=#= crm_resource: Resource 'xyz' not found Error performing operation: No such object =#=#=#= End test: Try to move a resource that doesn't exist - No such object (105) =#=#=#= * Passed: crm_resource - Try to move a resource that doesn't exist =#=#=#= Begin test: Move a resource from its existing location =#=#=#= WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. This will prevent dummy from running on node1 until the constraint is removed using the clear option or by editing the CIB with an appropriate tool. This will be the case even if node1 is the last node in the cluster =#=#=#= Current cib after: Move a resource from its existing location =#=#=#= =#=#=#= End test: Move a resource from its existing location - OK (0) =#=#=#= * Passed: crm_resource - Move a resource from its existing location =#=#=#= Begin test: Clear out constraints generated by --move =#=#=#= Removing constraint: cli-ban-dummy-on-node1 =#=#=#= Current cib after: Clear out constraints generated by --move =#=#=#= =#=#=#= End test: Clear out constraints generated by --move - OK (0) =#=#=#= * Passed: crm_resource - Clear out constraints generated by --move =#=#=#= Begin test: Default ticket granted state =#=#=#= false =#=#=#= Current cib after: Default ticket granted state =#=#=#= =#=#=#= End test: Default ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Default ticket granted state =#=#=#= Begin test: Set ticket granted state =#=#=#= =#=#=#= Current cib after: Set ticket granted state =#=#=#= =#=#=#= End test: Set ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Set ticket granted state =#=#=#= Begin test: List ticket IDs =#=#=#= ticketA =#=#=#= End test: List ticket IDs - OK (0) =#=#=#= * Passed: crm_ticket - List ticket IDs =#=#=#= Begin test: List ticket IDs, outputting in XML =#=#=#= =#=#=#= End test: List ticket IDs, outputting in XML - OK (0) =#=#=#= * Passed: crm_ticket - List ticket IDs, outputting in XML =#=#=#= Begin test: Query ticket state =#=#=#= State XML: =#=#=#= End test: Query ticket state - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket state =#=#=#= Begin test: Query ticket state, outputting as xml =#=#=#= =#=#=#= End test: Query ticket state, outputting as xml - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket state, outputting as xml =#=#=#= Begin test: Query ticket granted state =#=#=#= false =#=#=#= Current cib after: Query ticket granted state =#=#=#= =#=#=#= End test: Query ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket granted state =#=#=#= Begin test: Query ticket granted state, outputting as xml =#=#=#= =#=#=#= End test: Query ticket granted state, outputting as xml - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket granted state, outputting as xml =#=#=#= Begin test: Delete ticket granted state =#=#=#= =#=#=#= Current cib after: Delete ticket granted state =#=#=#= =#=#=#= End test: Delete ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Delete ticket granted state =#=#=#= Begin test: Make a ticket standby =#=#=#= =#=#=#= Current cib after: Make a ticket standby =#=#=#= =#=#=#= End test: Make a ticket standby - OK (0) =#=#=#= * Passed: crm_ticket - Make a ticket standby =#=#=#= Begin test: Query ticket standby state =#=#=#= true =#=#=#= Current cib after: Query ticket standby state =#=#=#= =#=#=#= End test: Query ticket standby state - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket standby state =#=#=#= Begin test: Activate a ticket =#=#=#= =#=#=#= Current cib after: Activate a ticket =#=#=#= =#=#=#= End test: Activate a ticket - OK (0) =#=#=#= * Passed: crm_ticket - Activate a ticket =#=#=#= Begin test: List ticket details =#=#=#= ticketA revoked (standby=false) =#=#=#= End test: List ticket details - OK (0) =#=#=#= * Passed: crm_ticket - List ticket details =#=#=#= Begin test: List ticket details, outputting as XML =#=#=#= =#=#=#= End test: List ticket details, outputting as XML - OK (0) =#=#=#= * Passed: crm_ticket - List ticket details, outputting as XML =#=#=#= Begin test: Add a second ticket =#=#=#= false =#=#=#= Current cib after: Add a second ticket =#=#=#= =#=#=#= End test: Add a second ticket - OK (0) =#=#=#= * Passed: crm_ticket - Add a second ticket =#=#=#= Begin test: Set second ticket granted state =#=#=#= =#=#=#= Current cib after: Set second ticket granted state =#=#=#= =#=#=#= End test: Set second ticket granted state - OK (0) =#=#=#= * Passed: crm_ticket - Set second ticket granted state =#=#=#= Begin test: List tickets =#=#=#= ticketA revoked ticketB revoked =#=#=#= End test: List tickets - OK (0) =#=#=#= * Passed: crm_ticket - List tickets =#=#=#= Begin test: List tickets, outputting as XML =#=#=#= =#=#=#= End test: List tickets, outputting as XML - OK (0) =#=#=#= * Passed: crm_ticket - List tickets, outputting as XML =#=#=#= Begin test: Delete second ticket =#=#=#= =#=#=#= Current cib after: Delete second ticket =#=#=#= =#=#=#= End test: Delete second ticket - OK (0) =#=#=#= * Passed: cibadmin - Delete second ticket =#=#=#= Begin test: Delete ticket standby state =#=#=#= =#=#=#= Current cib after: Delete ticket standby state =#=#=#= =#=#=#= End test: Delete ticket standby state - OK (0) =#=#=#= * Passed: crm_ticket - Delete ticket standby state =#=#=#= Begin test: Delete ticket standby state =#=#=#= =#=#=#= Current cib after: Delete ticket standby state =#=#=#= =#=#=#= End test: Delete ticket standby state - OK (0) =#=#=#= * Passed: cibadmin - Delete ticket standby state =#=#=#= Begin test: Query ticket constraints =#=#=#= Constraints XML: =#=#=#= End test: Query ticket constraints - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket constraints =#=#=#= Begin test: Query ticket constraints, outputting as xml =#=#=#= =#=#=#= End test: Query ticket constraints, outputting as xml - OK (0) =#=#=#= * Passed: crm_ticket - Query ticket constraints, outputting as xml =#=#=#= Begin test: Delete ticket constraint =#=#=#= =#=#=#= Current cib after: Delete ticket constraint =#=#=#= =#=#=#= End test: Delete ticket constraint - OK (0) =#=#=#= * Passed: cibadmin - Delete ticket constraint =#=#=#= Begin test: Ban a resource on unknown node =#=#=#= crm_resource: Node 'host1' not found Error performing operation: No such object =#=#=#= Current cib after: Ban a resource on unknown node =#=#=#= =#=#=#= End test: Ban a resource on unknown node - No such object (105) =#=#=#= * Passed: crm_resource - Ban a resource on unknown node =#=#=#= Begin test: Create two more nodes and bring them online =#=#=#= Current cluster status: * Node List: * Online: [ node1 ] * Full List of Resources: * dummy (ocf:pacemaker:Dummy): Started node1 * Fence (stonith:fence_true): Started node1 Performing Requested Modifications: * Bringing node node2 online * Bringing node node3 online Transition Summary: * Move Fence ( node1 -> node2 ) Executing Cluster Transition: * Resource action: dummy monitor on node3 * Resource action: dummy monitor on node2 * Resource action: Fence stop on node1 * Resource action: Fence monitor on node3 * Resource action: Fence monitor on node2 * Resource action: Fence start on node2 Revised Cluster Status: * Node List: * Online: [ node1 node2 node3 ] * Full List of Resources: * dummy (ocf:pacemaker:Dummy): Started node1 * Fence (stonith:fence_true): Started node2 =#=#=#= Current cib after: Create two more nodes and bring them online =#=#=#= =#=#=#= End test: Create two more nodes and bring them online - OK (0) =#=#=#= * Passed: crm_simulate - Create two more nodes and bring them online =#=#=#= Begin test: Ban dummy from node1 =#=#=#= WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. This will prevent dummy from running on node1 until the constraint is removed using the clear option or by editing the CIB with an appropriate tool. This will be the case even if node1 is the last node in the cluster =#=#=#= Current cib after: Ban dummy from node1 =#=#=#= =#=#=#= End test: Ban dummy from node1 - OK (0) =#=#=#= * Passed: crm_resource - Ban dummy from node1 =#=#=#= Begin test: Show where a resource is running =#=#=#= resource dummy is running on: node1 =#=#=#= End test: Show where a resource is running - OK (0) =#=#=#= * Passed: crm_resource - Show where a resource is running =#=#=#= Begin test: Show constraints on a resource =#=#=#= Locations: * Node node1 (score=-INFINITY, id=cli-ban-dummy-on-node1, rsc=dummy) =#=#=#= End test: Show constraints on a resource - OK (0) =#=#=#= * Passed: crm_resource - Show constraints on a resource =#=#=#= Begin test: Ban dummy from node2 =#=#=#= =#=#=#= Current cib after: Ban dummy from node2 =#=#=#= =#=#=#= End test: Ban dummy from node2 - OK (0) =#=#=#= * Passed: crm_resource - Ban dummy from node2 =#=#=#= Begin test: Relocate resources due to ban =#=#=#= Current cluster status: * Node List: * Online: [ node1 node2 node3 ] * Full List of Resources: * dummy (ocf:pacemaker:Dummy): Started node1 * Fence (stonith:fence_true): Started node2 Transition Summary: * Move dummy ( node1 -> node3 ) Executing Cluster Transition: * Resource action: dummy stop on node1 * Resource action: dummy start on node3 Revised Cluster Status: * Node List: * Online: [ node1 node2 node3 ] * Full List of Resources: * dummy (ocf:pacemaker:Dummy): Started node3 * Fence (stonith:fence_true): Started node2 =#=#=#= Current cib after: Relocate resources due to ban =#=#=#= =#=#=#= End test: Relocate resources due to ban - OK (0) =#=#=#= * Passed: crm_simulate - Relocate resources due to ban =#=#=#= Begin test: Move dummy to node1 =#=#=#= =#=#=#= Current cib after: Move dummy to node1 =#=#=#= =#=#=#= End test: Move dummy to node1 - OK (0) =#=#=#= * Passed: crm_resource - Move dummy to node1 =#=#=#= Begin test: Clear implicit constraints for dummy on node2 =#=#=#= Removing constraint: cli-ban-dummy-on-node2 =#=#=#= Current cib after: Clear implicit constraints for dummy on node2 =#=#=#= =#=#=#= End test: Clear implicit constraints for dummy on node2 - OK (0) =#=#=#= * Passed: crm_resource - Clear implicit constraints for dummy on node2 =#=#=#= Begin test: Drop the status section =#=#=#= =#=#=#= End test: Drop the status section - OK (0) =#=#=#= * Passed: cibadmin - Drop the status section =#=#=#= Begin test: Create a clone =#=#=#= =#=#=#= End test: Create a clone - OK (0) =#=#=#= * Passed: cibadmin - Create a clone =#=#=#= Begin test: Create a resource meta attribute =#=#=#= Performing update of 'is-managed' on 'test-clone', the parent of 'test-primitive' Set 'test-clone' option: id=test-clone-meta_attributes-is-managed set=test-clone-meta_attributes name=is-managed value=false =#=#=#= Current cib after: Create a resource meta attribute =#=#=#= =#=#=#= End test: Create a resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute =#=#=#= Begin test: Create a resource meta attribute in the primitive =#=#=#= Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed set=test-primitive-meta_attributes name=is-managed value=false =#=#=#= Current cib after: Create a resource meta attribute in the primitive =#=#=#= =#=#=#= End test: Create a resource meta attribute in the primitive - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in the primitive =#=#=#= Begin test: Update resource meta attribute with duplicates =#=#=#= Multiple attributes match name=is-managed Value: false (id=test-primitive-meta_attributes-is-managed) Value: false (id=test-clone-meta_attributes-is-managed) A value for 'is-managed' already exists in child 'test-primitive', performing update on that instead of 'test-clone' Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed value=true =#=#=#= Current cib after: Update resource meta attribute with duplicates =#=#=#= =#=#=#= End test: Update resource meta attribute with duplicates - OK (0) =#=#=#= * Passed: crm_resource - Update resource meta attribute with duplicates =#=#=#= Begin test: Update resource meta attribute with duplicates (force clone) =#=#=#= Set 'test-clone' option: id=test-clone-meta_attributes-is-managed name=is-managed value=true =#=#=#= Current cib after: Update resource meta attribute with duplicates (force clone) =#=#=#= =#=#=#= End test: Update resource meta attribute with duplicates (force clone) - OK (0) =#=#=#= * Passed: crm_resource - Update resource meta attribute with duplicates (force clone) =#=#=#= Begin test: Update child resource meta attribute with duplicates =#=#=#= Multiple attributes match name=is-managed Value: true (id=test-primitive-meta_attributes-is-managed) Value: true (id=test-clone-meta_attributes-is-managed) Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed value=false =#=#=#= Current cib after: Update child resource meta attribute with duplicates =#=#=#= =#=#=#= End test: Update child resource meta attribute with duplicates - OK (0) =#=#=#= * Passed: crm_resource - Update child resource meta attribute with duplicates =#=#=#= Begin test: Delete resource meta attribute with duplicates =#=#=#= Multiple attributes match name=is-managed Value: false (id=test-primitive-meta_attributes-is-managed) Value: true (id=test-clone-meta_attributes-is-managed) A value for 'is-managed' already exists in child 'test-primitive', performing delete on that instead of 'test-clone' Deleted 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource meta attribute with duplicates =#=#=#= =#=#=#= End test: Delete resource meta attribute with duplicates - OK (0) =#=#=#= * Passed: crm_resource - Delete resource meta attribute with duplicates =#=#=#= Begin test: Delete resource meta attribute in parent =#=#=#= Performing delete of 'is-managed' on 'test-clone', the parent of 'test-primitive' Deleted 'test-clone' option: id=test-clone-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource meta attribute in parent =#=#=#= =#=#=#= End test: Delete resource meta attribute in parent - OK (0) =#=#=#= * Passed: crm_resource - Delete resource meta attribute in parent =#=#=#= Begin test: Create a resource meta attribute in the primitive =#=#=#= Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed set=test-primitive-meta_attributes name=is-managed value=false =#=#=#= Current cib after: Create a resource meta attribute in the primitive =#=#=#= =#=#=#= End test: Create a resource meta attribute in the primitive - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in the primitive =#=#=#= Begin test: Update existing resource meta attribute =#=#=#= A value for 'is-managed' already exists in child 'test-primitive', performing update on that instead of 'test-clone' Set 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed value=true =#=#=#= Current cib after: Update existing resource meta attribute =#=#=#= =#=#=#= End test: Update existing resource meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Update existing resource meta attribute =#=#=#= Begin test: Create a resource meta attribute in the parent =#=#=#= Set 'test-clone' option: id=test-clone-meta_attributes-is-managed set=test-clone-meta_attributes name=is-managed value=true =#=#=#= Current cib after: Create a resource meta attribute in the parent =#=#=#= =#=#=#= End test: Create a resource meta attribute in the parent - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in the parent =#=#=#= Begin test: Copy resources =#=#=#= =#=#=#= End test: Copy resources - OK (0) =#=#=#= * Passed: cibadmin - Copy resources =#=#=#= Begin test: Delete resource parent meta attribute (force) =#=#=#= Deleted 'test-clone' option: id=test-clone-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource parent meta attribute (force) =#=#=#= =#=#=#= End test: Delete resource parent meta attribute (force) - OK (0) =#=#=#= * Passed: crm_resource - Delete resource parent meta attribute (force) =#=#=#= Begin test: Restore duplicates =#=#=#= =#=#=#= Current cib after: Restore duplicates =#=#=#= =#=#=#= End test: Restore duplicates - OK (0) =#=#=#= * Passed: cibadmin - Restore duplicates =#=#=#= Begin test: Delete resource child meta attribute =#=#=#= Multiple attributes match name=is-managed Value: true (id=test-primitive-meta_attributes-is-managed) Value: true (id=test-clone-meta_attributes-is-managed) Deleted 'test-primitive' option: id=test-primitive-meta_attributes-is-managed name=is-managed =#=#=#= Current cib after: Delete resource child meta attribute =#=#=#= =#=#=#= End test: Delete resource child meta attribute - OK (0) =#=#=#= * Passed: crm_resource - Delete resource child meta attribute =#=#=#= Begin test: Create the dummy-group resource group =#=#=#= =#=#=#= Current cib after: Create the dummy-group resource group =#=#=#= =#=#=#= End test: Create the dummy-group resource group - OK (0) =#=#=#= * Passed: cibadmin - Create the dummy-group resource group =#=#=#= Begin test: Create a resource meta attribute in dummy1 =#=#=#= Set 'dummy1' option: id=dummy1-meta_attributes-is-managed set=dummy1-meta_attributes name=is-managed value=true =#=#=#= Current cib after: Create a resource meta attribute in dummy1 =#=#=#= =#=#=#= End test: Create a resource meta attribute in dummy1 - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in dummy1 =#=#=#= Begin test: Create a resource meta attribute in dummy-group =#=#=#= Set 'dummy1' option: id=dummy1-meta_attributes-is-managed name=is-managed value=false Set 'dummy-group' option: id=dummy-group-meta_attributes-is-managed set=dummy-group-meta_attributes name=is-managed value=false =#=#=#= Current cib after: Create a resource meta attribute in dummy-group =#=#=#= =#=#=#= End test: Create a resource meta attribute in dummy-group - OK (0) =#=#=#= * Passed: crm_resource - Create a resource meta attribute in dummy-group =#=#=#= Begin test: Delete the dummy-group resource group =#=#=#= =#=#=#= Current cib after: Delete the dummy-group resource group =#=#=#= =#=#=#= End test: Delete the dummy-group resource group - OK (0) =#=#=#= * Passed: cibadmin - Delete the dummy-group resource group =#=#=#= Begin test: Specify a lifetime when moving a resource =#=#=#= Migration will take effect until: =#=#=#= Current cib after: Specify a lifetime when moving a resource =#=#=#= =#=#=#= End test: Specify a lifetime when moving a resource - OK (0) =#=#=#= * Passed: crm_resource - Specify a lifetime when moving a resource =#=#=#= Begin test: Try to move a resource previously moved with a lifetime =#=#=#= =#=#=#= Current cib after: Try to move a resource previously moved with a lifetime =#=#=#= =#=#=#= End test: Try to move a resource previously moved with a lifetime - OK (0) =#=#=#= * Passed: crm_resource - Try to move a resource previously moved with a lifetime =#=#=#= Begin test: Ban dummy from node1 for a short time =#=#=#= Migration will take effect until: WARNING: Creating rsc_location constraint 'cli-ban-dummy-on-node1' with a score of -INFINITY for resource dummy on node1. This will prevent dummy from running on node1 until the constraint is removed using the clear option or by editing the CIB with an appropriate tool. This will be the case even if node1 is the last node in the cluster =#=#=#= Current cib after: Ban dummy from node1 for a short time =#=#=#= =#=#=#= End test: Ban dummy from node1 for a short time - OK (0) =#=#=#= * Passed: crm_resource - Ban dummy from node1 for a short time =#=#=#= Begin test: Remove expired constraints =#=#=#= Removing constraint: cli-ban-dummy-on-node1 =#=#=#= Current cib after: Remove expired constraints =#=#=#= =#=#=#= End test: Remove expired constraints - OK (0) =#=#=#= * Passed: crm_resource - Remove expired constraints =#=#=#= Begin test: Clear all implicit constraints for dummy =#=#=#= Removing constraint: cli-prefer-dummy =#=#=#= Current cib after: Clear all implicit constraints for dummy =#=#=#= =#=#=#= End test: Clear all implicit constraints for dummy - OK (0) =#=#=#= * Passed: crm_resource - Clear all implicit constraints for dummy =#=#=#= Begin test: Set a node health strategy =#=#=#= =#=#=#= Current cib after: Set a node health strategy =#=#=#= =#=#=#= End test: Set a node health strategy - OK (0) =#=#=#= * Passed: crm_attribute - Set a node health strategy =#=#=#= Begin test: Set a node health attribute =#=#=#= =#=#=#= Current cib after: Set a node health attribute =#=#=#= =#=#=#= End test: Set a node health attribute - OK (0) =#=#=#= * Passed: crm_attribute - Set a node health attribute =#=#=#= Begin test: Show why a resource is not running on an unhealthy node =#=#=#= =#=#=#= End test: Show why a resource is not running on an unhealthy node - OK (0) =#=#=#= * Passed: crm_resource - Show why a resource is not running on an unhealthy node =#=#=#= Begin test: Delete a resource =#=#=#= =#=#=#= Current cib after: Delete a resource =#=#=#= =#=#=#= End test: Delete a resource - OK (0) =#=#=#= * Passed: crm_resource - Delete a resource =#=#=#= Begin test: Create an XML patchset =#=#=#= =#=#=#= End test: Create an XML patchset - Error occurred (1) =#=#=#= * Passed: crm_diff - Create an XML patchset =#=#=#= Begin test: Check locations and constraints for prim1 =#=#=#= =#=#=#= End test: Check locations and constraints for prim1 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim1 =#=#=#= Begin test: Recursively check locations and constraints for prim1 =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim1 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim1 =#=#=#= Begin test: Check locations and constraints for prim1 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim1 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim1 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim1 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim1 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim1 in XML =#=#=#= Begin test: Check locations and constraints for prim2 =#=#=#= Locations: * Node cluster01 (score=INFINITY, id=prim2-on-cluster1, rsc=prim2) Resources prim2 is colocated with: * prim3 (score=INFINITY, id=colocation-prim2-prim3-INFINITY) =#=#=#= End test: Check locations and constraints for prim2 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim2 =#=#=#= Begin test: Recursively check locations and constraints for prim2 =#=#=#= Locations: * Node cluster01 (score=INFINITY, id=prim2-on-cluster1, rsc=prim2) Resources prim2 is colocated with: * prim3 (score=INFINITY, id=colocation-prim2-prim3-INFINITY) * Resources prim3 is colocated with: * prim4 (score=INFINITY, id=colocation-prim3-prim4-INFINITY) * Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) * Resources prim4 is colocated with: * prim5 (score=INFINITY, id=colocation-prim4-prim5-INFINITY) =#=#=#= End test: Recursively check locations and constraints for prim2 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim2 =#=#=#= Begin test: Check locations and constraints for prim2 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim2 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim2 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim2 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim2 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim2 in XML =#=#=#= Begin test: Check locations and constraints for prim3 =#=#=#= Resources colocated with prim3: * prim2 (score=INFINITY, id=colocation-prim2-prim3-INFINITY) * Locations: * Node cluster01 (score=INFINITY, id=prim2-on-cluster1, rsc=prim2) Resources prim3 is colocated with: * prim4 (score=INFINITY, id=colocation-prim3-prim4-INFINITY) * Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) =#=#=#= End test: Check locations and constraints for prim3 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim3 =#=#=#= Begin test: Recursively check locations and constraints for prim3 =#=#=#= Resources colocated with prim3: * prim2 (score=INFINITY, id=colocation-prim2-prim3-INFINITY) * Locations: * Node cluster01 (score=INFINITY, id=prim2-on-cluster1, rsc=prim2) Resources prim3 is colocated with: * prim4 (score=INFINITY, id=colocation-prim3-prim4-INFINITY) * Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) * Resources prim4 is colocated with: * prim5 (score=INFINITY, id=colocation-prim4-prim5-INFINITY) =#=#=#= End test: Recursively check locations and constraints for prim3 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim3 =#=#=#= Begin test: Check locations and constraints for prim3 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim3 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim3 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim3 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim3 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim3 in XML =#=#=#= Begin test: Check locations and constraints for prim4 =#=#=#= Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) Resources colocated with prim4: * prim10 (score=INFINITY, id=colocation-prim10-prim4-INFINITY) * prim3 (score=INFINITY, id=colocation-prim3-prim4-INFINITY) Resources prim4 is colocated with: * prim5 (score=INFINITY, id=colocation-prim4-prim5-INFINITY) =#=#=#= End test: Check locations and constraints for prim4 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim4 =#=#=#= Begin test: Recursively check locations and constraints for prim4 =#=#=#= Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) Resources colocated with prim4: * prim10 (score=INFINITY, id=colocation-prim10-prim4-INFINITY) * prim3 (score=INFINITY, id=colocation-prim3-prim4-INFINITY) * Resources colocated with prim3: * prim2 (score=INFINITY, id=colocation-prim2-prim3-INFINITY) * Locations: * Node cluster01 (score=INFINITY, id=prim2-on-cluster1, rsc=prim2) Resources prim4 is colocated with: * prim5 (score=INFINITY, id=colocation-prim4-prim5-INFINITY) =#=#=#= End test: Recursively check locations and constraints for prim4 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim4 =#=#=#= Begin test: Check locations and constraints for prim4 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim4 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim4 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim4 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim4 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim4 in XML =#=#=#= Begin test: Check locations and constraints for prim5 =#=#=#= Resources colocated with prim5: * prim4 (score=INFINITY, id=colocation-prim4-prim5-INFINITY) * Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) =#=#=#= End test: Check locations and constraints for prim5 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim5 =#=#=#= Begin test: Recursively check locations and constraints for prim5 =#=#=#= Resources colocated with prim5: * prim4 (score=INFINITY, id=colocation-prim4-prim5-INFINITY) * Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) * Resources colocated with prim4: * prim10 (score=INFINITY, id=colocation-prim10-prim4-INFINITY) * prim3 (score=INFINITY, id=colocation-prim3-prim4-INFINITY) * Resources colocated with prim3: * prim2 (score=INFINITY, id=colocation-prim2-prim3-INFINITY) * Locations: * Node cluster01 (score=INFINITY, id=prim2-on-cluster1, rsc=prim2) =#=#=#= End test: Recursively check locations and constraints for prim5 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim5 =#=#=#= Begin test: Check locations and constraints for prim5 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim5 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim5 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim5 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim5 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim5 in XML =#=#=#= Begin test: Check locations and constraints for prim6 =#=#=#= Locations: * Node cluster02 (score=-INFINITY, id=prim6-not-on-cluster2, rsc=prim6) =#=#=#= End test: Check locations and constraints for prim6 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim6 =#=#=#= Begin test: Recursively check locations and constraints for prim6 =#=#=#= Locations: * Node cluster02 (score=-INFINITY, id=prim6-not-on-cluster2, rsc=prim6) =#=#=#= End test: Recursively check locations and constraints for prim6 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim6 =#=#=#= Begin test: Check locations and constraints for prim6 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim6 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim6 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim6 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim6 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim6 in XML =#=#=#= Begin test: Check locations and constraints for prim7 =#=#=#= Resources prim7 is colocated with: * group (score=INFINITY, id=colocation-prim7-group-INFINITY) =#=#=#= End test: Check locations and constraints for prim7 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim7 =#=#=#= Begin test: Recursively check locations and constraints for prim7 =#=#=#= Resources prim7 is colocated with: * group (score=INFINITY, id=colocation-prim7-group-INFINITY) =#=#=#= End test: Recursively check locations and constraints for prim7 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim7 =#=#=#= Begin test: Check locations and constraints for prim7 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim7 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim7 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim7 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim7 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim7 in XML =#=#=#= Begin test: Check locations and constraints for prim8 =#=#=#= Resources prim8 is colocated with: * gr2 (score=INFINITY, id=colocation-prim8-gr2-INFINITY) =#=#=#= End test: Check locations and constraints for prim8 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim8 =#=#=#= Begin test: Recursively check locations and constraints for prim8 =#=#=#= Resources prim8 is colocated with: * gr2 (score=INFINITY, id=colocation-prim8-gr2-INFINITY) =#=#=#= End test: Recursively check locations and constraints for prim8 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim8 =#=#=#= Begin test: Check locations and constraints for prim8 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim8 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim8 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim8 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim8 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim8 in XML =#=#=#= Begin test: Check locations and constraints for prim9 =#=#=#= Resources prim9 is colocated with: * clone (score=INFINITY, id=colocation-prim9-clone-INFINITY) =#=#=#= End test: Check locations and constraints for prim9 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim9 =#=#=#= Begin test: Recursively check locations and constraints for prim9 =#=#=#= Resources prim9 is colocated with: * clone (score=INFINITY, id=colocation-prim9-clone-INFINITY) =#=#=#= End test: Recursively check locations and constraints for prim9 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim9 =#=#=#= Begin test: Check locations and constraints for prim9 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim9 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim9 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim9 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim9 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim9 in XML =#=#=#= Begin test: Check locations and constraints for prim10 =#=#=#= Resources prim10 is colocated with: * prim4 (score=INFINITY, id=colocation-prim10-prim4-INFINITY) * Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) =#=#=#= End test: Check locations and constraints for prim10 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim10 =#=#=#= Begin test: Recursively check locations and constraints for prim10 =#=#=#= Resources prim10 is colocated with: * prim4 (score=INFINITY, id=colocation-prim10-prim4-INFINITY) * Locations: * Node cluster02 (score=INFINITY, id=prim4-on-cluster2, rsc=prim4) * Resources prim4 is colocated with: * prim5 (score=INFINITY, id=colocation-prim4-prim5-INFINITY) =#=#=#= End test: Recursively check locations and constraints for prim10 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim10 =#=#=#= Begin test: Check locations and constraints for prim10 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim10 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim10 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim10 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim10 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim10 in XML =#=#=#= Begin test: Check locations and constraints for prim11 =#=#=#= Resources colocated with prim11: * prim13 (score=INFINITY, id=colocation-prim13-prim11-INFINITY) Resources prim11 is colocated with: * prim12 (score=INFINITY, id=colocation-prim11-prim12-INFINITY) =#=#=#= End test: Check locations and constraints for prim11 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim11 =#=#=#= Begin test: Recursively check locations and constraints for prim11 =#=#=#= Resources colocated with prim11: * prim13 (score=INFINITY, id=colocation-prim13-prim11-INFINITY) * Resources colocated with prim13: * prim12 (score=INFINITY, id=colocation-prim12-prim13-INFINITY) * Resources colocated with prim12: * prim11 (id=colocation-prim11-prim12-INFINITY - loop) Resources prim11 is colocated with: * prim12 (score=INFINITY, id=colocation-prim11-prim12-INFINITY) * Resources prim12 is colocated with: * prim13 (score=INFINITY, id=colocation-prim12-prim13-INFINITY) * Resources prim13 is colocated with: * prim11 (id=colocation-prim13-prim11-INFINITY - loop) =#=#=#= End test: Recursively check locations and constraints for prim11 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim11 =#=#=#= Begin test: Check locations and constraints for prim11 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim11 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim11 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim11 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim11 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim11 in XML =#=#=#= Begin test: Check locations and constraints for prim12 =#=#=#= Resources colocated with prim12: * prim11 (score=INFINITY, id=colocation-prim11-prim12-INFINITY) Resources prim12 is colocated with: * prim13 (score=INFINITY, id=colocation-prim12-prim13-INFINITY) =#=#=#= End test: Check locations and constraints for prim12 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim12 =#=#=#= Begin test: Recursively check locations and constraints for prim12 =#=#=#= Resources colocated with prim12: * prim11 (score=INFINITY, id=colocation-prim11-prim12-INFINITY) * Resources colocated with prim11: * prim13 (score=INFINITY, id=colocation-prim13-prim11-INFINITY) * Resources colocated with prim13: * prim12 (id=colocation-prim12-prim13-INFINITY - loop) Resources prim12 is colocated with: * prim13 (score=INFINITY, id=colocation-prim12-prim13-INFINITY) * Resources prim13 is colocated with: * prim11 (score=INFINITY, id=colocation-prim13-prim11-INFINITY) * Resources prim11 is colocated with: * prim12 (id=colocation-prim11-prim12-INFINITY - loop) =#=#=#= End test: Recursively check locations and constraints for prim12 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim12 =#=#=#= Begin test: Check locations and constraints for prim12 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim12 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim12 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim12 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim12 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim12 in XML =#=#=#= Begin test: Check locations and constraints for prim13 =#=#=#= Resources colocated with prim13: * prim12 (score=INFINITY, id=colocation-prim12-prim13-INFINITY) Resources prim13 is colocated with: * prim11 (score=INFINITY, id=colocation-prim13-prim11-INFINITY) =#=#=#= End test: Check locations and constraints for prim13 - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim13 =#=#=#= Begin test: Recursively check locations and constraints for prim13 =#=#=#= Resources colocated with prim13: * prim12 (score=INFINITY, id=colocation-prim12-prim13-INFINITY) * Resources colocated with prim12: * prim11 (score=INFINITY, id=colocation-prim11-prim12-INFINITY) * Resources colocated with prim11: * prim13 (id=colocation-prim13-prim11-INFINITY - loop) Resources prim13 is colocated with: * prim11 (score=INFINITY, id=colocation-prim13-prim11-INFINITY) * Resources prim11 is colocated with: * prim12 (score=INFINITY, id=colocation-prim11-prim12-INFINITY) * Resources prim12 is colocated with: * prim13 (id=colocation-prim12-prim13-INFINITY - loop) =#=#=#= End test: Recursively check locations and constraints for prim13 - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim13 =#=#=#= Begin test: Check locations and constraints for prim13 in XML =#=#=#= =#=#=#= End test: Check locations and constraints for prim13 in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for prim13 in XML =#=#=#= Begin test: Recursively check locations and constraints for prim13 in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for prim13 in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for prim13 in XML =#=#=#= Begin test: Check locations and constraints for group =#=#=#= Resources colocated with group: * prim7 (score=INFINITY, id=colocation-prim7-group-INFINITY) =#=#=#= End test: Check locations and constraints for group - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for group =#=#=#= Begin test: Recursively check locations and constraints for group =#=#=#= Resources colocated with group: * prim7 (score=INFINITY, id=colocation-prim7-group-INFINITY) =#=#=#= End test: Recursively check locations and constraints for group - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for group =#=#=#= Begin test: Check locations and constraints for group in XML =#=#=#= =#=#=#= End test: Check locations and constraints for group in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for group in XML =#=#=#= Begin test: Recursively check locations and constraints for group in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for group in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for group in XML =#=#=#= Begin test: Check locations and constraints for clone =#=#=#= Resources colocated with clone: * prim9 (score=INFINITY, id=colocation-prim9-clone-INFINITY) =#=#=#= End test: Check locations and constraints for clone - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for clone =#=#=#= Begin test: Recursively check locations and constraints for clone =#=#=#= Resources colocated with clone: * prim9 (score=INFINITY, id=colocation-prim9-clone-INFINITY) =#=#=#= End test: Recursively check locations and constraints for clone - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for clone =#=#=#= Begin test: Check locations and constraints for clone in XML =#=#=#= =#=#=#= End test: Check locations and constraints for clone in XML - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for clone in XML =#=#=#= Begin test: Recursively check locations and constraints for clone in XML =#=#=#= =#=#=#= End test: Recursively check locations and constraints for clone in XML - OK (0) =#=#=#= * Passed: crm_resource - Recursively check locations and constraints for clone in XML =#=#=#= Begin test: Check locations and constraints for group member (referring to group) =#=#=#= Resources colocated with group: * prim7 (score=INFINITY, id=colocation-prim7-group-INFINITY) =#=#=#= End test: Check locations and constraints for group member (referring to group) - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for group member (referring to group) =#=#=#= Begin test: Check locations and constraints for group member (without referring to group) =#=#=#= Resources colocated with gr2: * prim8 (score=INFINITY, id=colocation-prim8-gr2-INFINITY) =#=#=#= End test: Check locations and constraints for group member (without referring to group) - OK (0) =#=#=#= * Passed: crm_resource - Check locations and constraints for group member (without referring to group) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Set a meta-attribute for primitive and resources colocated with it =#=#=#= =#=#=#= End test: Set a meta-attribute for primitive and resources colocated with it - OK (0) =#=#=#= * Passed: crm_resource - Set a meta-attribute for primitive and resources colocated with it =#=#=#= Begin test: Set a meta-attribute for group and resource colocated with it =#=#=#= Set 'group' option: id=group-meta_attributes-target-role set=group-meta_attributes name=target-role value=Stopped Set 'prim7' option: id=prim7-meta_attributes-target-role set=prim7-meta_attributes name=target-role value=Stopped =#=#=#= End test: Set a meta-attribute for group and resource colocated with it - OK (0) =#=#=#= * Passed: crm_resource - Set a meta-attribute for group and resource colocated with it =#=#=#= Begin test: Set a meta-attribute for clone and resource colocated with it =#=#=#= =#=#=#= End test: Set a meta-attribute for clone and resource colocated with it - OK (0) =#=#=#= * Passed: crm_resource - Set a meta-attribute for clone and resource colocated with it =#=#=#= Begin test: Show resource digests =#=#=#= =#=#=#= End test: Show resource digests - OK (0) =#=#=#= * Passed: crm_resource - Show resource digests =#=#=#= Begin test: Show resource digests with overrides =#=#=#= =#=#=#= End test: Show resource digests with overrides - OK (0) =#=#=#= * Passed: crm_resource - Show resource digests with overrides =#=#=#= Begin test: Show resource operations =#=#=#= rsc1 (ocf:pacemaker:Dummy): Started: rsc1_monitor_0 (node=node4, call=136, rc=7, exec=28ms): complete Fencing (stonith:fence_xvm): Started: Fencing_monitor_0 (node=node4, call=5, rc=7, exec=2ms): complete rsc1 (ocf:pacemaker:Dummy): Started: rsc1_monitor_0 (node=node2, call=101, rc=7, exec=45ms): complete Fencing (stonith:fence_xvm): Started: Fencing_monitor_0 (node=node2, call=5, rc=7, exec=4ms): complete Fencing (stonith:fence_xvm): Started: Fencing_monitor_0 (node=node3, call=5, rc=7, exec=24ms): complete rsc1 (ocf:pacemaker:Dummy): Started: rsc1_monitor_0 (node=node5, call=99, rc=193, exec=27ms): pending Fencing (stonith:fence_xvm): Started: Fencing_monitor_0 (node=node5, call=5, rc=7, exec=14ms): complete rsc1 (ocf:pacemaker:Dummy): Started: rsc1_start_0 (node=node1, call=104, rc=0, exec=22ms): complete rsc1 (ocf:pacemaker:Dummy): Started: rsc1_monitor_10000 (node=node1, call=106, rc=0, exec=20ms): complete Fencing (stonith:fence_xvm): Started: Fencing_start_0 (node=node1, call=10, rc=0, exec=59ms): complete Fencing (stonith:fence_xvm): Started: Fencing_monitor_120000 (node=node1, call=12, rc=0, exec=70ms): complete =#=#=#= End test: Show resource operations - OK (0) =#=#=#= * Passed: crm_resource - Show resource operations =#=#=#= Begin test: Show resource operations (XML) =#=#=#= =#=#=#= End test: Show resource operations (XML) - OK (0) =#=#=#= * Passed: crm_resource - Show resource operations (XML) =#=#=#= Begin test: List all nodes =#=#=#= cluster node: overcloud-controller-0 (1) cluster node: overcloud-controller-1 (2) cluster node: overcloud-controller-2 (3) cluster node: overcloud-galera-0 (4) cluster node: overcloud-galera-1 (5) cluster node: overcloud-galera-2 (6) guest node: lxc1 (lxc1) guest node: lxc2 (lxc2) remote node: overcloud-rabbit-0 (overcloud-rabbit-0) remote node: overcloud-rabbit-1 (overcloud-rabbit-1) remote node: overcloud-rabbit-2 (overcloud-rabbit-2) =#=#=#= End test: List all nodes - OK (0) =#=#=#= * Passed: crmadmin - List all nodes =#=#=#= Begin test: Minimally list all nodes =#=#=#= overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 overcloud-galera-0 overcloud-galera-1 overcloud-galera-2 lxc1 lxc2 overcloud-rabbit-0 overcloud-rabbit-1 overcloud-rabbit-2 =#=#=#= End test: Minimally list all nodes - OK (0) =#=#=#= * Passed: crmadmin - Minimally list all nodes =#=#=#= Begin test: List all nodes as bash exports =#=#=#= export overcloud-controller-0=1 export overcloud-controller-1=2 export overcloud-controller-2=3 export overcloud-galera-0=4 export overcloud-galera-1=5 export overcloud-galera-2=6 export lxc1=lxc1 export lxc2=lxc2 export overcloud-rabbit-0=overcloud-rabbit-0 export overcloud-rabbit-1=overcloud-rabbit-1 export overcloud-rabbit-2=overcloud-rabbit-2 =#=#=#= End test: List all nodes as bash exports - OK (0) =#=#=#= * Passed: crmadmin - List all nodes as bash exports =#=#=#= Begin test: List cluster nodes =#=#=#= 6 =#=#=#= End test: List cluster nodes - OK (0) =#=#=#= * Passed: crmadmin - List cluster nodes =#=#=#= Begin test: List guest nodes =#=#=#= 2 =#=#=#= End test: List guest nodes - OK (0) =#=#=#= * Passed: crmadmin - List guest nodes =#=#=#= Begin test: List remote nodes =#=#=#= 3 =#=#=#= End test: List remote nodes - OK (0) =#=#=#= * Passed: crmadmin - List remote nodes =#=#=#= Begin test: List cluster,remote nodes =#=#=#= 9 =#=#=#= End test: List cluster,remote nodes - OK (0) =#=#=#= * Passed: crmadmin - List cluster,remote nodes =#=#=#= Begin test: List guest,remote nodes =#=#=#= 5 =#=#=#= End test: List guest,remote nodes - OK (0) =#=#=#= * Passed: crmadmin - List guest,remote nodes =#=#=#= Begin test: Show allocation scores with crm_simulate =#=#=#= =#=#=#= End test: Show allocation scores with crm_simulate - OK (0) =#=#=#= * Passed: crm_simulate - Show allocation scores with crm_simulate =#=#=#= Begin test: Show utilization with crm_simulate =#=#=#= 4 of 32 resource instances DISABLED and 0 BLOCKED from further action due to failure Current cluster status: * Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] * Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] Utilization Information: Only 'private' parameters to 1m-interval monitor for dummy on cluster02 changed: 0:0;16:2:0:4a9e64d6-e1dd-4395-917c-1596312eafe4 * Original: cluster01 capacity: * Original: cluster02 capacity: * Original: httpd-bundle-0 capacity: * Original: httpd-bundle-1 capacity: * Original: httpd-bundle-2 capacity: * pcmk__assign_resource: ping:0 utilization on cluster02: * pcmk__assign_resource: ping:1 utilization on cluster01: * pcmk__assign_resource: Fencing utilization on cluster01: * pcmk__assign_resource: dummy utilization on cluster02: * pcmk__assign_resource: httpd-bundle-docker-0 utilization on cluster01: * pcmk__assign_resource: httpd-bundle-docker-1 utilization on cluster02: * pcmk__assign_resource: httpd-bundle-ip-192.168.122.131 utilization on cluster01: * pcmk__assign_resource: httpd-bundle-0 utilization on cluster01: * pcmk__assign_resource: httpd:0 utilization on httpd-bundle-0: * pcmk__assign_resource: httpd-bundle-ip-192.168.122.132 utilization on cluster02: * pcmk__assign_resource: httpd-bundle-1 utilization on cluster02: * pcmk__assign_resource: httpd:1 utilization on httpd-bundle-1: * pcmk__assign_resource: httpd-bundle-2 utilization on cluster01: * pcmk__assign_resource: httpd:2 utilization on httpd-bundle-2: * pcmk__assign_resource: Public-IP utilization on cluster02: * pcmk__assign_resource: Email utilization on cluster02: * pcmk__assign_resource: mysql-proxy:0 utilization on cluster02: * pcmk__assign_resource: mysql-proxy:1 utilization on cluster01: * pcmk__assign_resource: promotable-rsc:0 utilization on cluster02: * pcmk__assign_resource: promotable-rsc:1 utilization on cluster01: * Remaining: cluster01 capacity: * Remaining: cluster02 capacity: * Remaining: httpd-bundle-0 capacity: * Remaining: httpd-bundle-1 capacity: * Remaining: httpd-bundle-2 capacity: Transition Summary: * Start httpd-bundle-2 ( cluster01 ) due to unrunnable httpd-bundle-docker-2 start (blocked) * Start httpd:2 ( httpd-bundle-2 ) due to unrunnable httpd-bundle-docker-2 start (blocked) =#=#=#= End test: Show utilization with crm_simulate - OK (0) =#=#=#= * Passed: crm_simulate - Show utilization with crm_simulate =#=#=#= Begin test: Simulate injecting a failure =#=#=#= 4 of 32 resource instances DISABLED and 0 BLOCKED from further action due to failure Current cluster status: * Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] * Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] Performing Requested Modifications: * Injecting ping_monitor_10000@cluster02=1 into the configuration * Injecting attribute fail-count-ping#monitor_10000=1 into /node_state '2' * Injecting attribute last-failure-ping#monitor_10000= into /node_state '2' Transition Summary: * Recover ping:0 ( cluster02 ) * Start httpd-bundle-2 ( cluster01 ) due to unrunnable httpd-bundle-docker-2 start (blocked) * Start httpd:2 ( httpd-bundle-2 ) due to unrunnable httpd-bundle-docker-2 start (blocked) Executing Cluster Transition: * Cluster action: clear_failcount for ping on cluster02 * Pseudo action: ping-clone_stop_0 * Pseudo action: httpd-bundle_start_0 * Resource action: ping stop on cluster02 * Pseudo action: ping-clone_stopped_0 * Pseudo action: ping-clone_start_0 * Pseudo action: httpd-bundle-clone_start_0 * Resource action: ping start on cluster02 * Resource action: ping monitor=10000 on cluster02 * Pseudo action: ping-clone_running_0 * Pseudo action: httpd-bundle-clone_running_0 * Pseudo action: httpd-bundle_running_0 Revised Cluster Status: * Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] * Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] =#=#=#= End test: Simulate injecting a failure - OK (0) =#=#=#= * Passed: crm_simulate - Simulate injecting a failure =#=#=#= Begin test: Simulate bringing a node down =#=#=#= 4 of 32 resource instances DISABLED and 0 BLOCKED from further action due to failure Current cluster status: * Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] * Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] Performing Requested Modifications: * Taking node cluster01 offline Transition Summary: * Fence (off) httpd-bundle-0 (resource: httpd-bundle-docker-0) 'guest is unclean' * Start Fencing ( cluster02 ) * Start httpd-bundle-0 ( cluster02 ) due to unrunnable httpd-bundle-docker-0 start (blocked) * Stop httpd:0 ( httpd-bundle-0 ) due to unrunnable httpd-bundle-docker-0 start * Start httpd-bundle-2 ( cluster02 ) due to unrunnable httpd-bundle-docker-2 start (blocked) * Start httpd:2 ( httpd-bundle-2 ) due to unrunnable httpd-bundle-docker-2 start (blocked) Executing Cluster Transition: * Resource action: Fencing start on cluster02 * Pseudo action: stonith-httpd-bundle-0-off on httpd-bundle-0 * Pseudo action: httpd-bundle_stop_0 * Pseudo action: httpd-bundle_start_0 * Resource action: Fencing monitor=60000 on cluster02 * Pseudo action: httpd-bundle-clone_stop_0 * Pseudo action: httpd_stop_0 * Pseudo action: httpd-bundle-clone_stopped_0 * Pseudo action: httpd-bundle-clone_start_0 * Pseudo action: httpd-bundle_stopped_0 * Pseudo action: httpd-bundle-clone_running_0 * Pseudo action: httpd-bundle_running_0 Revised Cluster Status: * Node List: * Online: [ cluster02 ] * OFFLINE: [ cluster01 ] * GuestOnline: [ httpd-bundle-1 ] * Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster02 ] * Stopped: [ cluster01 ] * Fencing (stonith:fence_xvm): Started cluster02 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): FAILED * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster02 ] * Stopped: [ cluster01 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Stopped: [ cluster01 ] =#=#=#= End test: Simulate bringing a node down - OK (0) =#=#=#= * Passed: crm_simulate - Simulate bringing a node down =#=#=#= Begin test: Simulate a node failing =#=#=#= 4 of 32 resource instances DISABLED and 0 BLOCKED from further action due to failure Current cluster status: * Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] * Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] Performing Requested Modifications: * Failing node cluster02 Transition Summary: * Fence (off) httpd-bundle-1 (resource: httpd-bundle-docker-1) 'guest is unclean' * Fence (reboot) cluster02 'peer is no longer part of the cluster' * Stop ping:0 ( cluster02 ) due to node availability * Stop dummy ( cluster02 ) due to node availability * Stop httpd-bundle-ip-192.168.122.132 ( cluster02 ) due to node availability * Stop httpd-bundle-docker-1 ( cluster02 ) due to node availability * Stop httpd-bundle-1 ( cluster02 ) due to unrunnable httpd-bundle-docker-1 start * Stop httpd:1 ( httpd-bundle-1 ) due to unrunnable httpd-bundle-docker-1 start * Start httpd-bundle-2 ( cluster01 ) due to unrunnable httpd-bundle-docker-2 start (blocked) * Start httpd:2 ( httpd-bundle-2 ) due to unrunnable httpd-bundle-docker-2 start (blocked) * Move Public-IP ( cluster02 -> cluster01 ) * Move Email ( cluster02 -> cluster01 ) * Stop mysql-proxy:0 ( cluster02 ) due to node availability * Stop promotable-rsc:0 ( Promoted cluster02 ) due to node availability Executing Cluster Transition: * Pseudo action: httpd-bundle-1_stop_0 * Pseudo action: promotable-clone_demote_0 * Pseudo action: httpd-bundle_stop_0 * Pseudo action: httpd-bundle_start_0 * Fencing cluster02 (reboot) * Pseudo action: ping-clone_stop_0 * Pseudo action: dummy_stop_0 * Pseudo action: httpd-bundle-docker-1_stop_0 * Pseudo action: exim-group_stop_0 * Pseudo action: Email_stop_0 * Pseudo action: mysql-clone-group_stop_0 * Pseudo action: promotable-rsc_demote_0 * Pseudo action: promotable-clone_demoted_0 * Pseudo action: promotable-clone_stop_0 * Pseudo action: stonith-httpd-bundle-1-off on httpd-bundle-1 * Pseudo action: ping_stop_0 * Pseudo action: ping-clone_stopped_0 * Pseudo action: httpd-bundle-clone_stop_0 * Pseudo action: httpd-bundle-ip-192.168.122.132_stop_0 * Pseudo action: Public-IP_stop_0 * Pseudo action: mysql-group:0_stop_0 * Pseudo action: mysql-proxy_stop_0 * Pseudo action: promotable-rsc_stop_0 * Pseudo action: promotable-clone_stopped_0 * Pseudo action: httpd_stop_0 * Pseudo action: httpd-bundle-clone_stopped_0 * Pseudo action: httpd-bundle-clone_start_0 * Pseudo action: exim-group_stopped_0 * Pseudo action: exim-group_start_0 * Resource action: Public-IP start on cluster01 * Resource action: Email start on cluster01 * Pseudo action: mysql-group:0_stopped_0 * Pseudo action: mysql-clone-group_stopped_0 * Pseudo action: httpd-bundle_stopped_0 * Pseudo action: httpd-bundle-clone_running_0 * Pseudo action: exim-group_running_0 * Pseudo action: httpd-bundle_running_0 Revised Cluster Status: * Node List: * Online: [ cluster01 ] * OFFLINE: [ cluster02 ] * GuestOnline: [ httpd-bundle-0 ] * Full List of Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 ] * Stopped: [ cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Stopped * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf:pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf:pacemaker:Dummy): Stopped (disabled) * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): FAILED * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster01 * Email (lsb:exim): Started cluster01 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 ] * Stopped: [ cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Unpromoted: [ cluster01 ] * Stopped: [ cluster02 ] =#=#=#= End test: Simulate a node failing - OK (0) =#=#=#= * Passed: crm_simulate - Simulate a node failing =#=#=#= Begin test: List a promotable clone resource =#=#=#= resource promotable-clone is running on: cluster01 resource promotable-clone is running on: cluster02 Promoted =#=#=#= End test: List a promotable clone resource - OK (0) =#=#=#= * Passed: crm_resource - List a promotable clone resource =#=#=#= Begin test: List the primitive of a promotable clone resource =#=#=#= resource promotable-rsc is running on: cluster01 resource promotable-rsc is running on: cluster02 Promoted =#=#=#= End test: List the primitive of a promotable clone resource - OK (0) =#=#=#= * Passed: crm_resource - List the primitive of a promotable clone resource =#=#=#= Begin test: List a single instance of a promotable clone resource =#=#=#= resource promotable-rsc:0 is running on: cluster02 Promoted =#=#=#= End test: List a single instance of a promotable clone resource - OK (0) =#=#=#= * Passed: crm_resource - List a single instance of a promotable clone resource =#=#=#= Begin test: List another instance of a promotable clone resource =#=#=#= resource promotable-rsc:1 is running on: cluster01 =#=#=#= End test: List another instance of a promotable clone resource - OK (0) =#=#=#= * Passed: crm_resource - List another instance of a promotable clone resource =#=#=#= Begin test: List a promotable clone resource in XML =#=#=#= cluster01 cluster02 =#=#=#= End test: List a promotable clone resource in XML - OK (0) =#=#=#= * Passed: crm_resource - List a promotable clone resource in XML =#=#=#= Begin test: List the primitive of a promotable clone resource in XML =#=#=#= cluster01 cluster02 =#=#=#= End test: List the primitive of a promotable clone resource in XML - OK (0) =#=#=#= * Passed: crm_resource - List the primitive of a promotable clone resource in XML =#=#=#= Begin test: List a single instance of a promotable clone resource in XML =#=#=#= cluster02 =#=#=#= End test: List a single instance of a promotable clone resource in XML - OK (0) =#=#=#= * Passed: crm_resource - List a single instance of a promotable clone resource in XML =#=#=#= Begin test: List another instance of a promotable clone resource in XML =#=#=#= cluster01 =#=#=#= End test: List another instance of a promotable clone resource in XML - OK (0) =#=#=#= * Passed: crm_resource - List another instance of a promotable clone resource in XML =#=#=#= Begin test: Try to move an instance of a cloned resource =#=#=#= crm_resource: Cannot operate on clone resource instance 'promotable-rsc:0' Error performing operation: Invalid parameter =#=#=#= End test: Try to move an instance of a cloned resource - Invalid parameter (2) =#=#=#= * Passed: crm_resource - Try to move an instance of a cloned resource =#=#=#= Begin test: Query a nonexistent promotable score attribute =#=#=#= crm_attribute: Error performing operation: No such device or address =#=#=#= End test: Query a nonexistent promotable score attribute - No such object (105) =#=#=#= * Passed: crm_attribute - Query a nonexistent promotable score attribute =#=#=#= Begin test: Query a nonexistent promotable score attribute (XML) =#=#=#= crm_attribute: Error performing operation: No such device or address =#=#=#= End test: Query a nonexistent promotable score attribute (XML) - No such object (105) =#=#=#= * Passed: crm_attribute - Query a nonexistent promotable score attribute (XML) =#=#=#= Begin test: Delete a nonexistent promotable score attribute =#=#=#= =#=#=#= End test: Delete a nonexistent promotable score attribute - OK (0) =#=#=#= * Passed: crm_attribute - Delete a nonexistent promotable score attribute =#=#=#= Begin test: Delete a nonexistent promotable score attribute (XML) =#=#=#= =#=#=#= End test: Delete a nonexistent promotable score attribute (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Delete a nonexistent promotable score attribute (XML) =#=#=#= Begin test: Query after deleting a nonexistent promotable score attribute =#=#=#= crm_attribute: Error performing operation: No such device or address =#=#=#= End test: Query after deleting a nonexistent promotable score attribute - No such object (105) =#=#=#= * Passed: crm_attribute - Query after deleting a nonexistent promotable score attribute =#=#=#= Begin test: Query after deleting a nonexistent promotable score attribute (XML) =#=#=#= crm_attribute: Error performing operation: No such device or address =#=#=#= End test: Query after deleting a nonexistent promotable score attribute (XML) - No such object (105) =#=#=#= * Passed: crm_attribute - Query after deleting a nonexistent promotable score attribute (XML) =#=#=#= Begin test: Update a nonexistent promotable score attribute =#=#=#= =#=#=#= End test: Update a nonexistent promotable score attribute - OK (0) =#=#=#= * Passed: crm_attribute - Update a nonexistent promotable score attribute =#=#=#= Begin test: Update a nonexistent promotable score attribute (XML) =#=#=#= =#=#=#= End test: Update a nonexistent promotable score attribute (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Update a nonexistent promotable score attribute (XML) =#=#=#= Begin test: Query after updating a nonexistent promotable score attribute =#=#=#= scope=status name=master-promotable-rsc value=1 =#=#=#= End test: Query after updating a nonexistent promotable score attribute - OK (0) =#=#=#= * Passed: crm_attribute - Query after updating a nonexistent promotable score attribute =#=#=#= Begin test: Query after updating a nonexistent promotable score attribute (XML) =#=#=#= =#=#=#= End test: Query after updating a nonexistent promotable score attribute (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Query after updating a nonexistent promotable score attribute (XML) =#=#=#= Begin test: Update an existing promotable score attribute =#=#=#= =#=#=#= End test: Update an existing promotable score attribute - OK (0) =#=#=#= * Passed: crm_attribute - Update an existing promotable score attribute =#=#=#= Begin test: Update an existing promotable score attribute (XML) =#=#=#= =#=#=#= End test: Update an existing promotable score attribute (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Update an existing promotable score attribute (XML) =#=#=#= Begin test: Query after updating an existing promotable score attribute =#=#=#= scope=status name=master-promotable-rsc value=5 =#=#=#= End test: Query after updating an existing promotable score attribute - OK (0) =#=#=#= * Passed: crm_attribute - Query after updating an existing promotable score attribute =#=#=#= Begin test: Query after updating an existing promotable score attribute (XML) =#=#=#= =#=#=#= End test: Query after updating an existing promotable score attribute (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Query after updating an existing promotable score attribute (XML) =#=#=#= Begin test: Delete an existing promotable score attribute =#=#=#= Deleted status attribute: id=status-1-master-promotable-rsc name=master-promotable-rsc =#=#=#= End test: Delete an existing promotable score attribute - OK (0) =#=#=#= * Passed: crm_attribute - Delete an existing promotable score attribute =#=#=#= Begin test: Delete an existing promotable score attribute (XML) =#=#=#= =#=#=#= End test: Delete an existing promotable score attribute (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Delete an existing promotable score attribute (XML) =#=#=#= Begin test: Query after deleting an existing promotable score attribute =#=#=#= crm_attribute: Error performing operation: No such device or address =#=#=#= End test: Query after deleting an existing promotable score attribute - No such object (105) =#=#=#= * Passed: crm_attribute - Query after deleting an existing promotable score attribute =#=#=#= Begin test: Query after deleting an existing promotable score attribute (XML) =#=#=#= crm_attribute: Error performing operation: No such device or address =#=#=#= End test: Query after deleting an existing promotable score attribute (XML) - No such object (105) =#=#=#= * Passed: crm_attribute - Query after deleting an existing promotable score attribute (XML) =#=#=#= Begin test: Update a promotable score attribute to -INFINITY =#=#=#= =#=#=#= End test: Update a promotable score attribute to -INFINITY - OK (0) =#=#=#= * Passed: crm_attribute - Update a promotable score attribute to -INFINITY =#=#=#= Begin test: Update a promotable score attribute to -INFINITY (XML) =#=#=#= =#=#=#= End test: Update a promotable score attribute to -INFINITY (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Update a promotable score attribute to -INFINITY (XML) =#=#=#= Begin test: Query after updating a promotable score attribute to -INFINITY =#=#=#= scope=status name=master-promotable-rsc value=-INFINITY =#=#=#= End test: Query after updating a promotable score attribute to -INFINITY - OK (0) =#=#=#= * Passed: crm_attribute - Query after updating a promotable score attribute to -INFINITY =#=#=#= Begin test: Query after updating a promotable score attribute to -INFINITY (XML) =#=#=#= =#=#=#= End test: Query after updating a promotable score attribute to -INFINITY (XML) - OK (0) =#=#=#= * Passed: crm_attribute - Query after updating a promotable score attribute to -INFINITY (XML) =#=#=#= Begin test: Try OCF_RESOURCE_INSTANCE if -p is specified with an empty string =#=#=#= scope=status name=master-promotable-rsc value=-INFINITY =#=#=#= End test: Try OCF_RESOURCE_INSTANCE if -p is specified with an empty string - OK (0) =#=#=#= * Passed: crm_attribute - Try OCF_RESOURCE_INSTANCE if -p is specified with an empty string =#=#=#= Begin test: Return usage error if both -p and OCF_RESOURCE_INSTANCE are empty strings =#=#=#= crm_attribute: -p/--promotion must be called from an OCF resource agent or with a resource ID specified =#=#=#= End test: Return usage error if both -p and OCF_RESOURCE_INSTANCE are empty strings - Incorrect usage (64) =#=#=#= * Passed: crm_attribute - Return usage error if both -p and OCF_RESOURCE_INSTANCE are empty strings =#=#=#= Begin test: Check that CIB_file="-" works - crm_mon =#=#=#= Cluster Summary: * Stack: corosync * Current DC: cluster02 (version) - partition with quorum * Last updated: * Last change: * 5 nodes configured * 32 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] * GuestOnline: [ httpd-bundle-0 httpd-bundle-1 ] Active Resources: * Clone Set: ping-clone [ping]: * Started: [ cluster01 cluster02 ] * Fencing (stonith:fence_xvm): Started cluster01 * dummy (ocf:pacemaker:Dummy): Started cluster02 * Container bundle set: httpd-bundle [pcmk:http]: * httpd-bundle-0 (192.168.122.131) (ocf:heartbeat:apache): Started cluster01 * httpd-bundle-1 (192.168.122.132) (ocf:heartbeat:apache): Started cluster02 * httpd-bundle-2 (192.168.122.133) (ocf:heartbeat:apache): Stopped * Resource Group: exim-group: * Public-IP (ocf:heartbeat:IPaddr): Started cluster02 * Email (lsb:exim): Started cluster02 * Clone Set: mysql-clone-group [mysql-group]: * Started: [ cluster01 cluster02 ] * Clone Set: promotable-clone [promotable-rsc] (promotable): * Promoted: [ cluster02 ] * Unpromoted: [ cluster01 ] =#=#=#= End test: Check that CIB_file="-" works - crm_mon - OK (0) =#=#=#= * Passed: cat - Check that CIB_file="-" works - crm_mon =#=#=#= Begin test: Check that CIB_file="-" works - crm_resource =#=#=#= =#=#=#= End test: Check that CIB_file="-" works - crm_resource - OK (0) =#=#=#= * Passed: cat - Check that CIB_file="-" works - crm_resource =#=#=#= Begin test: Check that CIB_file="-" works - crmadmin =#=#=#= 11 =#=#=#= End test: Check that CIB_file="-" works - crmadmin - OK (0) =#=#=#= * Passed: cat - Check that CIB_file="-" works - crmadmin =#=#=#= Begin test: Get active shadow instance (no active instance) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance (no active instance) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance (no active instance) =#=#=#= Begin test: Get active shadow instance (no active instance) (XML) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance (no active instance) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance (no active instance) (XML) =#=#=#= Begin test: Get active shadow instance's file name (no active instance) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance's file name (no active instance) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's file name (no active instance) =#=#=#= Begin test: Get active shadow instance's file name (no active instance) (XML) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance's file name (no active instance) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's file name (no active instance) (XML) =#=#=#= Begin test: Get active shadow instance's contents (no active instance) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance's contents (no active instance) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's contents (no active instance) =#=#=#= Begin test: Get active shadow instance's contents (no active instance) (XML) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance's contents (no active instance) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's contents (no active instance) (XML) =#=#=#= Begin test: Get active shadow instance's diff (no active instance) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance's diff (no active instance) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (no active instance) =#=#=#= Begin test: Get active shadow instance's diff (no active instance) (XML) =#=#=#= crm_shadow: No active shadow configuration defined =#=#=#= End test: Get active shadow instance's diff (no active instance) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (no active instance) (XML) =#=#=#= Begin test: Create copied shadow instance =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance =#=#=#= Begin test: Create copied shadow instance (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (XML) =#=#=#= Begin test: Get active shadow instance (copied) =#=#=#= cts-cli =#=#=#= End test: Get active shadow instance (copied) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance (copied) =#=#=#= Begin test: Get active shadow instance (copied) (XML) =#=#=#= =#=#=#= End test: Get active shadow instance (copied) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance (copied) (XML) =#=#=#= Begin test: Get active shadow instance's file name (copied) =#=#=#= /tmp/cts-cli.shadow/shadow.cts-cli =#=#=#= End test: Get active shadow instance's file name (copied) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's file name (copied) =#=#=#= Begin test: Get active shadow instance's file name (copied) (XML) =#=#=#= =#=#=#= End test: Get active shadow instance's file name (copied) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's file name (copied) (XML) =#=#=#= Begin test: Get active shadow instance's contents (copied) =#=#=#= =#=#=#= End test: Get active shadow instance's contents (copied) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's contents (copied) =#=#=#= Begin test: Get active shadow instance's contents (copied) (XML) =#=#=#= ]]> =#=#=#= End test: Get active shadow instance's contents (copied) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's contents (copied) (XML) =#=#=#= Begin test: Get active shadow instance's diff (copied) =#=#=#= =#=#=#= End test: Get active shadow instance's diff (copied) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (copied) =#=#=#= Begin test: Get active shadow instance's diff (copied) (XML) =#=#=#= =#=#=#= End test: Get active shadow instance's diff (copied) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (copied) (XML) =#=#=#= Begin test: Get active shadow instance's diff (after changes) =#=#=#= Diff: --- 1.1.173 2 Diff: +++ 1.4.1 (null) -- /cib/configuration/op_defaults + /cib: @epoch=4, @num_updates=1 + /cib/configuration/resources/primitive[@id='dummy']: @description=desc ++ /cib/configuration/resources: ++ /cib/status: =#=#=#= End test: Get active shadow instance's diff (after changes) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after changes) =#=#=#= Begin test: Get active shadow instance's diff (after changes) (XML) =#=#=#= ]]> =#=#=#= End test: Get active shadow instance's diff (after changes) (XML) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after changes) (XML) =#=#=#= Begin test: Commit shadow instance =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance =#=#=#= Begin test: Commit shadow instance (force) =#=#=#= =#=#=#= End test: Commit shadow instance (force) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (force) =#=#=#= Begin test: Get active shadow instance's diff (after commit) =#=#=#= Diff: --- 1.2.0 2 Diff: +++ 1.4.1 (null) + /cib: @epoch=4, @num_updates=1 ++ /cib/status: =#=#=#= End test: Get active shadow instance's diff (after commit) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after commit) =#=#=#= Begin test: Commit shadow instance (force) (all) =#=#=#= =#=#=#= End test: Commit shadow instance (force) (all) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (force) (all) =#=#=#= Begin test: Get active shadow instance's diff (after commit all) =#=#=#= Diff: --- 1.4.2 2 Diff: +++ 1.4.1 (null) + /cib: @num_updates=1 =#=#=#= End test: Get active shadow instance's diff (after commit all) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after commit all) =#=#=#= Begin test: Commit shadow instance (XML) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (XML) =#=#=#= Begin test: Commit shadow instance (force) (XML) =#=#=#= =#=#=#= End test: Commit shadow instance (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (force) (XML) =#=#=#= Begin test: Get active shadow instance's diff (after commit) (XML) =#=#=#= ]]> =#=#=#= End test: Get active shadow instance's diff (after commit) (XML) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after commit) (XML) =#=#=#= Begin test: Commit shadow instance (force) (all) (XML) =#=#=#= =#=#=#= End test: Commit shadow instance (force) (all) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (force) (all) (XML) =#=#=#= Begin test: Get active shadow instance's diff (after commit all) (XML) =#=#=#= ]]> =#=#=#= End test: Get active shadow instance's diff (after commit all) (XML) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after commit all) (XML) =#=#=#= Begin test: Commit shadow instance (no active instance) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (no active instance) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (no active instance) =#=#=#= Begin test: Commit shadow instance (no active instance) (force) =#=#=#= =#=#=#= End test: Commit shadow instance (no active instance) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (no active instance) (force) =#=#=#= Begin test: Commit shadow instance (no active instance) (XML) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (no active instance) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (no active instance) (XML) =#=#=#= Begin test: Commit shadow instance (no active instance) (force) (XML) =#=#=#= =#=#=#= End test: Commit shadow instance (no active instance) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (no active instance) (force) (XML) =#=#=#= Begin test: Commit shadow instance (mismatch) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. Additionally, the supplied shadow instance (cts-cli) is not the same as the active one (nonexistent_shadow). To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (mismatch) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (mismatch) =#=#=#= Begin test: Commit shadow instance (mismatch) (force) =#=#=#= =#=#=#= End test: Commit shadow instance (mismatch) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (mismatch) (force) =#=#=#= Begin test: Commit shadow instance (mismatch) (XML) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. Additionally, the supplied shadow instance (cts-cli) is not the same as the active one (nonexistent_shadow). To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (mismatch) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (mismatch) (XML) =#=#=#= Begin test: Commit shadow instance (mismatch) (force) (XML) =#=#=#= =#=#=#= End test: Commit shadow instance (mismatch) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Commit shadow instance (mismatch) (force) (XML) =#=#=#= Begin test: Commit shadow instance (nonexistent shadow file) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (nonexistent shadow file) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent shadow file) =#=#=#= Begin test: Commit shadow instance (nonexistent shadow file) (force) =#=#=#= crm_shadow: Could not access shadow instance 'nonexistent_shadow': No such file or directory =#=#=#= End test: Commit shadow instance (nonexistent shadow file) (force) - No such object (105) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent shadow file) (force) =#=#=#= Begin test: Get active shadow instance's diff (nonexistent shadow file) =#=#=#= crm_shadow: Could not access shadow instance 'nonexistent_shadow': No such file or directory =#=#=#= End test: Get active shadow instance's diff (nonexistent shadow file) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (nonexistent shadow file) =#=#=#= Begin test: Commit shadow instance (nonexistent shadow file) (XML) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (nonexistent shadow file) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent shadow file) (XML) =#=#=#= Begin test: Commit shadow instance (nonexistent shadow file) (force) (XML) =#=#=#= crm_shadow: Could not access shadow instance 'nonexistent_shadow': No such file or directory =#=#=#= End test: Commit shadow instance (nonexistent shadow file) (force) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent shadow file) (force) (XML) =#=#=#= Begin test: Get active shadow instance's diff (nonexistent shadow file) (XML) =#=#=#= crm_shadow: Could not access shadow instance 'nonexistent_shadow': No such file or directory =#=#=#= End test: Get active shadow instance's diff (nonexistent shadow file) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (nonexistent shadow file) (XML) =#=#=#= Begin test: Commit shadow instance (nonexistent CIB file) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (nonexistent CIB file) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent CIB file) =#=#=#= Begin test: Commit shadow instance (nonexistent CIB file) (force) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Commit shadow instance (nonexistent CIB file) (force) - No such object (105) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent CIB file) (force) =#=#=#= Begin test: Get active shadow instance's diff (nonexistent CIB file) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Get active shadow instance's diff (nonexistent CIB file) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (nonexistent CIB file) =#=#=#= Begin test: Commit shadow instance (nonexistent CIB file) (XML) =#=#=#= crm_shadow: The commit command overwrites the active cluster configuration. To prevent accidental destruction of the cluster, the --force flag is required in order to proceed. =#=#=#= End test: Commit shadow instance (nonexistent CIB file) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent CIB file) (XML) =#=#=#= Begin test: Commit shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Commit shadow instance (nonexistent CIB file) (force) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Commit shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= Begin test: Get active shadow instance's diff (nonexistent CIB file) (XML) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Get active shadow instance's diff (nonexistent CIB file) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (nonexistent CIB file) (XML) =#=#=#= Begin test: Delete shadow instance =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance =#=#=#= Begin test: Delete shadow instance (force) =#=#=#= Remember to unset the CIB_shadow variable by entering the following into your shell: unset CIB_shadow =#=#=#= End test: Delete shadow instance (force) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (force) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Delete shadow instance (XML) =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (XML) =#=#=#= Begin test: Delete shadow instance (force) (XML) =#=#=#= Remember to unset the CIB_shadow variable by entering the following into your shell: unset CIB_shadow =#=#=#= End test: Delete shadow instance (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (force) (XML) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Delete shadow instance (no active instance) =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (no active instance) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (no active instance) =#=#=#= Begin test: Delete shadow instance (no active instance) (force) =#=#=#= =#=#=#= End test: Delete shadow instance (no active instance) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (no active instance) (force) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Delete shadow instance (no active instance) (XML) =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (no active instance) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (no active instance) (XML) =#=#=#= Begin test: Delete shadow instance (no active instance) (force) (XML) =#=#=#= =#=#=#= End test: Delete shadow instance (no active instance) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (no active instance) (force) (XML) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Delete shadow instance (mismatch) =#=#=#= crm_shadow: The delete command removes the specified shadow file. Additionally, the supplied shadow instance (cts-cli) is not the same as the active one (nonexistent_shadow). To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (mismatch) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (mismatch) =#=#=#= Begin test: Delete shadow instance (mismatch) (force) =#=#=#= =#=#=#= End test: Delete shadow instance (mismatch) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (mismatch) (force) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Delete shadow instance (mismatch) (XML) =#=#=#= crm_shadow: The delete command removes the specified shadow file. Additionally, the supplied shadow instance (cts-cli) is not the same as the active one (nonexistent_shadow). To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (mismatch) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (mismatch) (XML) =#=#=#= Begin test: Delete shadow instance (mismatch) (force) (XML) =#=#=#= =#=#=#= End test: Delete shadow instance (mismatch) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (mismatch) (force) (XML) =#=#=#= Begin test: Delete shadow instance (nonexistent shadow file) =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (nonexistent shadow file) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent shadow file) =#=#=#= Begin test: Delete shadow instance (nonexistent shadow file) (force) =#=#=#= Remember to unset the CIB_shadow variable by entering the following into your shell: unset CIB_shadow =#=#=#= End test: Delete shadow instance (nonexistent shadow file) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent shadow file) (force) =#=#=#= Begin test: Delete shadow instance (nonexistent shadow file) (XML) =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (nonexistent shadow file) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent shadow file) (XML) =#=#=#= Begin test: Delete shadow instance (nonexistent shadow file) (force) (XML) =#=#=#= Remember to unset the CIB_shadow variable by entering the following into your shell: unset CIB_shadow =#=#=#= End test: Delete shadow instance (nonexistent shadow file) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent shadow file) (force) (XML) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Delete shadow instance (nonexistent CIB file) =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (nonexistent CIB file) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent CIB file) =#=#=#= Begin test: Delete shadow instance (nonexistent CIB file) (force) =#=#=#= Remember to unset the CIB_shadow variable by entering the following into your shell: unset CIB_shadow =#=#=#= End test: Delete shadow instance (nonexistent CIB file) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent CIB file) (force) A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Delete shadow instance (nonexistent CIB file) (XML) =#=#=#= crm_shadow: The delete command removes the specified shadow file. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Delete shadow instance (nonexistent CIB file) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent CIB file) (XML) =#=#=#= Begin test: Delete shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= Remember to unset the CIB_shadow variable by entering the following into your shell: unset CIB_shadow =#=#=#= End test: Delete shadow instance (nonexistent CIB file) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Delete shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= Begin test: Create copied shadow instance (no active instance) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance (no active instance) - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (no active instance) =#=#=#= Begin test: Create copied shadow instance (no active instance) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance (no active instance) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (no active instance) (XML) =#=#=#= Begin test: Create copied shadow instance (mismatch) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance (mismatch) - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (mismatch) =#=#=#= Begin test: Create copied shadow instance (mismatch) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance (mismatch) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (mismatch) (XML) =#=#=#= Begin test: Create copied shadow instance (file already exists) =#=#=#= crm_shadow: A shadow instance 'cts-cli' already exists. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Create copied shadow instance (file already exists) - Cannot create output file (73) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (file already exists) =#=#=#= Begin test: Create copied shadow instance (file already exists) (force) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance (file already exists) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (file already exists) (force) =#=#=#= Begin test: Create copied shadow instance (file already exists) (XML) =#=#=#= crm_shadow: A shadow instance 'cts-cli' already exists. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Create copied shadow instance (file already exists) (XML) - Cannot create output file (73) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (file already exists) (XML) =#=#=#= Begin test: Create copied shadow instance (file already exists) (force) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create copied shadow instance (file already exists) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (file already exists) (force) (XML) =#=#=#= Begin test: Create copied shadow instance (nonexistent CIB file) (force) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Create copied shadow instance (nonexistent CIB file) (force) - No such object (105) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (nonexistent CIB file) (force) =#=#=#= Begin test: Create copied shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Create copied shadow instance (nonexistent CIB file) (force) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Create copied shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= Begin test: Create empty shadow instance =#=#=#= Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance =#=#=#= Begin test: Create empty shadow instance (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (XML) =#=#=#= Begin test: Create empty shadow instance (no active instance) =#=#=#= Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (no active instance) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (no active instance) =#=#=#= Begin test: Create empty shadow instance (no active instance) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (no active instance) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (no active instance) (XML) =#=#=#= Begin test: Create empty shadow instance (mismatch) =#=#=#= Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (mismatch) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (mismatch) =#=#=#= Begin test: Create empty shadow instance (mismatch) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (mismatch) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (mismatch) (XML) =#=#=#= Begin test: Create empty shadow instance (nonexistent CIB file) =#=#=#= Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (nonexistent CIB file) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (nonexistent CIB file) =#=#=#= Begin test: Create empty shadow instance (nonexistent CIB file) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (nonexistent CIB file) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (nonexistent CIB file) (XML) =#=#=#= Begin test: Create empty shadow instance (file already exists) =#=#=#= crm_shadow: A shadow instance 'cts-cli' already exists. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Create empty shadow instance (file already exists) - Cannot create output file (73) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (file already exists) =#=#=#= Begin test: Create empty shadow instance (file already exists) (force) =#=#=#= Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (file already exists) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (file already exists) (force) =#=#=#= Begin test: Create empty shadow instance (file already exists) (XML) =#=#=#= crm_shadow: A shadow instance 'cts-cli' already exists. To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Create empty shadow instance (file already exists) (XML) - Cannot create output file (73) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (file already exists) (XML) =#=#=#= Begin test: Create empty shadow instance (file already exists) (force) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Create empty shadow instance (file already exists) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Create empty shadow instance (file already exists) (force) (XML) =#=#=#= Begin test: Get active shadow instance's contents (empty CIB) =#=#=#= =#=#=#= End test: Get active shadow instance's contents (empty CIB) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's contents (empty CIB) =#=#=#= Begin test: Get active shadow instance's contents (empty CIB) (XML) =#=#=#= ]]> =#=#=#= End test: Get active shadow instance's contents (empty CIB) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's contents (empty CIB) (XML) =#=#=#= Begin test: Get active shadow instance's diff (empty CIB) =#=#=#= Diff: --- 1.1.173 2 Diff: +++ 0.1.0 (null) -- /cib/configuration/crm_config/cluster_property_set[@id='cib-bootstrap-options'] -- /cib/configuration/nodes/node[@id='1'] -- /cib/configuration/nodes/node[@id='2'] -- /cib/configuration/resources/clone[@id='ping-clone'] -- /cib/configuration/resources/primitive[@id='Fencing'] -- /cib/configuration/resources/primitive[@id='dummy'] -- /cib/configuration/resources/clone[@id='inactive-clone'] -- /cib/configuration/resources/group[@id='inactive-group'] -- /cib/configuration/resources/bundle[@id='httpd-bundle'] -- /cib/configuration/resources/group[@id='exim-group'] -- /cib/configuration/resources/clone[@id='mysql-clone-group'] -- /cib/configuration/resources/clone[@id='promotable-clone'] -- /cib/configuration/constraints/rsc_location[@id='not-on-cluster1'] -- /cib/configuration/constraints/rsc_location[@id='loc-promotable-clone'] -- /cib/configuration/tags -- /cib/configuration/op_defaults -- /cib/status/node_state[@id='2'] -- /cib/status/node_state[@id='1'] -- /cib/status/node_state[@id='httpd-bundle-0'] -- /cib/status/node_state[@id='httpd-bundle-1'] + /cib: @validate-with=pacemaker-X, @num_updates=0, @admin_epoch=0 -- /cib: @cib-last-written, @update-origin, @update-client, @update-user, @have-quorum, @dc-uuid =#=#=#= End test: Get active shadow instance's diff (empty CIB) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (empty CIB) =#=#=#= Begin test: Get active shadow instance's diff (empty CIB) (XML) =#=#=#= ]]> =#=#=#= End test: Get active shadow instance's diff (empty CIB) (XML) - Error occurred (1) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (empty CIB) (XML) =#=#=#= Begin test: Reset shadow instance =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance =#=#=#= Begin test: Get active shadow instance's diff (after reset) =#=#=#= =#=#=#= End test: Get active shadow instance's diff (after reset) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after reset) Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Reset shadow instance (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance (XML) =#=#=#= Begin test: Get active shadow instance's diff (after reset) (XML) =#=#=#= =#=#=#= End test: Get active shadow instance's diff (after reset) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Get active shadow instance's diff (after reset) (XML) =#=#=#= Begin test: Reset shadow instance (no active instance) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance (no active instance) - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance (no active instance) Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Reset shadow instance (no active instance) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance (no active instance) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance (no active instance) (XML) =#=#=#= Begin test: Reset shadow instance (mismatch) =#=#=#= crm_shadow: The supplied shadow instance (cts-cli) is not the same as the active one (nonexistent_shadow). To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Reset shadow instance (mismatch) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Reset shadow instance (mismatch) =#=#=#= Begin test: Reset shadow instance (mismatch) (force) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance (mismatch) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance (mismatch) (force) Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Reset shadow instance (mismatch) (XML) =#=#=#= crm_shadow: The supplied shadow instance (cts-cli) is not the same as the active one (nonexistent_shadow). To prevent accidental destruction of the shadow file, the --force flag is required in order to proceed. =#=#=#= End test: Reset shadow instance (mismatch) (XML) - Incorrect usage (64) =#=#=#= * Passed: crm_shadow - Reset shadow instance (mismatch) (XML) =#=#=#= Begin test: Reset shadow instance (mismatch) (force) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance (mismatch) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance (mismatch) (force) (XML) Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Reset shadow instance (nonexistent CIB file) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Reset shadow instance (nonexistent CIB file) - No such object (105) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent CIB file) =#=#=#= Begin test: Reset shadow instance (nonexistent CIB file) (XML) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Reset shadow instance (nonexistent CIB file) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent CIB file) (XML) =#=#=#= Begin test: Reset shadow instance (nonexistent CIB file) (force) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Reset shadow instance (nonexistent CIB file) (force) - No such object (105) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent CIB file) (force) =#=#=#= Begin test: Reset shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= crm_shadow: Could not connect to CIB: No such device or address =#=#=#= End test: Reset shadow instance (nonexistent CIB file) (force) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent CIB file) (force) (XML) =#=#=#= Begin test: Reset shadow instance (nonexistent shadow file) =#=#=#= crm_shadow: Could not access shadow instance 'cts-cli': No such file or directory =#=#=#= End test: Reset shadow instance (nonexistent shadow file) - No such object (105) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent shadow file) =#=#=#= Begin test: Reset shadow instance (nonexistent shadow file) (force) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance (nonexistent shadow file) (force) - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent shadow file) (force) =#=#=#= Begin test: Reset shadow instance (nonexistent shadow file) (XML) =#=#=#= crm_shadow: Could not access shadow instance 'cts-cli': No such file or directory =#=#=#= End test: Reset shadow instance (nonexistent shadow file) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent shadow file) (XML) =#=#=#= Begin test: Reset shadow instance (nonexistent shadow file) (force) (XML) =#=#=#= A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Reset shadow instance (nonexistent shadow file) (force) (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Reset shadow instance (nonexistent shadow file) (force) (XML) Created new pacemaker configuration A new shadow instance was created. To begin using it, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= Begin test: Switch to new shadow instance =#=#=#= To switch to the named shadow instance, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Switch to new shadow instance - OK (0) =#=#=#= * Passed: crm_shadow - Switch to new shadow instance =#=#=#= Begin test: Switch to new shadow instance (XML) =#=#=#= To switch to the named shadow instance, enter the following into your shell: export CIB_shadow=cts-cli =#=#=#= End test: Switch to new shadow instance (XML) - OK (0) =#=#=#= * Passed: crm_shadow - Switch to new shadow instance (XML) =#=#=#= Begin test: Switch to nonexistent shadow instance =#=#=#= crm_shadow: Could not access shadow instance 'cts-cli': No such file or directory =#=#=#= End test: Switch to nonexistent shadow instance - No such object (105) =#=#=#= * Passed: crm_shadow - Switch to nonexistent shadow instance =#=#=#= Begin test: Switch to nonexistent shadow instance (force) =#=#=#= crm_shadow: Could not access shadow instance 'cts-cli': No such file or directory =#=#=#= End test: Switch to nonexistent shadow instance (force) - No such object (105) =#=#=#= * Passed: crm_shadow - Switch to nonexistent shadow instance (force) =#=#=#= Begin test: Switch to nonexistent shadow instance (XML) =#=#=#= crm_shadow: Could not access shadow instance 'cts-cli': No such file or directory =#=#=#= End test: Switch to nonexistent shadow instance (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Switch to nonexistent shadow instance (XML) =#=#=#= Begin test: Switch to nonexistent shadow instance (force) (XML) =#=#=#= crm_shadow: Could not access shadow instance 'cts-cli': No such file or directory =#=#=#= End test: Switch to nonexistent shadow instance (force) (XML) - No such object (105) =#=#=#= * Passed: crm_shadow - Switch to nonexistent shadow instance (force) (XML) +=#=#=#= Begin test: Verbosely verify a file-specified configuration with an unallowed fencing level ID =#=#=#= +warning: Ignoring topology registration with invalid level 10 +Warnings found during check: config not valid +=#=#=#= End test: Verbosely verify a file-specified configuration with an unallowed fencing level ID - Invalid configuration (78) =#=#=#= +* Passed: crm_verify - Verbosely verify a file-specified configuration with an unallowed fencing level ID =#=#=#= Begin test: Verify a file-specified invalid configuration (text output) =#=#=#= Errors found during check: config not valid -V may provide more details =#=#=#= End test: Verify a file-specified invalid configuration (text output) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (text output) =#=#=#= Begin test: Verify a file-specified invalid configuration (verbose text output) =#=#=#= unpack_config warning: Blind faith: not fencing unseen nodes error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation Errors found during check: config not valid =#=#=#= End test: Verify a file-specified invalid configuration (verbose text output) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (verbose text output) =#=#=#= Begin test: Verify a file-specified invalid configuration (quiet text output) =#=#=#= =#=#=#= End test: Verify a file-specified invalid configuration (quiet text output) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (quiet text output) =#=#=#= Begin test: Verify a file-specified invalid configuration (XML output) =#=#=#= error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring <clone> resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation Errors found during check: config not valid =#=#=#= End test: Verify a file-specified invalid configuration (XML output) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (XML output) =#=#=#= Begin test: Verify a file-specified invalid configuration (verbose XML output) =#=#=#= unpack_config warning: Blind faith: not fencing unseen nodes error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring <clone> resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation Errors found during check: config not valid =#=#=#= End test: Verify a file-specified invalid configuration (verbose XML output) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (verbose XML output) =#=#=#= Begin test: Verify a file-specified invalid configuration (quiet XML output) =#=#=#= error: Resource test2:0 is of type systemd and therefore cannot be used as a promotable clone resource error: Ignoring <clone> resource 'test2-clone' because configuration is invalid error: CIB did not pass schema validation =#=#=#= End test: Verify a file-specified invalid configuration (quiet XML output) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify a file-specified invalid configuration (quiet XML output) =#=#=#= Begin test: Verify another file-specified invalid configuration (XML output) =#=#=#= error: Resource start-up disabled since no STONITH resources have been defined error: Either configure some or disable STONITH with the stonith-enabled option error: NOTE: Clusters with shared data need STONITH to ensure data integrity warning: Node pcmk-1 is unclean but cannot be fenced warning: Node pcmk-2 is unclean but cannot be fenced error: CIB did not pass schema validation Errors found during check: config not valid =#=#=#= End test: Verify another file-specified invalid configuration (XML output) - Invalid configuration (78) =#=#=#= * Passed: crm_verify - Verify another file-specified invalid configuration (XML output) =#=#=#= Begin test: Verify a file-specified valid configuration, outputting as xml =#=#=#= =#=#=#= End test: Verify a file-specified valid configuration, outputting as xml - OK (0) =#=#=#= * Passed: crm_verify - Verify a file-specified valid configuration, outputting as xml =#=#=#= Begin test: Verify a piped-in valid configuration, outputting as xml =#=#=#= =#=#=#= End test: Verify a piped-in valid configuration, outputting as xml - OK (0) =#=#=#= * Passed: cat - Verify a piped-in valid configuration, outputting as xml =#=#=#= Begin test: Verbosely verify a file-specified valid configuration, outputting as xml =#=#=#= =#=#=#= End test: Verbosely verify a file-specified valid configuration, outputting as xml - OK (0) =#=#=#= * Passed: crm_verify - Verbosely verify a file-specified valid configuration, outputting as xml =#=#=#= Begin test: Verbosely verify a piped-in valid configuration, outputting as xml =#=#=#= =#=#=#= End test: Verbosely verify a piped-in valid configuration, outputting as xml - OK (0) =#=#=#= * Passed: cat - Verbosely verify a piped-in valid configuration, outputting as xml =#=#=#= Begin test: Verify a string-supplied valid configuration, outputting as xml =#=#=#= =#=#=#= End test: Verify a string-supplied valid configuration, outputting as xml - OK (0) =#=#=#= * Passed: crm_verify - Verify a string-supplied valid configuration, outputting as xml =#=#=#= Begin test: Verbosely verify a string-supplied valid configuration, outputting as xml =#=#=#= =#=#=#= End test: Verbosely verify a string-supplied valid configuration, outputting as xml - OK (0) =#=#=#= * Passed: crm_verify - Verbosely verify a string-supplied valid configuration, outputting as xml diff --git a/cts/cts-cli.in b/cts/cts-cli.in index 10e4d5cc4d..3ab03cbd36 100755 --- a/cts/cts-cli.in +++ b/cts/cts-cli.in @@ -1,3722 +1,3727 @@ #!@BASH_PATH@ # # Copyright 2008-2024 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # # Set the exit status of a command to the exit code of the last program to # exit non-zero. This is bash-specific. set -o pipefail # # Note on portable usage of sed: GNU/POSIX/*BSD sed have a limited subset of # compatible functionality. Do not use the -i option, alternation (\|), # \0, or character sequences such as \n or \s. # USAGE_TEXT="Usage: cts-cli [] Options: --help Display this text, then exit -V, --verbose Display any differences from expected output -t 'TEST [...]' Run only specified tests (default: 'access_render daemons dates error_codes tools crm_mon acls validity upgrade rules feature_set'). Other tests: agents (must be run in an installed environment). -p DIR Look for executables in DIR (may be specified multiple times) -v, --valgrind Run all commands under valgrind -s Save actual output as expected output" # If readlink supports -e (i.e. GNU), use it readlink -e / >/dev/null 2>/dev/null if [ $? -eq 0 ]; then test_home="$(dirname "$(readlink -e "$0")")" else test_home="$(dirname "$0")" fi : ${shadow=cts-cli} shadow_dir=$(mktemp -d ${TMPDIR:-/tmp}/cts-cli.shadow.XXXXXXXXXX) num_errors=0 num_passed=0 verbose=0 tests="access_render daemons dates error_codes tools crm_mon acls validity" tests="$tests upgrade rules feature_set" do_save=0 XMLLINT_CMD= VALGRIND_CMD= VALGRIND_OPTS=" -q --gen-suppressions=all --show-reachable=no --leak-check=full --trace-children=no --time-stamp=yes --num-callers=20 --suppressions=$test_home/valgrind-pcmk.suppressions " # Temp files for saving a command's stdout/stderr in _test_assert() test_assert_outfile=$(mktemp ${TMPDIR:-/tmp}/cts-cli.ta_outfile.XXXXXXXXXX) test_assert_errfile=$(mktemp ${TMPDIR:-/tmp}/cts-cli.ta_errfile.XXXXXXXXXX) xmllint_outfile=$(mktemp ${TMPDIR:-/tmp}/cts-cli.xmllint_outfile.XXXXXXXXXX) # Log test errors to stderr export PCMK_stderr=1 # Output when PCMK_trace_functions is undefined is different from when it's # empty. Later we save the value of PCMK_trace_functions, do work, and restore # the original value. Getting back to the initial state is simplest if we assume # the variable is defined. : ${PCMK_trace_functions=""} export PCMK_trace_functions # These constants must track crm_exit_t values CRM_EX_OK=0 CRM_EX_ERROR=1 CRM_EX_INVALID_PARAM=2 CRM_EX_UNIMPLEMENT_FEATURE=3 CRM_EX_INSUFFICIENT_PRIV=4 CRM_EX_NOT_CONFIGURED=6 CRM_EX_USAGE=64 CRM_EX_DATAERR=65 CRM_EX_CANTCREAT=73 CRM_EX_CONFIG=78 CRM_EX_OLD=103 CRM_EX_DIGEST=104 CRM_EX_NOSUCH=105 CRM_EX_UNSAFE=107 CRM_EX_EXISTS=108 CRM_EX_MULTIPLE=109 CRM_EX_EXPIRED=110 CRM_EX_NOT_YET_IN_EFFECT=111 reset_shadow_cib_version() { local SHADOWPATH SHADOWPATH="$(crm_shadow --file)" # sed -i isn't portable :-( cp -p "$SHADOWPATH" "${SHADOWPATH}.$$" # preserve permissions sed -e 's/epoch="[0-9]*"/epoch="1"/g' \ -e 's/num_updates="[0-9]*"/num_updates="0"/g' \ -e 's/admin_epoch="[0-9]*"/admin_epoch="0"/g' \ "$SHADOWPATH" > "${SHADOWPATH}.$$" mv -- "${SHADOWPATH}.$$" "$SHADOWPATH" } # A newly created empty CIB might or might not have a rsc_defaults section # depending on whether the --with-resource-stickiness-default configure # option was used. To ensure regression tests behave the same either way, # delete any rsc_defaults after creating or erasing a CIB. delete_shadow_resource_defaults() { cibadmin --delete --xml-text '' # The above command might or might not bump the CIB version, so reset it # to ensure future changes result in the same version for comparison. reset_shadow_cib_version } create_shadow_cib() { local VALIDATE_WITH local SHADOW_CMD CREATE_ARG="$1" VALIDATE_WITH="$2" export CIB_shadow_dir="${shadow_dir}" SHADOW_CMD="$VALGRIND_CMD crm_shadow --batch --force $CREATE_ARG" if [ -z "$VALIDATE_WITH" ]; then $SHADOW_CMD "$shadow" 2>&1 else $SHADOW_CMD "$shadow" --validate-with="${VALIDATE_WITH}" 2>&1 fi export CIB_shadow="$shadow" delete_shadow_resource_defaults } function _test_assert() { target=$1; shift validate=$1; shift cib=$1; shift app=$(echo "$cmd" | head -n 1 | sed 's/\ .*//') printf "* Running: $app - $desc\n" 1>&2 printf "=#=#=#= Begin test: $desc =#=#=#=\n" # Capture stderr and stdout separately, then print them consecutively eval $VALGRIND_CMD $cmd > "$test_assert_outfile" 2> "$test_assert_errfile" rc=$? cat "$test_assert_errfile" cat "$test_assert_outfile" if [ x$cib != x0 ]; then printf "=#=#=#= Current cib after: $desc =#=#=#=\n" CIB_user=root cibadmin -Q fi # Do not validate if running under valgrind, even if told to do so. Valgrind # will output a lot more stuff that is not XML, so it wouldn't validate anyway. if [ "$validate" = "1" ] && [ "$VALGRIND_CMD" = "" ] && [ $rc = 0 ] && [ "$XMLLINT_CMD" != "" ]; then # The sed command filters out the "- validates" line that xmllint will output # on success. grep cannot be used here because "grep -v 'validates$'" will # return an exit code of 1 if its input consists entirely of "- validates". $XMLLINT_CMD --noout --relaxng \ "$PCMK_schema_directory/api/api-result.rng" "$test_assert_outfile" \ > "$xmllint_outfile" 2>&1 rc=$? sed -n '/validates$/ !p' "$xmllint_outfile" if [ $rc = 0 ]; then printf "=#=#=#= End test: %s - $(crm_error --exit $rc) (%d) =#=#=#=\n" "$desc" $rc else printf "=#=#=#= End test: %s - Failed to validate (%d) =#=#=#=\n" "$desc" $rc fi else printf "=#=#=#= End test: %s - $(crm_error --exit $rc) (%d) =#=#=#=\n" "$desc" $rc fi if [ $rc -ne $target ]; then num_errors=$(( $num_errors + 1 )) printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc" printf "* Failed (rc=%.3d): %-14s - %s\n" $rc $app "$desc (`which $app`)" 1>&2 return exit $CRM_EX_ERROR else printf "* Passed: %-14s - %s\n" $app "$desc" num_passed=$(( $num_passed + 1 )) fi } function test_assert() { _test_assert $1 0 $2 } function test_assert_validate() { _test_assert $1 1 $2 } # Tests that depend on resource agents and must be run in an installed # environment function test_agents() { desc="Validate a valid resource configuration" cmd="crm_resource --validate --class ocf --provider pacemaker --agent Dummy" test_assert $CRM_EX_OK 0 desc="Validate a valid resource configuration (XML)" cmd="crm_resource --validate --class ocf --provider pacemaker --agent Dummy" cmd="$cmd --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Make the Dummy configuration invalid (op_sleep can't be a generic string) export OCF_RESKEY_op_sleep=asdf desc="Validate an invalid resource configuration" cmd="crm_resource --validate --class ocf --provider pacemaker --agent Dummy" test_assert $CRM_EX_NOT_CONFIGURED 0 desc="Validate an invalid resource configuration (XML)" cmd="crm_resource --validate --class ocf --provider pacemaker --agent Dummy" cmd="$cmd --output-as=xml" test_assert_validate $CRM_EX_NOT_CONFIGURED 0 unset OCF_RESKEY_op_sleep export OCF_RESKEY_op_sleep } function test_daemons() { desc="Get CIB manager metadata" cmd="pacemaker-based metadata" test_assert $CRM_EX_OK 0 desc="Get controller metadata" cmd="pacemaker-controld metadata" test_assert $CRM_EX_OK 0 desc="Get fencer metadata" cmd="pacemaker-fenced metadata" test_assert $CRM_EX_OK 0 desc="Get scheduler metadata" cmd="pacemaker-schedulerd metadata" test_assert $CRM_EX_OK 0 } function test_crm_mon() { local TMPXML export CIB_file="$test_home/cli/crm_mon.xml" desc="Basic text output" cmd="crm_mon -1" test_assert $CRM_EX_OK 0 desc="XML output" cmd="crm_mon --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Basic text output without node section" cmd="crm_mon -1 --exclude=nodes" test_assert $CRM_EX_OK 0 desc="XML output without the node section" cmd="crm_mon --output-as=xml --exclude=nodes" test_assert_validate $CRM_EX_OK 0 desc="Text output with only the node section" cmd="crm_mon -1 --exclude=all --include=nodes" test_assert $CRM_EX_OK 0 # The above test doesn't need to be performed for other output formats. It's # really just a test to make sure that blank lines are correct. desc="Complete text output" cmd="crm_mon -1 --include=all" test_assert $CRM_EX_OK 0 # XML includes everything already so there's no need for a complete test desc="Complete text output with detail" cmd="crm_mon -1R --include=all" test_assert $CRM_EX_OK 0 # XML includes detailed output already desc="Complete brief text output" cmd="crm_mon -1 --include=all --brief" test_assert $CRM_EX_OK 0 desc="Complete text output grouped by node" cmd="crm_mon -1 --include=all --group-by-node" test_assert $CRM_EX_OK 0 # XML does not have a brief output option desc="Complete brief text output grouped by node" cmd="crm_mon -1 --include=all --group-by-node --brief" test_assert $CRM_EX_OK 0 desc="XML output grouped by node" cmd="crm_mon -1 --output-as=xml --group-by-node" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by node" cmd="crm_mon -1 --include=all --node=cluster01" test_assert $CRM_EX_OK 0 desc="XML output filtered by node" cmd="crm_mon --output-as xml --include=all --node=cluster01" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by tag" cmd="crm_mon -1 --include=all --node=even-nodes" test_assert $CRM_EX_OK 0 desc="XML output filtered by tag" cmd="crm_mon --output-as=xml --include=all --node=even-nodes" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by resource tag" cmd="crm_mon -1 --include=all --resource=fencing-rscs" test_assert $CRM_EX_OK 0 desc="XML output filtered by resource tag" cmd="crm_mon --output-as=xml --include=all --resource=fencing-rscs" test_assert_validate $CRM_EX_OK 0 desc="Basic text output filtered by node that doesn't exist" cmd="crm_mon -1 --node=blah" test_assert $CRM_EX_OK 0 desc="XML output filtered by node that doesn't exist" cmd="crm_mon --output-as=xml --node=blah" test_assert_validate $CRM_EX_OK 0 desc="Basic text output with inactive resources" cmd="crm_mon -1 -r" test_assert $CRM_EX_OK 0 # XML already includes inactive resources desc="Basic text output with inactive resources, filtered by node" cmd="crm_mon -1 -r --node=cluster02" test_assert $CRM_EX_OK 0 # XML already includes inactive resources desc="Complete text output filtered by primitive resource" cmd="crm_mon -1 --include=all --resource=Fencing" test_assert $CRM_EX_OK 0 desc="XML output filtered by primitive resource" cmd="crm_mon --output-as=xml --resource=Fencing" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by group resource" cmd="crm_mon -1 --include=all --resource=exim-group" test_assert $CRM_EX_OK 0 desc="XML output filtered by group resource" cmd="crm_mon --output-as=xml --resource=exim-group" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by group resource member" cmd="crm_mon -1 --include=all --resource=Public-IP" test_assert $CRM_EX_OK 0 desc="XML output filtered by group resource member" cmd="crm_mon --output-as=xml --resource=Email" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by clone resource" cmd="crm_mon -1 --include=all --resource=ping-clone" test_assert $CRM_EX_OK 0 desc="XML output filtered by clone resource" cmd="crm_mon --output-as=xml --resource=ping-clone" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by clone resource instance" cmd="crm_mon -1 --include=all --resource=ping" test_assert $CRM_EX_OK 0 desc="XML output filtered by clone resource instance" cmd="crm_mon --output-as=xml --resource=ping" test_assert_validate $CRM_EX_OK 0 desc="Complete text output filtered by exact clone resource instance" cmd="crm_mon -1 --include=all --show-detail --resource=ping:0" test_assert $CRM_EX_OK 0 desc="XML output filtered by exact clone resource instance" cmd="crm_mon --output-as=xml --resource=ping:1" test_assert_validate $CRM_EX_OK 0 desc="Basic text output filtered by resource that doesn't exist" cmd="crm_mon -1 --resource=blah" test_assert $CRM_EX_OK 0 desc="XML output filtered by resource that doesn't exist" cmd="crm_mon --output-as=xml --resource=blah" test_assert_validate $CRM_EX_OK 0 desc="Basic text output with inactive resources, filtered by tag" cmd="crm_mon -1 -r --resource=inactive-rscs" test_assert $CRM_EX_OK 0 desc="Basic text output with inactive resources, filtered by bundle resource" cmd="crm_mon -1 -r --resource=httpd-bundle" test_assert $CRM_EX_OK 0 desc="XML output filtered by inactive bundle resource" cmd="crm_mon --output-as=xml --resource=httpd-bundle" test_assert_validate $CRM_EX_OK 0 desc="Basic text output with inactive resources, filtered by bundled IP address resource" cmd="crm_mon -1 -r --resource=httpd-bundle-ip-192.168.122.131" test_assert $CRM_EX_OK 0 desc="XML output filtered by bundled IP address resource" cmd="crm_mon --output-as=xml --resource=httpd-bundle-ip-192.168.122.132" test_assert_validate $CRM_EX_OK 0 desc="Basic text output with inactive resources, filtered by bundled container" cmd="crm_mon -1 -r --resource=httpd-bundle-docker-1" test_assert $CRM_EX_OK 0 desc="XML output filtered by bundled container" cmd="crm_mon --output-as=xml --resource=httpd-bundle-docker-2" test_assert_validate $CRM_EX_OK 0 desc="Basic text output with inactive resources, filtered by bundle connection" cmd="crm_mon -1 -r --resource=httpd-bundle-0" test_assert $CRM_EX_OK 0 desc="XML output filtered by bundle connection" cmd="crm_mon --output-as=xml --resource=httpd-bundle-0" test_assert_validate $CRM_EX_OK 0 desc="Basic text output with inactive resources, filtered by bundled primitive resource" cmd="crm_mon -1 -r --resource=httpd" test_assert $CRM_EX_OK 0 desc="XML output filtered by bundled primitive resource" cmd="crm_mon --output-as=xml --resource=httpd" test_assert_validate $CRM_EX_OK 0 desc="Complete text output, filtered by clone name in cloned group" cmd="crm_mon -1 --include=all --show-detail --resource=mysql-clone-group" test_assert $CRM_EX_OK 0 desc="XML output, filtered by clone name in cloned group" cmd="crm_mon --output-as=xml --resource=mysql-clone-group" test_assert_validate $CRM_EX_OK 0 desc="Complete text output, filtered by group name in cloned group" cmd="crm_mon -1 --include=all --show-detail --resource=mysql-group" test_assert $CRM_EX_OK 0 desc="XML output, filtered by group name in cloned group" cmd="crm_mon --output-as=xml --resource=mysql-group" test_assert_validate $CRM_EX_OK 0 desc="Complete text output, filtered by exact group instance name in cloned group" cmd="crm_mon -1 --include=all --show-detail --resource=mysql-group:1" test_assert $CRM_EX_OK 0 desc="XML output, filtered by exact group instance name in cloned group" cmd="crm_mon --output-as=xml --resource=mysql-group:1" test_assert_validate $CRM_EX_OK 0 desc="Complete text output, filtered by primitive name in cloned group" cmd="crm_mon -1 --include=all --show-detail --resource=mysql-proxy" test_assert $CRM_EX_OK 0 desc="XML output, filtered by primitive name in cloned group" cmd="crm_mon --output-as=xml --resource=mysql-proxy" test_assert_validate $CRM_EX_OK 0 desc="Complete text output, filtered by exact primitive instance name in cloned group" cmd="crm_mon -1 --include=all --show-detail --resource=mysql-proxy:1" test_assert $CRM_EX_OK 0 desc="XML output, filtered by exact primitive instance name in cloned group" cmd="crm_mon --output-as=xml --resource=mysql-proxy:1" test_assert_validate $CRM_EX_OK 0 unset CIB_file export CIB_file="$test_home/cli/crm_mon-partial.xml" desc="Text output of partially active resources" cmd="crm_mon -1 --show-detail" test_assert $CRM_EX_OK 0 desc="XML output of partially active resources" cmd="crm_mon -1 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Text output of partially active resources, with inactive resources" cmd="crm_mon -1 -r --show-detail" test_assert $CRM_EX_OK 0 # XML already includes inactive resources desc="Complete brief text output, with inactive resources" cmd="crm_mon -1 -r --include=all --brief --show-detail" test_assert $CRM_EX_OK 0 # XML does not have a brief output option desc="Text output of partially active group" cmd="crm_mon -1 --resource=partially-active-group" test_assert $CRM_EX_OK 0 desc="Text output of partially active group, with inactive resources" cmd="crm_mon -1 --resource=partially-active-group -r" test_assert $CRM_EX_OK 0 desc="Text output of active member of partially active group" cmd="crm_mon -1 --resource=dummy-1" test_assert $CRM_EX_OK 0 desc="Text output of inactive member of partially active group" cmd="crm_mon -1 --resource=dummy-2 --show-detail" test_assert $CRM_EX_OK 0 desc="Complete brief text output grouped by node, with inactive resources" cmd="crm_mon -1 -r --include=all --group-by-node --brief --show-detail" test_assert $CRM_EX_OK 0 desc="Text output of partially active resources, with inactive resources, filtered by node" cmd="crm_mon -1 -r --node=cluster01" test_assert $CRM_EX_OK 0 desc="Text output of partially active resources, filtered by node" cmd="crm_mon -1 --output-as=xml --node=cluster01" test_assert_validate $CRM_EX_OK 0 unset CIB_file export CIB_file="$test_home/cli/crm_mon-unmanaged.xml" desc="Text output of active unmanaged resource on offline node" cmd="crm_mon -1" test_assert $CRM_EX_OK 0 desc="XML output of active unmanaged resource on offline node" cmd="crm_mon -1 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Brief text output of active unmanaged resource on offline node" cmd="crm_mon -1 --brief" test_assert $CRM_EX_OK 0 desc="Brief text output of active unmanaged resource on offline node, grouped by node" cmd="crm_mon -1 --brief --group-by-node" test_assert $CRM_EX_OK 0 # Maintenance mode tests export CIB_file=$(mktemp ${TMPDIR:-/tmp}/cts-cli.crm_mon.xml.XXXXXXXXXX) cp "$test_home/cli/crm_mon.xml" "$CIB_file" crm_attribute -n maintenance-mode -v true desc="Text output of all resources with maintenance-mode enabled" cmd="crm_mon -1 -r" test_assert $CRM_EX_OK 0 desc="XML output of all resources with maintenance-mode enabled" cmd="crm_mon -1 -r --output-as=xml" test_assert_validate $CRM_EX_OK 0 crm_attribute -n maintenance-mode -v false crm_attribute -n maintenance -N cluster02 -v true desc="Text output of all resources with maintenance enabled for a node" cmd="crm_mon -1 -r" test_assert $CRM_EX_OK 0 desc="XML output of all resources with maintenance enabled for a node" cmd="crm_mon -1 -r --output-as=xml" test_assert_validate $CRM_EX_OK 0 rm -f "$CIB_file" unset CIB_file export CIB_file="$test_home/cli/crm_mon-rsc-maint.xml" # The fence resource is excluded, for comparison desc="Text output of all resources with maintenance meta attribute true" cmd="crm_mon -1 -r" test_assert $CRM_EX_OK 0 desc="XML output of all resources with maintenance meta attribute true" cmd="crm_mon -1 -r --output-as=xml" test_assert_validate $CRM_EX_OK 0 unset CIB_file export CIB_file="$test_home/cli/crm_mon-T180.xml" desc="Text output of guest node's container on different node from its" desc="$desc remote resource" cmd="crm_mon -1" test_assert $CRM_EX_OK 0 desc="Complete text output of guest node's container on different node from" desc="$desc its remote resource" cmd="crm_mon -1 --show-detail" test_assert $CRM_EX_OK 0 unset CIB_file } function test_error_codes() { # Note: At the time of this writing, crm_error returns success even for # unknown error codes. We don't want to cause a regression by changing that. # Due to the way _test_assert() formats output, we need "crm_error" to be # the first token of cmd. We can't start with a parenthesis or variable # assignment. However, in the "list result codes" tests, we also need to # save some output for later processing. We'll use a temp file for this. local TMPFILE TMPFILE=$(mktemp ${TMPDIR:-/tmp}/cts-cli.crm_error_out.XXXXXXXXXX) # Legacy return codes # # Don't test unknown legacy code. FreeBSD includes a colon in strerror(), # while other distros do not. desc="Get legacy return code" cmd="crm_error -- 201" test_assert $CRM_EX_OK 0 desc="Get legacy return code (XML)" cmd="crm_error --output-as=xml -- 201" test_assert_validate $CRM_EX_OK 0 desc="Get legacy return code (with name)" cmd="crm_error -n -- 201" test_assert $CRM_EX_OK 0 desc="Get legacy return code (with name) (XML)" cmd="crm_error -n --output-as=xml -- 201" test_assert_validate $CRM_EX_OK 0 desc="Get multiple legacy return codes" cmd="crm_error -- 201 202" test_assert $CRM_EX_OK 0 desc="Get multiple legacy return codes (XML)" cmd="crm_error --output-as=xml -- 201 202" test_assert_validate $CRM_EX_OK 0 desc="Get multiple legacy return codes (with names)" cmd="crm_error -n -- 201 202" test_assert $CRM_EX_OK 0 desc="Get multiple legacy return codes (with names) (XML)" cmd="crm_error -n --output-as=xml -- 201 202" test_assert_validate $CRM_EX_OK 0 # We can only rely on our custom codes, so we'll spot-check codes 201-209 desc="List legacy return codes (spot check)" cmd="crm_error -l | grep 20[1-9]" test_assert $CRM_EX_OK 0 desc="List legacy return codes (spot check) (XML)" cmd="crm_error -l --output-as=xml > $TMPFILE; rc=$?" cmd="$cmd; grep -Ev ''" test_assert $CRM_EX_OK desc="Test '+=' XML attribute update syntax" cmd="cibadmin -M --score --xml-text=''" test_assert $CRM_EX_OK desc="Test '++' nvpair value update syntax" cmd="crm_attribute -n test_attr -v 'value++' --score" test_assert $CRM_EX_OK desc="Test '++' nvpair value update syntax (XML)" cmd="crm_attribute -n test_attr -v 'value++' --score --output-as=xml" test_assert $CRM_EX_OK desc="Test '+=' nvpair value update syntax" cmd="crm_attribute -n test_attr -v 'value+=2' --score" test_assert $CRM_EX_OK desc="Test '+=' nvpair value update syntax (XML)" cmd="crm_attribute -n test_attr -v 'value+=2' --score --output-as=xml" test_assert $CRM_EX_OK desc="Test '++' XML attribute update syntax (--score not set)" cmd="cibadmin -M --xml-text=''" test_assert $CRM_EX_OK desc="Test '+=' XML attribute update syntax (--score not set)" cmd="cibadmin -M --xml-text=''" test_assert $CRM_EX_OK desc="Test '++' nvpair value update syntax (--score not set)" cmd="crm_attribute -n test_attr -v 'value++'" test_assert $CRM_EX_OK desc="Test '++' nvpair value update syntax (--score not set) (XML)" cmd="crm_attribute -n test_attr -v 'value++' --output-as=xml" test_assert $CRM_EX_OK desc="Test '+=' nvpair value update syntax (--score not set)" cmd="crm_attribute -n test_attr -v 'value+=2'" test_assert $CRM_EX_OK desc="Test '+=' nvpair value update syntax (--score not set) (XML)" cmd="crm_attribute -n test_attr -v 'value+=2' --output-as=xml" test_assert $CRM_EX_OK desc="Require --force for CIB erasure" cmd="cibadmin -E" test_assert $CRM_EX_UNSAFE desc="Allow CIB erasure with --force" cmd="cibadmin -E --force" test_assert $CRM_EX_OK 0 # Skip outputting the resulting CIB in the previous command, and delete # rsc_defaults now, so tests behave the same regardless of build options. delete_shadow_resource_defaults # Verify the output after erasure desc="Query CIB" cmd="cibadmin -Q" test_assert $CRM_EX_OK # Save a copy of the CIB for a later test cibadmin -Q > "$TMPORIG" desc="Set cluster option" cmd="crm_attribute -n cluster-delay -v 60s" test_assert $CRM_EX_OK desc="Query new cluster option" cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" test_assert $CRM_EX_OK desc="Query cluster options" cmd="cibadmin -Q -o crm_config > $TMPXML" test_assert $CRM_EX_OK desc="Set no-quorum policy" cmd="crm_attribute -n no-quorum-policy -v ignore" test_assert $CRM_EX_OK desc="Delete nvpair" cmd="cibadmin -D -o crm_config --xml-text ''" test_assert $CRM_EX_OK desc="Create operation should fail" cmd="cibadmin -C -o crm_config --xml-file $TMPXML" test_assert $CRM_EX_EXISTS desc="Modify cluster options section" cmd="cibadmin -M -o crm_config --xml-file $TMPXML" test_assert $CRM_EX_OK desc="Query updated cluster option" cmd="cibadmin -Q -o crm_config | grep cib-bootstrap-options-cluster-delay" test_assert $CRM_EX_OK desc="Set duplicate cluster option" cmd="crm_attribute -n cluster-delay -v 40s -s duplicate" test_assert $CRM_EX_OK desc="Setting multiply defined cluster option should fail" cmd="crm_attribute -n cluster-delay -v 30s" test_assert $CRM_EX_MULTIPLE desc="Set cluster option with -s" cmd="crm_attribute -n cluster-delay -v 30s -s duplicate" test_assert $CRM_EX_OK desc="Delete cluster option with -i" cmd="crm_attribute -n cluster-delay -D -i cib-bootstrap-options-cluster-delay" test_assert $CRM_EX_OK desc="Create node1 and bring it online" cmd="crm_simulate --live-check --in-place --node-up=node1" test_assert $CRM_EX_OK desc="Create node attribute" cmd="crm_attribute -n ram -v 1024M -N node1 -t nodes" test_assert $CRM_EX_OK desc="Query new node attribute" cmd="cibadmin -Q -o nodes | grep node1-ram" test_assert $CRM_EX_OK desc="Create second node attribute" cmd="crm_attribute -n rattr -v XYZ -N node1 -t nodes" test_assert $CRM_EX_OK desc="Query node attributes by pattern" cmd="crm_attribute -t nodes -P 'ra.*' -N node1 --query" test_assert $CRM_EX_OK 0 desc="Update node attributes by pattern" cmd="crm_attribute -t nodes -P 'rat.*' -N node1 -v 10" test_assert $CRM_EX_OK desc="Delete node attributes by pattern" cmd="crm_attribute -t nodes -P 'rat.*' -N node1 -D" test_assert $CRM_EX_OK desc="Set a transient (fail-count) node attribute" cmd="crm_attribute -n fail-count-foo -v 3 -N node1 -t status" test_assert $CRM_EX_OK desc="Query a fail count" cmd="crm_failcount --query -r foo -N node1" test_assert $CRM_EX_OK desc="Show node attributes with crm_simulate" cmd="crm_simulate --live-check --show-attrs" test_assert $CRM_EX_OK 0 desc="Set a second transient node attribute" cmd="crm_attribute -n fail-count-bar -v 5 -N node1 -t status" test_assert $CRM_EX_OK desc="Query transient node attributes by pattern" cmd="crm_attribute -t status -P fail-count -N node1 --query" test_assert $CRM_EX_OK 0 desc="Update transient node attributes by pattern" cmd="crm_attribute -t status -P fail-count -N node1 -v 10" test_assert $CRM_EX_OK desc="Delete transient node attributes by pattern" cmd="crm_attribute -t status -P fail-count -N node1 -D" test_assert $CRM_EX_OK desc="crm_attribute given invalid delete usage" cmd="crm_attribute -t nodes -N node1 -D" test_assert $CRM_EX_USAGE 0 desc="Set a utilization node attribute" cmd="crm_attribute -n cpu -v 1 -N node1 -z" test_assert $CRM_EX_OK desc="Query utilization node attribute" cmd="crm_attribute --query -n cpu -N node1 -z" test_assert $CRM_EX_OK 0 desc="Digest calculation" cmd="cibadmin -Q | cibadmin -5 -p 2>&1 > /dev/null" test_assert $CRM_EX_OK # This update will fail because it has version numbers desc="Replace operation should fail" cmd="cibadmin -R --xml-file $TMPORIG" test_assert $CRM_EX_OLD desc="Default standby value" cmd="crm_standby -N node1 -G" test_assert $CRM_EX_OK desc="Set standby status" cmd="crm_standby -N node1 -v true" test_assert $CRM_EX_OK desc="Query standby value" cmd="crm_standby -N node1 -G" test_assert $CRM_EX_OK desc="Delete standby value" cmd="crm_standby -N node1 -D" test_assert $CRM_EX_OK desc="Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert $CRM_EX_OK desc="crm_resource run with extra arguments" cmd="crm_resource foo bar" test_assert $CRM_EX_USAGE 0 desc="List all available resource options (invalid type)" cmd="crm_resource --list-options=asdf" test_assert $CRM_EX_USAGE 0 desc="List all available resource options (invalid type) (XML)" cmd="crm_resource --list-options=asdf --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="List non-advanced primitive meta-attributes" cmd="crm_resource --list-options=primitive" test_assert $CRM_EX_OK 0 desc="List non-advanced primitive meta-attributes (XML) (shows all)" cmd="crm_resource --list-options=primitive --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="List all available primitive meta-attributes" cmd="crm_resource --list-options=primitive --all" test_assert $CRM_EX_OK 0 desc="List all available primitive meta-attributes (XML)" cmd="crm_resource --list-options=primitive --all --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="List non-advanced fencing parameters" cmd="crm_resource --list-options=fencing" test_assert $CRM_EX_OK 0 desc="List non-advanced fencing parameters (XML) (shows all)" cmd="crm_resource --list-options=fencing --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="List all available fencing parameters" cmd="crm_resource --list-options=fencing --all" test_assert $CRM_EX_OK 0 desc="List all available fencing parameters (XML)" cmd="crm_resource --list-options=fencing --all --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="crm_resource given both -r and resource config" cmd="crm_resource -r xyz --class ocf --provider pacemaker --agent Dummy" test_assert $CRM_EX_USAGE 0 desc="crm_resource given resource config with invalid action" cmd="crm_resource --class ocf --provider pacemaker --agent Dummy -D" test_assert $CRM_EX_USAGE 0 desc="Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p is-managed -v false" test_assert $CRM_EX_OK desc="Query a resource meta attribute" cmd="crm_resource -r dummy --meta -g is-managed" test_assert $CRM_EX_OK desc="Remove a resource meta attribute" cmd="crm_resource -r dummy --meta -d is-managed" test_assert $CRM_EX_OK desc="Create another resource meta attribute" cmd="crm_resource -r dummy --meta -p target-role -v Stopped --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Show why a resource is not running" cmd="crm_resource -Y -r dummy --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Remove another resource meta attribute" cmd="crm_resource -r dummy --meta -d target-role --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get a non-existent attribute from a resource element with output-as=xml" cmd="crm_resource -r dummy --get-parameter nonexistent --element --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get a non-existent attribute from a resource element without output-as=xml" cmd="crm_resource -r dummy --get-parameter nonexistent --element" test_assert $CRM_EX_OK desc="Get an existent attribute from a resource element with output-as=xml" cmd="crm_resource -r dummy --get-parameter class --element --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get an existent attribute from a resource element without output-as=xml" cmd="crm_resource -r dummy --get-parameter class --element" test_assert $CRM_EX_OK desc="Set a non-existent attribute for a resource element with output-as=xml" cmd="crm_resource -r dummy --set-parameter=description -v test_description --element --output-as=xml" test_assert_validate $CRM_EX_OK desc="Set an existent attribute for a resource element with output-as=xml" cmd="crm_resource -r dummy --set-parameter=description -v test_description --element --output-as=xml" test_assert_validate $CRM_EX_OK desc="Delete an existent attribute for a resource element with output-as=xml" cmd="crm_resource -r dummy -d description --element --output-as=xml" test_assert_validate $CRM_EX_OK desc="Delete a non-existent attribute for a resource element with output-as=xml" cmd="crm_resource -r dummy -d description --element --output-as=xml" test_assert_validate $CRM_EX_OK desc="Set a non-existent attribute for a resource element without output-as=xml" cmd="crm_resource -r dummy --set-parameter=description -v test_description --element" test_assert $CRM_EX_OK desc="Set an existent attribute for a resource element without output-as=xml" cmd="crm_resource -r dummy --set-parameter=description -v test_description --element" test_assert $CRM_EX_OK desc="Delete an existent attribute for a resource element without output-as=xml" cmd="crm_resource -r dummy -d description --element" test_assert $CRM_EX_OK desc="Delete a non-existent attribute for a resource element without output-as=xml" cmd="crm_resource -r dummy -d description --element" test_assert $CRM_EX_OK desc="Create a resource attribute" cmd="crm_resource -r dummy -p delay -v 10s" test_assert $CRM_EX_OK desc="List the configured resources" cmd="crm_resource -L" test_assert $CRM_EX_OK desc="List the configured resources in XML" cmd="crm_resource -L --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Implicitly list the configured resources" cmd="crm_resource" test_assert $CRM_EX_OK 0 desc="List IDs of instantiated resources" cmd="crm_resource -l" test_assert $CRM_EX_OK 0 desc="Show XML configuration of resource" cmd="crm_resource -q -r dummy" test_assert $CRM_EX_OK 0 desc="Show XML configuration of resource, output as XML" cmd="crm_resource -q -r dummy --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Require a destination when migrating a resource that is stopped" cmd="crm_resource -r dummy -M" test_assert $CRM_EX_USAGE desc="Don't support migration to non-existent locations" cmd="crm_resource -r dummy -M -N i.do.not.exist" test_assert $CRM_EX_NOSUCH desc="Create a fencing resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert $CRM_EX_OK desc="Bring resources online" cmd="crm_simulate --live-check --in-place -S" test_assert $CRM_EX_OK desc="Try to move a resource to its existing location" cmd="crm_resource -r dummy --move --node node1" test_assert $CRM_EX_EXISTS desc="Try to move a resource that doesn't exist" cmd="crm_resource -r xyz --move --node node1" test_assert $CRM_EX_NOSUCH 0 desc="Move a resource from its existing location" cmd="crm_resource -r dummy --move" test_assert $CRM_EX_OK desc="Clear out constraints generated by --move" cmd="crm_resource -r dummy --clear" test_assert $CRM_EX_OK desc="Default ticket granted state" cmd="crm_ticket -t ticketA -G granted -d false" test_assert $CRM_EX_OK desc="Set ticket granted state" cmd="crm_ticket -t ticketA -r --force" test_assert $CRM_EX_OK desc="List ticket IDs" cmd="crm_ticket -w" test_assert $CRM_EX_OK 0 desc="List ticket IDs, outputting in XML" cmd="crm_ticket -w --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Query ticket state" cmd="crm_ticket -t ticketA -q" test_assert $CRM_EX_OK 0 desc="Query ticket state, outputting as xml" cmd="crm_ticket -t ticketA -q --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Query ticket granted state" cmd="crm_ticket -t ticketA -G granted" test_assert $CRM_EX_OK desc="Query ticket granted state, outputting as xml" cmd="crm_ticket -t ticketA -G granted --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Delete ticket granted state" cmd="crm_ticket -t ticketA -D granted --force" test_assert $CRM_EX_OK desc="Make a ticket standby" cmd="crm_ticket -t ticketA -s" test_assert $CRM_EX_OK desc="Query ticket standby state" cmd="crm_ticket -t ticketA -G standby" test_assert $CRM_EX_OK desc="Activate a ticket" cmd="crm_ticket -t ticketA -a" test_assert $CRM_EX_OK desc="List ticket details" cmd="crm_ticket -L -t ticketA" test_assert $CRM_EX_OK 0 desc="List ticket details, outputting as XML" cmd="crm_ticket -L -t ticketA --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Add a second ticket" cmd="crm_ticket -t ticketB -G granted -d false" test_assert $CRM_EX_OK desc="Set second ticket granted state" cmd="crm_ticket -t ticketB -r --force" test_assert $CRM_EX_OK desc="List tickets" cmd="crm_ticket -l" test_assert $CRM_EX_OK 0 desc="List tickets, outputting as XML" cmd="crm_ticket -l --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Delete second ticket" cmd="cibadmin --delete --xml-text ''" test_assert $CRM_EX_OK desc="Delete ticket standby state" cmd="crm_ticket -t ticketA -D standby" test_assert $CRM_EX_OK esc="Add a constraint to a ticket" cmd="cibadmin -C -o constraints --xml-text ''" test_assert $CRM_EX_OK desc="Query ticket constraints" cmd="crm_ticket -t ticketA -c" test_assert $CRM_EX_OK 0 desc="Query ticket constraints, outputting as xml" cmd="crm_ticket -t ticketA -c --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Delete ticket constraint" cmd="cibadmin --delete --xml-text ''" test_assert $CRM_EX_OK desc="Ban a resource on unknown node" cmd="crm_resource -r dummy -B -N host1" test_assert $CRM_EX_NOSUCH desc="Create two more nodes and bring them online" cmd="crm_simulate --live-check --in-place --node-up=node2 --node-up=node3" test_assert $CRM_EX_OK desc="Ban dummy from node1" cmd="crm_resource -r dummy -B -N node1" test_assert $CRM_EX_OK desc="Show where a resource is running" cmd="crm_resource -r dummy -W" test_assert $CRM_EX_OK 0 desc="Show constraints on a resource" cmd="crm_resource -a -r dummy" test_assert $CRM_EX_OK 0 desc="Ban dummy from node2" cmd="crm_resource -r dummy -B -N node2 --output-as=xml" test_assert_validate $CRM_EX_OK desc="Relocate resources due to ban" cmd="crm_simulate --live-check --in-place -S" test_assert $CRM_EX_OK desc="Move dummy to node1" cmd="crm_resource -r dummy -M -N node1 --output-as=xml" test_assert_validate $CRM_EX_OK desc="Clear implicit constraints for dummy on node2" cmd="crm_resource -r dummy -U -N node2" test_assert $CRM_EX_OK desc="Drop the status section" cmd="cibadmin -R -o status --xml-text ''" test_assert $CRM_EX_OK 0 desc="Create a clone" cmd="cibadmin -C -o resources --xml-text ''" test_assert $CRM_EX_OK 0 desc="Create a resource meta attribute" cmd="crm_resource -r test-primitive --meta -p is-managed -v false" test_assert $CRM_EX_OK desc="Create a resource meta attribute in the primitive" cmd="crm_resource -r test-primitive --meta -p is-managed -v false --force" test_assert $CRM_EX_OK desc="Update resource meta attribute with duplicates" cmd="crm_resource -r test-clone --meta -p is-managed -v true" test_assert $CRM_EX_OK desc="Update resource meta attribute with duplicates (force clone)" cmd="crm_resource -r test-clone --meta -p is-managed -v true --force" test_assert $CRM_EX_OK desc="Update child resource meta attribute with duplicates" cmd="crm_resource -r test-primitive --meta -p is-managed -v false" test_assert $CRM_EX_OK desc="Delete resource meta attribute with duplicates" cmd="crm_resource -r test-clone --meta -d is-managed" test_assert $CRM_EX_OK desc="Delete resource meta attribute in parent" cmd="crm_resource -r test-primitive --meta -d is-managed" test_assert $CRM_EX_OK desc="Create a resource meta attribute in the primitive" cmd="crm_resource -r test-primitive --meta -p is-managed -v false --force" test_assert $CRM_EX_OK desc="Update existing resource meta attribute" cmd="crm_resource -r test-clone --meta -p is-managed -v true" test_assert $CRM_EX_OK desc="Create a resource meta attribute in the parent" cmd="crm_resource -r test-clone --meta -p is-managed -v true --force" test_assert $CRM_EX_OK desc="Copy resources" cmd="cibadmin -Q -o resources > $TMPXML" test_assert $CRM_EX_OK 0 desc="Delete resource parent meta attribute (force)" cmd="crm_resource -r test-clone --meta -d is-managed --force" test_assert $CRM_EX_OK desc="Restore duplicates" cmd="cibadmin -R -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK desc="Delete resource child meta attribute" cmd="crm_resource -r test-primitive --meta -d is-managed" test_assert $CRM_EX_OK desc="Create the dummy-group resource group" cmd="cibadmin -C -o resources --xml-text '" cmd="$cmd " cmd="$cmd " cmd="$cmd '" test_assert $CRM_EX_OK desc="Create a resource meta attribute in dummy1" cmd="crm_resource -r dummy1 --meta -p is-managed -v true" test_assert $CRM_EX_OK desc="Create a resource meta attribute in dummy-group" cmd="crm_resource -r dummy-group --meta -p is-managed -v false" test_assert $CRM_EX_OK desc="Delete the dummy-group resource group" cmd="cibadmin -D -o resources --xml-text ''" test_assert $CRM_EX_OK desc="Specify a lifetime when moving a resource" cmd="crm_resource -r dummy --move --node node2 --lifetime=PT1H" test_assert $CRM_EX_OK desc="Try to move a resource previously moved with a lifetime" cmd="crm_resource -r dummy --move --node node1" test_assert $CRM_EX_OK desc="Ban dummy from node1 for a short time" cmd="crm_resource -r dummy -B -N node1 --lifetime=PT1S" test_assert $CRM_EX_OK desc="Remove expired constraints" sleep 2 cmd="crm_resource --clear --expired" test_assert $CRM_EX_OK # Clear has already been tested elsewhere, but we need to get rid of the # constraints so testing delete works. It won't delete if there's still # a reference to the resource somewhere. desc="Clear all implicit constraints for dummy" cmd="crm_resource -r dummy -U" test_assert $CRM_EX_OK desc="Set a node health strategy" cmd="crm_attribute -n node-health-strategy -v migrate-on-red" test_assert $CRM_EX_OK desc="Set a node health attribute" cmd="crm_attribute -N node3 -n '#health-cts-cli' -v red" test_assert $CRM_EX_OK desc="Show why a resource is not running on an unhealthy node" cmd="crm_resource -N node3 -Y -r dummy --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Delete a resource" cmd="crm_resource -D -r dummy -t primitive" test_assert $CRM_EX_OK unset CIB_shadow unset CIB_shadow_dir desc="Create an XML patchset" cmd="crm_diff -o $test_home/cli/crm_diff_old.xml -n $test_home/cli/crm_diff_new.xml" test_assert $CRM_EX_ERROR 0 export CIB_file="$test_home/cli/constraints.xml" for rsc in prim1 prim2 prim3 prim4 prim5 prim6 prim7 prim8 prim9 \ prim10 prim11 prim12 prim13 group clone; do desc="Check locations and constraints for $rsc" cmd="crm_resource -a -r $rsc" test_assert $CRM_EX_OK 0 desc="Recursively check locations and constraints for $rsc" cmd="crm_resource -A -r $rsc" test_assert $CRM_EX_OK 0 desc="Check locations and constraints for $rsc in XML" cmd="crm_resource -a -r $rsc --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Recursively check locations and constraints for $rsc in XML" cmd="crm_resource -A -r $rsc --output-as=xml" test_assert_validate $CRM_EX_OK 0 done desc="Check locations and constraints for group member (referring to group)" cmd="crm_resource -a -r gr2" test_assert $CRM_EX_OK 0 desc="Check locations and constraints for group member (without referring to group)" cmd="crm_resource -a -r gr2 --force" test_assert $CRM_EX_OK 0 # Create a shadow CIB based on constraints.xml create_shadow_cib --create unset CIB_file desc="Set a meta-attribute for primitive and resources colocated with it" cmd="crm_resource -r prim5 --meta --set-parameter=target-role -v Stopped --recursive --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Set a meta-attribute for group and resource colocated with it" cmd="crm_resource -r group --meta --set-parameter=target-role -v Stopped --recursive" test_assert $CRM_EX_OK 0 desc="Set a meta-attribute for clone and resource colocated with it" cmd="crm_resource -r clone --meta --set-parameter=target-role -v Stopped --recursive --output-as=xml" test_assert_validate $CRM_EX_OK 0 unset CIB_shadow unset CIB_shadow_dir export CIB_file="$test_home/cli/crm_resource_digests.xml" desc="Show resource digests" cmd="crm_resource --digests -r rsc1 -N node1 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Show resource digests with overrides" cmd="$cmd CRM_meta_interval=10000 CRM_meta_timeout=20000" test_assert $CRM_EX_OK 0 desc="Show resource operations" cmd="crm_resource --list-operations" test_assert $CRM_EX_OK 0 desc="Show resource operations (XML)" cmd="crm_resource --list-operations --output-as=xml" test_assert_validate $CRM_EX_OK 0 unset CIB_file export CIB_file="$test_home/cli/crmadmin-cluster-remote-guest-nodes.xml" desc="List all nodes" cmd="crmadmin -N" test_assert $CRM_EX_OK 0 desc="Minimally list all nodes" cmd="crmadmin -N -q" test_assert $CRM_EX_OK 0 desc="List all nodes as bash exports" cmd="crmadmin -N -B" test_assert $CRM_EX_OK 0 desc="List cluster nodes" cmd="crmadmin -N cluster | wc -l | grep 6" test_assert $CRM_EX_OK 0 desc="List guest nodes" cmd="crmadmin -N guest | wc -l | grep 2" test_assert $CRM_EX_OK 0 desc="List remote nodes" cmd="crmadmin -N remote | wc -l | grep 3" test_assert $CRM_EX_OK 0 desc="List cluster,remote nodes" cmd="crmadmin -N cluster,remote | wc -l | grep 9" test_assert $CRM_EX_OK 0 desc="List guest,remote nodes" cmd="crmadmin -N guest,remote | wc -l | grep 5" test_assert $CRM_EX_OK 0 unset CIB_file export CIB_file="$test_home/cli/crm_mon.xml" export CIB_shadow_dir="${shadow_dir}" desc="Show allocation scores with crm_simulate" cmd="crm_simulate -x $CIB_file --show-scores --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Show utilization with crm_simulate" cmd="crm_simulate -x $CIB_file --show-utilization" test_assert $CRM_EX_OK 0 desc="Simulate injecting a failure" cmd="crm_simulate -x $CIB_file -S -i ping_monitor_10000@cluster02=1" test_assert $CRM_EX_OK 0 desc="Simulate bringing a node down" cmd="crm_simulate -x $CIB_file -S --node-down=cluster01" test_assert $CRM_EX_OK 0 desc="Simulate a node failing" cmd="crm_simulate -x $CIB_file -S --node-fail=cluster02" test_assert $CRM_EX_OK 0 unset CIB_shadow_dir desc="List a promotable clone resource" cmd="crm_resource --locate -r promotable-clone" test_assert $CRM_EX_OK 0 desc="List the primitive of a promotable clone resource" cmd="crm_resource --locate -r promotable-rsc" test_assert $CRM_EX_OK 0 desc="List a single instance of a promotable clone resource" cmd="crm_resource --locate -r promotable-rsc:0" test_assert $CRM_EX_OK 0 desc="List another instance of a promotable clone resource" cmd="crm_resource --locate -r promotable-rsc:1" test_assert $CRM_EX_OK 0 desc="List a promotable clone resource in XML" cmd="crm_resource --locate -r promotable-clone --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="List the primitive of a promotable clone resource in XML" cmd="crm_resource --locate -r promotable-rsc --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="List a single instance of a promotable clone resource in XML" cmd="crm_resource --locate -r promotable-rsc:0 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="List another instance of a promotable clone resource in XML" cmd="crm_resource --locate -r promotable-rsc:1 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Try to move an instance of a cloned resource" cmd="crm_resource -r promotable-rsc:0 --move --node node1" test_assert $CRM_EX_INVALID_PARAM 0 # Create a sandbox copy of crm_mon.xml cibadmin -Q > "$TMPXML" export CIB_file="$TMPXML" desc="Query a nonexistent promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -G" test_assert $CRM_EX_NOSUCH 0 desc="Query a nonexistent promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -G --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Delete a nonexistent promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -D" test_assert $CRM_EX_OK 0 desc="Delete a nonexistent promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -D --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Query after deleting a nonexistent promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -G" test_assert $CRM_EX_NOSUCH 0 desc="Query after deleting a nonexistent promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -G --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Update a nonexistent promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -v 1" test_assert $CRM_EX_OK 0 desc="Update a nonexistent promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -v 1 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Query after updating a nonexistent promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -G" test_assert $CRM_EX_OK 0 desc="Query after updating a nonexistent promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -G --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Update an existing promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -v 5" test_assert $CRM_EX_OK 0 desc="Update an existing promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -v 5 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Query after updating an existing promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -G" test_assert $CRM_EX_OK 0 desc="Query after updating an existing promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -G --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Delete an existing promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -D" test_assert $CRM_EX_OK 0 desc="Delete an existing promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -D --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Query after deleting an existing promotable score attribute" cmd="crm_attribute -N cluster01 -p promotable-rsc -G" test_assert $CRM_EX_NOSUCH 0 desc="Query after deleting an existing promotable score attribute (XML)" cmd="crm_attribute -N cluster01 -p promotable-rsc -G --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 # Test for an issue with legacy command line parsing when the resource is # specified in the environment (CLBZ#5509) export OCF_RESOURCE_INSTANCE=promotable-rsc desc="Update a promotable score attribute to -INFINITY" cmd="crm_attribute -N cluster01 -p -v -INFINITY" test_assert $CRM_EX_OK 0 desc="Update a promotable score attribute to -INFINITY (XML)" cmd="crm_attribute -N cluster01 -p -v -INFINITY --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Query after updating a promotable score attribute to -INFINITY" cmd="crm_attribute -N cluster01 -p -G" test_assert $CRM_EX_OK 0 desc="Query after updating a promotable score attribute to -INFINITY (XML)" cmd="crm_attribute -N cluster01 -p -G --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Try OCF_RESOURCE_INSTANCE if -p is specified with an empty string" cmd="crm_attribute -N cluster01 -p '' -G" test_assert $CRM_EX_OK 0 export OCF_RESOURCE_INSTANCE="" desc="Return usage error if both -p and OCF_RESOURCE_INSTANCE are empty strings" cmd="crm_attribute -N cluster01 -p '' -G" test_assert $CRM_EX_USAGE 0 unset CIB_file unset OCF_RESOURCE_INSTANCE export CIB_file="-" desc="Check that CIB_file=\"-\" works - crm_mon" cmd="cat $test_home/cli/crm_mon.xml | crm_mon -1" test_assert $CRM_EX_OK 0 desc="Check that CIB_file=\"-\" works - crm_resource" cmd="cat $test_home/cli/crm_resource_digests.xml | crm_resource --digests -r rsc1 -N node1 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Check that CIB_file=\"-\" works - crmadmin" cmd="cat $test_home/cli/crmadmin-cluster-remote-guest-nodes.xml | crmadmin -N | wc -l | grep 11" test_assert $CRM_EX_OK 0 unset CIB_file rm -f "$TMPXML" "$TMPORIG" # crm_shadow tests unset CIB_shadow unset CIB_shadow_dir # Query with no active shadow instance desc="Get active shadow instance (no active instance)" cmd="crm_shadow --which" test_assert $CRM_EX_NOSUCH 0 desc="Get active shadow instance (no active instance) (XML)" cmd="crm_shadow --which --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Get active shadow instance's file name (no active instance)" cmd="crm_shadow --file" test_assert $CRM_EX_NOSUCH 0 desc="Get active shadow instance's file name (no active instance) (XML)" cmd="crm_shadow --file --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Get active shadow instance's contents (no active instance)" cmd="crm_shadow --display" test_assert $CRM_EX_NOSUCH 0 desc="Get active shadow instance's contents (no active instance) (XML)" cmd="crm_shadow --display --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Get active shadow instance's diff (no active instance)" cmd="crm_shadow --diff" test_assert $CRM_EX_NOSUCH 0 desc="Get active shadow instance's diff (no active instance) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 # Create new shadow instance based on active CIB # Don't use create_shadow_cib() here; test explicitly export CIB_file="$test_home/cli/crm_mon.xml" export CIB_shadow="$shadow" export CIB_shadow_dir="$shadow_dir" # Delete the shadow file if it already exists crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create copied shadow instance" cmd="crm_shadow --create $shadow --batch" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create copied shadow instance (XML)" cmd="crm_shadow --create $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Query shadow instance based on active CIB desc="Get active shadow instance (copied)" cmd="crm_shadow --which" test_assert $CRM_EX_OK 0 desc="Get active shadow instance (copied) (XML)" cmd="crm_shadow --which --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get active shadow instance's file name (copied)" cmd="crm_shadow --file" test_assert $CRM_EX_OK 0 desc="Get active shadow instance's file name (copied) (XML)" cmd="crm_shadow --file --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get active shadow instance's contents (copied)" cmd="crm_shadow --display" test_assert $CRM_EX_OK 0 desc="Get active shadow instance's contents (copied) (XML)" cmd="crm_shadow --display --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get active shadow instance's diff (copied)" cmd="crm_shadow --diff" test_assert $CRM_EX_OK 0 desc="Get active shadow instance's diff (copied) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Make some changes to the shadow file export CIB_file="$(crm_shadow --file)" cibadmin --modify --xml-text '' cibadmin --delete --xml-text '' cibadmin --create -o resources --xml-text \ '' state="" cibadmin --create -o status --xml-text "$state" unset state export CIB_file="$test_home/cli/crm_mon.xml" desc="Get active shadow instance's diff (after changes)" cmd="crm_shadow --diff" test_assert $CRM_EX_ERROR 0 desc="Get active shadow instance's diff (after changes) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_ERROR 0 # Commit the modified shadow CIB to a temp active CIB file cp "$test_home/cli/crm_mon.xml" "$TMPXML" export CIB_file="$TMPXML" desc="Commit shadow instance" cmd="crm_shadow --commit $shadow" test_assert $CRM_EX_USAGE 0 desc="Commit shadow instance (force)" cmd="crm_shadow --commit $shadow --force" test_assert $CRM_EX_OK 0 desc="Get active shadow instance's diff (after commit)" cmd="crm_shadow --diff" test_assert $CRM_EX_ERROR 0 desc="Commit shadow instance (force) (all)" cmd="crm_shadow --commit $shadow --force --all" test_assert $CRM_EX_OK 0 desc="Get active shadow instance's diff (after commit all)" cmd="crm_shadow --diff" test_assert $CRM_EX_ERROR 0 # Repeat sequence with XML output cp "$test_home/cli/crm_mon.xml" "$TMPXML" export CIB_file="$TMPXML" desc="Commit shadow instance (XML)" cmd="crm_shadow --commit $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Commit shadow instance (force) (XML)" cmd="crm_shadow --commit $shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get active shadow instance's diff (after commit) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_ERROR 0 desc="Commit shadow instance (force) (all) (XML)" cmd="crm_shadow --commit $shadow --force --all --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get active shadow instance's diff (after commit all) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_ERROR 0 # Commit an inactive shadow instance with no active instance unset CIB_shadow desc="Commit shadow instance (no active instance)" cmd="crm_shadow --commit $shadow" test_assert $CRM_EX_USAGE 0 desc="Commit shadow instance (no active instance) (force)" cmd="crm_shadow --commit $shadow --force" test_assert $CRM_EX_OK 0 desc="Commit shadow instance (no active instance) (XML)" cmd="crm_shadow --commit $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Commit shadow instance (no active instance) (force) (XML)" cmd="crm_shadow --commit $shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Commit an inactive shadow instance with an active instance export CIB_shadow="nonexistent_shadow" desc="Commit shadow instance (mismatch)" cmd="crm_shadow --commit $shadow" test_assert $CRM_EX_USAGE 0 desc="Commit shadow instance (mismatch) (force)" cmd="crm_shadow --commit $shadow --force" test_assert $CRM_EX_OK 0 desc="Commit shadow instance (mismatch) (XML)" cmd="crm_shadow --commit $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Commit shadow instance (mismatch) (force) (XML)" cmd="crm_shadow --commit $shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Commit an active shadow instance whose shadow file is missing desc="Commit shadow instance (nonexistent shadow file)" cmd="crm_shadow --commit $CIB_shadow" test_assert $CRM_EX_USAGE 0 desc="Commit shadow instance (nonexistent shadow file) (force)" cmd="crm_shadow --commit $CIB_shadow --force" test_assert $CRM_EX_NOSUCH 0 desc="Get active shadow instance's diff (nonexistent shadow file)" cmd="crm_shadow --diff" test_assert $CRM_EX_NOSUCH 0 desc="Commit shadow instance (nonexistent shadow file) (XML)" cmd="crm_shadow --commit $CIB_shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Commit shadow instance (nonexistent shadow file) (force) (XML)" cmd="crm_shadow --commit $CIB_shadow --force --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Get active shadow instance's diff (nonexistent shadow file) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 # Commit an active shadow instance when the CIB file is missing export CIB_file="$test_home/cli/nonexistent_cib.xml" export CIB_shadow="$shadow" desc="Commit shadow instance (nonexistent CIB file)" cmd="crm_shadow --commit $shadow" test_assert $CRM_EX_USAGE 0 desc="Commit shadow instance (nonexistent CIB file) (force)" cmd="crm_shadow --commit $shadow --force" test_assert $CRM_EX_NOSUCH 0 desc="Get active shadow instance's diff (nonexistent CIB file)" cmd="crm_shadow --diff" test_assert $CRM_EX_NOSUCH 0 desc="Commit shadow instance (nonexistent CIB file) (XML)" cmd="crm_shadow --commit $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Commit shadow instance (nonexistent CIB file) (force) (XML)" cmd="crm_shadow --commit $shadow --force --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Get active shadow instance's diff (nonexistent CIB file) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 rm -f "$TMPXML" # Delete an active shadow instance export CIB_file="$test_home/cli/crm_mon.xml" export CIB_shadow="$shadow" desc="Delete shadow instance" cmd="crm_shadow --delete $shadow" test_assert $CRM_EX_USAGE 0 desc="Delete shadow instance (force)" cmd="crm_shadow --delete $shadow --force" test_assert $CRM_EX_OK 0 create_shadow_cib --create desc="Delete shadow instance (XML)" cmd="crm_shadow --delete $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Delete shadow instance (force) (XML)" cmd="crm_shadow --delete $shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Delete an inactive shadow instance with no active instance create_shadow_cib --create unset CIB_shadow desc="Delete shadow instance (no active instance)" cmd="crm_shadow --delete $shadow" test_assert $CRM_EX_USAGE 0 desc="Delete shadow instance (no active instance) (force)" cmd="crm_shadow --delete $shadow --force" test_assert $CRM_EX_OK 0 create_shadow_cib --create unset CIB_shadow desc="Delete shadow instance (no active instance) (XML)" cmd="crm_shadow --delete $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Delete shadow instance (no active instance) (force) (XML)" cmd="crm_shadow --delete $shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Delete an inactive shadow instance with an active instance create_shadow_cib --create export CIB_shadow="nonexistent_shadow" desc="Delete shadow instance (mismatch)" cmd="crm_shadow --delete $shadow" test_assert $CRM_EX_USAGE 0 desc="Delete shadow instance (mismatch) (force)" cmd="crm_shadow --delete $shadow --force" test_assert $CRM_EX_OK 0 create_shadow_cib --create export CIB_shadow="nonexistent_shadow" desc="Delete shadow instance (mismatch) (XML)" cmd="crm_shadow --delete $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Delete shadow instance (mismatch) (force) (XML)" cmd="crm_shadow --delete $shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Delete an active shadow instance whose shadow file is missing desc="Delete shadow instance (nonexistent shadow file)" cmd="crm_shadow --delete $CIB_shadow" test_assert $CRM_EX_USAGE 0 desc="Delete shadow instance (nonexistent shadow file) (force)" cmd="crm_shadow --delete $CIB_shadow --force" test_assert $CRM_EX_OK 0 desc="Delete shadow instance (nonexistent shadow file) (XML)" cmd="crm_shadow --delete $CIB_shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Delete shadow instance (nonexistent shadow file) (force) (XML)" cmd="crm_shadow --delete $CIB_shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Delete an active shadow instance when the CIB file is missing export CIB_file="$test_home/cli/crm_mon.xml" create_shadow_cib --create export CIB_file="$test_home/cli/nonexistent_cib.xml" desc="Delete shadow instance (nonexistent CIB file)" cmd="crm_shadow --delete $shadow" test_assert $CRM_EX_USAGE 0 desc="Delete shadow instance (nonexistent CIB file) (force)" cmd="crm_shadow --delete $shadow --force" test_assert $CRM_EX_OK 0 export CIB_file="$test_home/cli/crm_mon.xml" create_shadow_cib --create export CIB_file="$test_home/cli/nonexistent_cib.xml" desc="Delete shadow instance (nonexistent CIB file) (XML)" cmd="crm_shadow --delete $shadow --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Delete shadow instance (nonexistent CIB file) (force) (XML)" cmd="crm_shadow --delete $shadow --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create new shadow instance based on active CIB with no instance active export CIB_file="$test_home/cli/crm_mon.xml" crm_shadow --delete "$shadow" --force >/dev/null 2>&1 unset CIB_shadow desc="Create copied shadow instance (no active instance)" cmd="crm_shadow --create $shadow --batch" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 unset CIB_shadow desc="Create copied shadow instance (no active instance) (XML)" cmd="crm_shadow --create $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create new shadow instance based on active CIB with other instance active export CIB_file="$test_home/cli/crm_mon.xml" export CIB_shadow="nonexistent_shadow" crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create copied shadow instance (mismatch)" cmd="crm_shadow --create $shadow --batch" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create copied shadow instance (mismatch) (XML)" cmd="crm_shadow --create $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create new shadow instance based on CIB (shadow file already exists) export CIB_file="$test_home/cli/crm_mon.xml" desc="Create copied shadow instance (file already exists)" cmd="crm_shadow --create $shadow --batch" test_assert $CRM_EX_CANTCREAT 0 desc="Create copied shadow instance (file already exists) (force)" cmd="crm_shadow --create $shadow --batch --force" test_assert $CRM_EX_OK 0 desc="Create copied shadow instance (file already exists) (XML)" cmd="crm_shadow --create $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_CANTCREAT 0 desc="Create copied shadow instance (file already exists) (force) (XML)" cmd="crm_shadow --create $shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create new shadow instance based on active CIB when the CIB file is missing export CIB_file="$test_home/cli/nonexistent_cib.xml" export CIB_shadow="$shadow" crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create copied shadow instance (nonexistent CIB file) (force)" cmd="crm_shadow --create $shadow --batch --force" test_assert $CRM_EX_NOSUCH 0 desc="Create copied shadow instance (nonexistent CIB file) (force) (XML)" cmd="crm_shadow --create $shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 # Create new empty shadow instance export CIB_shadow="$shadow" crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance" cmd="crm_shadow --create-empty $shadow --batch" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance (XML)" cmd="crm_shadow --create-empty $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create empty shadow instance with no active instance unset CIB_shadow crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance (no active instance)" cmd="crm_shadow --create-empty $shadow --batch" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance (no active instance) (XML)" cmd="crm_shadow --create-empty $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create empty shadow instance with other instance active export CIB_shadow="nonexistent_shadow" crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance (mismatch)" cmd="crm_shadow --create-empty $shadow --batch" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance (mismatch) (XML)" cmd="crm_shadow --create-empty $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create empty shadow instance when the CIB file is missing export CIB_file="$test_home/cli/nonexistent_cib.xml" export CIB_shadow="$shadow" crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance (nonexistent CIB file)" cmd="crm_shadow --create-empty $shadow --batch" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Create empty shadow instance (nonexistent CIB file) (XML)" cmd="crm_shadow --create-empty $shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Create empty shadow instance (shadow file already exists) export CIB_file="$test_home/cli/crm_mon.xml" desc="Create empty shadow instance (file already exists)" cmd="crm_shadow --create-empty $shadow --batch" test_assert $CRM_EX_CANTCREAT 0 desc="Create empty shadow instance (file already exists) (force)" cmd="crm_shadow --create-empty $shadow --batch --force" test_assert $CRM_EX_OK 0 desc="Create empty shadow instance (file already exists) (XML)" cmd="crm_shadow --create-empty $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_CANTCREAT 0 desc="Create empty shadow instance (file already exists) (force) (XML)" cmd="crm_shadow --create-empty $shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Query shadow instance with an empty CIB. # --which and --file queries were done earlier. delete_shadow_resource_defaults desc="Get active shadow instance's contents (empty CIB)" cmd="crm_shadow --display" test_assert $CRM_EX_OK 0 desc="Get active shadow instance's contents (empty CIB) (XML)" cmd="crm_shadow --display --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get active shadow instance's diff (empty CIB)" cmd="crm_shadow --diff" test_assert $CRM_EX_ERROR 0 desc="Get active shadow instance's diff (empty CIB) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_ERROR 0 # Reset shadow instance (overwrite existing shadow file based on active CIB) export CIB_file="$test_home/cli/crm_mon.xml" export CIB_shadow="$shadow" desc="Reset shadow instance" cmd="crm_shadow --reset $shadow --batch" test_assert $CRM_EX_OK 0 desc="Get active shadow instance's diff (after reset)" cmd="crm_shadow --diff" test_assert $CRM_EX_OK 0 create_shadow_cib --create-empty desc="Reset shadow instance (XML)" cmd="crm_shadow --reset $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Get active shadow instance's diff (after reset) (XML)" cmd="crm_shadow --diff --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Reset an inactive shadow instance with no active instance unset CIB_shadow desc="Reset shadow instance (no active instance)" cmd="crm_shadow --reset $shadow --batch" test_assert $CRM_EX_OK 0 create_shadow_cib --create-empty unset CIB_shadow desc="Reset shadow instance (no active instance) (XML)" cmd="crm_shadow --reset $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Reset an inactive shadow instance with an active instance export CIB_shadow="nonexistent_shadow" desc="Reset shadow instance (mismatch)" cmd="crm_shadow --reset $shadow --batch" test_assert $CRM_EX_USAGE 0 desc="Reset shadow instance (mismatch) (force)" cmd="crm_shadow --reset $shadow --batch --force" test_assert $CRM_EX_OK 0 create_shadow_cib --create-empty export CIB_shadow="nonexistent_shadow" desc="Reset shadow instance (mismatch) (XML)" cmd="crm_shadow --reset $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="Reset shadow instance (mismatch) (force) (XML)" cmd="crm_shadow --reset $shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Reset an active shadow instance when the CIB file is missing create_shadow_cib --create-empty export CIB_file="$test_home/cli/nonexistent_cib.xml" desc="Reset shadow instance (nonexistent CIB file)" cmd="crm_shadow --reset $CIB_shadow --batch" test_assert $CRM_EX_NOSUCH 0 desc="Reset shadow instance (nonexistent CIB file) (XML)" cmd="crm_shadow --reset $CIB_shadow --batch --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Reset shadow instance (nonexistent CIB file) (force)" cmd="crm_shadow --reset $CIB_shadow --batch --force" test_assert $CRM_EX_NOSUCH 0 desc="Reset shadow instance (nonexistent CIB file) (force) (XML)" cmd="crm_shadow --reset $CIB_shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 # Reset an active shadow instance whose shadow file is missing export CIB_file="$test_home/cli/crm_mon.xml" export CIB_shadow="$shadow" crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Reset shadow instance (nonexistent shadow file)" cmd="crm_shadow --reset $CIB_shadow --batch" test_assert $CRM_EX_NOSUCH 0 desc="Reset shadow instance (nonexistent shadow file) (force)" cmd="crm_shadow --reset $CIB_shadow --batch --force" test_assert $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Reset shadow instance (nonexistent shadow file) (XML)" cmd="crm_shadow --reset $CIB_shadow --batch --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Reset shadow instance (nonexistent shadow file) (force) (XML)" cmd="crm_shadow --reset $CIB_shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Switch shadow instances # In batch mode, this only displays a message create_shadow_cib --create-empty # Makes no difference now, just future-proofing CIB_shadow="nonexistent_shadow" desc="Switch to new shadow instance" cmd="crm_shadow --switch $shadow --batch" test_assert $CRM_EX_OK 0 CIB_shadow="nonexistent_shadow" desc="Switch to new shadow instance (XML)" cmd="crm_shadow --switch $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_OK 0 crm_shadow --delete "$shadow" --force >/dev/null 2>&1 desc="Switch to nonexistent shadow instance" cmd="crm_shadow --switch $shadow --batch" test_assert $CRM_EX_NOSUCH 0 desc="Switch to nonexistent shadow instance (force)" cmd="crm_shadow --switch $shadow --batch --force" test_assert $CRM_EX_NOSUCH 0 desc="Switch to nonexistent shadow instance (XML)" cmd="crm_shadow --switch $shadow --batch --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Switch to nonexistent shadow instance (force) (XML)" cmd="crm_shadow --switch $shadow --batch --force --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 CIB_file_invalid_1="$test_home/cli/crm_verify_invalid_bz.xml" CIB_file_invalid_2="$test_home/cli/crm_verify_invalid_no_stonith.xml" - + CIB_file_invalid_3="$test_home/cli/crm_verify_invalid_fencing_topology.xml" + + desc="Verbosely verify a file-specified configuration with an unallowed fencing level ID" + cmd="crm_verify --xml-file '$CIB_file_invalid_3' --verbose" + test_assert $CRM_EX_CONFIG 0 + desc="Verify a file-specified invalid configuration (text output)" cmd="crm_verify --xml-file '$CIB_file_invalid_1'" test_assert $CRM_EX_CONFIG 0 desc="Verify a file-specified invalid configuration (verbose text output)" cmd="crm_verify --xml-file '$CIB_file_invalid_1' --verbose" test_assert $CRM_EX_CONFIG 0 desc="Verify a file-specified invalid configuration (quiet text output)" cmd="crm_verify --xml-file '$CIB_file_invalid_1' --quiet" test_assert $CRM_EX_CONFIG 0 desc="Verify a file-specified invalid configuration (XML output)" cmd="crm_verify --xml-file '$CIB_file_invalid_1' --output-as=xml" test_assert_validate $CRM_EX_CONFIG 0 desc="Verify a file-specified invalid configuration (verbose XML output)" cmd="crm_verify --xml-file '$CIB_file_invalid_1' --output-as=xml --verbose" test_assert_validate $CRM_EX_CONFIG 0 desc="Verify a file-specified invalid configuration (quiet XML output)" cmd="crm_verify --xml-file '$CIB_file_invalid_1' --output-as=xml --quiet" test_assert_validate $CRM_EX_CONFIG 0 desc="Verify another file-specified invalid configuration (XML output)" cmd="crm_verify --xml-file '$CIB_file_invalid_2' --output-as=xml" test_assert_validate $CRM_EX_CONFIG 0 export CIB_file="$test_home/cli/crm_mon.xml" desc="Verify a file-specified valid configuration, outputting as xml" cmd="crm_verify --xml-file '$CIB_file' --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Verify a piped-in valid configuration, outputting as xml" cmd="cat '$CIB_file' | crm_verify -p --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Verbosely verify a file-specified valid configuration, outputting as xml" cmd="crm_verify --xml-file '$CIB_file' --output-as=xml --verbose" test_assert_validate $CRM_EX_OK 0 desc="Verbosely verify a piped-in valid configuration, outputting as xml" cmd="cat '$CIB_file' | crm_verify -p --output-as=xml --verbose" test_assert_validate $CRM_EX_OK 0 CIB_file_contents=$(cat "$CIB_file") desc="Verify a string-supplied valid configuration, outputting as xml" cmd="crm_verify -X '$CIB_file_contents' --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Verbosely verify a string-supplied valid configuration, outputting as xml" cmd="crm_verify -X '$CIB_file_contents' --output-as=xml --verbose" test_assert_validate $CRM_EX_OK 0 unset CIB_file unset CIB_shadow unset CIB_shadow_dir } INVALID_PERIODS=( "2019-01-01 00:00:00Z" # Start with no end "2019-01-01 00:00:00Z/" # Start with only a trailing slash "PT2S/P1M" # Two durations "2019-13-01 00:00:00Z/P1M" # Out-of-range month "20191077T15/P1M" # Out-of-range day "2019-10-01T25:00:00Z/P1M" # Out-of-range hour "2019-10-01T24:00:01Z/P1M" # Hour 24 with anything but :00:00 "PT5H/20191001T007000Z" # Out-of-range minute "2019-10-01 00:00:80Z/P1M" # Out-of-range second "2019-10-01 00:00:10 +25:00/P1M" # Out-of-range offset hour "20191001T000010 -00:61/P1M" # Out-of-range offset minute "P1Y/2019-02-29 00:00:00Z" # Feb. 29 in non-leap-year "2019-01-01 00:00:00Z/P" # Duration with no values "P1Z/2019-02-20 00:00:00Z" # Invalid duration unit "P1YM/2019-02-20 00:00:00Z" # No number for duration unit ) function test_dates() { # Ensure invalid period specifications are rejected for spec in '' "${INVALID_PERIODS[@]}"; do desc="Invalid period - [$spec]" cmd="iso8601 -p \"$spec\"" test_assert $CRM_EX_INVALID_PARAM 0 done desc="'2005-040/2005-043' period" cmd="iso8601 -p '2005-040/2005-043'" test_assert $CRM_EX_OK 0 desc="'2005-040/2005-043' period (XML)" cmd="iso8601 -p '2005-040/2005-043' --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="2014-01-01 00:30:00 - 1 Hour" cmd="iso8601 -d '2014-01-01 00:30:00Z' -D P-1H -E '2013-12-31 23:30:00Z'" test_assert $CRM_EX_OK 0 desc="Valid date - Feb 29 in leap year" cmd="iso8601 -d '2020-02-29 00:00:00Z' -E '2020-02-29 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="Valid date - using 'T' and offset" cmd="iso8601 -d '20191201T131211 -05:00' -E '2019-12-01 18:12:11Z'" test_assert $CRM_EX_OK 0 desc="24:00:00 equivalent to 00:00:00 of next day" cmd="iso8601 -d '2019-12-31 24:00:00Z' -E '2020-01-01 00:00:00Z'" test_assert $CRM_EX_OK 0 for y in 06 07 08 09 10 11 12 13 14 15 16 17 18 40; do desc="20$y-W01-7" cmd="iso8601 -d '20$y-W01-7 00Z'" test_assert $CRM_EX_OK 0 desc="20$y-W01-7 - round-trip" cmd="iso8601 -d '20$y-W01-7 00Z' -W -E '20$y-W01-7 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="20$y-W01-1" cmd="iso8601 -d '20$y-W01-1 00Z'" test_assert $CRM_EX_OK 0 desc="20$y-W01-1 - round-trip" cmd="iso8601 -d '20$y-W01-1 00Z' -W -E '20$y-W01-1 00:00:00Z'" test_assert $CRM_EX_OK 0 done desc="2009-W53-07" cmd="iso8601 -d '2009-W53-7 00:00:00Z' -W -E '2009-W53-7 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="2009-W53-07 (XML)" cmd="iso8601 -d '2009-W53-7 00:00:00Z' -W -E '2009-W53-7 00:00:00Z' --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="epoch + 2 Years 5 Months 6 Minutes" cmd="iso8601 -d 'epoch' -D P2Y5MT6M -E '1972-06-01 00:06:00Z'" test_assert $CRM_EX_OK 0 desc="2009-01-31 + 1 Month" cmd="iso8601 -d '20090131T000000Z' -D P1M -E '2009-02-28 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="2009-01-31 + 2 Months" cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P2M -E '2009-03-31 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="2009-01-31 + 3 Months" cmd="iso8601 -d '2009-01-31 00:00:00Z' -D P3M -E '2009-04-30 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="2009-03-31 - 1 Month" cmd="iso8601 -d '2009-03-31 01:00:00 +01:00' -D P-1M -E '2009-02-28 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="2009-03-31 - 1 Month (XML)" cmd="iso8601 -d '2009-03-31 01:00:00 +01:00' -D P-1M -E '2009-02-28 00:00:00Z' --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="2038-01-01 + 3 Months" cmd="iso8601 -d '2038-01-01 00:00:00Z' -D P3M -E '2038-04-01 00:00:00Z'" test_assert $CRM_EX_OK 0 desc="2038-01-01 + 3 Months (XML)" cmd="iso8601 -d '2038-01-01 00:00:00Z' -D P3M -E '2038-04-01 00:00:00Z' --output-as=xml" test_assert_validate $CRM_EX_OK 0 } function test_acl_loop() { local TMPXML TMPXML="$1" # Make sure we're rejecting things for the right reasons orig_trace_fns="$PCMK_trace_functions" export PCMK_trace_functions=pcmk__check_acl,pcmk__apply_creation_acl CIB_user=root cibadmin --replace --xml-text '' ### no ACL ### export CIB_user=unknownguy desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Set enable-acl" cmd="crm_attribute -n enable-acl -v false" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v false" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 ### deny /cib permission ### export CIB_user=l33t-haxor desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Set enable-acl" cmd="crm_attribute -n enable-acl -v false" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v false" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 ### observer role ### export CIB_user=niceguy desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert $CRM_EX_OK 0 desc="$CIB_user: Set enable-acl" cmd="crm_attribute -n enable-acl -v false" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v false" test_assert $CRM_EX_OK desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 export CIB_user=root desc="$CIB_user: Query configuration" cmd="cibadmin -Q" test_assert $CRM_EX_OK 0 desc="$CIB_user: Set stonith-enabled" cmd="crm_attribute -n stonith-enabled -v true" test_assert $CRM_EX_OK desc="$CIB_user: Create a resource" cmd="cibadmin -C -o resources --xml-text ''" test_assert $CRM_EX_OK ### deny /cib permission ### export CIB_user=l33t-haxor desc="$CIB_user: Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p target-role -v Stopped" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Query a resource meta attribute" cmd="crm_resource -r dummy --meta -g target-role" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 desc="$CIB_user: Remove a resource meta attribute" cmd="crm_resource -r dummy --meta -d target-role" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 ### observer role ### export CIB_user=niceguy desc="$CIB_user: Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p target-role -v Stopped" test_assert $CRM_EX_OK desc="$CIB_user: Query a resource meta attribute" cmd="crm_resource -r dummy --meta -g target-role" test_assert $CRM_EX_OK desc="$CIB_user: Remove a resource meta attribute" cmd="crm_resource -r dummy --meta -d target-role" test_assert $CRM_EX_OK desc="$CIB_user: Create a resource meta attribute" cmd="crm_resource -r dummy --meta -p target-role -v Started" test_assert $CRM_EX_OK ### read //meta_attributes ### export CIB_user=badidea desc="$CIB_user: Query configuration - implied deny" cmd="cibadmin -Q" test_assert $CRM_EX_OK 0 ### deny /cib, read //meta_attributes ### export CIB_user=betteridea desc="$CIB_user: Query configuration - explicit deny" cmd="cibadmin -Q" test_assert $CRM_EX_OK 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --delete --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql ### observer role ### export CIB_user=niceguy desc="$CIB_user: Replace - remove acls" cmd="cibadmin --replace --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -C -o resources --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create resource" cmd="cibadmin --replace --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" crm_attribute -n enable-acl -v false CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - modify attribute (deny)" cmd="cibadmin --replace --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --replace --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - delete attribute (deny)" cmd="cibadmin --replace --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create attribute (deny)" cmd="cibadmin --replace --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 ### admin role ### CIB_user=bob CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create attribute (direct allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - modify attribute (direct allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --replace -o resources --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - delete attribute (direct allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 ### super_user role ### export CIB_user=joe CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create attribute (inherited allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - modify attribute (inherited allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --replace -o resources --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - delete attribute (inherited allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 ### rsc_writer role ### export CIB_user=mike CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create attribute (allow overrides deny)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - modify attribute (allow overrides deny)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --replace -o resources --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - delete attribute (allow overrides deny)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK 0 ### rsc_denied role ### export CIB_user=chris CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - create attribute (deny overrides allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 # Set as root since setting as chris failed CIB_user=root cibadmin --modify --xml-text '' CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --modify --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - modify attribute (deny overrides allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 # Set as root since setting as chris failed CIB_user=root cibadmin --modify --xml-text '' CIB_user=root cibadmin -Q > "$TMPXML" CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin --replace -o resources --xml-text '' CIB_user=root CIB_file="$TMPXML" CIB_shadow="" cibadmin -Ql desc="$CIB_user: Replace - delete attribute (deny overrides allow)" cmd="cibadmin --replace -o resources --xml-file $TMPXML" test_assert $CRM_EX_INSUFFICIENT_PRIV 0 export PCMK_trace_functions="$orig_trace_fns" } function test_acls() { local SHADOWPATH local TMPXML TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.acls.xml.XXXXXXXXXX) create_shadow_cib --create-empty cat < "$TMPXML" EOF desc="Configure some ACLs" cmd="cibadmin -M -o acls --xml-file $TMPXML" test_assert $CRM_EX_OK desc="Enable ACLs" cmd="crm_attribute -n enable-acl -v true" test_assert $CRM_EX_OK desc="Set cluster option" cmd="crm_attribute -n no-quorum-policy -v ignore" test_assert $CRM_EX_OK cat < "$TMPXML" EOF desc="New ACL role" cmd="cibadmin --create -o acls --xml-file $TMPXML" test_assert $CRM_EX_OK cat < "$TMPXML" EOF desc="New ACL target" cmd="cibadmin --create -o acls --xml-file $TMPXML" test_assert $CRM_EX_OK cat < "$TMPXML" EOF desc="Another ACL role" cmd="cibadmin --create -o acls --xml-file $TMPXML" test_assert $CRM_EX_OK cat < "$TMPXML" EOF desc="Another ACL target" cmd="cibadmin --create -o acls --xml-file $TMPXML" test_assert $CRM_EX_OK cat < "$TMPXML" EOF desc="Updated ACL" cmd="cibadmin --replace -o acls --xml-file $TMPXML" test_assert $CRM_EX_OK test_acl_loop "$TMPXML" unset CIB_shadow_dir rm -f "$TMPXML" } function test_validity() { local TMPGOOD local TMPBAD TMPGOOD=$(mktemp ${TMPDIR:-/tmp}/cts-cli.validity.good.xml.XXXXXXXXXX) TMPBAD=$(mktemp ${TMPDIR:-/tmp}/cts-cli.validity.bad.xml.XXXXXXXXXX) create_shadow_cib --create-empty pacemaker-1.2 orig_trace_fns="$PCMK_trace_functions" export PCMK_trace_functions=apply_upgrade,pcmk__update_schema cibadmin -C -o resources --xml-text '' cibadmin -C -o resources --xml-text '' cibadmin -C -o constraints --xml-text '' cibadmin -Q > "$TMPGOOD" desc="Try to make resulting CIB invalid (enum violation)" cmd="cibadmin -M -o constraints --xml-text ''" test_assert $CRM_EX_CONFIG sed 's|"start"|"break"|' "$TMPGOOD" > "$TMPBAD" desc="Run crm_simulate with invalid CIB (enum violation)" cmd="crm_simulate -x $TMPBAD -S" test_assert $CRM_EX_CONFIG 0 desc="Try to make resulting CIB invalid (unrecognized validate-with)" cmd="cibadmin -M --xml-text ''" test_assert $CRM_EX_CONFIG sed 's|"pacemaker-1.2"|"pacemaker-9999.0"|' "$TMPGOOD" > "$TMPBAD" desc="Run crm_simulate with invalid CIB (unrecognized validate-with)" cmd="crm_simulate -x $TMPBAD -S" test_assert $CRM_EX_CONFIG 0 desc="Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1)" cmd="cibadmin -C -o configuration --xml-text ''" test_assert $CRM_EX_CONFIG sed 's|||' "$TMPGOOD" > "$TMPBAD" desc="Run crm_simulate with invalid, but possibly recoverable CIB (valid with X.Y+1)" cmd="crm_simulate -x $TMPBAD -S" test_assert $CRM_EX_OK 0 sed 's|[ ][ ]*validate-with="[^"]*"||' "$TMPGOOD" > "$TMPBAD" desc="Make resulting CIB valid, although without validate-with attribute" cmd="cibadmin -R --xml-file $TMPBAD" test_assert $CRM_EX_OK desc="Run crm_simulate with valid CIB, but without validate-with attribute" cmd="crm_simulate -x $TMPBAD -S" test_assert $CRM_EX_OK 0 # this will just disable validation and accept the config, outputting # validation errors sed -e 's|[ ][ ]*validate-with="[^"]*"||' \ -e 's|\([ ][ ]*epoch="[^"]*\)"|\10"|' -e 's|"start"|"break"|' \ "$TMPGOOD" > "$TMPBAD" desc="Make resulting CIB invalid, and without validate-with attribute" cmd="cibadmin -R --xml-file $TMPBAD" test_assert $CRM_EX_OK desc="Run crm_simulate with invalid CIB, also without validate-with attribute" cmd="crm_simulate -x $TMPBAD -S" test_assert $CRM_EX_OK 0 unset CIB_shadow_dir rm -f "$TMPGOOD" "$TMPBAD" export PCMK_trace_functions="$orig_trace_fns" } test_upgrade() { local TMPXML TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) create_shadow_cib --create-empty pacemaker-2.10 orig_trace_fns="$PCMK_trace_functions" export PCMK_trace_functions=apply_upgrade,pcmk__update_schema desc="Set stonith-enabled=false" cmd="crm_attribute -n stonith-enabled -v false" test_assert $CRM_EX_OK cat < "$TMPXML" EOF desc="Configure the initial resource" cmd="cibadmin -M -o resources --xml-file $TMPXML" test_assert $CRM_EX_OK desc="Upgrade to latest CIB schema (trigger 2.10.xsl + the wrapping)" cmd="cibadmin --upgrade --force -V -V" test_assert $CRM_EX_OK desc="Query a resource instance attribute (shall survive)" cmd="crm_resource -r mySmartFuse -g requires" test_assert $CRM_EX_OK unset CIB_shadow_dir rm -f "$TMPXML" export PCMK_trace_functions="$orig_trace_fns" } test_rules() { local TMPXML create_shadow_cib --create-empty cibadmin -C -o crm_config --xml-text '' cibadmin -C -o resources --xml-text '' TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) cat < "$TMPXML" EOF cibadmin -C -o constraints -x "$TMPXML" rm -f "$TMPXML" TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) cat < "$TMPXML" EOF cibadmin -C -o constraints -x "$TMPXML" rm -f "$TMPXML" if [ "$(uname)" == "FreeBSD" ]; then tomorrow=$(date -v+1d +"%F %T %z") else tomorrow=$(date --date=tomorrow +"%F %T %z") fi TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) cat < "$TMPXML" EOF cibadmin -C -o constraints -x "$TMPXML" rm -f "$TMPXML" TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) cat < "$TMPXML" EOF cibadmin -C -o constraints -x "$TMPXML" rm -f "$TMPXML" TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) cat < "$TMPXML" EOF cibadmin -C -o constraints -x "$TMPXML" rm -f "$TMPXML" TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) cat < "$TMPXML" EOF cibadmin -C -o constraints -x "$TMPXML" rm -f "$TMPXML" TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.tools.xml.XXXXXXXXXX) cat < "$TMPXML" EOF cibadmin -C -o constraints -x "$TMPXML" rm -f "$TMPXML" desc="crm_rule given no arguments" cmd="crm_rule" test_assert $CRM_EX_USAGE 0 desc="crm_rule given no arguments (XML)" cmd="crm_rule --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="crm_rule given no rule to check" cmd="crm_rule -c" test_assert $CRM_EX_USAGE 0 desc="crm_rule given no rule to check (XML)" cmd="crm_rule -c --output-as=xml" test_assert_validate $CRM_EX_USAGE 0 desc="crm_rule given invalid input XML" cmd="crm_rule -c -r blahblah -X 'invalidxml'" test_assert $CRM_EX_DATAERR 0 desc="crm_rule given invalid input XML (XML)" cmd="crm_rule -c -r blahblah -X 'invalidxml' --output-as=xml" test_assert_validate $CRM_EX_DATAERR 0 desc="crm_rule given invalid input XML on stdin" cmd="echo 'invalidxml' | crm_rule -c -r blahblah -X -" test_assert $CRM_EX_DATAERR 0 desc="crm_rule given invalid input XML on stdin (XML)" cmd="echo 'invalidxml' | crm_rule -c -r blahblah -X - --output-as=xml" test_assert_validate $CRM_EX_DATAERR 0 desc="Try to check a rule that doesn't exist" cmd="crm_rule -c -r blahblah" test_assert $CRM_EX_NOSUCH desc="Try to check a rule that doesn't exist, with XML output" cmd="crm_rule -c -r blahblah --output-as=xml" test_assert_validate $CRM_EX_NOSUCH 0 desc="Try to check a rule that has too many date_expressions" cmd="crm_rule -c -r cli-rule-too-many-date-expressions" test_assert $CRM_EX_UNIMPLEMENT_FEATURE 0 desc="Try to check a rule that has too many date_expressions (XML)" cmd="crm_rule -c -r cli-rule-too-many-date-expressions --output-as=xml" test_assert_validate $CRM_EX_UNIMPLEMENT_FEATURE 0 desc="Verify basic rule is expired" cmd="crm_rule -c -r cli-prefer-rule-dummy-expired" test_assert $CRM_EX_EXPIRED 0 desc="Verify basic rule is expired, with XML output" cmd="crm_rule -c -r cli-prefer-rule-dummy-expired --output-as=xml" test_assert_validate $CRM_EX_EXPIRED 0 desc="Verify basic rule worked in the past" cmd="crm_rule -c -r cli-prefer-rule-dummy-expired -d 20180101" test_assert $CRM_EX_OK 0 desc="Verify basic rule worked in the past (XML)" cmd="crm_rule -c -r cli-prefer-rule-dummy-expired -d 20180101 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Verify basic rule is not yet in effect" cmd="crm_rule -c -r cli-prefer-rule-dummy-not-yet" test_assert $CRM_EX_NOT_YET_IN_EFFECT 0 desc="Verify basic rule is not yet in effect (XML)" cmd="crm_rule -c -r cli-prefer-rule-dummy-not-yet --output-as=xml" test_assert_validate $CRM_EX_NOT_YET_IN_EFFECT 0 desc="Verify date_spec rule with years has expired" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-only-years" test_assert $CRM_EX_EXPIRED 0 desc="Verify date_spec rule with years has expired (XML)" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-only-years --output-as=xml" test_assert_validate $CRM_EX_EXPIRED 0 desc="Verify multiple rules at once" cmd="crm_rule -c -r cli-prefer-rule-dummy-not-yet -r cli-prefer-rule-dummy-date_spec-only-years" test_assert $CRM_EX_EXPIRED 0 desc="Verify multiple rules at once, with XML output" cmd="crm_rule -c -r cli-prefer-rule-dummy-not-yet -r cli-prefer-rule-dummy-date_spec-only-years --output-as=xml" test_assert_validate $CRM_EX_EXPIRED 0 desc="Verify date_spec rule with years is in effect" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-only-years -d 20190201" test_assert $CRM_EX_OK 0 desc="Verify date_spec rule with years is in effect (XML)" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-only-years -d 20190201 --output-as=xml" test_assert_validate $CRM_EX_OK 0 desc="Try to check a rule whose date_spec does not contain years=" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-without-years" test_assert $CRM_EX_UNIMPLEMENT_FEATURE 0 desc="Try to check a rule whose date_spec does not contain years= (XML)" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-without-years --output-as=xml" test_assert_validate $CRM_EX_UNIMPLEMENT_FEATURE 0 desc="Try to check a rule whose date_spec contains years= and moon=" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-years-moon" test_assert $CRM_EX_UNIMPLEMENT_FEATURE 0 desc="Try to check a rule whose date_spec contains years= and moon= (XML)" cmd="crm_rule -c -r cli-prefer-rule-dummy-date_spec-years-moon --output-as=xml" test_assert_validate $CRM_EX_UNIMPLEMENT_FEATURE 0 desc="Try to check a rule with no date_expression" cmd="crm_rule -c -r cli-no-date_expression-rule" test_assert $CRM_EX_UNIMPLEMENT_FEATURE 0 desc="Try to check a rule with no date_expression (XML)" cmd="crm_rule -c -r cli-no-date_expression-rule --output-as=xml" test_assert_validate $CRM_EX_UNIMPLEMENT_FEATURE 0 unset CIB_shadow_dir } # Ensure all command output is in portable locale for comparison export LC_ALL="C" test_access_render() { local TMPXML=$(mktemp ${TMPDIR:-/tmp}/cts-cli.access_render.xml.XXXXXXXXXX) create_shadow_cib --create-empty # Create a test CIB that has ACL roles cat < "$TMPXML" EOF desc="Configure some ACLs" cmd="cibadmin -M -o acls --xml-file $TMPXML" test_assert $CRM_EX_OK desc="Enable ACLs" cmd="crm_attribute -n enable-acl -v true" test_assert $CRM_EX_OK unset CIB_user # Run cibadmin --show-access on the test CIB as an ACL-restricted user desc="An instance of ACLs render (into color)" cmd="cibadmin --force --show-access=color -Q --user tony" test_assert $CRM_EX_OK 0 desc="An instance of ACLs render (into namespacing)" cmd="cibadmin --force --show-access=namespace -Q --user tony" test_assert $CRM_EX_OK 0 desc="An instance of ACLs render (into text)" cmd="cibadmin --force --show-access=text -Q --user tony" test_assert $CRM_EX_OK 0 unset CIB_shadow_dir rm -f "$TMPXML" } function test_feature_set() { create_shadow_cib --create-empty # Import the initial test CIB with non-mixed versions desc="Import the test CIB" cmd="cibadmin --replace --xml-file $test_home/cli/crm_mon-feature_set.xml" test_assert $CRM_EX_OK desc="Complete text output, no mixed status" cmd="crm_mon -1 --show-detail" test_assert $CRM_EX_OK 0 desc="XML output, no mixed status" cmd="crm_mon --output-as=xml" test_assert_validate $CRM_EX_OK 0 # Modify the CIB to fake that the cluster has mixed versions desc="Fake inconsistent feature set" cmd="crm_attribute --node=cluster02 --name=#feature-set --update=3.15.0 --lifetime=reboot" test_assert $CRM_EX_OK desc="Complete text output, mixed status" cmd="crm_mon -1 --show-detail" test_assert $CRM_EX_OK 0 desc="XML output, mixed status" cmd="crm_mon --output-as=xml" test_assert_validate $CRM_EX_OK 0 unset CIB_shadow_dir } # Process command-line arguments while [ $# -gt 0 ]; do case "$1" in -t) tests="$2" shift 2 ;; -V|--verbose) verbose=1 shift ;; -v|--valgrind) export G_SLICE=always-malloc VALGRIND_CMD="valgrind $VALGRIND_OPTS" shift ;; -s) do_save=1 shift ;; -p) export PATH="$2:$PATH" shift ;; --help) echo "$USAGE_TEXT" exit $CRM_EX_OK ;; *) echo "error: unknown option $1" echo echo "$USAGE_TEXT" exit $CRM_EX_USAGE ;; esac done for t in $tests; do case "$t" in access_render) ;; agents) ;; daemons) ;; dates) ;; error_codes) ;; tools) ;; acls) ;; validity) ;; upgrade) ;; rules) ;; crm_mon) ;; feature_set) ;; *) echo "error: unknown test $t" echo echo "$USAGE_TEXT" exit $CRM_EX_USAGE ;; esac done XMLLINT_CMD=$(which xmllint 2>/dev/null) if [ $? -ne 0 ]; then XMLLINT_CMD="" echo "xmllint is missing - install it to validate command output" fi # Check whether we're running from source directory SRCDIR=$(dirname $test_home) if [ -x "$SRCDIR/tools/crm_simulate" ]; then path_dirs="$SRCDIR/tools" for daemon in based controld fenced schedulerd; do if [ -x "$SRCDIR/daemons/$daemon/pacemaker-${daemon}" ]; then path_dirs="$path_dirs:$SRCDIR/daemons/$daemon" fi done export PATH="$path_dirs:$PATH" echo "Using local binaries from: ${path_dirs//:/ }" if [ -x "$SRCDIR/xml" ]; then export PCMK_schema_directory="$SRCDIR/xml" echo "Using local schemas from: $PCMK_schema_directory" fi else export PATH="@CRM_DAEMON_DIR@:$PATH" export PCMK_schema_directory=@CRM_SCHEMA_DIRECTORY@ fi for t in $tests; do echo "Testing $t" TMPFILE=$(mktemp ${TMPDIR:-/tmp}/cts-cli.$t.XXXXXXXXXX) eval TMPFILE_$t="$TMPFILE" test_$t > "$TMPFILE" # @TODO Add a way to suppress this message within cibadmin, and then drop # the handling here. suppress="The supplied command can provide skewed result since it is run" suppress="$suppress under user that also gets guarded per ACLs on their" suppress="$suppress own right. Continuing since --force flag was provided." # This giant sed replaces content expected to change for each run # (timestamps, source file line numbers, etc.), build (configure options, # version numbers, etc.), or platform (system messages, etc.). # # last-rc-change= is always numeric in the CIB. However, for the crm_mon # test we also need to compare against the XML output of the crm_mon # program. There, these are shown as human readable strings (like the # output of the `date` command). sed -e 's|\(|\1/>|' \ -e 's|\(|\1>|' \ -e 's/Last updated: .*/Last updated:/' \ -e 's/Last change: .*/Last change:/' \ -e 's/(version .*)/(version)/' \ -e 's/last_update time=\".*\"/last_update time=\"\"/' \ -e 's/last_change time=\".*\"/last_change time=\"\"/' \ -e 's/ api-version="[^"]*"/ api-version="X"/' \ -e 's/ default="[^"]*"/ default=""/' \ -e 's/\(\* Possible values.*: .*\)(default: [^)]*)/\1(default: )/g' \ -e 's/ version="[^"]*"/ version=""/' \ -e 's/request=\".*\(crm_[a-zA-Z0-9]*\)/request=\"\1/' \ -e 's/request=\".*iso8601/request=\"iso8601/' \ -e 's/crm_feature_set="[^"]*" //'\ -e 's/@crm_feature_set=[0-9.]*, //'\ -e 's/\( "${TMPFILE}.$$" mv -- "${TMPFILE}.$$" "$TMPFILE" if [ $do_save -eq 1 ]; then cp "$TMPFILE" $test_home/cli/regression.$t.exp fi done rm -rf "${shadow_dir}" rm -f "${test_assert_outfile}" rm -f "${test_assert_errfile}" rm -f "${xmllint_outfile}" failed=0 if [ $verbose -eq 1 ]; then echo -e "\n\nResults" fi for t in $tests; do eval TMPFILE="\$TMPFILE_$t" if [ $verbose -eq 1 ]; then diff -wu $test_home/cli/regression.$t.exp "$TMPFILE" else diff -w $test_home/cli/regression.$t.exp "$TMPFILE" >/dev/null 2>&1 fi if [ $? -ne 0 ]; then failed=1 fi done echo -e "\n\nSummary" for t in $tests; do eval TMPFILE="\$TMPFILE_$t" grep -e '^\* \(Passed\|Failed\)' "$TMPFILE" done function print_or_remove_file() { eval TMPFILE="\$TMPFILE_$1" if [[ ! $(diff -wq $test_home/cli/regression.$1.exp "$TMPFILE") ]]; then rm -f "$TMPFILE" else echo " $TMPFILE" if [ $verbose -ne 0 ]; then echo "======================================================" cat "$TMPFILE" echo "======================================================" fi fi } if [ $num_errors -ne 0 ] && [ $failed -ne 0 ]; then echo "$num_errors tests failed; see output in:" for t in $tests; do print_or_remove_file "$t" done exit $CRM_EX_ERROR elif [ $num_errors -ne 0 ]; then echo "$num_errors tests failed" for t in $tests; do print_or_remove_file "$t" done exit $CRM_EX_ERROR elif [ $failed -eq 1 ]; then echo "$num_passed tests passed but output was unexpected; see output in:" for t in $tests; do print_or_remove_file "$t" done exit $CRM_EX_DIGEST else echo $num_passed tests passed for t in $tests; do eval TMPFILE="\$TMPFILE_$t" rm -f "$TMPFILE" done crm_shadow --force --delete $shadow >/dev/null 2>&1 exit $CRM_EX_OK fi diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 2bcdf46c42..8b0245c2e7 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3639 +1,3639 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(timeout_ms / 1000, INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; if (msg == NULL) { return NULL; } op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP); cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); crm_element_value_int(msg, PCMK__XA_ST_CALLOPT, &(cmd->options)); crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(stonith_our_uname)) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { pcmk__node_status_t *node = pcmk__get_node(0, cmd->target, NULL, pcmk__node_search_cluster_member); cmd->target_nodeid = node->cluster_layer_id; } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); pcmk__xml_free(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } freeXpathObject(xpath); } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2list(dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } // Use pcmk__digest_operation() here? if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(stonith_our_uname)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(stonith_our_uname); pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST, stonith_our_uname); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; - for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { + for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; - for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { + for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE), crm_element_value(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if (!stand_alone /* if standalone, there's no attribute manager */ && (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, PCMK_XA_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range - if ((id <= 0) || (id >= ST_LEVEL_MAX)) { + if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range - if ((id < 0) || (id >= ST_LEVEL_MAX)) { + if ((id < 0) || (id >= ST__LEVEL_COUNT)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_suicide)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { const pcmk__node_status_t *node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member); pcmk__cluster_send_message(node, pcmk__cluster_msg_fenced, reply); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *wrapper = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } // Pack the results into XML wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); list = pcmk__xe_create(wrapper, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; xmlNode *dev = NULL; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified); crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } crm_log_xml_trace(list, "query-result"); stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: pcmk__xml_free(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " QB_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } if (!stand_alone && pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, reply); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); pcmk__xml_free(reply); if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE); stonith__xe_set_result(notify_data, result); crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op); crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost"); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result, notify_data); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { host = node->name; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(pcmk__node_status_t *peer) { return (peer != NULL) && (peer->name != NULL) && pcmk_is_set(peer->processes, crm_get_cluster_proc()); } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; pcmk__node_status_t *entry = NULL; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->name, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->name); return entry->name; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); } relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); CRM_ASSERT(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID); const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml, LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, PCMK__XA_ST_TARGET); action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; CRM_ASSERT(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION), crm_element_value(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if ((request->peer != NULL) || stand_alone) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET); const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster_member); if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); pcmk__cluster_send_message(node, pcmk__cluster_msg_fenced, request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } pcmk__xml_free(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); const char *device_id = crm_element_value(dev, PCMK_XA_ID); const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, PCMK_XA_ID, &node_id); name = crm_element_value(request->xml, PCMK_XA_UNAME); pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } pcmk__xml_free(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { int call_options = st_opt_none; bool is_reply = false; CRM_CHECK(message != NULL, return); if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) { is_reply = true; } crm_element_value_int(message, PCMK__XA_ST_CALLOPT, &call_options); crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index 411b2b3dc9..896a779f5d 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -1,2598 +1,2598 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 /* When one fencer queries its peers for devices able to handle a fencing * request, each peer will reply with a list of such devices available to it. * Each reply will be parsed into a peer_device_info_t, with each device's * information kept in a device_properties_t. */ typedef struct device_properties_s { /* Whether access to this device has been verified */ gboolean verified; /* The remaining members are indexed by the operation's "phase" */ /* Whether this device has been executed in each phase */ gboolean executed[st_phase_max]; /* Whether this device is disallowed from executing in each phase */ gboolean disallowed[st_phase_max]; /* Action-specific timeout for each phase */ int custom_action_timeout[st_phase_max]; /* Action-specific maximum random delay for each phase */ int delay_max[st_phase_max]; /* Action-specific base delay for each phase */ int delay_base[st_phase_max]; /* Group of enum st_device_flags */ uint32_t device_support_flags; } device_properties_t; typedef struct { /* Name of peer that sent this result */ char *host; /* Only try peers for non-topology based operations once */ gboolean tried; /* Number of entries in the devices table */ int ndevices; /* Devices available to this host that are capable of fencing the target */ GHashTable *devices; } peer_device_info_t; GHashTable *stonith_remote_op_list = NULL; extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer); static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data != NULL) { peer_device_info_t *peer = data; g_hash_table_destroy(peer->devices); free(peer->host); free(peer); } } void free_stonith_remote_op_list(void) { if (stonith_remote_op_list != NULL) { g_hash_table_destroy(stonith_remote_op_list); stonith_remote_op_list = NULL; } } struct peer_count_data { const remote_fencing_op_t *op; gboolean verified_only; uint32_t support_action_only; int count; }; /*! * \internal * \brief Increment a counter if a device has not been executed yet * * \param[in] key Device ID (ignored) * \param[in] value Device properties * \param[in,out] user_data Peer count data */ static void count_peer_device(gpointer key, gpointer value, gpointer user_data) { device_properties_t *props = (device_properties_t*)value; struct peer_count_data *data = user_data; if (!props->executed[data->op->phase] && (!data->verified_only || props->verified) && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) { ++(data->count); } } /*! * \internal * \brief Check the number of available devices in a peer's query results * * \param[in] op Operation that results are for * \param[in] peer Peer to count * \param[in] verified_only Whether to count only verified devices * \param[in] support_action_only Whether to count only devices that support action * * \return Number of devices available to peer that were not already executed */ static int count_peer_devices(const remote_fencing_op_t *op, const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only) { struct peer_count_data data; data.op = op; data.verified_only = verified_only; data.support_action_only = support_on_action_only; data.count = 0; if (peer) { g_hash_table_foreach(peer->devices, count_peer_device, &data); } return data.count; } /*! * \internal * \brief Search for a device in a query result * * \param[in] op Operation that result is for * \param[in] peer Query result for a peer * \param[in] device Device ID to search for * * \return Device properties if found, NULL otherwise */ static device_properties_t * find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, uint32_t support_action_only) { device_properties_t *props = g_hash_table_lookup(peer->devices, device); if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) { return NULL; } return (props && !props->executed[op->phase] && !props->disallowed[op->phase])? props : NULL; } /*! * \internal * \brief Find a device in a peer's device list and mark it as executed * * \param[in] op Operation that peer result is for * \param[in,out] peer Peer with results to search * \param[in] device ID of device to mark as done * \param[in] verified_devices_only Only consider verified devices * * \return TRUE if device was found and marked, FALSE otherwise */ static gboolean grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, const char *device, gboolean verified_devices_only) { device_properties_t *props = find_peer_device(op, peer, device, fenced_support_flag(op->action)); if ((props == NULL) || (verified_devices_only && !props->verified)) { return FALSE; } crm_trace("Removing %s from %s (%d remaining)", device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none)); props->executed[op->phase] = TRUE; return TRUE; } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->delegate); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { pcmk__xml_free(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); g_list_free(op->duplicates); pcmk__reset_result(&op->result); free(op); } void init_stonith_remote_op_hash_table(GHashTable **table) { if (*table == NULL) { *table = pcmk__strkey_table(NULL, free_remote_op); } } /*! * \internal * \brief Return an operation's originally requested action (before any remap) * * \param[in] op Operation to check * * \return Operation's original action */ static const char * op_requested_action(const remote_fencing_op_t *op) { return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action); } /*! * \internal * \brief Remap a "reboot" operation to the "off" phase * * \param[in,out] op Operation to remap */ static void op_phase_off(remote_fencing_op_t *op) { crm_info("Remapping multiple-device reboot targeting %s to 'off' " QB_XS " id=%.8s", op->target, op->id); op->phase = st_phase_off; /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ strcpy(op->action, PCMK_ACTION_OFF); } /*! * \internal * \brief Advance a remapped reboot operation to the "on" phase * * \param[in,out] op Operation to remap */ static void op_phase_on(remote_fencing_op_t *op) { GList *iter = NULL; crm_info("Remapped 'off' targeting %s complete, " "remapping to 'on' for %s " QB_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; strcpy(op->action, PCMK_ACTION_ON); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. */ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { GList *match = g_list_find_custom(op->devices_list, iter->data, sort_strings); if (match) { op->devices_list = g_list_remove(op->devices_list, match->data); } } g_list_free_full(op->automatic_list, free); op->automatic_list = NULL; /* Rewind device list pointer */ op->devices = op->devices_list; } /*! * \internal * \brief Reset a remapped reboot operation * * \param[in,out] op Operation to reset */ static void undo_op_remap(remote_fencing_op_t *op) { if (op->phase > 0) { crm_info("Undoing remap of reboot targeting %s for %s " QB_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; strcpy(op->action, PCMK_ACTION_REBOOT); } } /*! * \internal * \brief Create notification data XML for a fencing operation result * * \param[in,out] parent Parent XML element for newly created element * \param[in] op Fencer operation that completed * * \return Newly created XML to add as notification data * \note The caller is responsible for freeing the result. */ static xmlNode * fencing_result2xml(xmlNode *parent, const remote_fencing_op_t *op) { xmlNode *notify_data = pcmk__xe_create(parent, PCMK__XE_ST_NOTIFY_FENCE); crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state); crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate); crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name); return notify_data; } /*! * \internal * \brief Broadcast a fence result notification to all CPG peers * * \param[in] op Fencer operation that completed * \param[in] op_merged Whether this operation is a duplicate of another */ void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged) { static int count = 0; xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); xmlNode *wrapper = NULL; xmlNode *notify_data = NULL; count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); crm_xml_add_int(bcast, PCMK_XA_COUNT, count); if (op_merged) { pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true); } wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA); notify_data = fencing_result2xml(wrapper, op); stonith__xe_set_result(notify_data, &op->result); pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, bcast); pcmk__xml_free(bcast); return; } /*! * \internal * \brief Reply to a local request originator and notify all subscribed clients * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; pcmk__client_t *client = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ crm_xml_add_int(data, PCMK_XA_STATE, op->state); crm_xml_add(data, PCMK__XA_ST_TARGET, op->target); crm_xml_add(data, PCMK__XA_ST_OP, op->action); reply = fenced_construct_reply(op->request, data, &op->result); crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ client = pcmk__find_client_by_id(op->client_id); if (client == NULL) { crm_trace("Skipping reply to %s: no longer a client", op->client_id); } else { do_local_reply(reply, client, op->call_options); } /* bcast to all local clients that the fencing operation happend */ notify_data = fencing_result2xml(NULL, op); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result, notify_data); pcmk__xml_free(notify_data); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; pcmk__xml_free(reply); } /*! * \internal * \brief Finalize all duplicates of a given fencer operation * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data) { for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { other->state = op->state; crm_debug("Performing duplicate notification for %s@%s: %s " QB_XS " id=%.8s", other->client_name, other->originator, pcmk_exec_status_str(op->result.execution_status), other->id); pcmk__copy_result(&op->result, &other->result); finalize_op(other, data, true); } else { // Possible if (for example) it timed out already crm_err("Skipping duplicate notification for %s@%s " QB_XS " state=%s id=%.8s", other->client_name, other->originator, stonith_op_state_str(other->state), other->id); } } } static char * delegate_from_xml(xmlNode *xml) { xmlNode *match = get_xpath_object("//@" PCMK__XA_ST_DELEGATE, xml, LOG_NEVER); if (match == NULL) { return crm_element_value_copy(xml, PCMK__XA_SRC); } else { return crm_element_value_copy(match, PCMK__XA_ST_DELEGATE); } } /*! * \internal * \brief Finalize a peer fencing operation * * Clean up after a fencing operation completes. This function has two code * paths: the executioner uses it to broadcast the result to CPG peers, and then * each peer (including the executioner) uses it to process that broadcast and * notify its IPC clients of the result. * * \param[in,out] op Fencer operation that completed * \param[in,out] data If not NULL, XML reply of last delegated operation * \param[in] dup Whether this operation is a duplicate of another * (in which case, do not broadcast the result) * * \note The operation result should be set before calling this function. */ static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; gboolean op_merged = FALSE; CRM_CHECK((op != NULL), return); // This is a no-op if timers have already been cleared clear_remote_op_timers(op); if (op->notify_sent) { // Most likely, this is a timed-out action that eventually completed crm_notice("Operation '%s'%s%s by %s for %s@%s%s: " "Result arrived too late " QB_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), op->id); return; } set_fencing_completed(op); undo_op_remap(op); if (data == NULL) { data = pcmk__xe_create(NULL, "remote-op"); local_data = data; } else if (op->delegate == NULL) { switch (op->result.execution_status) { case PCMK_EXEC_NO_FENCE_DEVICE: break; case PCMK_EXEC_INVALID: if (op->result.exit_status != CRM_EX_EXPIRED) { op->delegate = delegate_from_xml(data); } break; default: op->delegate = delegate_from_xml(data); break; } } if (dup || (crm_element_value(data, PCMK__XA_ST_OP_MERGED) != NULL)) { op_merged = true; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, PCMK__XA_SUBT); if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) { /* Defer notification until the bcast message arrives */ fenced_broadcast_op_result(op, op_merged); pcmk__xml_free(local_data); return; } if (pcmk__result_ok(&op->result) || dup || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { level = LOG_NOTICE; } do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) " QB_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), crm_exit_str(op->result.exit_status), pcmk_exec_status_str(op->result.execution_status), ((op->result.exit_reason == NULL)? "" : ": "), ((op->result.exit_reason == NULL)? "" : op->result.exit_reason), op->id); handle_local_reply_and_notify(op, data); if (!dup) { finalize_op_duplicates(op, data); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { pcmk__xml_free(op->request); op->request = NULL; } pcmk__xml_free(local_data); } /*! * \internal * \brief Finalize a watchdog fencer op after the waiting time expires * * \param[in,out] userdata Fencer operation that completed * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Self-fencing (%s) by %s for %s assumed complete " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, NULL, false); return G_SOURCE_REMOVE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Peer's '%s' action targeting %s for client %s timed out " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Peer did not return fence result within timeout"); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; crm_trace("Try another device for '%s' action targeting %s " "for client %s without delay " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } // Try another device, if appropriate request_peer_fencing(op, NULL); return G_SOURCE_REMOVE; } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] op Fencer operation that timed out * \param[in] reason Readable description of what step timed out */ static void finalize_timed_out_op(remote_fencing_op_t *op, const char *reason) { crm_debug("Action '%s' targeting %s for client %s timed out " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); if (op->phase == st_phase_on) { /* A remapped reboot operation timed out in the "on" phase, but the * "off" phase completed successfully, so quit trying any further * devices, and return success. */ op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { op->state = st_failed; pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason); } finalize_op(op, NULL, false); } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] userdata Fencer operation that timed out * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action '%s' targeting %s for client %s already completed " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } else { finalize_timed_out_op(userdata, "Fencing did not complete within a " "total timeout based on the " "configured timeout and retries for " "any devices attempted"); } return G_SOURCE_REMOVE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %.8s targeting %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %.8s targeting %s already in progress", op->id, op->target); } else if (op->query_results) { // Query succeeded, so attempt the actual fencing crm_debug("Query %.8s targeting %s complete (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); request_peer_fencing(op, NULL); } else { crm_debug("Query %.8s targeting %s timed out (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); finalize_timed_out_op(op, "No capable peers replied to device query " "within timeout"); } return G_SOURCE_REMOVE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } - for (i = 0; i < ST_LEVEL_MAX; i++) { + for (i = 0; i < ST__LEVEL_COUNT; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } /*! * \internal * \brief Add a device to an operation's automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to add */ static void add_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (!match) { op->automatic_list = g_list_prepend(op->automatic_list, pcmk__str_copy(device)); } } /*! * \internal * \brief Remove a device from the automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to remove */ static void remove_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (match) { op->automatic_list = g_list_remove(op->automatic_list, match->data); } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GList *devices) { GList *lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { const char *device = lpc->data; op->devices_list = g_list_append(op->devices_list, pcmk__str_copy(device)); } op->devices = op->devices_list; } /*! * \internal * \brief Check whether a node matches a topology target * * \param[in] tp Topology table entry to check * \param[in] node Name of node to check * * \return TRUE if node matches topology target */ static gboolean topology_matches(const stonith_topology_t *tp, const char *node) { regex_t r_patt; CRM_CHECK(node && tp && tp->target, return FALSE); switch (tp->kind) { case fenced_target_by_attribute: /* This level targets by attribute, so tp->target is a NAME=VALUE pair * of a permanent attribute applied to targeted nodes. The test below * relies on the locally cached copy of the CIB, so if fencing needs to * be done before the initial CIB is received or after a malformed CIB * is received, then the topology will be unable to be used. */ if (node_has_attr(node, tp->target_attribute, tp->target_value)) { crm_notice("Matched %s with %s by attribute", node, tp->target); return TRUE; } break; case fenced_target_by_pattern: /* This level targets node names matching a pattern, so tp->target * (and tp->target_pattern) is a regular expression. */ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { crm_info("Bad regex '%s' for fencing level", tp->target); } else { int status = regexec(&r_patt, node, 0, NULL, 0); regfree(&r_patt); if (status == 0) { crm_notice("Matched %s with %s by name", node, tp->target); return TRUE; } } break; case fenced_target_by_name: crm_trace("Testing %s against %s", node, tp->target); return pcmk__str_eq(tp->target, node, pcmk__str_casei); default: break; } crm_trace("No match for %s with %s", node, tp->target); return FALSE; } stonith_topology_t * find_topology_for_host(const char *host) { GHashTableIter tIter; stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp != NULL) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (topology_matches(tp, host)) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } } crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); return NULL; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * \param[in] empty_ok If true, an operation without a target (i.e. * queries) or a target without a topology will get a * pcmk_rc_ok return value instead of ENODEV * * \return Standard Pacemaker return value */ static int advance_topology_level(remote_fencing_op_t *op, bool empty_ok) { stonith_topology_t *tp = NULL; if (op->target) { tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return empty_ok? pcmk_rc_ok : ENODEV; } CRM_ASSERT(tp->levels != NULL); stonith__set_call_options(op->call_options, op->id, st_opt_topology); /* This is a new level, so undo any remapping left over from previous */ undo_op_remap(op); do { op->level++; - } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); + } while (op->level < ST__LEVEL_COUNT && tp->levels[op->level] == NULL); - if (op->level < ST_LEVEL_MAX) { + if (op->level < ST__LEVEL_COUNT) { crm_trace("Attempting fencing level %d targeting %s (%d devices) " "for client %s@%s (id=%.8s)", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level if ((op->level > 1) && (op->client_delay > 0)) { op->client_delay = 0; } if ((g_list_next(op->devices_list) != NULL) && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about * switched power outlets for redundant power supplies.) */ op_phase_off(op); } return pcmk_rc_ok; } crm_info("All %sfencing options targeting %s for client %s@%s failed " QB_XS " id=%.8s", (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", op->target, op->client_name, op->originator, op->id); return ENODEV; } /*! * \internal * \brief If fencing operation is a duplicate, merge it into the other one * * \param[in,out] op Fencing operation to check */ static void merge_duplicates(remote_fencing_op_t *op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { const char *other_action = op_requested_action(other); pcmk__node_status_t *node = NULL; if (!strcmp(op->id, other->id)) { continue; // Don't compare against self } if (other->state > st_exec) { crm_trace("%.8s not duplicate of %.8s: not in progress", op->id, other->id); continue; } if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", op->id, other->id, op->target, other->target); continue; } if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) { crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", op->id, other->id, op->action, other_action); continue; } if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: same client %s", op->id, other->id, op->client_name); continue; } if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: suicide for %s", op->id, other->id, other->target); continue; } node = pcmk__get_node(0, other->originator, NULL, pcmk__node_search_cluster_member); if (!fencing_peer_active(node)) { crm_notice("Failing action '%s' targeting %s originating from " "client %s@%s: Originator is dead " QB_XS " id=%.8s", other->action, other->target, other->client_name, other->originator, other->id); crm_trace("%.8s not duplicate of %.8s: originator dead", op->id, other->id); other->state = st_failed; continue; } if ((other->total_timeout > 0) && (now > (other->total_timeout + other->created))) { crm_trace("%.8s not duplicate of %.8s: old (%lld vs. %lld + %ds)", op->id, other->id, (long long)now, (long long)other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); crm_trace("Best guess as to timeout used for %.8s: %ds", other->id, other->total_timeout); } crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical request from %s@%s " QB_XS " original=%.8s duplicate=%.8s total_timeout=%ds", op->action, op->target, op->client_name, other->client_name, other->originator, op->id, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; pcmk__node_status_t *entry = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } /*! * \internal * \brief Process a manual confirmation of a pending fence action * * \param[in] client IPC client that sent confirmation * \param[in,out] msg Request XML with manual confirmation * * \return Standard Pacemaker return code */ int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR); CRM_CHECK(dev != NULL, return EPROTO); crm_notice("Received manual confirmation that %s has been fenced", pcmk__s(crm_element_value(dev, PCMK__XA_ST_TARGET), "unknown target")); op = initiate_remote_stonith_op(client, msg, TRUE); if (op == NULL) { return EPROTO; } op->state = st_done; set_fencing_completed(op); op->delegate = pcmk__str_copy("a human"); // For the fencer's purposes, the fencing operation is done pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, msg, false); /* For the requester's purposes, the operation is still pending. The * actual result will be sent asynchronously via the operation's done_cb(). */ return EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith operation * * \param[in] client ID of local stonith client that initiated the operation * \param[in] request The request from the client that started the operation * \param[in] peer TRUE if this operation is owned by another stonith peer * (an operation owned by one peer is stored on all peers, * but only the owner executes it; all nodes get the results * once the owner finishes execution) */ void * create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request, LOG_NEVER); int call_options = 0; const char *operation = NULL; init_stonith_remote_op_hash_table(&stonith_remote_op_list); /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(stonith_remote_op_list, op_id); if (op) { crm_debug("Reusing existing remote fencing op %.8s for %s", op_id, ((client == NULL)? "unknown client" : client)); return op; } } op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t)); crm_element_value_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays crm_element_value_int(request, PCMK__XA_ST_DELAY, &(op->client_delay)); if (peer && dev) { op->id = crm_element_value_copy(dev, PCMK__XA_ST_REMOTE_OP); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(stonith_remote_op_list, op->id, op); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, PCMK__XA_ST_DEVICE_ACTION); /* The node initiating the stonith operation. If an operation is relayed, * this is the last node the operation lands on. When in standalone mode, * origin is the ID of the client that originated the operation. * * Or may be the name of the function that created the operation. */ op->originator = crm_element_value_copy(dev, PCMK__XA_ST_ORIGIN); if (op->originator == NULL) { /* Local or relayed request */ op->originator = pcmk__str_copy(stonith_our_uname); } // Delegate may not be set op->delegate = crm_element_value_copy(dev, PCMK__XA_ST_DELEGATE); op->created = time(NULL); CRM_LOG_ASSERT(client != NULL); op->client_id = pcmk__str_copy(client); /* For a RELAY operation, set fenced on the client. */ operation = crm_element_value(request, PCMK__XA_ST_OP); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } else { op->client_name = crm_element_value_copy(request, PCMK__XA_ST_CLIENTNAME); } op->target = crm_element_value_copy(dev, PCMK__XA_ST_TARGET); // @TODO Figure out how to avoid copying XML here op->request = pcmk__xml_copy(NULL, request); crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options); op->call_options = call_options; crm_element_value_int(request, PCMK__XA_ST_CALLID, &(op->client_callid)); crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " "base timeout %ds, %u %s expected)", (peer && dev)? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name, op->base_timeout, op->replies_expected, pcmk__plural_alt(op->replies_expected, "reply", "replies")); if (op->call_options & st_opt_cs_nodeid) { int nodeid; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(op->target, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); /* Ensure the conversion only happens once */ stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); if ((node != NULL) && (node->name != NULL)) { pcmk__str_update(&(op->target), node->name); } else { crm_warn("Could not expand nodeid '%s' into a host name", op->target); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); if (op->state != st_duplicate) { /* kick history readers */ fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } /* safe to trim as long as that doesn't touch pending ops */ stonith_fence_history_trim(); return op; } /*! * \internal * \brief Create a peer fencing operation from a request, and initiate it * * \param[in] client IPC client that made request (NULL to get from request) * \param[in] request Request XML * \param[in] manual_ack Whether this is a manual action confirmation * * \return Newly created operation on success, otherwise NULL */ remote_fencing_op_t * initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; const char *relay_op_id = NULL; const char *operation = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, PCMK__XA_ST_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { return op; } CRM_CHECK(op->action, return NULL); if (advance_topology_level(op, true) != pcmk_rc_ok) { op->state = st_failed; } switch (op->state) { case st_failed: // advance_topology_level() exhausted levels pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "All topology levels failed"); crm_warn("Could not request peer fencing (%s) targeting %s " QB_XS " id=%.8s", op->action, op->target, op->id); finalize_op(op, NULL, false); return op; case st_duplicate: crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " QB_XS " id=%.8s", op->action, op->target, op->id); return op; default: crm_notice("Requesting peer fencing (%s) targeting %s " QB_XS " id=%.8s state=%s base_timeout=%ds", op->action, op->target, op->id, stonith_op_state_str(op->state), op->base_timeout); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, op->call_options); crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(query, PCMK__XA_ST_TARGET, op->target); crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op)); crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name); crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout); /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ operation = crm_element_value(request, PCMK__XA_ST_OP); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); if (relay_op_id) { crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id); } } pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, query); pcmk__xml_free(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static bool is_watchdog_fencing(const remote_fencing_op_t *op, const char *device) { return (stonith_watchdog_timeout_ms > 0 // Only an explicit mismatch is considered not a watchdog fencing. && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches) && pcmk__is_fencing_action(op->action) && node_does_watchdog_fencing(op->target)); } static peer_device_info_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GList *iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { peer_device_info_t *peer = iter->data; crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", peer->host, op->target, peer->ndevices, pcmk__plural_s(peer->ndevices), peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if (pcmk_is_set(op->call_options, st_opt_topology)) { if (grab_peer_device(op, peer, device, verified_devices_only)) { return peer; } } else if (!peer->tried && count_peer_devices(op, peer, verified_devices_only, fenced_support_flag(op->action))) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static peer_device_info_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; peer_device_info_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence (%s) %s using %s", op->action, op->target, device); } else { crm_trace("Checking for someone to fence (%s) %s", op->action, op->target); } /* Best choice is a peer other than the target with verified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } /* If no other peer has verified access, next best is unverified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } /* If no other peer can do it, last option is self-fencing * (which is never allowed for the "on" phase of a remapped reboot) */ if (op->phase != st_phase_on) { peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if (peer) { crm_trace("%s will fence itself", peer->host); return peer; } } /* Try the next fencing level if there is one (unless we're in the "on" * phase of a remapped "reboot", because we ignore errors in that case) */ } while ((op->phase != st_phase_on) && pcmk_is_set(op->call_options, st_opt_topology) && (advance_topology_level(op, false) == pcmk_rc_ok)); /* With a simple watchdog fencing configuration without a topology, * "device" is NULL here. Consider it should be done with watchdog fencing. */ if (is_watchdog_fencing(op, device)) { crm_info("Couldn't contact watchdog-fencing target-node (%s)", op->target); /* check_watchdog_fencing_and_wait will log additional info */ } else { crm_notice("Couldn't find anyone to fence (%s) %s using %s", op->action, op->target, (device? device : "any device")); } return NULL; } static int valid_fencing_timeout(int specified_timeout, bool action_specific, const remote_fencing_op_t *op, const char *device) { int timeout = specified_timeout; if (!is_watchdog_fencing(op, device)) { return timeout; } timeout = (int) QB_MIN(QB_MAX(specified_timeout, stonith_watchdog_timeout_ms / 1000), INT_MAX); if (timeout > specified_timeout) { if (action_specific) { crm_warn("pcmk_%s_timeout %ds for %s is too short (must be >= " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds " "instead", op->action, specified_timeout, device? device : "watchdog", timeout, timeout); } else { crm_warn("Fencing timeout %ds is too short (must be >= " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds " "instead", specified_timeout, timeout, timeout); } } return timeout; } static int get_device_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, bool with_delay) { int timeout = op->base_timeout; device_properties_t *props; timeout = valid_fencing_timeout(op->base_timeout, false, op, device); if (!peer || !device) { return timeout; } props = g_hash_table_lookup(peer->devices, device); if (!props) { return timeout; } if (props->custom_action_timeout[op->phase]) { timeout = valid_fencing_timeout(props->custom_action_timeout[op->phase], true, op, device); } // op->client_delay < 0 means disable any static/random fencing delays if (with_delay && (op->client_delay >= 0)) { // delay_base is eventually limited by delay_max timeout += (props->delay_max[op->phase] > 0 ? props->delay_max[op->phase] : props->delay_base[op->phase]); } return timeout; } struct timeout_data { const remote_fencing_op_t *op; const peer_device_info_t *peer; int total_timeout; }; /*! * \internal * \brief Add timeout to a total if device has not been executed yet * * \param[in] key GHashTable key (device ID) * \param[in] value GHashTable value (device properties) * \param[in,out] user_data Timeout data */ static void add_device_timeout(gpointer key, gpointer value, gpointer user_data) { const char *device_id = key; device_properties_t *props = value; struct timeout_data *timeout = user_data; if (!props->executed[timeout->op->phase] && !props->disallowed[timeout->op->phase]) { timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, device_id, true); } } static int get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) { struct timeout_data timeout; timeout.op = op; timeout.peer = peer; timeout.total_timeout = 0; g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); } static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer) { long long total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { int i; GList *device_list = NULL; GList *iter = NULL; GList *auto_list = NULL; if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none) && (op->automatic_list != NULL)) { auto_list = g_list_copy(op->automatic_list); } /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ - for (i = 0; i < ST_LEVEL_MAX; i++) { + for (i = 0; i < ST__LEVEL_COUNT; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { bool found = false; for (iter = op->query_results; iter != NULL; iter = iter->next) { const peer_device_info_t *peer = iter->data; if (auto_list) { GList *match = g_list_find_custom(auto_list, device_list->data, sort_strings); if (match) { auto_list = g_list_remove(auto_list, match->data); } } if (find_peer_device(op, peer, device_list->data, fenced_support_flag(op->action))) { total_timeout += get_device_timeout(op, peer, device_list->data, true); found = true; break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ /* in case of watchdog-device we add the timeout to the budget if didn't get a reply */ if (!found && is_watchdog_fencing(op, device_list->data)) { total_timeout += stonith_watchdog_timeout_ms / 1000; } } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ //Add only exists automatic_list device timeout if (auto_list) { for (iter = auto_list; iter != NULL; iter = iter->next) { GList *iter2 = NULL; for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) { peer_device_info_t *peer = iter2->data; if (find_peer_device(op, peer, iter->data, st_device_supports_on)) { total_timeout += get_device_timeout(op, peer, iter->data, true); break; } } } } g_list_free(auto_list); } else if (chosen_peer) { total_timeout = get_peer_timeout(op, chosen_peer); } else { total_timeout = valid_fencing_timeout(op->base_timeout, false, op, NULL); } if (total_timeout <= 0) { total_timeout = op->base_timeout; } /* Take any requested fencing delay into account to prevent it from eating * up the total timeout. */ if (op->client_delay > 0) { total_timeout += op->client_delay; } return (int) QB_MIN(total_timeout, INT_MAX); } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GList *iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a synchronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); client_node = crm_element_value(op->request, PCMK__XA_ST_CLIENTNODE); call_id = crm_element_value(op->request, PCMK__XA_ST_CALLID); client_id = crm_element_value(op->request, PCMK__XA_ST_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) { // Client is connected to this node, so send update directly to them do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id); crm_xml_add(update, PCMK__XA_ST_CALLID, call_id); crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout); pcmk__cluster_send_message(pcmk__get_node(0, client_node, NULL, pcmk__node_search_cluster_member), pcmk__cluster_msg_fenced, update); pcmk__xml_free(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %.8s to client %s", dup->id, dup->client_name); report_timeout_period(iter->data, op_timeout); } } /*! * \internal * \brief Advance an operation to the next device in its topology * * \param[in,out] op Fencer operation to advance * \param[in] device ID of device that just completed * \param[in,out] msg If not NULL, XML reply of last delegated operation */ static void advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, xmlNode *msg) { /* Advance to the next device at this topology level, if any */ if (op->devices) { op->devices = op->devices->next; } /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); /* If there are no more devices at this topology level, run through any * remaining devices with automatic unfencing */ if (op->devices == NULL) { op->devices = op->automatic_list; } } if ((op->devices == NULL) && (op->phase == st_phase_off)) { /* We're done with this level and with required devices, but we had * remapped "reboot" to "off", so start over with "on". If any devices * need to be turned back on, op->devices will be non-NULL after this. */ op_phase_on(op); } // This function is only called if the previous device succeeded pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (op->devices) { /* Necessary devices remain, so execute the next one */ crm_trace("Next targeting %s on behalf of %s@%s", op->target, op->client_name, op->originator); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; } request_peer_fencing(op, NULL); } else { /* We're done with all devices and phases, so finalize operation */ crm_trace("Marking complex fencing op targeting %s as complete", op->target); op->state = st_done; finalize_op(op, msg, false); } } static gboolean check_watchdog_fencing_and_wait(remote_fencing_op_t * op) { if (node_does_watchdog_fencing(op->target)) { guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX); crm_notice("Waiting %s for %s to self-fence (%s) for " "client %s " QB_XS " id=%.8s", pcmk__readable_interval(timeout_ms), op->target, op->action, op->client_name, op->id); if (op->op_timer_one) { g_source_remove(op->op_timer_one); } op->op_timer_one = g_timeout_add(timeout_ms, remote_op_watchdog_done, op); return TRUE; } else { crm_debug("Skipping fallback to watchdog-fencing as %s is " "not in host-list", op->target); } return FALSE; } /*! * \internal * \brief Ask a peer to execute a fencing operation * * \param[in,out] op Fencing operation to be executed * \param[in,out] peer If NULL or topology is in use, choose best peer to * execute the fencing, otherwise use this peer */ static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) { const char *device = NULL; int timeout; CRM_CHECK(op != NULL, return); crm_trace("Action %.8s targeting %s for %s is %s", op->id, op->target, op->client_name, stonith_op_state_str(op->state)); if ((op->phase == st_phase_on) && (op->devices != NULL)) { /* We are in the "on" phase of a remapped topology reboot. If this * device has pcmk_reboot_action="off", or doesn't support the "on" * action, skip it. * * We can't check device properties at this point because we haven't * chosen a peer for this stage yet. Instead, we check the local node's * knowledge about the device. If different versions of the fence agent * are installed on different nodes, there's a chance this could be * mistaken, but the worst that could happen is we don't try turning the * node back on when we should. */ device = op->devices->data; if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF, pcmk__str_none)) { crm_info("Not turning %s back on using %s because the device is " "configured to stay off (pcmk_reboot_action='off')", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } if (!fenced_device_supports_on(device)) { crm_info("Not turning %s back on using %s because the agent " "doesn't support 'on'", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } } timeout = op->base_timeout; if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer); op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total timeout set to %ds for peer's fencing targeting %s for %s" QB_XS "id=%.8s", op->total_timeout, op->target, op->client_name, op->id); } if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore the caller's peer preference if topology is in use, because * that peer might not have access to the required device. With * topology, stonith_choose_peer() removes the device from further * consideration, so the timeout must be calculated beforehand. * * @TODO Basing the total timeout on the caller's preferred peer (above) * is less than ideal. */ peer = stonith_choose_peer(op); device = op->devices->data; /* Fencing timeout sent to peer takes no delay into account. * The peer will add a dedicated timer for any delay upon * schedule_stonith_command(). */ timeout = get_device_timeout(op, peer, device, false); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); const pcmk__node_status_t *peer_node = pcmk__get_node(0, peer->host, NULL, pcmk__node_search_cluster_member); if (op->client_delay > 0) { /* Take requested fencing delay into account to prevent it from * eating up the timeout. */ timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay; } crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target); crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout); crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options); crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay); if (device) { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device, true); crm_notice("Requesting that %s perform '%s' action targeting %s " "using %s " QB_XS " for client %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device); } else { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); crm_notice("Requesting that %s perform '%s' action targeting %s " QB_XS " for client %s (%ds, %s)", peer->host, op->action, op->target, op->client_name, timeout_one, pcmk__readable_interval(stonith_watchdog_timeout_ms)); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } if (!is_watchdog_fencing(op, device) || !check_watchdog_fencing_and_wait(op)) { /* Some thoughts about self-fencing cases reaching this point: - Actually check in check_watchdog_fencing_and_wait shouldn't fail if STONITH_WATCHDOG_ID is chosen as fencing-device and it being present implies watchdog-fencing is enabled anyway - If watchdog-fencing is disabled either in general or for a specific target - detected in check_watchdog_fencing_and_wait - for some other kind of self-fencing we can't expect a success answer but timeout is fine if the node doesn't come back in between - Delicate might be the case where we have watchdog-fencing enabled for a node but the watchdog-fencing-device isn't explicitly chosen for suicide. Local pe-execution in sbd may detect the node as unclean and lead to timely suicide. Otherwise the selection of PCMK_OPT_STONITH_WATCHDOG_TIMEOUT at least is questionable. */ /* coming here we're not waiting for watchdog timeout - thus engage timer with timout evaluated before */ op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } pcmk__cluster_send_message(peer_node, pcmk__cluster_msg_fenced, remote_op); peer->tried = TRUE; pcmk__xml_free(remote_op); return; } else if (op->phase == st_phase_on) { /* A remapped "on" cannot be executed, but the node was already * turned off successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " "after successful 'off'", device, op->target); advance_topology_device_in_level(op, device, NULL); return; } else if (op->owner == FALSE) { crm_err("Fencing (%s) targeting %s for client %s is not ours to control", op->action, op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of fencing (%s) %s for client %s " QB_XS " state=%s", op->action, op->target, op->client_name, stonith_op_state_str(op->state)); CRM_CHECK(op->state < st_done, return); finalize_timed_out_op(op, "All nodes failed, or are unable, to " "fence target"); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if (is_watchdog_fencing(op, device) && check_watchdog_fencing_and_wait(op)) { /* Consider a watchdog fencing targeting an offline node executing * once it starts waiting for the target to self-fence. So that when * the query timer pops, remote_op_query_timeout() considers the * fencing already in progress. */ op->state = st_exec; return; } if (op->state == st_query) { crm_info("No peers (out of %d) have devices capable of fencing " "(%s) %s for client %s " QB_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } else { if (pcmk_is_set(op->call_options, st_opt_topology)) { pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } /* ... else use existing result from previous failed attempt * (topology is not in use, and no devices remain to be attempted). * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would * prevent finalize_op() from setting the correct delegate if * needed. */ crm_info("No peers (out of %d) are capable of fencing (%s) %s " "for client %s " QB_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); } op->state = st_failed; finalize_op(op, NULL, false); } else { crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " "for client %s " QB_XS " id=%.8s", op->action, op->target, (device? " using " : ""), (device? device : ""), op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const peer_device_info_t *peer_a = a; const peer_device_info_t *peer_b = b; return (peer_b->ndevices - peer_a->ndevices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not * * \param[in] op Fencing operation with topology to check */ static gboolean all_topology_devices_found(const remote_fencing_op_t *op) { GList *device = NULL; GList *iter = NULL; device_properties_t *match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (pcmk__is_fencing_action(op->action)) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } - for (i = 0; i < ST_LEVEL_MAX; i++) { + for (i = 0; i < ST__LEVEL_COUNT; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { peer_device_info_t *peer = iter->data; if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } match = find_peer_device(op, peer, device->data, st_device_supports_none); } if (!match) { return FALSE; } } } return TRUE; } /*! * \internal * \brief Parse action-specific device properties from XML * * \param[in] xml XML element containing the properties * \param[in] peer Name of peer that sent XML (for logs) * \param[in] device Device ID (for logs) * \param[in] action Action the properties relate to (for logs) * \param[in,out] op Fencing operation that properties are being parsed for * \param[in] phase Phase the properties relate to * \param[in,out] props Device properties to update */ static void parse_action_specific(const xmlNode *xml, const char *peer, const char *device, const char *action, remote_fencing_op_t *op, enum st_remap_phase phase, device_properties_t *props) { props->custom_action_timeout[phase] = 0; crm_element_value_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, &props->custom_action_timeout[phase]); if (props->custom_action_timeout[phase]) { crm_trace("Peer %s with device %s returned %s action timeout %ds", peer, device, action, props->custom_action_timeout[phase]); } props->delay_max[phase] = 0; crm_element_value_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]); if (props->delay_max[phase]) { crm_trace("Peer %s with device %s returned maximum of random delay %ds for %s", peer, device, props->delay_max[phase], action); } props->delay_base[phase] = 0; crm_element_value_int(xml, PCMK__XA_ST_DELAY_BASE, &props->delay_base[phase]); if (props->delay_base[phase]) { crm_trace("Peer %s with device %s returned base delay %ds for %s", peer, device, props->delay_base[phase], action); } /* Handle devices with automatic unfencing */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { int required = 0; crm_element_value_int(xml, PCMK__XA_ST_REQUIRED, &required); if (required) { crm_trace("Peer %s requires device %s to execute for action %s", peer, device, action); add_required_device(op, device); } } /* If a reboot is remapped to off+on, it's possible that a node is allowed * to perform one action but not another. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) { props->disallowed[phase] = TRUE; crm_trace("Peer %s is disallowed from executing %s for device %s", peer, action, device); } } /*! * \internal * \brief Parse one device's properties from peer's XML query reply * * \param[in] xml XML node containing device properties * \param[in,out] op Operation that query and reply relate to * \param[in,out] peer Peer's device information * \param[in] device ID of device being parsed */ static void add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, peer_device_info_t *peer, const char *device) { xmlNode *child; int verified = 0; device_properties_t *props = pcmk__assert_alloc(1, sizeof(device_properties_t)); int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */ /* Add a new entry to this peer's devices list */ g_hash_table_insert(peer->devices, pcmk__str_copy(device), props); /* Peers with verified (monitored) access will be preferred */ crm_element_value_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified); if (verified) { crm_trace("Peer %s has confirmed a verified device %s", peer->host, device); props->verified = TRUE; } crm_element_value_int(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, &flags); props->device_support_flags = flags; /* Parse action-specific device properties */ parse_action_specific(xml, peer->host, device, op_requested_action(op), op, st_phase_requested, props); for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child)) { /* Replies for "reboot" operations will include the action-specific * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF, op, st_phase_off, props); } else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_ON, op, st_phase_on, props); } } } /*! * \internal * \brief Parse a peer's XML query reply and add it to operation's results * * \param[in,out] op Operation that query and reply relate to * \param[in] host Name of peer that sent this reply * \param[in] ndevices Number of devices expected in reply * \param[in] xml XML node containing device list * * \return Newly allocated result structure with parsed reply */ static peer_device_info_t * add_result(remote_fencing_op_t *op, const char *host, int ndevices, const xmlNode *xml) { peer_device_info_t *peer = pcmk__assert_alloc(1, sizeof(peer_device_info_t)); xmlNode *child; peer->host = pcmk__str_copy(host); peer->devices = pcmk__strkey_table(free, free); /* Each child element describes one capable device available to the peer */ for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child)) { const char *device = pcmk__xe_id(child); if (device) { add_device_properties(child, op, peer, device); } } peer->ndevices = g_hash_table_size(peer->devices); CRM_CHECK(ndevices == peer->ndevices, crm_err("Query claimed to have %d device%s but %d found", ndevices, pcmk__plural_s(ndevices), peer->ndevices)); op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); return peer; } /*! * \internal * \brief Handle a peer's reply to our fencing query * * Parse a query result from XML and store it in the remote operation * table, and when enough replies have been received, issue a fencing request. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error * * \note See initiate_remote_stonith_op() for how the XML query was initially * formed, and stonith_query() for how the peer formed its XML reply. */ int process_remote_stonith_query(xmlNode *msg) { int ndevices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; peer_device_info_t *peer = NULL; uint32_t replies_expected; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" PCMK__XA_ST_AVAILABLE_DEVICES, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices); op = g_hash_table_lookup(stonith_remote_op_list, id); if (op == NULL) { crm_debug("Received query reply for unknown or expired operation %s", id); return -EOPNOTSUPP; } replies_expected = fencing_active_peers(); if (op->replies_expected < replies_expected) { replies_expected = op->replies_expected; } if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = crm_element_value(msg, PCMK__XA_SRC); host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", op->replies, replies_expected, host, op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); if (ndevices > 0) { peer = add_result(op, host, ndevices, dev); } pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (pcmk_is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } } else if (op->state == st_query) { int nverified = count_peer_devices(op, peer, TRUE, fenced_support_flag(op->action)); /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if ((peer != NULL) && !host_is_target && nverified) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified device%s", nverified, pcmk__plural_s(nverified)); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if ((peer != NULL) && (op->state == st_done)) { crm_info("Discarding query result from %s (%d device%s): " "Operation is %s", peer->host, peer->ndevices, pcmk__plural_s(peer->ndevices), stonith_op_state_str(op->state)); } return pcmk_ok; } /*! * \internal * \brief Handle a peer's reply to a fencing request * * Parse a fencing reply from XML, and either finalize the operation * or attempt another device as appropriate. * * \param[in] msg XML reply received */ void fenced_process_fencing_reply(xmlNode *msg) { const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR); pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(dev != NULL, return); id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(id != NULL, return); dev = stonith__find_xe_with_result(msg); CRM_CHECK(dev != NULL, return); stonith__xe_get_result(dev, &result); device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (stonith_remote_op_list) { op = g_hash_table_lookup(stonith_remote_op_list, id); } if ((op == NULL) && pcmk__result_ok(&result)) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, PCMK__XA_ST_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Received peer result of unknown or expired operation %s", id); pcmk__reset_result(&result); return; } pcmk__reset_result(&op->result); op->result = result; // The operation takes ownership of the result if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { crm_err("Received outdated reply for device %s (instead of %s) to " "fence (%s) %s. Operation already timed out at peer level.", device, (const char *) op->devices->data, op->action, op->target); return; } if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT), PCMK__VALUE_BROADCAST, pcmk__str_none)) { if (pcmk__result_ok(&op->result)) { op->state = st_done; } else { op->state = st_failed; } finalize_op(op, msg, false); return; } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err("Received non-broadcast fencing result for operation %.8s " "we do not own (device %s targeting %s)", op->id, device, op->target); return; } if (pcmk_is_set(op->call_options, st_opt_topology)) { const char *device = NULL; const char *reason = op->result.exit_reason; /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { finalize_op(op, msg, false); return; } device = crm_element_value(msg, PCMK__XA_ST_DEVICE_ID); if ((op->phase == 2) && !pcmk__result_ok(&op->result)) { /* A remapped "on" failed, but the node was already turned off * successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s " "after successful 'off'", device, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : ": ", (reason == NULL)? "" : reason, op->target); pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: " "%s%s%s%s", op->action, op->target, ((device == NULL)? "" : " using "), ((device == NULL)? "" : device), op->client_name, op->originator, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } if (pcmk__result_ok(&op->result)) { /* An operation completed successfully. Try another device if * necessary, otherwise mark the operation as done. */ advance_topology_device_in_level(op, device, msg); return; } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (advance_topology_level(op, false) != pcmk_rc_ok) { op->state = st_failed; finalize_op(op, msg, false); return; } } } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) { op->state = st_done; finalize_op(op, msg, false); return; } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT) && (op->devices == NULL)) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; finalize_op(op, msg, false); return; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure */ crm_trace("Next for %s on behalf of %s@%s (result was: %s)", op->target, op->originator, op->client_name, pcmk_exec_status_str(op->result.execution_status)); request_peer_fencing(op, NULL); } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; /* We don't have to worry about remapped reboots here * because if state is done, any remapping has been undone */ } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index ff75616f49..75ae55def2 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -1,330 +1,330 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include // uint32_t, uint64_t #include // xmlNode #include #include #include #include /*! * \internal * \brief Check whether target has already been fenced recently * * \param[in] tolerance Number of seconds to look back in time * \param[in] target Name of node to search for * \param[in] action Action we want to match * * \return TRUE if an equivalent fencing operation took place in the last * \p tolerance seconds, FALSE otherwise */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; /*! list of actions that must execute on the target node. Used for unfencing */ GString *on_target_actions; GList *targets; time_t targets_age; gboolean has_attr_map; // Whether target's nodeid should be passed as a parameter to the agent gboolean include_nodeid; /* whether the cluster should automatically unfence nodes with the device */ gboolean automatic_unfencing; guint priority; uint32_t flags; // Group of enum st_device_flags GHashTable *params; GHashTable *aliases; GList *pending_ops; mainloop_timer_t *timer; crm_trigger_t *work; xmlNode *agent_metadata; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; gboolean cib_registered; gboolean api_registered; gboolean dirty; } stonith_device_t; /* These values are used to index certain arrays by "phase". Usually an * operation has only one "phase", so phase is always zero. However, some * reboots are remapped to "off" then "on", in which case "reboot" will be * phase 0, "off" will be phase 1 and "on" will be phase 2. */ enum st_remap_phase { st_phase_requested = 0, st_phase_off = 1, st_phase_on = 2, st_phase_max = 3 }; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! When was the fencing action recorded (seconds since epoch) */ time_t created; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! The number of query replies expected */ guint replies_expected; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; /*! * Fencing delay (in seconds) requested by API client (used by controller to * implement \c PCMK_OPT_PRIORITY_FENCING_DELAY). A value of -1 means * disable all configured delays. */ int client_delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; //! Group of enum stonith_call_options associated with this operation uint32_t call_options; /*! The current state of the remote operation. This indicates * what stage the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The client's call_id that initiated the fencing request */ int client_callid; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GList *query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The current operation phase being executed */ enum st_remap_phase phase; /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ GList *automatic_list; /*! List of all devices at the currently executing topology level */ GList *devices_list; /*! Current entry in the topology device list */ GList *devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GList *duplicates; /*! The point at which the remote operation completed(nsec) */ long long completed_nsec; /*! The (potentially intermediate) result of the operation */ pcmk__action_result_t result; } remote_fencing_op_t; void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged); // Fencer-specific client flags enum st_client_flags { st_callback_unknown = UINT64_C(0), st_callback_notify_fence = (UINT64_C(1) << 0), st_callback_device_add = (UINT64_C(1) << 2), st_callback_device_del = (UINT64_C(1) << 4), st_callback_notify_history = (UINT64_C(1) << 5), st_callback_notify_history_synced = (UINT64_C(1) << 6) }; // How the user specified the target of a topology level enum fenced_target_by { fenced_target_by_unknown = -1, // Invalid or not yet parsed fenced_target_by_name, // By target name fenced_target_by_pattern, // By a pattern matching target names fenced_target_by_attribute, // By a node attribute/value on target }; /* * Complex fencing requirements are specified via fencing topologies. * A topology consists of levels; each level is a list of fencing devices. * Topologies are stored in a hash table by node name. When a node needs to be * fenced, if it has an entry in the topology table, the levels are tried * sequentially, and the devices in each level are tried sequentially. * Fencing is considered successful as soon as any level succeeds; * a level is considered successful if all its devices succeed. * Essentially, all devices at a given level are "and-ed" and the * levels are "or-ed". * * This structure is used for the topology table entries. * Topology levels start from 1, so levels[0] is unused and always NULL. */ typedef struct stonith_topology_s { enum fenced_target_by kind; // How target was specified /*! Node name regex or attribute name=value for which topology applies */ char *target; char *target_value; char *target_pattern; char *target_attribute; /*! Names of fencing devices at each topology level */ - GList *levels[ST_LEVEL_MAX]; + GList *levels[ST__LEVEL_COUNT]; } stonith_topology_t; void stonith_shutdown(int nsig); void init_device_list(void); void free_device_list(void); void init_topology_list(void); void free_topology_list(void); void free_stonith_remote_op_list(void); void init_stonith_remote_op_hash_table(GHashTable **table); void free_metadata_cache(void); void fenced_unregister_handlers(void); uint64_t get_stonith_flag(const char *name); void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *op_request, const char *remote_peer); int stonith_device_register(xmlNode *msg, gboolean from_cib); void stonith_device_remove(const char *id, bool from_cib); char *stonith_level_key(const xmlNode *msg, enum fenced_target_by); void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); stonith_topology_t *find_topology_for_host(const char *host); void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data); void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc); remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack); void fenced_process_fencing_reply(xmlNode *msg); int process_remote_stonith_query(xmlNode * msg); void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options); void stonith_fence_history_trim(void); bool fencing_peer_active(pcmk__node_status_t *peer); void set_fencing_completed(remote_fencing_op_t * op); int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg); const char *fenced_device_reboot_action(const char *device_id); bool fenced_device_supports_on(const char *device_id); gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); void fencing_topology_init(void); void setup_cib(void); void fenced_cib_cleanup(void); int fenced_scheduler_init(void); void fenced_scheduler_cleanup(void); void fenced_scheduler_run(xmlNode *cib); static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } /*! * \internal * \brief Get the device flag to use with a given action when searching devices * * \param[in] action Action to check * * \return st_device_supports_on if \p action is "on", otherwise * st_device_supports_none */ static inline uint32_t fenced_support_flag(const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { return st_device_supports_on; } return st_device_supports_none; } extern char *stonith_our_uname; extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern long long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; extern GHashTable *stonith_remote_op_list; extern crm_exit_t exit_code; extern gboolean stonith_shutdown_flag; diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index 04ea92a1dc..5a5c013cb2 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -1,174 +1,178 @@ /* * Copyright 2011-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_FENCING_INTERNAL__H #define PCMK__CRM_FENCING_INTERNAL__H #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif enum st_device_flags { st_device_supports_none = (0 << 0), st_device_supports_list = (1 << 0), st_device_supports_status = (1 << 1), st_device_supports_reboot = (1 << 2), st_device_supports_parameter_plug = (1 << 3), st_device_supports_parameter_port = (1 << 4), st_device_supports_on = (1 << 5), }; #define stonith__set_device_flags(device_flags, device_id, flags_to_set) do { \ device_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fence device", device_id, \ (device_flags), (flags_to_set), \ #flags_to_set); \ } while (0) #define stonith__set_call_options(st_call_opts, call_for, flags_to_set) do { \ st_call_opts = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fencer call", (call_for), \ (st_call_opts), (flags_to_set), \ #flags_to_set); \ } while (0) #define stonith__clear_call_options(st_call_opts, call_for, flags_to_clear) do { \ st_call_opts = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fencer call", (call_for), \ (st_call_opts), (flags_to_clear), \ #flags_to_clear); \ } while (0) struct stonith_action_s; typedef struct stonith_action_s stonith_action_t; stonith_action_t *stonith__action_create(const char *agent, const char *action_name, const char *target, uint32_t target_nodeid, int timeout_sec, GHashTable *device_args, GHashTable *port_map, const char *host_arg); void stonith__destroy_action(stonith_action_t *action); pcmk__action_result_t *stonith__action_result(stonith_action_t *action); int stonith__result2rc(const pcmk__action_result_t *result); void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result); void stonith__xe_get_result(const xmlNode *xml, pcmk__action_result_t *result); xmlNode *stonith__find_xe_with_result(xmlNode *xml); int stonith__execute_async(stonith_action_t *action, void *userdata, void (*done) (int pid, const pcmk__action_result_t *result, void *user_data), void (*fork_cb) (int pid, void *user_data)); int stonith__metadata_async(const char *agent, int timeout_sec, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data); xmlNode *create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list); xmlNode *create_device_registration_xml(const char *id, enum stonith_namespace standard, const char *agent, const stonith_key_value_t *params, const char *rsc_provides); void stonith__register_messages(pcmk__output_t *out); GList *stonith__parse_targets(const char *hosts); const char *stonith__later_succeeded(const stonith_history_t *event, const stonith_history_t *top_history); stonith_history_t *stonith__sort_history(stonith_history_t *history); void stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, xmlNode *metadata); -# define ST_LEVEL_MAX 10 +/* Only 1-9 is allowed for fencing topology levels, + * however, 0 is used to unregister all levels in + * unregister requests. + */ +# define ST__LEVEL_COUNT 10 # define STONITH_ATTR_ACTION_OP "action" # define STONITH_OP_EXEC "st_execute" # define STONITH_OP_TIMEOUT_UPDATE "st_timeout_update" # define STONITH_OP_QUERY "st_query" # define STONITH_OP_FENCE "st_fence" # define STONITH_OP_RELAY "st_relay" # define STONITH_OP_DEVICE_ADD "st_device_register" # define STONITH_OP_DEVICE_DEL "st_device_remove" # define STONITH_OP_FENCE_HISTORY "st_fence_history" # define STONITH_OP_LEVEL_ADD "st_level_add" # define STONITH_OP_LEVEL_DEL "st_level_remove" # define STONITH_OP_NOTIFY "st_notify" # define STONITH_OP_POKE "poke" # define STONITH_WATCHDOG_AGENT "fence_watchdog" /* Don't change 2 below as it would break rolling upgrade */ # define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog" # define STONITH_WATCHDOG_ID "watchdog" stonith_history_t *stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data); bool stonith__event_state_pending(stonith_history_t *history, void *user_data); bool stonith__event_state_eq(stonith_history_t *history, void *user_data); bool stonith__event_state_neq(stonith_history_t *history, void *user_data); int stonith__legacy2status(int rc); int stonith__exit_status(const stonith_callback_data_t *data); int stonith__execution_status(const stonith_callback_data_t *data); const char *stonith__exit_reason(const stonith_callback_data_t *data); int stonith__event_exit_status(const stonith_event_t *event); int stonith__event_execution_status(const stonith_event_t *event); const char *stonith__event_exit_reason(const stonith_event_t *event); char *stonith__event_description(const stonith_event_t *event); gchar *stonith__history_description(const stonith_history_t *event, bool full_history, const char *later_succeeded, uint32_t show_opts); /*! * \internal * \brief Is a fencing operation in pending state? * * \param[in] state State as enum op_state value * * \return A boolean */ static inline bool stonith__op_state_pending(enum op_state state) { return state != st_failed && state != st_done; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node); gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node); #ifdef __cplusplus } #endif #endif // PCMK__CRM_FENCING_INTERNAL__H diff --git a/include/crm_internal.h b/include/crm_internal.h index b3e6287272..0142a7272f 100644 --- a/include/crm_internal.h +++ b/include/crm_internal.h @@ -1,95 +1,98 @@ /* * Copyright 2006-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_INTERNAL__H #define PCMK__CRM_INTERNAL__H #ifndef PCMK__CONFIG_H #define PCMK__CONFIG_H #include #endif #include /* Our minimum glib dependency is 2.42. Define that as both the minimum and * maximum glib APIs that are allowed (i.e. APIs that were already deprecated * in 2.42, and APIs introduced after 2.42, cannot be used by Pacemaker code). */ #define GLIB_VERSION_MIN_REQUIRED GLIB_VERSION_2_42 #define GLIB_VERSION_MAX_ALLOWED GLIB_VERSION_2_42 #include #include #include /* Public API headers can guard including deprecated API headers with this * symbol, thus preventing internal code (which includes this header) from using * deprecated APIs, while still allowing external code to use them by default. */ #define PCMK_ALLOW_DEPRECATED 0 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define N_(String) (String) #ifdef ENABLE_NLS #define _(String) gettext(String) #else #define _(String) (String) #endif /* * IPC service names that are only used internally */ #define PCMK__SERVER_BASED_RO "cib_ro" #define PCMK__SERVER_BASED_RW "cib_rw" #define PCMK__SERVER_BASED_SHM "cib_shm" /* * IPC commands that can be sent to Pacemaker daemons */ #define PCMK__ATTRD_CMD_PEER_REMOVE "peer-remove" #define PCMK__ATTRD_CMD_UPDATE "update" #define PCMK__ATTRD_CMD_UPDATE_BOTH "update-both" #define PCMK__ATTRD_CMD_UPDATE_DELAY "update-delay" #define PCMK__ATTRD_CMD_QUERY "query" #define PCMK__ATTRD_CMD_REFRESH "refresh" #define PCMK__ATTRD_CMD_SYNC_RESPONSE "sync-response" #define PCMK__ATTRD_CMD_CLEAR_FAILURE "clear-failure" #define PCMK__ATTRD_CMD_CONFIRM "confirm" #define PCMK__CONTROLD_CMD_NODES "list-nodes" +#define ST__LEVEL_MIN 1 +#define ST__LEVEL_MAX 9 + #ifdef __cplusplus } #endif #endif // CRM_INTERNAL__H diff --git a/lib/pengine/pe_status_private.h b/lib/pengine/pe_status_private.h index 431f450ad4..866be7dec9 100644 --- a/lib/pengine/pe_status_private.h +++ b/lib/pengine/pe_status_private.h @@ -1,140 +1,144 @@ /* * Copyright 2018-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__PENGINE_PE_STATUS_PRIVATE__H #define PCMK__PENGINE_PE_STATUS_PRIVATE__H /* This header is for the sole use of libpe_status, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // G_GNUC_INTERNAL, GSList, GList, etc. #include // xmlNode #include // pcmk__op_digest_t #include // pcmk_action_t, etc. #ifdef __cplusplus extern "C" { #endif #if defined(PCMK__UNIT_TESTING) #undef G_GNUC_INTERNAL #define G_GNUC_INTERNAL #endif typedef struct notify_data_s { GSList *keys; // Environment variable name/value pairs const char *action; pcmk_action_t *pre; pcmk_action_t *post; pcmk_action_t *pre_done; pcmk_action_t *post_done; GList *active; /* notify_entry_t* */ GList *inactive; /* notify_entry_t* */ GList *start; /* notify_entry_t* */ GList *stop; /* notify_entry_t* */ GList *demote; /* notify_entry_t* */ GList *promote; /* notify_entry_t* */ GList *promoted; /* notify_entry_t* */ GList *unpromoted; /* notify_entry_t* */ GHashTable *allowed_nodes; } notify_data_t; G_GNUC_INTERNAL pcmk_resource_t *pe__create_clone_child(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL void pe__create_action_notifications(pcmk_resource_t *rsc, notify_data_t *n_data); G_GNUC_INTERNAL void pe__free_action_notification_data(notify_data_t *n_data); G_GNUC_INTERNAL notify_data_t *pe__action_notif_pseudo_ops(pcmk_resource_t *rsc, const char *task, pcmk_action_t *action, pcmk_action_t *complete); G_GNUC_INTERNAL void pe__force_anon(const char *standard, pcmk_resource_t *rsc, const char *rid, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL gint pe__cmp_rsc_priority(gconstpointer a, gconstpointer b); G_GNUC_INTERNAL gboolean pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler); +G_GNUC_INTERNAL +void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, + pcmk_scheduler_t *scheduler); + G_GNUC_INTERNAL gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL pcmk__op_digest_t *pe__compare_fencing_digest(pcmk_resource_t *rsc, const char *agent, pcmk_node_t *node, pcmk_scheduler_t *scheduler); G_GNUC_INTERNAL void pe__unpack_node_health_scores(pcmk_scheduler_t *scheduler); // Primitive resource methods G_GNUC_INTERNAL unsigned int pe__primitive_max_per_node(const pcmk_resource_t *rsc); // Group resource methods G_GNUC_INTERNAL unsigned int pe__group_max_per_node(const pcmk_resource_t *rsc); // Clone resource methods G_GNUC_INTERNAL unsigned int pe__clone_max_per_node(const pcmk_resource_t *rsc); // Bundle resource methods G_GNUC_INTERNAL pcmk_node_t *pe__bundle_active_node(const pcmk_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean); G_GNUC_INTERNAL unsigned int pe__bundle_max_per_node(const pcmk_resource_t *rsc); #ifdef __cplusplus } #endif #endif // PCMK__PENGINE_PE_STATUS_PRIVATE__H diff --git a/lib/pengine/status.c b/lib/pengine/status.c index 11dd34c7f7..2be388b15c 100644 --- a/lib/pengine/status.c +++ b/lib/pengine/status.c @@ -1,524 +1,528 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include /*! * \brief Create a new object to hold scheduler data * * \return New, initialized scheduler data on success, else NULL (and set errno) * \note Only pcmk_scheduler_t objects created with this function (as opposed * to statically declared or directly allocated) should be used with the * functions in this library, to allow for future extensions to the * data type. The caller is responsible for freeing the memory with * pe_free_working_set() when the instance is no longer needed. */ pcmk_scheduler_t * pe_new_working_set(void) { pcmk_scheduler_t *scheduler = calloc(1, sizeof(pcmk_scheduler_t)); if (scheduler == NULL) { return NULL; } scheduler->priv = calloc(1, sizeof(pcmk__scheduler_private_t)); if (scheduler->priv == NULL) { free(scheduler); return NULL; } set_working_set_defaults(scheduler); return scheduler; } /*! * \brief Free scheduler data * * \param[in,out] scheduler Scheduler data to free */ void pe_free_working_set(pcmk_scheduler_t *scheduler) { if (scheduler != NULL) { pe_reset_working_set(scheduler); free(scheduler->priv); free(scheduler); } } #define XPATH_DEPRECATED_RULES \ "//" PCMK_XE_OP_DEFAULTS "//" PCMK_XE_EXPRESSION \ "|//" PCMK_XE_OP "//" PCMK_XE_EXPRESSION /*! * \internal * \brief Log a warning for deprecated rule syntax in operations * * \param[in] scheduler Scheduler data */ static void check_for_deprecated_rules(pcmk_scheduler_t *scheduler) { // @COMPAT Drop this function when support for the syntax is dropped xmlNode *deprecated = get_xpath_object(XPATH_DEPRECATED_RULES, scheduler->input, LOG_NEVER); if (deprecated != NULL) { pcmk__warn_once(pcmk__wo_op_attr_expr, "Support for rules with node attribute expressions in " PCMK_XE_OP " or " PCMK_XE_OP_DEFAULTS " is deprecated " "and will be dropped in a future release"); } } /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pcmk_scheduler_t * scheduler) { const char *new_version = NULL; xmlNode *section = NULL; if ((scheduler == NULL) || (scheduler->input == NULL)) { return FALSE; } new_version = crm_element_value(scheduler->input, PCMK_XA_CRM_FEATURE_SET); if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) { pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'", new_version, CRM_FEATURE_SET); return FALSE; } crm_trace("Beginning unpack"); if (scheduler->priv->failed != NULL) { pcmk__xml_free(scheduler->priv->failed); } scheduler->priv->failed = pcmk__xe_create(NULL, "failed-ops"); if (scheduler->priv->now == NULL) { scheduler->priv->now = crm_time_new(NULL); } if (pcmk__xe_attr_is_true(scheduler->input, PCMK_XA_HAVE_QUORUM)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_quorate); } else { pcmk__clear_scheduler_flags(scheduler, pcmk__sched_quorate); } scheduler->priv->op_defaults = get_xpath_object("//" PCMK_XE_OP_DEFAULTS, scheduler->input, LOG_NEVER); check_for_deprecated_rules(scheduler); scheduler->priv->rsc_defaults = get_xpath_object("//" PCMK_XE_RSC_DEFAULTS, scheduler->input, LOG_NEVER); section = get_xpath_object("//" PCMK_XE_CRM_CONFIG, scheduler->input, LOG_TRACE); unpack_config(section, scheduler); if (!pcmk_any_flags_set(scheduler->flags, pcmk__sched_location_only|pcmk__sched_quorate) && (scheduler->no_quorum_policy != pcmk_no_quorum_ignore)) { pcmk__sched_warn(scheduler, "Fencing and resource management disabled " "due to lack of quorum"); } section = get_xpath_object("//" PCMK_XE_NODES, scheduler->input, LOG_TRACE); unpack_nodes(section, scheduler); section = get_xpath_object("//" PCMK_XE_RESOURCES, scheduler->input, LOG_TRACE); if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { unpack_remote_nodes(section, scheduler); } unpack_resources(section, scheduler); + section = get_xpath_object("//" PCMK_XE_FENCING_TOPOLOGY, scheduler->input, + LOG_TRACE); + pcmk__unpack_fencing_topology(section, scheduler); + section = get_xpath_object("//" PCMK_XE_TAGS, scheduler->input, LOG_NEVER); unpack_tags(section, scheduler); if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { section = get_xpath_object("//" PCMK_XE_STATUS, scheduler->input, LOG_TRACE); unpack_status(section, scheduler); } if (!pcmk_is_set(scheduler->flags, pcmk__sched_no_counts)) { for (GList *item = scheduler->priv->resources; item != NULL; item = item->next) { pcmk_resource_t *rsc = item->data; rsc->priv->fns->count(item->data); } crm_trace("Cluster resource count: %d (%d disabled, %d blocked)", scheduler->ninstances, scheduler->disabled_resources, scheduler->blocked_resources); } pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_status); return TRUE; } /*! * \internal * \brief Free a list of pcmk_resource_t * * \param[in,out] resources List to free * * \note When the scheduler's resource list is freed, that includes the original * storage for the uname and id of any Pacemaker Remote nodes in the * scheduler's node list, so take care not to use those afterward. * \todo Refactor pcmk_node_t to strdup() the node name. */ static void pe_free_resources(GList *resources) { pcmk_resource_t *rsc = NULL; GList *iterator = resources; while (iterator != NULL) { rsc = (pcmk_resource_t *) iterator->data; iterator = iterator->next; rsc->priv->fns->free(rsc); } if (resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GList *actions) { GList *iterator = actions; while (iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if (actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GList *nodes) { for (GList *iterator = nodes; iterator != NULL; iterator = iterator->next) { pcmk_node_t *node = (pcmk_node_t *) iterator->data; // Shouldn't be possible, but to be safe ... if (node == NULL) { continue; } if (node->details == NULL) { free(node); continue; } /* This is called after pe_free_resources(), which means that we can't * use node->private->name for Pacemaker Remote nodes. */ crm_trace("Freeing node %s", (pcmk__is_pacemaker_remote_node(node)? "(guest or remote)" : pcmk__node_name(node))); if (node->priv->attrs != NULL) { g_hash_table_destroy(node->priv->attrs); } if (node->priv->utilization != NULL) { g_hash_table_destroy(node->priv->utilization); } if (node->priv->digest_cache != NULL) { g_hash_table_destroy(node->priv->digest_cache); } g_list_free(node->details->running_rsc); g_list_free(node->priv->assigned_resources); free(node->priv); free(node->details); free(node->assign); free(node); } if (nodes != NULL) { g_list_free(nodes); } } static void pe__free_ordering(GList *constraints) { GList *iterator = constraints; while (iterator != NULL) { pcmk__action_relation_t *order = iterator->data; iterator = iterator->next; free(order->task1); free(order->task2); free(order); } if (constraints != NULL) { g_list_free(constraints); } } static void pe__free_location(GList *constraints) { GList *iterator = constraints; while (iterator != NULL) { pcmk__location_t *cons = iterator->data; iterator = iterator->next; g_list_free_full(cons->nodes, free); free(cons->id); free(cons); } if (constraints != NULL) { g_list_free(constraints); } } /*! * \brief Reset scheduler data to defaults without freeing it or constraints * * \param[in,out] scheduler Scheduler data to reset * * \deprecated This function is deprecated as part of the API; * pe_reset_working_set() should be used instead. */ void cleanup_calculations(pcmk_scheduler_t *scheduler) { if (scheduler == NULL) { return; } pcmk__clear_scheduler_flags(scheduler, pcmk__sched_have_status); if (scheduler->priv->options != NULL) { g_hash_table_destroy(scheduler->priv->options); } if (scheduler->priv->singletons != NULL) { g_hash_table_destroy(scheduler->priv->singletons); } if (scheduler->priv->ticket_constraints != NULL) { g_hash_table_destroy(scheduler->priv->ticket_constraints); } if (scheduler->template_rsc_sets) { g_hash_table_destroy(scheduler->template_rsc_sets); } if (scheduler->tags) { g_hash_table_destroy(scheduler->tags); } crm_trace("deleting resources"); pe_free_resources(scheduler->priv->resources); crm_trace("deleting actions"); pe_free_actions(scheduler->priv->actions); crm_trace("deleting nodes"); pe_free_nodes(scheduler->nodes); pe__free_param_checks(scheduler); g_list_free(scheduler->stop_needed); pcmk__xml_free(scheduler->graph); crm_time_free(scheduler->priv->now); pcmk__xml_free(scheduler->input); pcmk__xml_free(scheduler->priv->failed); set_working_set_defaults(scheduler); CRM_LOG_ASSERT((scheduler->priv->location_constraints == NULL) && (scheduler->priv->ordering_constraints == NULL)); } /*! * \brief Reset scheduler data to default state without freeing it * * \param[in,out] scheduler Scheduler data to reset */ void pe_reset_working_set(pcmk_scheduler_t *scheduler) { if (scheduler == NULL) { return; } crm_trace("Deleting %d ordering constraints", g_list_length(scheduler->priv->ordering_constraints)); pe__free_ordering(scheduler->priv->ordering_constraints); scheduler->priv->ordering_constraints = NULL; crm_trace("Deleting %d location constraints", g_list_length(scheduler->priv->location_constraints)); pe__free_location(scheduler->priv->location_constraints); scheduler->priv->location_constraints = NULL; crm_trace("Deleting %d colocation constraints", g_list_length(scheduler->priv->colocation_constraints)); g_list_free_full(scheduler->priv->colocation_constraints, free); scheduler->priv->colocation_constraints = NULL; cleanup_calculations(scheduler); } void set_working_set_defaults(pcmk_scheduler_t *scheduler) { // These members must be preserved pcmk__scheduler_private_t *priv = scheduler->priv; pcmk__output_t *out = priv->out; // Wipe the main structs (any other members must have previously been freed) memset(scheduler, 0, sizeof(pcmk_scheduler_t)); memset(priv, 0, sizeof(pcmk__scheduler_private_t)); // Restore the members to preserve scheduler->priv = priv; scheduler->priv->out = out; // Set defaults for everything else scheduler->priv->next_ordering_id = 1; scheduler->action_id = 1; scheduler->no_quorum_policy = pcmk_no_quorum_stop; pcmk__set_scheduler_flags(scheduler, pcmk__sched_symmetric_cluster |pcmk__sched_stop_removed_resources |pcmk__sched_cancel_removed_actions); if (!strcmp(PCMK__CONCURRENT_FENCING_DEFAULT, PCMK_VALUE_TRUE)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_concurrent_fencing); } } pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id) { return pe_find_resource_with_flags(rsc_list, id, pcmk_rsc_match_history); } pcmk_resource_t * pe_find_resource_with_flags(GList *rsc_list, const char *id, enum pe_find flags) { GList *rIter = NULL; for (rIter = rsc_list; id && rIter; rIter = rIter->next) { pcmk_resource_t *parent = rIter->data; pcmk_resource_t *match = parent->priv->fns->find_rsc(parent, id, NULL, flags); if (match != NULL) { return match; } } crm_trace("No match for %s", id); return NULL; } /*! * \brief Find a node by name or ID in a list of nodes * * \param[in] nodes List of nodes (as pcmk_node_t*) * \param[in] id If not NULL, ID of node to find * \param[in] node_name If not NULL, name of node to find * * \return Node from \p nodes that matches \p id if any, * otherwise node from \p nodes that matches \p uname if any, * otherwise NULL */ pcmk_node_t * pe_find_node_any(const GList *nodes, const char *id, const char *uname) { pcmk_node_t *match = NULL; if (id != NULL) { match = pe_find_node_id(nodes, id); } if ((match == NULL) && (uname != NULL)) { match = pcmk__find_node_in_list(nodes, uname); } return match; } /*! * \brief Find a node by ID in a list of nodes * * \param[in] nodes List of nodes (as pcmk_node_t*) * \param[in] id ID of node to find * * \return Node from \p nodes that matches \p id if any, otherwise NULL */ pcmk_node_t * pe_find_node_id(const GList *nodes, const char *id) { for (const GList *iter = nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; /* @TODO Whether node IDs should be considered case-sensitive should * probably depend on the node type, so functionizing the comparison * would be worthwhile */ if (pcmk__str_eq(node->priv->id, id, pcmk__str_casei)) { return node; } } return NULL; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include /*! * \brief Find a node by name in a list of nodes * * \param[in] nodes List of nodes (as pcmk_node_t*) * \param[in] node_name Name of node to find * * \return Node from \p nodes that matches \p node_name if any, otherwise NULL */ pcmk_node_t * pe_find_node(const GList *nodes, const char *node_name) { return pcmk__find_node_in_list(nodes, node_name); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 6c3a71ce96..c9d7fffa96 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,5097 +1,5133 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); // A (parsed) resource action history entry struct action_history { pcmk_resource_t *rsc; // Resource that history is for pcmk_node_t *node; // Node that history is for xmlNode *xml; // History entry XML // Parsed from entry XML const char *id; // XML ID of history entry const char *key; // Operation key of action const char *task; // Action name const char *exit_reason; // Exit reason given for result guint interval_ms; // Action interval int call_id; // Call ID of action int expected_exit_status; // Expected exit status of action int exit_status; // Actual exit status of action int execution_status; // Execution status of action }; /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the * flag is stringified more readably in log messages. */ #define set_config_flag(scheduler, option, flag) do { \ GHashTable *config_hash = (scheduler)->priv->options; \ const char *scf_value = pcmk__cluster_option(config_hash, (option)); \ \ if (scf_value != NULL) { \ if (crm_is_true(scf_value)) { \ (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } else { \ (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } \ } \ } while(0) static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum pcmk__on_fail *failed); static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node); static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler); static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler); static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler); /*! * \internal * \brief Check whether a node is a dangling guest node * * \param[in] node Node to check * * \return true if \p node had a Pacemaker Remote connection resource with a * launcher that was removed from the CIB, otherwise false. */ static bool is_dangling_guest_node(pcmk_node_t *node) { return pcmk__is_pacemaker_remote_node(node) && (node->priv->remote != NULL) && (node->priv->remote->priv->launcher == NULL) && pcmk_is_set(node->priv->remote->flags, pcmk__rsc_removed_launched); } /*! * \brief Schedule a fence action for a node * * \param[in,out] scheduler Scheduler data * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider * \c PCMK_OPT_PRIORITY_FENCING_DELAY */ void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); if (pcmk__is_guest_or_bundle_node(node)) { // Fence a guest or bundle node by marking its launcher as failed pcmk_resource_t *rsc = node->priv->remote->priv->launcher; if (!pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", pcmk__node_name(node), reason, rsc->id); } else { pcmk__sched_warn(scheduler, "Guest node %s will be fenced " "(by recovering its guest resource %s): %s", pcmk__node_name(node), rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ pcmk__set_node_flags(node, pcmk__node_remote_reset); pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", pcmk__node_name(node), reason); pcmk__set_rsc_flags(node->priv->remote, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } else if (pcmk__is_remote_node(node)) { pcmk_resource_t *rsc = node->priv->remote; if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", pcmk__node_name(node), reason); } else if (!pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)) { pcmk__set_node_flags(node, pcmk__node_remote_reset); pcmk__sched_warn(scheduler, "Remote node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean", reason); } else { pcmk__sched_warn(scheduler, "Cluster node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \ "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \ "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \ "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \ "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \ "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler) { xmlXPathObjectPtr result = NULL; if (!pcmk_is_set(scheduler->flags, flag)) { result = xpath_search(scheduler->input, xpath); if (result && (numXpathResults(result) > 0)) { pcmk__set_scheduler_flags(scheduler, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) { const char *value = NULL; guint interval_ms = 0U; GHashTable *config_hash = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; scheduler->priv->options = config_hash; pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data, config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, scheduler); pcmk__validate_cluster_options(config_hash); set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES, pcmk__sched_probe_resources); if (!pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) { crm_info("Startup probes: disabled (dangerous)"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_info("Watchdog-based self-fencing will be performed via SBD if " "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is nonzero"); pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING, scheduler); value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT); pcmk_parse_interval_spec(value, &interval_ms); if (interval_ms >= INT_MAX) { scheduler->priv->fence_timeout_ms = INT_MAX; } else { scheduler->priv->fence_timeout_ms = (int) interval_ms; } crm_debug("STONITH timeout: %d", scheduler->priv->fence_timeout_ms); set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED, pcmk__sched_fencing_enabled); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { crm_debug("STONITH of failed nodes is enabled"); } else { crm_debug("STONITH of failed nodes is disabled"); } scheduler->priv->fence_action = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_ACTION); if (!strcmp(scheduler->priv->fence_action, PCMK__ACTION_POWEROFF)) { pcmk__warn_once(pcmk__wo_poweroff, "Support for " PCMK_OPT_STONITH_ACTION " of " "'" PCMK__ACTION_POWEROFF "' is deprecated and will be " "removed in a future release " "(use '" PCMK_ACTION_OFF "' instead)"); scheduler->priv->fence_action = PCMK_ACTION_OFF; } crm_trace("STONITH will %s nodes", scheduler->priv->fence_action); set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING, pcmk__sched_concurrent_fencing); if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) { crm_debug("Concurrent fencing is enabled"); } else { crm_debug("Concurrent fencing is disabled"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY); if (value) { pcmk_parse_interval_spec(value, &interval_ms); scheduler->priority_fencing_delay = (int) (interval_ms / 1000); crm_trace("Priority fencing delay is %ds", scheduler->priority_fencing_delay); } set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES, pcmk__sched_stop_all); crm_debug("Stop all active resources: %s", pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all)); set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER, pcmk__sched_symmetric_cluster); if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_ignore; } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_freeze; } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_demote; } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) { if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { int do_panic = 0; crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC, &do_panic); if (do_panic || pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) { scheduler->no_quorum_policy = pcmk_no_quorum_fence; } else { crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop': cluster has never had quorum"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop' because fencing is disabled"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { scheduler->no_quorum_policy = pcmk_no_quorum_stop; } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case pcmk_no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case pcmk_no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case pcmk_no_quorum_fence: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case pcmk_no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES, pcmk__sched_stop_removed_resources); if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { crm_trace("Orphan resources are stopped"); } else { crm_trace("Orphan resources are ignored"); } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS, pcmk__sched_cancel_removed_actions); if (pcmk_is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) { crm_trace("Orphan resource actions are stopped"); } else { crm_trace("Orphan resource actions are ignored"); } value = pcmk__cluster_option(config_hash, PCMK__OPT_REMOVE_AFTER_STOP); if (value != NULL) { if (crm_is_true(value)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_remove_after_stop); pcmk__warn_once(pcmk__wo_remove_after, "Support for the " PCMK__OPT_REMOVE_AFTER_STOP " cluster property is deprecated and will be " "removed in a future release"); } else { pcmk__clear_scheduler_flags(scheduler, pcmk__sched_remove_after_stop); } } set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE, pcmk__sched_in_maintenance); crm_trace("Maintenance mode: %s", pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance)); set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL, pcmk__sched_start_failure_fatal); if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) { crm_trace("Start failures are always fatal"); } else { crm_trace("Start failures are handled by failcount"); } if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING, pcmk__sched_startup_fencing); } if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pcmk__warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes"); } pe__unpack_node_health_scores(scheduler); scheduler->priv->placement_strategy = pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY); crm_trace("Placement strategy: %s", scheduler->priv->placement_strategy); set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK, pcmk__sched_shutdown_lock); if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); pcmk_parse_interval_spec(value, &(scheduler->shutdown_lock)); scheduler->shutdown_lock /= 1000; crm_trace("Resources will be locked to nodes that were cleanly " "shut down (locks expire after %s)", pcmk__readable_interval(scheduler->shutdown_lock)); } else { crm_trace("Resources will not be locked to nodes that were cleanly " "shut down"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); pcmk_parse_interval_spec(value, &(scheduler->node_pending_timeout)); scheduler->node_pending_timeout /= 1000; if (scheduler->node_pending_timeout == 0) { crm_trace("Do not fence pending nodes"); } else { crm_trace("Fence pending nodes after %s", pcmk__readable_interval(scheduler->node_pending_timeout * 1000)); } return TRUE; } pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler) { pcmk_node_t *new_node = NULL; if (pcmk_find_node(scheduler, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pcmk_node_t)); if (new_node == NULL) { pcmk__sched_err(scheduler, "Could not allocate memory for node %s", uname); return NULL; } new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment)); new_node->details = calloc(1, sizeof(struct pcmk__node_details)); new_node->priv = calloc(1, sizeof(pcmk__node_private_t)); if ((new_node->assign == NULL) || (new_node->details == NULL) || (new_node->priv == NULL)) { free(new_node->assign); free(new_node->details); free(new_node->priv); free(new_node); pcmk__sched_err(scheduler, "Could not allocate memory for node %s", uname); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->assign->score = char2score(score); new_node->priv->id = id; new_node->priv->name = uname; new_node->priv->flags = pcmk__node_probes_allowed; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->priv->scheduler = scheduler; if (pcmk__str_eq(type, PCMK_VALUE_MEMBER, pcmk__str_null_matches|pcmk__str_casei)) { new_node->priv->variant = pcmk__node_variant_cluster; } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) { new_node->priv->variant = pcmk__node_variant_remote; pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes); } else { /* @COMPAT 'ping' is the default for backward compatibility, but it * should be changed to 'member' at a compatibility break */ if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) { pcmk__config_warn("Node %s has unrecognized type '%s', " "assuming '" PCMK__VALUE_PING "'", pcmk__s(uname, "without name"), type); } pcmk__warn_once(pcmk__wo_ping_node, "Support for nodes of type '" PCMK__VALUE_PING "' " "(such as %s) is deprecated and will be removed in a " "future release", pcmk__s(uname, "unnamed node")); new_node->priv->variant = pcmk__node_variant_ping; } new_node->priv->attrs = pcmk__strkey_table(free, free); if (pcmk__is_pacemaker_remote_node(new_node)) { pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote"); } else { pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster"); } new_node->priv->utilization = pcmk__strkey_table(free, free); new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests); scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node, pe__cmp_node_name); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = pcmk__xe_id(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL); attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) { if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) { continue; } for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL); attr != NULL; attr = pcmk__xe_next(attr)) { const char *value = crm_element_value(attr, PCMK_XA_VALUE); const char *name = crm_element_value(attr, PCMK_XA_NAME); if (name == NULL) { // Sanity continue; } if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) { remote_name = value; } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) { remote_server = value; } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) { remote_port = value; } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) { connect_timeout = value; } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) { remote_allow_migrate = value; } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->priv->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node) { if ((new_node->priv->variant == pcmk__node_variant_remote) && (new_node->priv->remote == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } } gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; pcmk_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) { new_node = NULL; id = crm_element_value(xml_obj, PCMK_XA_ID); uname = crm_element_value(xml_obj, PCMK_XA_UNAME); type = crm_element_value(xml_obj, PCMK_XA_TYPE); score = crm_element_value(xml_obj, PCMK_XA_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" PCMK_XE_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, scheduler); if (new_node == NULL) { return FALSE; } handle_startup_fencing(scheduler, new_node); add_node_attrs(xml_obj, new_node, FALSE, scheduler); crm_trace("Done with node %s", crm_element_value(xml_obj, PCMK_XA_UNAME)); } } if (scheduler->localhost && (pcmk_find_node(scheduler, scheduler->localhost) == NULL)) { crm_info("Creating a fake local node"); pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0, scheduler); } return TRUE; } static void unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { const char *launcher_id = NULL; if (rsc->priv->children != NULL) { g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher, scheduler); return; } launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER); if ((launcher_id != NULL) && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) { pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources, launcher_id); if (launcher != NULL) { rsc->priv->launcher = launcher; launcher->priv->launched = g_list_append(launcher->priv->launched, rsc); pcmk__rsc_trace(rsc, "Resource %s's launcher is %s", rsc->id, launcher_id); } else { pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s", rsc->id, launcher_id); } } } gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = pcmk__xe_id(xml_obj); /* The pcmk_find_node() check ensures we don't iterate over an * expanded node that has already been added to the node list */ if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found remote node %s defined by resource %s", new_node_id, pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, scheduler); if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s", new_node_id, pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL); xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, scheduler); if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, pcmk__xe_id(xml_obj2), pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc) { pcmk_node_t *remote_node = NULL; if (!pcmk_is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) { return; } if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pcmk_find_node(scheduler, new_rsc->id); CRM_CHECK(remote_node != NULL, return); pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s", new_rsc->id, pcmk__node_name(remote_node)); remote_node->priv->remote = new_rsc; if (new_rsc->priv->launcher == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(scheduler, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ pcmk__insert_dup(remote_node->priv->attrs, CRM_ATTR_KIND, "container"); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] scheduler Scheduler data * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when pe__unpack_resource() calls resource_location() */ gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; GList *gIter = NULL; scheduler->template_rsc_sets = pcmk__strkey_table(free, pcmk__free_idref); for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { pcmk_resource_t *new_rsc = NULL; const char *id = pcmk__xe_id(xml_obj); if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring <%s> resource without ID", xml_obj->name); continue; } if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) { if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ pcmk__insert_dup(scheduler->template_rsc_sets, id, NULL); } continue; } crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id); if (pe__unpack_resource(xml_obj, &new_rsc, NULL, scheduler) == pcmk_rc_ok) { scheduler->priv->resources = g_list_append(scheduler->priv->resources, new_rsc); pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", xml_obj->name, id); } } for (gIter = scheduler->priv->resources; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; unpack_launcher(rsc, scheduler); link_rsc2remotenode(scheduler, rsc); } scheduler->priv->resources = g_list_sort(scheduler->priv->resources, pe__cmp_rsc_priority); if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { /* Ignore */ } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled) && !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the " PCMK_OPT_STONITH_ENABLED " option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } +/*! + * \internal + * \brief Parse configuration XML for fencing topology information + * + * \param[in] xml_fencing_topology Top of fencing topology configuration XML + * \param[in,out] scheduler Scheduler data + * + * \return void + */ +void +pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler) +{ + xmlNode *xml_obj = NULL; + int id = 0; + + for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL); + xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) { + + crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id); + + // Ensure an ID was given + if (pcmk__str_empty(pcmk__xe_id(xml_obj))) { + pcmk__config_warn("Ignoring registration for topology level without ID"); + continue; + } + + // Ensure level ID is in allowed range + if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { + pcmk__config_warn("Ignoring topology registration with invalid level %d", + id); + continue; + } + + } +} + gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler) { xmlNode *xml_tag = NULL; scheduler->tags = pcmk__strkey_table(free, pcmk__free_idref); for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL); xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = pcmk__xe_id(xml_tag); if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID, (const char *) xml_tag->name); continue; } for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL); xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) { const char *obj_ref = pcmk__xe_id(xml_obj_ref); if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID, xml_obj_ref->name, tag_id); continue; } pcmk__add_idref(scheduler->tags, tag_id, obj_ref); } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pcmk__ticket_t *ticket = NULL; ticket_id = pcmk__xe_id(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, scheduler); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = pcmk__xml_attr_value(xIter); if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) { continue; } pcmk__insert_dup(ticket->state, prop_name, prop_value); } granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED); if (granted && crm_is_true(granted)) { pcmk__set_ticket_flags(ticket, pcmk__ticket_granted); crm_info("We have ticket '%s'", ticket->id); } else { pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted); crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED); if (last_granted) { long long last_granted_ll; pcmk__scan_ll(last_granted, &last_granted_ll, 0LL); ticket->last_granted = (time_t) last_granted_ll; } standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY); if (standby && crm_is_true(standby)) { pcmk__set_ticket_flags(ticket, pcmk__ticket_standby); if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby); } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) { continue; } unpack_ticket_state(xml_obj, scheduler); } return TRUE; } static void unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = NULL; pcmk_resource_t *rsc = NULL; int maint = 0; if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) { return; } if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) { return; } crm_trace("Processing Pacemaker Remote node %s", pcmk__node_name(this_node)); pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE), &maint, 0); if (maint) { pcmk__set_node_flags(this_node, pcmk__node_remote_maint); } else { pcmk__clear_node_flags(this_node, pcmk__node_remote_maint); } rsc = this_node->priv->remote; if (!pcmk_is_set(this_node->priv->flags, pcmk__node_remote_reset)) { this_node->details->unclean = FALSE; pcmk__set_node_flags(this_node, pcmk__node_seen); } attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL, NULL); add_node_attrs(attrs, this_node, TRUE, scheduler); if (pe__shutdown_requested(this_node)) { crm_info("%s is shutting down", pcmk__node_name(this_node)); this_node->details->shutdown = TRUE; } if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL, pcmk__rsc_node_current))) { crm_info("%s is in standby mode", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); } if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL, pcmk__rsc_node_current)) || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed))) { crm_info("%s is in maintenance mode", pcmk__node_name(this_node)); this_node->details->maintenance = TRUE; } discovery = pcmk__node_attr(this_node, PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED, NULL, pcmk__rsc_node_current); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__warn_once(pcmk__wo_rdisc_enabled, "Support for the " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " node attribute is deprecated and will be removed" " (and behave as 'true') in a future release."); if (pcmk__is_remote_node(this_node) && !pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { pcmk__config_warn("Ignoring " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " attribute on Pacemaker Remote node %s" " because fencing is disabled", pcmk__node_name(this_node)); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("%s has resource discovery disabled", pcmk__node_name(this_node)); pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed); } } } /*! * \internal * \brief Unpack a cluster node's transient attributes * * \param[in] state CIB node state XML * \param[in,out] node Cluster node whose attributes are being unpacked * \param[in,out] scheduler Scheduler data */ static void unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL, NULL); add_node_attrs(attrs, node, TRUE, scheduler); if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL, pcmk__rsc_node_current))) { crm_info("%s is in standby mode", pcmk__node_name(node)); pcmk__set_node_flags(node, pcmk__node_standby); } if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL, pcmk__rsc_node_current))) { crm_info("%s is in maintenance mode", pcmk__node_name(node)); node->details->maintenance = TRUE; } discovery = pcmk__node_attr(node, PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED, NULL, pcmk__rsc_node_current); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__config_warn("Ignoring " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " attribute for %s because disabling resource" " discovery is not allowed for cluster nodes", pcmk__node_name(node)); } } /*! * \internal * \brief Unpack a node state entry (first pass) * * Unpack one node state entry from status. This unpacks information from the * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not * the resource history inside it. Multiple passes through the status are needed * to fully unpack everything. * * \param[in] state CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *id = NULL; const char *uname = NULL; pcmk_node_t *this_node = NULL; id = crm_element_value(state, PCMK_XA_ID); if (id == NULL) { pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without " PCMK_XA_ID); crm_log_xml_info(state, "missing-id"); return; } uname = crm_element_value(state, PCMK_XA_UNAME); if (uname == NULL) { /* If a joining peer makes the cluster acquire the quorum from corosync * meanwhile it has not joined CPG membership of pacemaker-controld yet, * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and * wait for it to join CPG. */ crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" " "without " PCMK_XA_UNAME, id); } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { crm_notice("Ignoring recorded state for removed node with name %s and " PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id); return; } if (pcmk__is_pacemaker_remote_node(this_node)) { int remote_fenced = 0; /* We can't determine the online status of Pacemaker Remote nodes until * after all resource history has been unpacked. In this first pass, we * do need to mark whether the node has been fenced, as this plays a * role during unpacking cluster node resource state. */ pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED), &remote_fenced, 0); if (remote_fenced) { pcmk__set_node_flags(this_node, pcmk__node_remote_fenced); } else { pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced); } return; } unpack_transient_attributes(state, this_node, scheduler); /* Provisionally mark this cluster node as clean. We have at least seen it * in the current cluster's lifetime. */ this_node->details->unclean = FALSE; pcmk__set_node_flags(this_node, pcmk__node_seen); crm_trace("Determining online status of cluster node %s (id %s)", pcmk__node_name(this_node), id); determine_online_status(state, this_node, scheduler); if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate) && this_node->details->online && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(scheduler, this_node, "cluster does not have quorum", FALSE); } } /*! * \internal * \brief Unpack nodes' resource history as much as possible * * Unpack as many nodes' resource history as possible in one pass through the * status. We need to process Pacemaker Remote nodes' connections/containers * before unpacking their history; the connection/container history will be * in another node's history, so it might take multiple passes to unpack * everything. * * \param[in] status CIB XML status section * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done, * or EAGAIN if more unpacking remains to be done) */ static int unpack_node_history(const xmlNode *status, bool fence, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; // Loop through all PCMK__XE_NODE_STATE entries in CIB status for (const xmlNode *state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL, NULL); state != NULL; state = pcmk__xe_next_same(state)) { const char *id = pcmk__xe_id(state); const char *uname = crm_element_value(state, PCMK_XA_UNAME); pcmk_node_t *this_node = NULL; if ((id == NULL) || (uname == NULL)) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history from malformed " PCMK__XE_NODE_STATE " without id and/or uname"); continue; } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history for node %s because " "no longer in configuration", id); continue; } if (pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) { crm_trace("Not unpacking resource history for node %s because " "already unpacked", id); continue; } if (fence) { // We're processing all remaining nodes } else if (pcmk__is_guest_or_bundle_node(this_node)) { /* We can unpack a guest node's history only after we've unpacked * other resource history to the point that we know that the node's * connection and containing resource are both up. */ const pcmk_resource_t *remote = this_node->priv->remote; const pcmk_resource_t *launcher = remote->priv->launcher; if ((remote->priv->orig_role != pcmk_role_started) || (launcher->priv->orig_role != pcmk_role_started)) { crm_trace("Not unpacking resource history for guest node %s " "because launcher and connection are not known to " "be up", id); continue; } } else if (pcmk__is_remote_node(this_node)) { /* We can unpack a remote node's history only after we've unpacked * other resource history to the point that we know that the node's * connection is up, with the exception of when shutdown locks are * in use. */ pcmk_resource_t *rsc = this_node->priv->remote; if ((rsc == NULL) || (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock) && (rsc->priv->orig_role != pcmk_role_started))) { crm_trace("Not unpacking resource history for remote node %s " "because connection is not known to be up", id); continue; } /* If fencing and shutdown locks are disabled and we're not processing * unseen nodes, then we don't want to unpack offline nodes until online * nodes have been unpacked. This allows us to number active clone * instances first. */ } else if (!pcmk_any_flags_set(scheduler->flags, pcmk__sched_fencing_enabled |pcmk__sched_shutdown_lock) && !this_node->details->online) { crm_trace("Not unpacking resource history for offline " "cluster node %s", id); continue; } if (pcmk__is_pacemaker_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); unpack_handle_remote_attrs(this_node, state, scheduler); } crm_trace("Unpacking resource history for %snode %s", (fence? "unseen " : ""), id); pcmk__set_node_flags(this_node, pcmk__node_unpacked); unpack_node_lrm(this_node, state, scheduler); rc = EAGAIN; // Other node histories might depend on this one } return rc; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler) { xmlNode *state = NULL; crm_trace("Beginning unpack"); if (scheduler->priv->ticket_constraints == NULL) { scheduler->priv->ticket_constraints = pcmk__strkey_table(free, destroy_ticket); } for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL; state = pcmk__xe_next(state)) { if (pcmk__xe_is(state, PCMK_XE_TICKETS)) { unpack_tickets_state((xmlNode *) state, scheduler); } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) { unpack_node_state(state, scheduler); } } while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) { crm_trace("Another pass through node resource histories is needed"); } // Now catch any nodes we didn't see unpack_node_history(status, pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled), scheduler); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (scheduler->stop_needed != NULL) { for (GList *item = scheduler->stop_needed; item; item = item->next) { pcmk_resource_t *container = item->data; pcmk_node_t *node = pcmk__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(scheduler->stop_needed); scheduler->stop_needed = NULL; } /* Now that we know status of all Pacemaker Remote connections and nodes, * we can stop connections for node shutdowns, and check the online status * of remote/guest nodes that didn't have any node history to unpack. */ for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *this_node = gIter->data; if (!pcmk__is_pacemaker_remote_node(this_node)) { continue; } if (this_node->details->shutdown && (this_node->priv->remote != NULL)) { pe__set_next_role(this_node->priv->remote, pcmk_role_stopped, "remote shutdown"); } if (!pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) { determine_remote_online_status(scheduler, this_node); } } return TRUE; } /*! * \internal * \brief Unpack node's time when it became a member at the cluster layer * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * \param[in,out] scheduler Scheduler data * * \return Epoch time when node became a cluster member * (or scheduler effective time for legacy entries) if a member, * 0 if not a member, or -1 if no valid information available */ static long long unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler) { const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM); int member = 0; if (member_time == NULL) { return -1LL; } else if (crm_str_to_boolean(member_time, &member) == 1) { /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was * recorded as a boolean for a DC < 2.1.7, or the node is pending * shutdown and has left the CPG, in which case it was set to 1 to avoid * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT. * * We return the effective time for in_ccm=1 because what's important to * avoid fencing is that effective time minus this value is less than * the pending node timeout. */ return member? (long long) get_effective_time(scheduler) : 0LL; } else { long long when_member = 0LL; if ((pcmk__scan_ll(member_time, &when_member, 0LL) != pcmk_rc_ok) || (when_member < 0LL)) { crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in " PCMK__XE_NODE_STATE " entry", member_time); return -1LL; } return when_member; } } /*! * \internal * \brief Unpack node's time when it became online in process group * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * * \return Epoch time when node became online in process group (or 0 if not * online, or 1 for legacy online entries) */ static long long unpack_node_online(const xmlNode *node_state) { const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD); // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline" if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE, pcmk__str_casei|pcmk__str_null_matches)) { return 0LL; } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) { return 1LL; } else { long long when_online = 0LL; if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok) || (when_online < 0)) { crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in " PCMK__XE_NODE_STATE " entry, assuming offline", peer_time); return 0LL; } return when_online; } } /*! * \internal * \brief Unpack node attribute for user-requested fencing * * \param[in] node Node to check * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry in CIB status * * \return \c true if fencing has been requested for \p node, otherwise \c false */ static bool unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state) { long long value = 0LL; int value_i = 0; const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE, NULL, pcmk__rsc_node_current); // Value may be boolean or an epoch time if (crm_str_to_boolean(value_s, &value_i) == 1) { return (value_i != 0); } if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) { return (value > 0); } crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE "node attribute for %s", value_s, pcmk__node_name(node)); return false; } static gboolean determine_online_status_no_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); if (when_member <= 0) { crm_trace("Node %s is %sdown", pcmk__node_name(this_node), ((when_member < 0)? "presumed " : "")); } else if (when_online > 0) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node %s is not ready to run resources: %s", pcmk__node_name(this_node), join); } } else if (!pcmk_is_set(this_node->priv->flags, pcmk__node_expected_up)) { crm_trace("Node %s controller is down: " "member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } else { /* mark it unclean */ pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE); crm_info("Node %s member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } return online; } /*! * \internal * \brief Check whether a node has taken too long to join controller group * * \param[in,out] scheduler Scheduler data * \param[in] node Node to check * \param[in] when_member Epoch time when node became a cluster member * \param[in] when_online Epoch time when node joined controller group * * \return true if node has been pending (on the way up) longer than * \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false * \note This will also update the cluster's recheck time if appropriate. */ static inline bool pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, long long when_member, long long when_online) { if ((scheduler->node_pending_timeout > 0) && (when_member > 0) && (when_online <= 0)) { // There is a timeout on pending nodes, and node is pending time_t timeout = when_member + scheduler->node_pending_timeout; if (get_effective_time(node->priv->scheduler) >= timeout) { return true; // Node has timed out } // Node is pending, but still has time pe__update_recheck_time(timeout, scheduler, "pending node timeout"); } return false; } static bool determine_online_status_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { bool termination_requested = unpack_node_terminate(this_node, node_state); const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); /* - PCMK__XA_JOIN ::= member|down|pending|banned - PCMK_XA_EXPECTED ::= member|down @COMPAT with entries recorded for DCs < 2.1.7 - PCMK__XA_IN_CCM ::= true|false - PCMK_XA_CRMD ::= online|offline Since crm_feature_set 3.18.0 (pacemaker-2.1.7): - PCMK__XA_IN_CCM ::= |0 Since when node has been a cluster member. A value 0 of means the node is not a cluster member. - PCMK_XA_CRMD ::= |0 Since when peer has been online in CPG. A value 0 means the peer is offline in CPG. */ crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, ""), (termination_requested? " (termination requested)" : "")); if (this_node->details->shutdown) { crm_debug("%s is shutting down", pcmk__node_name(this_node)); /* Slightly different criteria since we can't shut down a dead peer */ return (when_online > 0); } if (when_member < 0) { pe_fence_node(scheduler, this_node, "peer has not been seen by the cluster", FALSE); return false; } if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) { pe_fence_node(scheduler, this_node, "peer failed Pacemaker membership criteria", FALSE); } else if (termination_requested) { if ((when_member <= 0) && (when_online <= 0) && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) { crm_info("%s was fenced as requested", pcmk__node_name(this_node)); return false; } pe_fence_node(scheduler, this_node, "fencing was requested", false); } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_null_matches)) { if (pending_too_long(scheduler, this_node, when_member, when_online)) { pe_fence_node(scheduler, this_node, "peer pending timed out on joining the process group", FALSE); } else if ((when_member > 0) || (when_online > 0)) { crm_info("- %s is not ready to run resources", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", pcmk__node_name(this_node)); } } else if (when_member <= 0) { // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes pe_fence_node(scheduler, this_node, "peer is no longer part of the cluster", TRUE); } else if (when_online <= 0) { pe_fence_node(scheduler, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) { crm_info("%s is active", pcmk__node_name(this_node)); } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("%s is not ready to run resources", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); this_node->details->pending = TRUE; } else { pe_fence_node(scheduler, this_node, "peer was in an unknown state", FALSE); } return (when_member > 0); } static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node) { pcmk_resource_t *rsc = this_node->priv->remote; pcmk_resource_t *launcher = NULL; pcmk_node_t *host = NULL; const char *node_type = "Remote"; if (rsc == NULL) { /* This is a leftover node state entry for a former Pacemaker Remote * node whose connection resource was removed. Consider it offline. */ crm_trace("Pacemaker Remote node %s is considered OFFLINE because " "its connection resource has been removed from the CIB", this_node->priv->id); this_node->details->online = FALSE; return; } launcher = rsc->priv->launcher; if (launcher != NULL) { node_type = "Guest"; if (pcmk__list_of_1(rsc->priv->active_nodes)) { host = rsc->priv->active_nodes->data; } } /* If the resource is currently started, mark it online. */ if (rsc->priv->orig_role == pcmk_role_started) { this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if ((rsc->priv->orig_role == pcmk_role_started) && (rsc->priv->next_role == pcmk_role_stopped)) { crm_trace("%s node %s shutting down because connection resource is stopping", node_type, this_node->priv->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if ((launcher != NULL) && pcmk_is_set(launcher->flags, pcmk__rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->priv->id); this_node->details->online = FALSE; pcmk__set_node_flags(this_node, pcmk__node_remote_reset); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", node_type, this_node->priv->id); this_node->details->online = FALSE; } else if ((rsc->priv->orig_role == pcmk_role_stopped) || ((launcher != NULL) && (launcher->priv->orig_role == pcmk_role_stopped))) { crm_trace("%s node %s OFFLINE because its resource is stopped", node_type, this_node->priv->id); this_node->details->online = FALSE; pcmk__clear_node_flags(this_node, pcmk__node_remote_reset); } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->priv->id); this_node->details->online = FALSE; pcmk__set_node_flags(this_node, pcmk__node_remote_reset); } else { crm_trace("%s node %s is %s", node_type, this_node->priv->id, this_node->details->online? "ONLINE" : "OFFLINE"); } } static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { pcmk__set_node_flags(this_node, pcmk__node_expected_up); } if (this_node->priv->variant == pcmk__node_variant_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { online = determine_online_status_no_fencing(scheduler, node_state, this_node); } else { online = determine_online_status_fencing(scheduler, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->assign->score = -PCMK_SCORE_INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->assign->score = -PCMK_SCORE_INFINITY; } if (this_node->priv->variant == pcmk__node_variant_ping) { crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node)); } else if (this_node->details->unclean) { pcmk__sched_warn(scheduler, "%s is unclean", pcmk__node_name(this_node)); } else if (!this_node->details->online) { crm_trace("%s is offline", pcmk__node_name(this_node)); } else if (this_node->details->shutdown) { crm_info("%s is shutting down", pcmk__node_name(this_node)); } else if (this_node->details->pending) { crm_info("%s is pending", pcmk__node_name(this_node)); } else if (pcmk_is_set(this_node->priv->flags, pcmk__node_standby)) { crm_info("%s is in standby", pcmk__node_name(this_node)); } else if (this_node->details->maintenance) { crm_info("%s is in maintenance", pcmk__node_name(this_node)); } else { crm_info("%s is online", pcmk__node_name(this_node)); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char)); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pcmk_resource_t * create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE); pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none); crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pcmk_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pcmk_find_node(scheduler, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL, scheduler); } link_rsc2remotenode(scheduler, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) { // This removed resource needs to be mapped to a launcher crm_trace("Launched resource %s was removed from the configuration", rsc_id); pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched); } pcmk__set_rsc_flags(rsc, pcmk__rsc_removed); scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history * * \param[in,out] parent Clone resource that orphan will be added to * \param[in] rsc_id Orphan's resource ID * \param[in] node Where orphan is active (for logging only) * \param[in,out] scheduler Scheduler data * * \return Newly added orphaned instance of \p parent */ static pcmk_resource_t * create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *top = pe__create_clone_child(parent, scheduler); pcmk_resource_t *orphan = NULL; // find_rsc() because we might be a cloned group orphan = top->priv->fns->find_rsc(top, rsc_id, NULL, pcmk_rsc_match_clone_only); pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, pcmk__node_name(node)); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX * instances are already active). * * \param[in,out] scheduler Scheduler data * \param[in] node Node on which to check for instance * \param[in,out] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (no instance) */ static pcmk_resource_t * find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, pcmk_resource_t *parent, const char *rsc_id) { GList *rIter = NULL; pcmk_resource_t *rsc = NULL; pcmk_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(pcmk__is_anonymous_clone(parent)); // Check for active (or partially active, for cloned groups) instance pcmk__rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, pcmk__node_name(node), parent->id); for (rIter = parent->priv->children; (rIter != NULL) && (rsc == NULL); rIter = rIter->next) { GList *locations = NULL; pcmk_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and * (3) when we re-run calculations on the same scheduler data as part of * a simulation. */ child->priv->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (pcmk__same_node((pcmk_node_t *) locations->data, node)) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true * to false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->priv->active_nodes != NULL) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, pcmk__node_name(node)); skip_inactive = TRUE; rsc = NULL; } else { pcmk__rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && !pcmk_is_set(child->flags, pcmk__rsc_blocked)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->priv->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance != NULL) { const pcmk_node_t *pending_node = NULL; pending_node = inactive_instance->priv->pending_node; if ((pending_node != NULL) && !pcmk__same_node(pending_node, node)) { inactive_instance = NULL; } } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pcmk__rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we * don't want to consume a valid instance number for unclean nodes. Such * instances may appear to be active according to the history, but should be * considered inactive, so we can start an instance elsewhere. Treat such * instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pcmk__is_guest_or_bundle_node(node) && !pe__is_universal_clone(parent, scheduler)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler); pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pcmk_resource_t * unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, const char *rsc_id) { pcmk_resource_t *rsc = NULL; pcmk_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(scheduler->priv->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0, * we create a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources, clone0_id); if (clone0 && !pcmk_is_set(clone0->flags, pcmk__rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) { crm_trace("Resource history for %s is orphaned " "because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pcmk__is_anonymous_clone(parent)) { if (pcmk__is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->priv->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(scheduler, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none) && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) { pcmk__str_update(&(rsc->priv->history_id), rsc_id); pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, pcmk__node_name(node), rsc->id, pcmk_is_set(rsc->flags, pcmk__rsc_removed)? " (ORPHAN)" : ""); } return rsc; } static pcmk_resource_t * process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, pcmk__node_name(node)); rsc = create_fake_resource(rsc_id, rsc_entry, scheduler); if (rsc == NULL) { return NULL; } if (!pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -PCMK_SCORE_INFINITY, "__orphan_do_not_run__", scheduler); } return rsc; } static void process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__on_fail on_fail) { pcmk_node_t *tmpnode = NULL; char *reason = NULL; enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore; pcmk_scheduler_t *scheduler = NULL; bool known_active = false; CRM_ASSERT(rsc); scheduler = rsc->priv->scheduler; known_active = (rsc->priv->orig_role > pcmk_role_stopped); pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, pcmk_role_text(rsc->priv->orig_role), pcmk__node_name(node), pcmk__on_fail_text(on_fail)); /* process current state */ if (rsc->priv->orig_role != pcmk_role_unknown) { pcmk_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->priv->probed_nodes, node->priv->id) == NULL) { pcmk_node_t *n = pe__copy_node(node); pcmk__rsc_trace(rsc, "%s (%s in history) known on %s", rsc->id, pcmk__s(rsc->priv->history_id, "the same"), pcmk__node_name(n)); g_hash_table_insert(iter->priv->probed_nodes, (gpointer) n->priv->id, n); } if (pcmk_is_set(iter->flags, pcmk__rsc_unique)) { break; } iter = iter->priv->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (known_active && !node->details->online && !node->details->maintenance && pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pcmk__is_guest_or_bundle_node(node)) { pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); should_fence = TRUE; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { if (pcmk__is_remote_node(node) && (node->priv->remote != NULL) && !pcmk_is_set(node->priv->remote->flags, pcmk__rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ pcmk__clear_node_flags(node, pcmk__node_seen); reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(scheduler, node, reason, FALSE); } free(reason); } /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */ save_on_fail = on_fail; if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = pcmk__on_fail_ignore; } switch (on_fail) { case pcmk__on_fail_ignore: /* nothing to do */ break; case pcmk__on_fail_demote: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed); demote_action(rsc, node, FALSE); break; case pcmk__on_fail_fence_node: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(scheduler, node, reason, FALSE); free(reason); break; case pcmk__on_fail_standby_node: pcmk__set_node_flags(node, pcmk__node_standby|pcmk__node_fail_standby); break; case pcmk__on_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked); break; case pcmk__on_fail_ban: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -PCMK_SCORE_INFINITY, "__action_migration_auto__", scheduler); break; case pcmk__on_fail_stop: pe__set_next_role(rsc, pcmk_role_stopped, PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP); break; case pcmk__on_fail_restart: if (known_active) { pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); stop_action(rsc, node, FALSE); } break; case pcmk__on_fail_restart_container: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ scheduler->stop_needed = g_list_prepend(scheduler->stop_needed, rsc->priv->launcher); } else if (rsc->priv->launcher != NULL) { stop_action(rsc->priv->launcher, node, FALSE); } else if (known_active) { stop_action(rsc, node, FALSE); } break; case pcmk__on_fail_reset_remote: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { tmpnode = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { tmpnode = pcmk_find_node(scheduler, rsc->id); } if (pcmk__is_remote_node(tmpnode) && !pcmk_is_set(tmpnode->priv->flags, pcmk__node_remote_fenced)) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(scheduler, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (known_active) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->priv->remote_reconnect_ms > 0U) { pe__set_next_role(rsc, pcmk_role_stopped, "remote reset"); } break; } /* Ensure a remote connection failure forces an unclean Pacemaker Remote * node to be fenced. By marking the node as seen, the failure will result * in a fencing operation regardless if we're going to attempt to reconnect * in this transition. */ if (pcmk_all_flags_set(rsc->flags, pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) { tmpnode = pcmk_find_node(scheduler, rsc->id); if (tmpnode && tmpnode->details->unclean) { pcmk__set_node_flags(tmpnode, pcmk__node_seen); } } if (known_active) { if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Removed resource %s is active on %s and will be " "stopped when possible", rsc->id, pcmk__node_name(node)); } else { crm_notice("Removed resource %s must be stopped manually on %s " "because " PCMK_OPT_STOP_ORPHAN_RESOURCES " is set to false", rsc->id, pcmk__node_name(node)); } } native_add_running(rsc, node, scheduler, (save_on_fail != pcmk__on_fail_ignore)); switch (on_fail) { case pcmk__on_fail_ignore: break; case pcmk__on_fail_demote: case pcmk__on_fail_block: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed); break; default: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); break; } } else if ((rsc->priv->history_id != NULL) && (strchr(rsc->priv->history_id, ':') != NULL)) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)", rsc->priv->history_id, rsc->id); free(rsc->priv->history_id); rsc->priv->history_id = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, FALSE); GList *gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pcmk_action_t *stop = (pcmk_action_t *) gIter->data; pcmk__set_action_flags(stop, pcmk__action_optional); } g_list_free(possible_matches); } /* A successful stop after migrate_to on the migration source doesn't make * the partially migrated resource stopped on the migration target. */ if ((rsc->priv->orig_role == pcmk_role_stopped) && (rsc->priv->active_nodes != NULL) && (rsc->priv->partial_migration_target != NULL) && pcmk__same_node(rsc->priv->partial_migration_source, node)) { rsc->priv->orig_role = pcmk_role_started; } } /* create active recurring operations as optional */ static void process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc, int start_index, int stop_index, GList *sorted_op_list, pcmk_scheduler_t *scheduler) { int counter = -1; const char *task = NULL; const char *status = NULL; GList *gIter = sorted_op_list; CRM_ASSERT(rsc); pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = pcmk__xe_id(rsc_op); counter++; if (node->details->online == FALSE) { pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline", rsc->id, pcmk__node_name(node)); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active", id, pcmk__node_name(node)); continue; } else if (counter < start_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d", id, pcmk__node_name(node), counter); continue; } crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring", id, pcmk__node_name(node)); continue; } status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pcmk__rsc_trace(rsc, "Skipping %s on %s: status", id, pcmk__node_name(node)); continue; } task = crm_element_value(rsc_op, PCMK_XA_OPERATION); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node)); custom_action(rsc, key, task, node, TRUE, scheduler); } } void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { const xmlNode *rsc_op = (const xmlNode *) iter->data; counter++; task = crm_element_value(rsc_op, PCMK_XA_OPERATION); status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((scheduler->shutdown_lock > 0) && (get_effective_time(scheduler) > (lock_time + scheduler->shutdown_lock))) { pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, pcmk__node_name(node)); pe__clear_resource_history(rsc, node); } else { rsc->priv->lock_node = node; rsc->priv->lock_time = lock_time; } } } /*! * \internal * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status * * \param[in,out] node Node whose status is being unpacked * \param[in] rsc_entry \c PCMK__XE_LRM_RESOURCE XML being unpacked * \param[in,out] scheduler Scheduler data * * \return Resource corresponding to the entry, or NULL if no operation history */ static pcmk_resource_t * unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource, pcmk_scheduler_t *scheduler) { GList *gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = pcmk_role_unknown; const char *rsc_id = pcmk__xe_id(lrm_resource); pcmk_resource_t *rsc = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum pcmk__on_fail on_fail = pcmk__on_fail_ignore; enum rsc_role_e saved_role = pcmk_role_unknown; if (rsc_id == NULL) { pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE " entry: No " PCMK_XA_ID); crm_log_xml_info(lrm_resource, "missing-id"); return NULL; } crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s", rsc_id, pcmk__node_name(node)); /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort * them */ for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) { op_list = g_list_prepend(op_list, rsc_op); } if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(scheduler, node, rsc_id); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(lrm_resource, node, scheduler); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { unpack_shutdown_lock(lrm_resource, rsc, node, scheduler); } /* process operations */ saved_role = rsc->priv->orig_role; rsc->priv->orig_role = pcmk_role_unknown; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, scheduler); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail); if (get_target_role(rsc, &req_role)) { if ((rsc->priv->next_role == pcmk_role_unknown) || (req_role < rsc->priv->next_role)) { pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE); } else if (req_role > rsc->priv->next_role) { pcmk__rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, pcmk_role_text(rsc->priv->next_role), pcmk_role_text(req_role)); } } if (saved_role > rsc->priv->orig_role) { rsc->priv->orig_role = saved_role; } return rsc; } static void handle_removed_launched_resources(const xmlNode *lrm_rsc_list, pcmk_scheduler_t *scheduler) { for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { pcmk_resource_t *rsc; pcmk_resource_t *launcher = NULL; const char *rsc_id; const char *launcher_id = NULL; if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) { continue; } launcher_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER); rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); if ((launcher_id == NULL) || (rsc_id == NULL)) { continue; } launcher = pe_find_resource(scheduler->priv->resources, launcher_id); if (launcher == NULL) { continue; } rsc = pe_find_resource(scheduler->priv->resources, rsc_id); if ((rsc == NULL) || (rsc->priv->launcher != NULL) || !pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) { continue; } pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s", rsc->id, launcher_id); rsc->priv->launcher = launcher; launcher->priv->launched = g_list_append(launcher->priv->launched, rsc); } } /*! * \internal * \brief Unpack one node's lrm status section * * \param[in,out] node Node whose status is being unpacked * \param[in] xml CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler) { bool found_removed_launched_resource = false; // Drill down to PCMK__XE_LRM_RESOURCES section xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL); if (xml == NULL) { return; } xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL); if (xml == NULL) { return; } // Unpack each PCMK__XE_LRM_RESOURCE entry for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCE, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) { pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler); if ((rsc != NULL) && pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) { found_removed_launched_resource = true; } } /* Now that all resource state has been unpacked for this node, map any * removed launched resources to their launchers. */ if (found_removed_launched_resource) { handle_removed_launched_resources(xml, scheduler); } } static void set_active(pcmk_resource_t *rsc) { const pcmk_resource_t *top = pe__const_top_resource(rsc, false); if (top && pcmk_is_set(top->flags, pcmk__rsc_promotable)) { rsc->priv->orig_role = pcmk_role_unpromoted; } else { rsc->priv->orig_role = pcmk_role_started; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pcmk_node_t *node = value; int *score = user_data; node->assign->score = *score; } #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \ "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, int target_rc, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']" SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'", NULL); /* Need to check against transition_magic too? */ if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_TARGET "='", source, "']", NULL); } else if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']", NULL); } else { g_string_append_c(xpath, ']'); } xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); if (xml && target_rc >= 0) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_EXEC_ERROR; crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc); crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status); if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) { return NULL; } } return xml; } static xmlNode * find_lrm_resource(const char *rsc_id, const char *node_name, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']", NULL); xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); return xml; } /*! * \internal * \brief Check whether a resource has no completed action history on a node * * \param[in,out] rsc Resource to check * \param[in] node_name Node to check * * \return true if \p rsc_id is unknown on \p node_name, otherwise false */ static bool unknown_on_node(pcmk_resource_t *rsc, const char *node_name) { bool result = false; xmlXPathObjectPtr search; char *xpath = NULL; xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" SUB_XPATH_LRM_RSC_OP "[@" PCMK__XA_RC_CODE "!='%d']", node_name, rsc->id, PCMK_OCF_UNKNOWN); search = xpath_search(rsc->priv->scheduler->input, xpath); result = (numXpathResults(search) == 0); freeXpathObject(search); free(xpath); return result; } /*! * \internal * \brief Check whether a probe/monitor indicating the resource was not running * on a node happened after some event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that monitor is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a monitor happened after event, false otherwise */ static bool monitor_not_running_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, pcmk_scheduler_t *scheduler) { /* Any probe/monitor operation on the node indicating it was not running * there */ xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name, NULL, PCMK_OCF_NOT_RUNNING, scheduler); return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0); } /*! * \internal * \brief Check whether any non-monitor operation on a node happened after some * event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that non-monitor is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool non_monitor_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, pcmk_scheduler_t *scheduler) { xmlNode *lrm_resource = NULL; lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler); if (lrm_resource == NULL) { return false; } for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL, NULL); op != NULL; op = pcmk__xe_next_same(op)) { const char * task = NULL; if (op == xml_op) { continue; } task = crm_element_value(op, PCMK_XA_OPERATION); if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL) && pe__is_newer_op(op, xml_op) > 0) { return true; } } return false; } /*! * \internal * \brief Check whether the resource has newer state on a node after a migration * attempt * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] migrate_to Any migrate_to event that is being compared to * \param[in] migrate_from Any migrate_from event that is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool newer_state_after_migrate(const char *rsc_id, const char *node_name, const xmlNode *migrate_to, const xmlNode *migrate_from, pcmk_scheduler_t *scheduler) { const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to; const char *source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE); /* It's preferred to compare to the migrate event on the same node if * existing, since call ids are more reliable. */ if ((xml_op != migrate_to) && (migrate_to != NULL) && pcmk__str_eq(node_name, source, pcmk__str_casei)) { xml_op = migrate_to; } /* If there's any newer non-monitor operation on the node, or any newer * probe/monitor operation on the node indicating it was not running there, * the migration events potentially no longer matter for the node. */ return non_monitor_after(rsc_id, node_name, xml_op, scheduler) || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler); } /*! * \internal * \brief Parse migration source and target node names from history entry * * \param[in] entry Resource history entry for a migration action * \param[in] source_node If not NULL, source must match this node * \param[in] target_node If not NULL, target must match this node * \param[out] source_name Where to store migration source node name * \param[out] target_name Where to store migration target node name * * \return Standard Pacemaker return code */ static int get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node, const pcmk_node_t *target_node, const char **source_name, const char **target_name) { *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE); *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET); if ((*source_name == NULL) || (*target_name == NULL)) { pcmk__config_err("Ignoring resource history entry %s without " PCMK__META_MIGRATE_SOURCE " and " PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry)); return pcmk_rc_unpack_error; } if ((source_node != NULL) && !pcmk__str_eq(*source_name, source_node->priv->name, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_SOURCE "='%s' does not match %s", pcmk__xe_id(entry), *source_name, pcmk__node_name(source_node)); return pcmk_rc_unpack_error; } if ((target_node != NULL) && !pcmk__str_eq(*target_name, target_node->priv->name, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_TARGET "='%s' does not match %s", pcmk__xe_id(entry), *target_name, pcmk__node_name(target_node)); return pcmk_rc_unpack_error; } return pcmk_rc_ok; } /* * \internal * \brief Add a migration source to a resource's list of dangling migrations * * If the migrate_to and migrate_from actions in a live migration both * succeeded, but there is no stop on the source, the migration is considered * "dangling." Add the source to the resource's dangling migration list, which * will be used to schedule a stop on the source without affecting the target. * * \param[in,out] rsc Resource involved in migration * \param[in] node Migration source */ static void add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node) { pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s", rsc->id, pcmk__node_name(node)); rsc->priv->orig_role = pcmk_role_stopped; rsc->priv->dangling_migration_sources = g_list_prepend(rsc->priv->dangling_migration_sources, (gpointer) node); } /*! * \internal * \brief Update resource role etc. after a successful migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_success(struct action_history *history) { /* A complete migration sequence is: * 1. migrate_to on source node (which succeeded if we get to this function) * 2. migrate_from on target node * 3. stop on source node * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from succeeded but no stop has happened, the * migration is considered to be "dangling". * * If a successful migrate_to and stop have happened on the source node, we * still need to check for a partial migration, due to scenarios (easier to * produce with batch-limit=1) like: * * - A resource is migrating from node1 to node2, and a migrate_to is * initiated for it on node1. * * - node2 goes into standby mode while the migrate_to is pending, which * aborts the transition. * * - Upon completion of the migrate_to, a new transition schedules a stop * on both nodes and a start on node1. * * - If the new transition is aborted for any reason while the resource is * stopping on node1, the transition after that stop completes will see * the migrate_to and stop on the source, but it's still a partial * migration, and the resource must be stopped on node2 because it is * potentially active there due to the migrate_to. * * We also need to take into account that either node's history may be * cleared at any point in the migration process. */ int from_rc = PCMK_OCF_OK; int from_status = PCMK_EXEC_PENDING; pcmk_node_t *target_node = NULL; xmlNode *migrate_from = NULL; const char *source = NULL; const char *target = NULL; bool source_newer_op = false; bool target_newer_state = false; bool active_on_target = false; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } // Check for newer state on the source source_newer_op = non_monitor_after(history->rsc->id, source, history->xml, scheduler); // Check for a migrate_from action from this source on the target migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, -1, scheduler); if (migrate_from != NULL) { if (source_newer_op) { /* There's a newer non-monitor operation on the source and a * migrate_from on the target, so this migrate_to is irrelevant to * the resource's state. */ return; } crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc); crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status); } /* If the resource has newer state on both the source and target after the * migration events, this migrate_to is irrelevant to the resource's state. */ target_newer_state = newer_state_after_migrate(history->rsc->id, target, history->xml, migrate_from, scheduler); if (source_newer_op && target_newer_state) { return; } /* Check for dangling migration (migrate_from succeeded but stop not done). * We know there's no stop because we already returned if the target has a * migrate_from and the source has any newer non-monitor operation. */ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { add_dangling_migration(history->rsc, history->node); return; } /* Without newer state, this migrate_to implies the resource is active. * (Clones are not allowed to migrate, so role can't be promoted.) */ history->rsc->priv->orig_role = pcmk_role_started; target_node = pcmk_find_node(scheduler, target); active_on_target = !target_newer_state && (target_node != NULL) && target_node->details->online; if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target if (active_on_target) { native_add_running(history->rsc, target_node, scheduler, TRUE); } else { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable); } return; } // The migrate_from is pending, complete but erased, or to be scheduled /* If there is no history at all for the resource on an online target, then * it was likely cleaned. Just return, and we'll schedule a probe. Once we * have the probe result, it will be reflected in target_newer_state. */ if ((target_node != NULL) && target_node->details->online && unknown_on_node(history->rsc, target)) { return; } if (active_on_target) { pcmk_node_t *source_node = pcmk_find_node(scheduler, source); native_add_running(history->rsc, target_node, scheduler, FALSE); if ((source_node != NULL) && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ history->rsc->priv->partial_migration_target = target_node; history->rsc->priv->partial_migration_source = source_node; } } else if (!source_newer_op) { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_failure(struct action_history *history) { xmlNode *target_migrate_from = NULL; const char *source = NULL; const char *target = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->priv->orig_role = pcmk_role_started; // Check for migrate_from on the target target_migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, PCMK_OCF_OK, scheduler); if (/* If the resource state is unknown on the target, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, target) /* If the resource has newer state on the target after the migration * events, this migrate_to no longer matters for the target. */ && !newer_state_after_migrate(history->rsc->id, target, history->xml, target_migrate_from, scheduler)) { /* The resource has no newer state on the target, so assume it's still * active there. * (if it is up). */ pcmk_node_t *target_node = pcmk_find_node(scheduler, target); if (target_node && target_node->details->online) { native_add_running(history->rsc, target_node, scheduler, FALSE); } } else if (!non_monitor_after(history->rsc->id, source, history->xml, scheduler)) { /* We know the resource has newer state on the target, but this * migrate_to still matters for the source as long as there's no newer * non-monitor operation there. */ // Mark node as having dangling migration so we can force a stop later history->rsc->priv->dangling_migration_sources = g_list_prepend(history->rsc->priv->dangling_migration_sources, (gpointer) history->node); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_from action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_from_failure(struct action_history *history) { xmlNode *source_migrate_to = NULL; const char *source = NULL; const char *target = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, NULL, history->node, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->priv->orig_role = pcmk_role_started; // Check for a migrate_to on the source source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO, source, target, PCMK_OCF_OK, scheduler); if (/* If the resource state is unknown on the source, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, source) /* If the resource has newer state on the source after the migration * events, this migrate_from no longer matters for the source. */ && !newer_state_after_migrate(history->rsc->id, source, source_migrate_to, history->xml, scheduler)) { /* The resource has no newer state on the source, so assume it's still * active there (if it is up). */ pcmk_node_t *source_node = pcmk_find_node(scheduler, source); if (source_node && source_node->details->online) { native_add_running(history->rsc, source_node, scheduler, TRUE); } } } /*! * \internal * \brief Add an action to cluster's list of failed actions * * \param[in,out] history Parsed action result history */ static void record_failed_op(struct action_history *history) { const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; if (!(history->node->details->online)) { return; } for (const xmlNode *xIter = scheduler->priv->failed->children; xIter != NULL; xIter = xIter->next) { const char *key = pcmk__xe_history_key(xIter); const char *uname = crm_element_value(xIter, PCMK_XA_UNAME); if (pcmk__str_eq(history->key, key, pcmk__str_none) && pcmk__str_eq(uname, history->node->priv->name, pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", history->key, pcmk__node_name(history->node)); return; } } crm_trace("Adding entry for %s on %s to failed action list", history->key, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name); crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id); pcmk__xml_copy(scheduler->priv->failed, history->xml); } static char * last_change_str(const xmlNode *xml_op) { time_t when; char *result = NULL; if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &when) == pcmk_ok) { char *when_s = pcmk__epoch2str(&when, 0); const char *p = strchr(when_s, ' '); // Skip day of week to make message shorter if ((p != NULL) && (*(++p) != '\0')) { result = pcmk__str_copy(p); } free(when_s); } if (result == NULL) { result = pcmk__str_copy("unknown_time"); } return result; } /*! * \internal * \brief Ban a resource (or its clone if an anonymous instance) from all nodes * * \param[in,out] rsc Resource to ban */ static void ban_from_all_nodes(pcmk_resource_t *rsc) { int score = -PCMK_SCORE_INFINITY; const pcmk_scheduler_t *scheduler = rsc->priv->scheduler; if (rsc->priv->parent != NULL) { pcmk_resource_t *parent = uber_parent(rsc); if (pcmk__is_anonymous_clone(parent)) { /* For anonymous clones, if an operation with * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the * entire clone must stop. */ rsc = parent; } } // Ban the resource from all nodes crm_notice("%s will not be started under current conditions", rsc->id); if (rsc->priv->allowed_nodes != NULL) { g_hash_table_destroy(rsc->priv->allowed_nodes); } rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes); g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score); } /*! * \internal * \brief Get configured failure handling and role after failure for an action * * \param[in,out] history Unpacked action history entry * \param[out] on_fail Where to set configured failure handling * \param[out] fail_role Where to set to role after failure */ static void unpack_failure_handling(struct action_history *history, enum pcmk__on_fail *on_fail, enum rsc_role_e *fail_role) { xmlNode *config = pcmk__find_action_config(history->rsc, history->task, history->interval_ms, true); GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node, history->task, history->interval_ms, config); const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL); *on_fail = pcmk__parse_on_fail(history->rsc, history->task, history->interval_ms, on_fail_str); *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail, meta); g_hash_table_destroy(meta); } /*! * \internal * \brief Update resource role, failure handling, etc., after a failed action * * \param[in,out] history Parsed action result history * \param[in] config_on_fail Action failure handling from configuration * \param[in] fail_role Resource's role after failure of this action * \param[out] last_failure This will be set to the history XML * \param[in,out] on_fail Actual handling of action result */ static void unpack_rsc_op_failure(struct action_history *history, enum pcmk__on_fail config_on_fail, enum rsc_role_e fail_role, xmlNode **last_failure, enum pcmk__on_fail *on_fail) { bool is_probe = false; char *last_change_s = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; *last_failure = history->xml; is_probe = pcmk_xe_is_probe(history->xml); last_change_s = last_change_str(history->xml); if (!pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster) && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); } else { pcmk__sched_warn(scheduler, "Unexpected result (%s%s%s) was recorded for %s of " "%s on %s at %s " QB_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); if (is_probe && (history->exit_status != PCMK_OCF_OK) && (history->exit_status != PCMK_OCF_NOT_RUNNING) && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the " PCMK_XA_RESOURCE_DISCOVERY " option for location " "constraints", history->rsc->id, pcmk__node_name(history->node)); } record_failed_op(history); } free(last_change_s); if (*on_fail < config_on_fail) { pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s", pcmk__on_fail_text(*on_fail), pcmk__on_fail_text(config_on_fail), history->key); *on_fail = config_on_fail; } if (strcmp(history->task, PCMK_ACTION_STOP) == 0) { resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY, "__stop_fail__", scheduler); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) { unpack_migrate_to_failure(history); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) { unpack_migrate_from_failure(history); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->priv->orig_role = pcmk_role_promoted; } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) { if (config_on_fail == pcmk__on_fail_block) { history->rsc->priv->orig_role = pcmk_role_promoted; pe__set_next_role(history->rsc, pcmk_role_stopped, "demote with " PCMK_META_ON_FAIL "=block"); } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) { history->rsc->priv->orig_role = pcmk_role_stopped; } else { /* Staying in the promoted role would put the scheduler and * controller into a loop. Setting the role to unpromoted is not * dangerous because the resource will be stopped as part of * recovery, and any promotion will be ordered after that stop. */ history->rsc->priv->orig_role = pcmk_role_unpromoted; } } if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { /* leave stopped */ pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id); history->rsc->priv->orig_role = pcmk_role_stopped; } else if (history->rsc->priv->orig_role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id); set_active(history->rsc); } pcmk__rsc_trace(history->rsc, "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s", history->rsc->id, pcmk_role_text(history->rsc->priv->orig_role), pcmk__btoa(history->node->details->unclean), pcmk__on_fail_text(config_on_fail), pcmk_role_text(fail_role)); if ((fail_role != pcmk_role_started) && (history->rsc->priv->next_role < fail_role)) { pe__set_next_role(history->rsc, fail_role, "failure"); } if (fail_role == pcmk_role_stopped) { ban_from_all_nodes(history->rsc); } } /*! * \internal * \brief Block a resource with a failed action if it cannot be recovered * * If resource action is a failed stop and fencing is not possible, mark the * resource as unmanaged and blocked, since recovery cannot be done. * * \param[in,out] history Parsed action history entry */ static void block_if_unrecoverable(struct action_history *history) { char *last_change_s = NULL; if (strcmp(history->task, PCMK_ACTION_STOP) != 0) { return; // All actions besides stop are always recoverable } if (pe_can_fence(history->node->priv->scheduler, history->node)) { return; // Failed stops are recoverable via fencing } last_change_s = last_change_str(history->xml); pcmk__sched_err(history->node->priv->scheduler, "No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " QB_XS " rc=%d id=%s", history->rsc->id, history->task, pcmk__node_name(history->node), services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), last_change_s, history->exit_status, history->id); free(last_change_s); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked); } /*! * \internal * \brief Update action history's execution status and why * * \param[in,out] history Parsed action history entry * \param[out] why Where to store reason for update * \param[in] value New value * \param[in] reason Description of why value was changed */ static inline void remap_because(struct action_history *history, const char **why, int value, const char *reason) { if (history->execution_status != value) { history->execution_status = value; *why = reason; } } /*! * \internal * \brief Remap informational monitor results and operation status * * For the monitor results, certain OCF codes are for providing extended information * to the user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by Pacemaker. * * For operation status, the action result can be used to determine an appropriate * status for the purposes of responding to the action. The status provided by the * executor is not directly usable since the executor does not know what was expected. * * \param[in,out] history Parsed action history entry * \param[in,out] on_fail What should be done about the result * \param[in] expired Whether result is expired * * \note If the result is remapped and the node is not shutting down or failed, * the operation will be recorded in the scheduler data's list of failed * operations to highlight it for the user. * * \note This may update the resource's current and next role. */ static void remap_operation(struct action_history *history, enum pcmk__on_fail *on_fail, bool expired) { bool is_probe = false; int orig_exit_status = history->exit_status; int orig_exec_status = history->execution_status; const char *why = NULL; const char *task = history->task; // Remap degraded results to their successful counterparts history->exit_status = pcmk__effective_rc(history->exit_status); if (history->exit_status != orig_exit_status) { why = "degraded result"; if (!expired && (!history->node->details->shutdown || history->node->details->online)) { record_failed_op(history); } } if (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && ((history->execution_status != PCMK_EXEC_DONE) || (history->exit_status != PCMK_OCF_NOT_RUNNING))) { history->execution_status = PCMK_EXEC_DONE; history->exit_status = PCMK_OCF_NOT_RUNNING; why = "equivalent probe result"; } /* If the executor reported an execution status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ switch (history->execution_status) { case PCMK_EXEC_DONE: case PCMK_EXEC_ERROR: break; // These should be treated as node-fatal case PCMK_EXEC_NO_FENCE_DEVICE: case PCMK_EXEC_NO_SECRETS: remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "node-fatal error"); goto remap_done; default: goto remap_done; } is_probe = pcmk_xe_is_probe(history->xml); if (is_probe) { task = "probe"; } if (history->expected_exit_status < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * expected exit status in the transition key, which (along with the * similar case of a corrupted transition key in the CIB) will be * reported to this function as -1. Pacemaker 2.0+ does not support * rolling upgrades from those versions or processing of saved CIB files * from those versions, so we do not need to care much about this case. */ remap_because(history, &why, PCMK_EXEC_ERROR, "obsolete history format"); pcmk__config_warn("Expected result not found for %s on %s " "(corrupt or obsolete CIB?)", history->key, pcmk__node_name(history->node)); } else if (history->exit_status == history->expected_exit_status) { remap_because(history, &why, PCMK_EXEC_DONE, "expected result"); } else { remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result"); pcmk__rsc_debug(history->rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", history->key, pcmk__node_name(history->node), history->expected_exit_status, services_ocf_exitcode_str(history->expected_exit_status), history->exit_status, services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, "")); } switch (history->exit_status) { case PCMK_OCF_OK: if (is_probe && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || (history->expected_exit_status == history->exit_status) || !pcmk_is_set(history->rsc->flags, pcmk__rsc_managed)) { /* For probes, recurring monitors for the Stopped role, and * unmanaged resources, "not running" is not considered a * failure. */ remap_because(history, &why, PCMK_EXEC_DONE, "exit status"); history->rsc->priv->orig_role = pcmk_role_stopped; *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "not running"); } break; case PCMK_OCF_RUNNING_PROMOTED: if (is_probe && (history->exit_status != history->expected_exit_status)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active and promoted on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } if (!expired || (history->exit_status == history->expected_exit_status)) { history->rsc->priv->orig_role = pcmk_role_promoted; } break; case PCMK_OCF_FAILED_PROMOTED: if (!expired) { history->rsc->priv->orig_role = pcmk_role_promoted; } remap_because(history, &why, PCMK_EXEC_ERROR, "exit status"); break; case PCMK_OCF_NOT_CONFIGURED: remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); break; case PCMK_OCF_UNIMPLEMENT_FEATURE: { guint interval_ms = 0; crm_element_value_ms(history->xml, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); } else { remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED, "exit status"); } } break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); break; default: if (history->execution_status == PCMK_EXEC_DONE) { char *last_change_s = last_change_str(history->xml); crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", history->exit_status, task, history->rsc->id, pcmk__node_name(history->node), last_change_s); remap_because(history, &why, PCMK_EXEC_ERROR, "unknown exit status"); free(last_change_s); } break; } remap_done: if (why != NULL) { pcmk__rsc_trace(history->rsc, "Remapped %s result from [%s: %s] to [%s: %s] " "because of %s", history->key, pcmk_exec_status_str(orig_exec_status), crm_exit_str(orig_exit_status), pcmk_exec_status_str(history->execution_status), crm_exit_str(history->exit_status), why); } } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(const xmlNode *xml_op, const char *task, pcmk_resource_t *rsc, pcmk_node_t *node) { if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure, rsc->priv->scheduler); } else { pcmk__op_digest_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->priv->scheduler); switch (digest_data->rc) { case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, pcmk__xe_history_key(xml_op), node->priv->id); break; case pcmk__digest_match: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn, pcmk_scheduler_t *scheduler) { pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id); if (remote_node) { pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, scheduler); order_actions(fence, action, pcmk__ar_first_implies_then); } } static bool should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task, guint interval_ms, bool is_last_failure) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if ((rsc->priv->remote_reconnect_ms > 0U) && pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_fencing_enabled) && (interval_ms != 0) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler, rsc->id); if (remote_node && !pcmk_is_set(remote_node->priv->flags, pcmk__node_remote_fenced)) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in,out] history Parsed action result history * * \return true if operation history entry is expired, otherwise false */ static bool check_operation_expiry(struct action_history *history) { bool expired = false; bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0"); time_t last_run = 0; int unexpired_fail_count = 0; const char *clear_reason = NULL; const guint expiration_sec = history->rsc->priv->failure_expiration_ms / 1000; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) { pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not expired: " "Not Installed does not expire", history->id, pcmk__node_name(history->node)); return false; // "Not installed" must always be cleared manually } if ((expiration_sec > 0) && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE, &last_run) == 0)) { /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a * timestamp */ time_t now = get_effective_time(scheduler); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + expiration_sec)) && !should_ignore_failure_timeout(history->rsc, history->task, history->interval_ms, is_last_failure)) { expired = true; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(history->node, history->rsc, &last_failure, pcmk__fc_effective, history->xml); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d " "expiration=%s last-failure@%lld", history->id, (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, pcmk__readable_interval(expiration_sec * 1000), (long long) last_failure); last_failure += expiration_sec + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, scheduler, "fail count expiration"); } } if (expired) { if (pe_get_failcount(history->node, history->rsc, NULL, pcmk__fc_default, history->xml)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Unexpired fail count", history->id, pcmk__node_name(history->node)); expired = false; } } else if (is_last_failure && (history->rsc->priv->remote_reconnect_ms > 0U)) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(history->xml, history->task, history->rsc, history->node)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { pcmk_action_t *clear_op = NULL; // Schedule clearing of the fail count clear_op = pe__clear_failcount(history->rsc, history->node, clear_reason, scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled) && (history->rsc->priv->remote_reconnect_ms > 0)) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_history() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", history->task, history->rsc->id); order_after_remote_fencing(clear_op, history->rsc, scheduler); } } if (expired && (history->interval_ms == 0) && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { switch (history->exit_status) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Don't expire probes that return these values pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Probe result", history->id, pcmk__node_name(history->node)); expired = false; break; } } return expired; } int pe__target_rc_from_xml(const xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } /*! * \internal * \brief Update a resource's state for an action result * * \param[in,out] history Parsed action history entry * \param[in] exit_status Exit status to base new state on * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void update_resource_state(struct action_history *history, int exit_status, const xmlNode *last_failure, enum pcmk__on_fail *on_fail) { bool clear_past_failure = false; if ((exit_status == PCMK_OCF_NOT_INSTALLED) || (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml))) { history->rsc->priv->orig_role = pcmk_role_stopped; } else if (exit_status == PCMK_OCF_NOT_RUNNING) { clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { if ((last_failure != NULL) && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure), pcmk__str_none)) { clear_past_failure = true; } if (history->rsc->priv->orig_role < pcmk_role_started) { set_active(history->rsc); } } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_stopped; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_promoted; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE, pcmk__str_none)) { if (*on_fail == pcmk__on_fail_demote) { /* Demote clears an error only if * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE */ clear_past_failure = true; } history->rsc->priv->orig_role = pcmk_role_unpromoted; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { unpack_migrate_to_success(history); } else if (history->rsc->priv->orig_role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "%s active on %s", history->rsc->id, pcmk__node_name(history->node)); set_active(history->rsc); } if (!clear_past_failure) { return; } switch (*on_fail) { case pcmk__on_fail_stop: case pcmk__on_fail_ban: case pcmk__on_fail_standby_node: case pcmk__on_fail_fence_node: pcmk__rsc_trace(history->rsc, "%s (%s) is not cleared by a completed %s", history->rsc->id, pcmk__on_fail_text(*on_fail), history->task); break; case pcmk__on_fail_block: case pcmk__on_fail_ignore: case pcmk__on_fail_demote: case pcmk__on_fail_restart: case pcmk__on_fail_restart_container: *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures"); break; case pcmk__on_fail_reset_remote: if (history->rsc->priv->remote_reconnect_ms == 0U) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures and reset remote"); } break; } } /*! * \internal * \brief Check whether a given history entry matters for resource state * * \param[in] history Parsed action history entry * * \return true if action can affect resource state, otherwise false */ static inline bool can_affect_state(struct action_history *history) { #if 0 /* @COMPAT It might be better to parse only actions we know we're interested * in, rather than exclude a couple we don't. However that would be a * behavioral change that should be done at a major or minor series release. * Currently, unknown operations can affect whether a resource is considered * active and/or failed. */ return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, "asyncmon", NULL); #else return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL); #endif } /*! * \internal * \brief Unpack execution/exit status and exit reason from a history entry * * \param[in,out] history Action history entry to unpack * * \return Standard Pacemaker return code */ static int unpack_action_result(struct action_history *history) { if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS, &(history->execution_status)) < 0) || (history->execution_status < PCMK_EXEC_PENDING) || (history->execution_status > PCMK_EXEC_MAX) || (history->execution_status == PCMK_EXEC_CANCELLED)) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_OP_STATUS " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_OP_STATUS), "")); return pcmk_rc_unpack_error; } if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE, &(history->exit_status)) < 0) || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) { #if 0 /* @COMPAT We should ignore malformed entries, but since that would * change behavior, it should be done at a major or minor series * release. */ pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_RC_CODE " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_RC_CODE), "")); return pcmk_rc_unpack_error; #else history->exit_status = CRM_EX_ERROR; #endif } history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON); return pcmk_rc_ok; } /*! * \internal * \brief Process an action history entry whose result expired * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the * entry needs no further processing) */ static int process_expired_result(struct action_history *history, int orig_exit_status) { if (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && (orig_exit_status != history->expected_exit_status)) { if (history->rsc->priv->orig_role <= pcmk_role_stopped) { history->rsc->priv->orig_role = pcmk_role_unknown; } crm_trace("Ignoring resource history entry %s for probe of %s on %s: " "Masked failure expired", history->id, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->exit_status == history->expected_exit_status) { return pcmk_rc_undetermined; // Only failures expire } if (history->interval_ms == 0) { crm_notice("Ignoring resource history entry %s for %s of %s on %s: " "Expired failure", history->id, history->task, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->node->details->online && !history->node->details->unclean) { /* Reschedule the recurring action. schedule_cancel() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so pcmk__check_action_config() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s-interval %s of %s on %s " "after failure expired", pcmk__readable_interval(history->interval_ms), history->task, history->rsc->id, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST, "calculated-failure-timeout"); return pcmk_rc_ok; } return pcmk_rc_undetermined; } /*! * \internal * \brief Process a masked probe failure * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void mask_probe_failure(struct action_history *history, int orig_exit_status, const xmlNode *last_failure, enum pcmk__on_fail *on_fail) { pcmk_resource_t *ban_rsc = history->rsc; if (!pcmk_is_set(history->rsc->flags, pcmk__rsc_unique)) { ban_rsc = uber_parent(history->rsc); } crm_notice("Treating probe result '%s' for %s on %s as 'not running'", services_ocf_exitcode_str(orig_exit_status), history->rsc->id, pcmk__node_name(history->node)); update_resource_state(history, history->expected_exit_status, last_failure, on_fail); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name); record_failed_op(history); resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY, "masked-probe-failure", ban_rsc->priv->scheduler); } /*! * \internal Check whether a given failure is for a given pending action * * \param[in] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known * * \return true if \p last_failure is failure of pending action in \p history, * otherwise false * \note Both \p history and \p last_failure must come from the same * \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be * the same. */ static bool failure_is_newer(const struct action_history *history, const xmlNode *last_failure) { guint failure_interval_ms = 0U; long long failure_change = 0LL; long long this_change = 0LL; if (last_failure == NULL) { return false; // Resource has no last_failure entry } if (!pcmk__str_eq(history->task, crm_element_value(last_failure, PCMK_XA_OPERATION), pcmk__str_none)) { return false; // last_failure is for different action } if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL, &failure_interval_ms) != pcmk_ok) || (history->interval_ms != failure_interval_ms)) { return false; // last_failure is for action with different interval } if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE), &this_change, 0LL) != pcmk_rc_ok) || (pcmk__scan_ll(crm_element_value(last_failure, PCMK_XA_LAST_RC_CHANGE), &failure_change, 0LL) != pcmk_rc_ok) || (failure_change < this_change)) { return false; // Failure is not known to be newer } return true; } /*! * \internal * \brief Update a resource's role etc. for a pending action * * \param[in,out] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known */ static void process_pending_action(struct action_history *history, const xmlNode *last_failure) { /* For recurring monitors, a failure is recorded only in RSC_last_failure_0, * and there might be a RSC_monitor_INTERVAL entry with the last successful * or pending result. * * If last_failure contains the failure of the pending recurring monitor * we're processing here, and is newer, the action is no longer pending. * (Pending results have call ID -1, which sorts last, so the last failure * if any should be known.) */ if (failure_is_newer(history, last_failure)) { return; } if (strcmp(history->task, PCMK_ACTION_START) == 0) { pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending); set_active(history->rsc); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->priv->orig_role = pcmk_role_promoted; } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) && history->node->details->unclean) { /* A migrate_to action is pending on a unclean source, so force a stop * on the target. */ const char *migrate_target = NULL; pcmk_node_t *target = NULL; migrate_target = crm_element_value(history->xml, PCMK__META_MIGRATE_TARGET); target = pcmk_find_node(history->rsc->priv->scheduler, migrate_target); if (target != NULL) { stop_action(history->rsc, target, FALSE); } } if (history->rsc->priv->pending_action != NULL) { /* There should never be multiple pending actions, but as a failsafe, * just remember the first one processed for display purposes. */ return; } if (pcmk_is_probe(history->task, history->interval_ms)) { /* Pending probes are currently never displayed, even if pending * operations are requested. If we ever want to change that, * enable the below and the corresponding part of * native.c:native_pending_action(). */ #if 0 history->rsc->private->pending_action = strdup("probe"); history->rsc->private->pending_node = history->node; #endif } else { history->rsc->priv->pending_action = strdup(history->task); history->rsc->priv->pending_node = history->node; } } static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum pcmk__on_fail *on_fail) { int old_rc = 0; bool expired = false; pcmk_resource_t *parent = rsc; enum rsc_role_e fail_role = pcmk_role_unknown; enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart; struct action_history history = { .rsc = rsc, .node = node, .xml = xml_op, .execution_status = PCMK_EXEC_UNKNOWN, }; CRM_CHECK(rsc && node && xml_op, return); history.id = pcmk__xe_id(xml_op); if (history.id == NULL) { pcmk__config_err("Ignoring resource history entry for %s on %s " "without ID", rsc->id, pcmk__node_name(node)); return; } // Task and interval history.task = crm_element_value(xml_op, PCMK_XA_OPERATION); if (history.task == NULL) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "without " PCMK_XA_OPERATION, history.id, rsc->id, pcmk__node_name(node)); return; } crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms)); if (!can_affect_state(&history)) { pcmk__rsc_trace(rsc, "Ignoring resource history entry %s for %s on %s " "with irrelevant action '%s'", history.id, rsc->id, pcmk__node_name(node), history.task); return; } if (unpack_action_result(&history) != pcmk_rc_ok) { return; // Error already logged } history.expected_exit_status = pe__target_rc_from_xml(xml_op); history.key = pcmk__xe_history_key(xml_op); crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id)); pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", history.id, history.task, history.call_id, pcmk__node_name(node), pcmk_exec_status_str(history.execution_status), crm_exit_str(history.exit_status)); if (node->details->unclean) { pcmk__rsc_trace(rsc, "%s is running on %s, which is unclean (further action " "depends on value of stop's on-fail attribute)", rsc->id, pcmk__node_name(node)); } expired = check_operation_expiry(&history); old_rc = history.exit_status; remap_operation(&history, on_fail, expired); if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) { goto done; } if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { mask_probe_failure(&history, old_rc, *last_failure, on_fail); goto done; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) { parent = uber_parent(rsc); } switch (history.execution_status) { case PCMK_EXEC_PENDING: process_pending_action(&history, *last_failure); goto done; case PCMK_EXEC_DONE: update_resource_state(&history, history.exit_status, *last_failure, on_fail); goto done; case PCMK_EXEC_NOT_INSTALLED: unpack_failure_handling(&history, &failure_strategy, &fail_role); if (failure_strategy == pcmk__on_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " QB_XS " status=%d rc=%d id=%s", history.task, rsc->id, pcmk__node_name(node), history.execution_status, history.exit_status, history.id); /* Also for printing it as "FAILED" by marking it as * pcmk__rsc_failed later */ *on_fail = pcmk__on_fail_ban; } resource_location(parent, node, -PCMK_SCORE_INFINITY, "hard-error", rsc->priv->scheduler); unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); goto done; case PCMK_EXEC_NOT_CONNECTED: if (pcmk__is_pacemaker_remote_node(node) && pcmk_is_set(node->priv->remote->flags, pcmk__rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ pcmk__set_rsc_flags(node->priv->remote, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } break; // Not done, do error handling case PCMK_EXEC_ERROR: case PCMK_EXEC_ERROR_HARD: case PCMK_EXEC_ERROR_FATAL: case PCMK_EXEC_TIMEOUT: case PCMK_EXEC_NOT_SUPPORTED: case PCMK_EXEC_INVALID: break; // Not done, do error handling default: // No other value should be possible at this point break; } unpack_failure_handling(&history, &failure_strategy, &fail_role); if ((failure_strategy == pcmk__on_fail_ignore) || ((failure_strategy == pcmk__on_fail_restart_container) && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) { char *last_change_s = last_change_str(xml_op); crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " QB_XS " %s", history.task, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), rsc->id, pcmk__node_name(node), last_change_s, history.id); free(last_change_s); update_resource_state(&history, history.expected_exit_status, *last_failure, on_fail); crm_xml_add(xml_op, PCMK_XA_UNAME, node->priv->name); pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure); record_failed_op(&history); if ((failure_strategy == pcmk__on_fail_restart_container) && (*on_fail <= pcmk__on_fail_restart)) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); if (history.execution_status == PCMK_EXEC_ERROR_HARD) { uint8_t log_level = LOG_ERR; if (history.exit_status == PCMK_OCF_NOT_INSTALLED) { log_level = LOG_NOTICE; } do_crm_log(log_level, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s) " QB_XS " %s", parent->id, pcmk__node_name(node), services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, node, -PCMK_SCORE_INFINITY, "hard-error", rsc->priv->scheduler); } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) { pcmk__sched_err(rsc->priv->scheduler, "Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " QB_XS " %s", parent->id, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, NULL, -PCMK_SCORE_INFINITY, "fatal-error", rsc->priv->scheduler); } } done: pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", rsc->id, pcmk__node_name(node), history.id, pcmk_role_text(rsc->priv->orig_role), pcmk_role_text(rsc->priv->next_role)); } static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler) { const char *cluster_name = NULL; const char *dc_id = crm_element_value(scheduler->input, PCMK_XA_DC_UUID); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pcmk__insert_dup(node->priv->attrs, CRM_ATTR_UNAME, node->priv->name); pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id); if ((scheduler->dc_node == NULL) && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) { scheduler->dc_node = node; pcmk__insert_dup(node->priv->attrs, CRM_ATTR_IS_DC, PCMK_VALUE_TRUE); } else if (!pcmk__same_node(node, scheduler->dc_node)) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_IS_DC, PCMK_VALUE_FALSE); } cluster_name = g_hash_table_lookup(scheduler->priv->options, PCMK_OPT_CLUSTER_NAME); if (cluster_name) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME, cluster_name); } pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, node->priv->attrs, NULL, overwrite, scheduler); pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data, node->priv->utilization, NULL, FALSE, scheduler); if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL, pcmk__rsc_node_current) == NULL) { const char *site_name = pcmk__node_attr(node, "site-name", NULL, pcmk__rsc_node_current); if (site_name) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_SITE_NAME, site_name); } else if (cluster_name) { /* Default to cluster-name if unset */ pcmk__insert_dup(node->priv->attrs, CRM_ATTR_SITE_NAME, cluster_name); } } } static GList * extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GList *gIter = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) { crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc); crm_xml_add(rsc_op, PCMK_XA_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler) { GList *output = NULL; GList *intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS, NULL, NULL); pcmk_node_t *this_node = NULL; xmlNode *node_state = NULL; CRM_CHECK(status != NULL, return NULL); for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) { const char *uname = crm_element_value(node_state, PCMK_XA_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pcmk_find_node(scheduler, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pcmk__is_pacemaker_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); } else { determine_online_status(node_state, this_node, scheduler); } if (this_node->details->online || pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL, NULL); tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL, NULL); for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL); lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) { if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) { const char *rsc_id = crm_element_value(lrm_rsc, PCMK_XA_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; }