diff --git a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt
index 0d67ecd90b..0ad6c2ee2d 100644
--- a/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt
+++ b/doc/Clusters_from_Scratch/en-US/Ch-Stonith.txt
@@ -1,140 +1,140 @@
= Configure STONITH =
== What is STONITH? ==
STONITH (Shoot The Other Node In The Head aka. fencing) protects your data from
being corrupted by rogue nodes or unintended concurrent access.
Just because a node is unresponsive doesn't mean it has stopped
accessing your data. The only way to be 100% sure that your data is
safe, is to use STONITH to ensure that the node is truly
offline before allowing the data to be accessed from another node.
STONITH also has a role to play in the event that a clustered service
cannot be stopped. In this case, the cluster uses STONITH to force the
whole node offline, thereby making it safe to start the service
elsewhere.
== Choose a STONITH Device ==
It is crucial that your STONITH device can allow the cluster to
differentiate between a node failure and a network failure.
The biggest mistake people make in choosing a STONITH device is to
use a remote power switch (such as many on-board IPMI controllers) that
shares power with the node it controls. In such cases, the cluster
cannot be sure if the node is really offline, or active and suffering
from a network fault.
Likewise, any device that relies on the machine being active (such as
SSH-based "devices" used during testing) are inappropriate.
== Configure the Cluster for STONITH ==
. Configure the STONITH device itself to be able to fence your nodes and accept
fencing requests.
. Install the STONITH agent(s). To see what packages are available, run `yum
search fence-agents fence-virt`. Be sure to install the package(s) on all
cluster nodes.
. Find the correct STONITH agent script: `pcs stonith list`
. Find the parameters associated with the device: +pcs stonith describe pass:[agent_name]+
. Create a local copy of the CIB: `pcs cluster cib stonith_cfg`
. Create the fencing resource: +pcs -f stonith_cfg stonith create pass:[stonith_id
stonith_device_type [stonith_device_options]]+
. Enable STONITH in the cluster: `pcs -f stonith_cfg property set stonith-enabled=true`
. If the device does not know how to fence nodes based on their uname,
you may also need to set the special *pcmk_host_map* parameter. See
`man stonithd` for details.
. If the device does not support the *list* command, you may also need
to set the special *pcmk_host_list* and/or *pcmk_host_check*
parameters. See `man stonithd` for details.
. If the device does not expect the victim to be specified with the
*port* parameter, you may also need to set the special
*pcmk_host_argument* parameter. See `man stonithd` for details.
. Commit the new configuration: `pcs cluster cib-push stonith_cfg`
. Once the STONITH resource is running, test it (you might want to stop
the cluster on that machine first): +stonith_admin --reboot pass:[nodename]+
== Example ==
For this example, assume we have a chassis containing four nodes
and an IPMI device active on 10.0.0.1. Following the steps above
would go something like this:
Step 1: Configure the IP address, authentication credentials, etc. in the IPMI device itself.
Step 2: Install the *fence-agents-ipmilan* package on both nodes.
Step 3: Choose the *fence_ipmilan* STONITH agent.
Step 4: Obtain the agent's possible parameters:
----
[root@pcmk-1 ~]# pcs stonith describe fence_ipmilan
Stonith options for: fence_ipmilan
ipport: TCP/UDP port to use for connection with device
inet6_only: Forces agent to use IPv6 addresses only
ipaddr (required): IP Address or Hostname
passwd_script: Script to retrieve password
method: Method to fence (onoff|cycle)
inet4_only: Forces agent to use IPv4 addresses only
passwd: Login password or passphrase
lanplus: Use Lanplus to improve security of connection
auth: IPMI Lan Auth type.
cipher: Ciphersuite to use (same as ipmitool -C parameter)
privlvl: Privilege level on IPMI device
action (required): Fencing Action
login: Login Name
verbose: Verbose mode
debug: Write debug information to given file
version: Display version information and exit
help: Display help and exit
power_wait: Wait X seconds after issuing ON/OFF
login_timeout: Wait X seconds for cmd prompt after login
power_timeout: Test X seconds for status change after ON/OFF
delay: Wait X seconds before fencing is started
ipmitool_path: Path to ipmitool binary
shell_timeout: Wait X seconds for cmd prompt after issuing command
retry_on: Count of attempts to retry power on
sudo: Use sudo (without password) when calling 3rd party sotfware.
- stonith-timeout: How long to wait for the STONITH action to complete per a stonith device.
+ stonith-timeout: How long to wait for the STONITH action (reboot, on, off) to complete per a stonith device.
priority: The priority of the stonith resource. Devices are tried in order of highest priority to lowest.
pcmk_host_map: A mapping of host names to ports numbers for devices that do not support host names.
pcmk_host_list: A list of machines controlled by this device (Optional unless pcmk_host_check=static-list).
pcmk_host_check: How to determine which machines are controlled by the device.
----
Step 5: `pcs cluster cib stonith_cfg`
Step 6: Here are example parameters for creating our STONITH resource:
----
# pcs -f stonith_cfg stonith create ipmi-fencing fence_ipmilan \
pcmk_host_list="pcmk-1 pcmk-2" ipaddr=10.0.0.1 login=testuser \
passwd=acd123 op monitor interval=60s
# pcs -f stonith_cfg stonith
ipmi-fencing (stonith:fence_ipmilan): Stopped
----
Steps 7-10: Enable STONITH in the cluster:
----
# pcs -f stonith_cfg property set stonith-enabled=true
# pcs -f stonith_cfg property
Cluster Properties:
cluster-infrastructure: corosync
cluster-name: mycluster
dc-version: 1.1.12-a9c8177
have-watchdog: false
stonith-enabled: true
----
Step 11: `pcs cluster cib-push stonith_cfg`
diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt
index 026d329b80..f8bca12d0a 100644
--- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt
+++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt
@@ -1,398 +1,398 @@
= Cluster-Wide Configuration =
== CIB Properties ==
Certain settings are defined by CIB properties (that is, attributes of the
+cib+ tag) rather than with the rest of the cluster configuration in the
+configuration+ section.
The reason is simply a matter of parsing. These options are used by the
configuration database which is, by design, mostly ignorant of the content it
holds. So the decision was made to place them in an easy-to-find location.
.CIB Properties
[width="95%",cols="2m,5<",options="header",align="center"]
|=========================================================
|Field |Description
| admin_epoch |
indexterm:[Configuration Version,Cluster]
indexterm:[Cluster,Option,Configuration Version]
indexterm:[admin_epoch,Cluster Option]
indexterm:[Cluster,Option,admin_epoch]
When a node joins the cluster, the cluster performs a check to see
which node has the best configuration. It asks the node with the highest
(+admin_epoch+, +epoch+, +num_updates+) tuple to replace the configuration on
all the nodes -- which makes setting them, and setting them correctly, very
important. +admin_epoch+ is never modified by the cluster; you can use this
to make the configurations on any inactive nodes obsolete. _Never set this
value to zero_. In such cases, the cluster cannot tell the difference between
your configuration and the "empty" one used when nothing is found on disk.
| epoch |
indexterm:[epoch,Cluster Option]
indexterm:[Cluster,Option,epoch]
The cluster increments this every time the configuration is updated (usually by
the administrator).
| num_updates |
indexterm:[num_updates,Cluster Option]
indexterm:[Cluster,Option,num_updates]
The cluster increments this every time the configuration or status is updated
(usually by the cluster) and resets it to 0 when epoch changes.
| validate-with |
indexterm:[validate-with,Cluster Option]
indexterm:[Cluster,Option,validate-with]
Determines the type of XML validation that will be done on the configuration.
If set to +none+, the cluster will not verify that updates conform to the
DTD (nor reject ones that don't). This option can be useful when
operating a mixed-version cluster during an upgrade.
|cib-last-written |
indexterm:[cib-last-written,Cluster Property]
indexterm:[Cluster,Property,cib-last-written]
Indicates when the configuration was last written to disk. Maintained by the
cluster; for informational purposes only.
|have-quorum |
indexterm:[have-quorum,Cluster Property]
indexterm:[Cluster,Property,have-quorum]
Indicates if the cluster has quorum. If false, this may mean that the
cluster cannot start resources or fence other nodes (see
+no-quorum-policy+ below). Maintained by the cluster.
|dc-uuid |
indexterm:[dc-uuid,Cluster Property]
indexterm:[Cluster,Property,dc-uuid]
Indicates which cluster node is the current leader. Used by the
cluster when placing resources and determining the order of some
events. Maintained by the cluster.
|=========================================================
=== Working with CIB Properties ===
Although these fields can be written to by the user, in
most cases the cluster will overwrite any values specified by the
user with the "correct" ones.
To change the ones that can be specified by the user,
for example +admin_epoch+, one should use:
----
# cibadmin --modify --crm_xml ''
----
A complete set of CIB properties will look something like this:
.Attributes set for a cib object
======
[source,XML]
-------
-------
======
== Cluster Options ==
Cluster options, as you might expect, control how the cluster behaves
when confronted with certain situations.
They are grouped into sets within the +crm_config+ section, and, in advanced
configurations, there may be more than one set. (This will be described later
in the section on <> where we will show how to have the cluster use
different sets of options during working hours than during weekends.) For now,
we will describe the simple case where each option is present at most once.
You can obtain an up-to-date list of cluster options, including
their default values, by running the `man pengine` and `man crmd` commands.
.Cluster Options
[width="95%",cols="5m,2,11>).
| enable-startup-probes | TRUE |
indexterm:[enable-startup-probes,Cluster Option]
indexterm:[Cluster,Option,enable-startup-probes]
Should the cluster check for active resources during startup?
| maintenance-mode | FALSE |
indexterm:[maintenance-mode,Cluster Option]
indexterm:[Cluster,Option,maintenance-mode]
Should the cluster refrain from monitoring, starting and stopping resources?
| stonith-enabled | TRUE |
indexterm:[stonith-enabled,Cluster Option]
indexterm:[Cluster,Option,stonith-enabled]
Should failed nodes and nodes with resources that can't be stopped be
shot? If you value your data, set up a STONITH device and enable this.
If true, or unset, the cluster will refuse to start resources unless
one or more STONITH resources have been configured.
| stonith-action | reboot |
indexterm:[stonith-action,Cluster Option]
indexterm:[Cluster,Option,stonith-action]
Action to send to STONITH device. Allowed values are +reboot+ and +off+.
The value +poweroff+ is also allowed, but is only used for
legacy devices.
| stonith-timeout | 60s |
indexterm:[stonith-timeout,Cluster Option]
indexterm:[Cluster,Option,stonith-timeout]
-How long to wait for STONITH actions to complete
+How long to wait for STONITH actions (reboot, on, off) to complete
| cluster-delay | 60s |
indexterm:[cluster-delay,Cluster Option]
indexterm:[Cluster,Option,cluster-delay]
Estimated maximum round-trip delay over the network (excluding action
execution). If the TE requires an action to be executed on another node,
it will consider the action failed if it does not get a response
from the other node in this time (after considering the action's
own timeout). The "correct" value will depend on the speed and load of your
network and cluster nodes.
| dc-deadtime | 20s |
indexterm:[dc-deadtime,Cluster Option]
indexterm:[Cluster,Option,dc-deadtime]
How long to wait for a response from other nodes during startup.
The "correct" value will depend on the speed/load of your network and the type of switches used.
| cluster-recheck-interval | 15min |
indexterm:[cluster-recheck-interval,Cluster Option]
indexterm:[Cluster,Option,cluster-recheck-interval]
Polling interval for time-based changes to options, resource parameters and constraints.
The Cluster is primarily event-driven, but your configuration can have
elements that take effect based on the time of day. To ensure these changes
take effect, we can optionally poll the cluster's status for changes. A value
of 0 disables polling. Positive values are an interval (in seconds unless other
SI units are specified, e.g. 5min).
| pe-error-series-max | -1 |
indexterm:[pe-error-series-max,Cluster Option]
indexterm:[Cluster,Option,pe-error-series-max]
The number of PE inputs resulting in ERRORs to save. Used when reporting problems.
A value of -1 means unlimited (report all).
| pe-warn-series-max | -1 |
indexterm:[pe-warn-series-max,Cluster Option]
indexterm:[Cluster,Option,pe-warn-series-max]
The number of PE inputs resulting in WARNINGs to save. Used when reporting problems.
A value of -1 means unlimited (report all).
| pe-input-series-max | -1 |
indexterm:[pe-input-series-max,Cluster Option]
indexterm:[Cluster,Option,pe-input-series-max]
The number of "normal" PE inputs to save. Used when reporting problems.
A value of -1 means unlimited (report all).
| remove-after-stop | FALSE |
indexterm:[remove-after-stop,Cluster Option]
indexterm:[Cluster,Option,remove-after-stop]
_Advanced Use Only:_ Should the cluster remove resources from the LRM after
they are stopped? Values other than the default are, at best, poorly tested and
potentially dangerous.
| startup-fencing | TRUE |
indexterm:[startup-fencing,Cluster Option]
indexterm:[Cluster,Option,startup-fencing]
_Advanced Use Only:_ Should the cluster shoot unseen nodes?
Not using the default is very unsafe!
| election-timeout | 2min |
indexterm:[election-timeout,Cluster Option]
indexterm:[Cluster,Option,election-timeout]
_Advanced Use Only:_ If you need to adjust this value, it probably indicates
the presence of a bug.
| shutdown-escalation | 20min |
indexterm:[shutdown-escalation,Cluster Option]
indexterm:[Cluster,Option,shutdown-escalation]
_Advanced Use Only:_ If you need to adjust this value, it probably indicates
the presence of a bug.
| crmd-integration-timeout | 3min |
indexterm:[crmd-integration-timeout,Cluster Option]
indexterm:[Cluster,Option,crmd-integration-timeout]
_Advanced Use Only:_ If you need to adjust this value, it probably indicates
the presence of a bug.
| crmd-finalization-timeout | 30min |
indexterm:[crmd-finalization-timeout,Cluster Option]
indexterm:[Cluster,Option,crmd-finalization-timeout]
_Advanced Use Only:_ If you need to adjust this value, it probably indicates
the presence of a bug.
| crmd-transition-delay | 0s |
indexterm:[crmd-transition-delay,Cluster Option]
indexterm:[Cluster,Option,crmd-transition-delay]
_Advanced Use Only:_ Delay cluster recovery for the configured interval to
allow for additional/related events to occur. Useful if your configuration is
sensitive to the order in which ping updates arrive.
Enabling this option will slow down cluster recovery under
all conditions.
|default-resource-stickiness | 0 |
indexterm:[default-resource-stickiness,Cluster Option]
indexterm:[Cluster,Option,default-resource-stickiness]
_Deprecated:_ See <> instead
| is-managed-default | TRUE |
indexterm:[is-managed-default,Cluster Option]
indexterm:[Cluster,Option,is-managed-default]
_Deprecated:_ See <> instead
| default-action-timeout | 20s |
indexterm:[default-action-timeout,Cluster Option]
indexterm:[Cluster,Option,default-action-timeout]
_Deprecated:_ See <> instead
|=========================================================
=== Querying and Setting Cluster Options ===
indexterm:[Querying,Cluster Option]
indexterm:[Setting,Cluster Option]
indexterm:[Cluster,Querying Options]
indexterm:[Cluster,Setting Options]
Cluster options can be queried and modified using the `crm_attribute` tool. To
get the current value of +cluster-delay+, you can run:
----
# crm_attribute --query --name cluster-delay
----
which is more simply written as
----
# crm_attribute -G -n cluster-delay
----
If a value is found, you'll see a result like this:
----
# crm_attribute -G -n cluster-delay
scope=crm_config name=cluster-delay value=60s
----
If no value is found, the tool will display an error:
----
# crm_attribute -G -n clusta-deway
scope=crm_config name=clusta-deway value=(null)
Error performing operation: No such device or address
----
To use a different value (for example, 30 seconds), simply run:
----
# crm_attribute --name cluster-delay --update 30s
----
To go back to the cluster's default value, you can delete the value, for example:
----
# crm_attribute --name cluster-delay --delete
Deleted crm_config option: id=cib-bootstrap-options-cluster-delay name=cluster-delay
----
=== When Options are Listed More Than Once ===
If you ever see something like the following, it means that the option you're modifying is present more than once.
.Deleting an option that is listed twice
=======
------
# crm_attribute --name batch-limit --delete
Multiple attributes match name=batch-limit in crm_config:
Value: 50 (set=cib-bootstrap-options, id=cib-bootstrap-options-batch-limit)
Value: 100 (set=custom, id=custom-batch-limit)
Please choose from one of the matches above and supply the 'id' with --id
-------
=======
In such cases, follow the on-screen instructions to perform the
requested action. To determine which value is currently being used by
the cluster, refer to <>.
diff --git a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt
index f644f805a4..02525d6f08 100644
--- a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt
+++ b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt
@@ -1,829 +1,833 @@
= STONITH =
////
We prefer [[ch-stonith]], but older versions of asciidoc don't deal well
with that construct for chapter headings
////
anchor:ch-stonith[Chapter 13, STONITH]
indexterm:[STONITH, Configuration]
== What Is STONITH? ==
STONITH (an acronym for "Shoot The Other Node In The Head"), also called
'fencing', protects your data from being corrupted by rogue nodes or concurrent
access.
Just because a node is unresponsive, this doesn't mean it isn't
accessing your data. The only way to be 100% sure that your data is
safe, is to use STONITH so we can be certain that the node is truly
offline, before allowing the data to be accessed from another node.
STONITH also has a role to play in the event that a clustered service
cannot be stopped. In this case, the cluster uses STONITH to force the
whole node offline, thereby making it safe to start the service
elsewhere.
== What STONITH Device Should You Use? ==
It is crucial that the STONITH device can allow the cluster to
differentiate between a node failure and a network one.
The biggest mistake people make in choosing a STONITH device is to
use a remote power switch (such as many on-board IPMI controllers) that
shares power with the node it controls. In such cases, the cluster
cannot be sure if the node is really offline, or active and suffering
from a network fault.
Likewise, any device that relies on the machine being active (such as
SSH-based "devices" used during testing) are inappropriate.
== Special Treatment of STONITH Resources ==
STONITH resources are somewhat special in Pacemaker.
STONITH may be initiated by pacemaker or by other parts of the cluster
(such as resources like DRBD or DLM). To accommodate this, pacemaker
does not require the STONITH resource to be in the 'started' state
in order to be used, thus allowing reliable use of STONITH devices in such a
case.
[NOTE]
====
In pacemaker versions 1.1.9 and earlier, this feature either did not exist or
did not work well. Only "running" STONITH resources could be used by Pacemaker
for fencing, and if another component tried to fence a node while Pacemaker was
moving STONITH resources, the fencing could fail.
====
All nodes have access to STONITH devices' definitions and instantiate them
on-the-fly when needed, but preference is given to 'verified' instances, which
are the ones that are 'started' according to the cluster's knowledge.
In the case of a cluster split, the partition with a verified instance
will have a slight advantage, because the STONITH daemon in the other partition
will have to hear from all its current peers before choosing a node to
perform the fencing.
Fencing resources do work the same as regular resources in some respects:
* +target-role+ can be used to enable or disable the resource
* Location constraints can be used to prevent a specific node from using the resource
[IMPORTANT]
===========
Currently there is a limitation that fencing resources may only have
one set of meta-attributes and one set of instance attributes. This
can be revisited if it becomes a significant limitation for people.
===========
.Properties of Fencing Resources
[width="95%",cols="5m,2,3,10
----
====
Based on that, we would create a STONITH resource fragment that might look
like this:
.An IPMI-based STONITH Resource
====
[source,XML]
----
----
====
Finally, we need to enable STONITH:
----
# crm_attribute -t crm_config -n stonith-enabled -v true
----
== Advanced STONITH Configurations ==
Some people consider that having one fencing device is a single point
of failure footnote:[Not true, since a node or resource must fail
before fencing even has a chance to]; others prefer removing the node
from the storage and network instead of turning it off.
Whatever the reason, Pacemaker supports fencing nodes with multiple
devices through a feature called 'fencing topologies'.
Simply create the individual devices as you normally would, then
define one or more +fencing-level+ entries in the +fencing-topology+ section of
the configuration.
* Each fencing level is attempted in order of ascending +index+.
* If a device fails, processing terminates for the current level.
No further devices in that level are exercised, and the next level is attempted instead.
* If the operation succeeds for all the listed devices in a level, the level is deemed to have passed.
* The operation is finished when a level has passed (success), or all levels have been attempted (failed).
* If the operation failed, the next step is determined by the Policy Engine and/or `crmd`.
Some possible uses of topologies include:
* Try poison-pill and fail back to power
* Try disk and network, and fall back to power if either fails
* Initiate a kdump and then poweroff the node
.Properties of Fencing Levels
[width="95%",cols="1m,6<",options="header",align="center"]
|=========================================================
|Field
|Description
|id
|A unique name for the level
indexterm:[id,fencing-level]
indexterm:[Fencing,fencing-level,id]
|target
|The node to which this level applies
indexterm:[target,fencing-level]
indexterm:[Fencing,fencing-level,target]
|index
|The order in which to attempt the levels.
Levels are attempted in ascending order 'until one succeeds'.
indexterm:[index,fencing-level]
indexterm:[Fencing,fencing-level,index]
|devices
|A comma-separated list of devices that must all be tried for this level
indexterm:[devices,fencing-level]
indexterm:[Fencing,fencing-level,devices]
|=========================================================
.Fencing topology with different devices for different nodes
====
[source,XML]
----
...
...
----
====
=== Example Dual-Layer, Dual-Device Fencing Topologies ===
The following example illustrates an advanced use of +fencing-topology+ in a cluster with the following properties:
* 3 nodes (2 active prod-mysql nodes, 1 prod_mysql-rep in standby for quorum purposes)
* the active nodes have an IPMI-controlled power board reached at 192.0.2.1 and 192.0.2.2
* the active nodes also have two independent PSUs (Power Supply Units)
connected to two independent PDUs (Power Distribution Units) reached at
198.51.100.1 (port 10 and port 11) and 203.0.113.1 (port 10 and port 11)
* the first fencing method uses the `fence_ipmi` agent
* the second fencing method uses the `fence_apc_snmp` agent targetting 2 fencing devices (one per PSU, either port 10 or 11)
* fencing is only implemented for the active nodes and has location constraints
* fencing topology is set to try IPMI fencing first then default to a "sure-kill" dual PDU fencing
In a normal failure scenario, STONITH will first select +fence_ipmi+ to try to kill the faulty node.
Using a fencing topology, if that first method fails, STONITH will then move on to selecting +fence_apc_snmp+ twice:
* once for the first PDU
* again for the second PDU
The fence action is considered successful only if both PDUs report the required status. If any of them fails, STONITH loops back to the first fencing method, +fence_ipmi+, and so on until the node is fenced or fencing action is cancelled.
.First fencing method: single IPMI device
Each cluster node has it own dedicated IPMI channel that can be called for fencing using the following primitives:
[source,XML]
----
----
.Second fencing method: dual PDU devices
Each cluster node also has two distinct power channels controlled by two
distinct PDUs. That means a total of 4 fencing devices configured as follows:
- Node 1, PDU 1, PSU 1 @ port 10
- Node 1, PDU 2, PSU 2 @ port 10
- Node 2, PDU 1, PSU 1 @ port 11
- Node 2, PDU 2, PSU 2 @ port 11
The matching fencing agents are configured as follows:
[source,XML]
----
----
.Location Constraints
To prevent STONITH from trying to run a fencing agent on the same node it is
supposed to fence, constraints are placed on all the fencing primitives:
[source,XML]
----
----
.Fencing topology
Now that all the fencing resources are defined, it's time to create the right topology.
We want to first fence using IPMI and if that does not work, fence both PDUs to effectively and surely kill the node.
[source,XML]
----
----
Please note, in +fencing-topology+, the lowest +index+ value determines the priority of the first fencing method.
.Final configuration
Put together, the configuration looks like this:
[source,XML]
----
...
...
----
diff --git a/fencing/main.c b/fencing/main.c
index 385c7e606c..4f917636a1 100644
--- a/fencing/main.c
+++ b/fencing/main.c
@@ -1,1460 +1,1452 @@
/*
* Copyright (C) 2009 Andrew Beekhof
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
char *stonith_our_uname = NULL;
char *stonith_our_uuid = NULL;
long stonith_watchdog_timeout_ms = 0;
GMainLoop *mainloop = NULL;
gboolean stand_alone = FALSE;
gboolean no_cib_connect = FALSE;
gboolean stonith_shutdown_flag = FALSE;
qb_ipcs_service_t *ipcs = NULL;
xmlNode *local_cib = NULL;
static cib_t *cib_api = NULL;
static void *cib_library = NULL;
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
{
if (stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
if (crm_client_new(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
static void
st_ipc_created(qb_ipcs_connection_t * c)
{
crm_trace("Connection created for %p", c);
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
int call_options = 0;
xmlNode *request = NULL;
crm_client_t *c = crm_client_get(qbc);
const char *op = NULL;
if (c == NULL) {
crm_info("Invalid client: %p", qbc);
return 0;
}
request = crm_ipcs_recv(c, data, size, &id, &flags);
if (request == NULL) {
crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__);
return 0;
}
op = crm_element_value(request, F_CRM_TASK);
if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) {
crm_xml_add(request, F_TYPE, T_STONITH_NG);
crm_xml_add(request, F_STONITH_OPERATION, op);
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE);
free_xml(request);
return 0;
}
if (c->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if (value == NULL) {
value = "unknown";
}
c->name = crm_strdup_printf("%s.%u", value, c->pid);
}
crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options);
crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c));
if (is_set(call_options, st_opt_sync_call)) {
CRM_ASSERT(flags & crm_ipc_client_response);
CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */
c->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, c->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c));
crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname);
crm_log_xml_trace(request, "Client[inbound]");
stonith_command(c, id, flags, request, NULL);
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t * c)
{
crm_client_t *client = crm_client_get(c);
if (client == NULL) {
return 0;
}
crm_trace("Connection %p closed", c);
crm_client_destroy(client);
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t * c)
{
crm_trace("Connection %p destroyed", c);
st_ipc_closed(c);
}
static void
stonith_peer_callback(xmlNode * msg, void *private_data)
{
const char *remote_peer = crm_element_value(msg, F_ORIG);
const char *op = crm_element_value(msg, F_STONITH_OPERATION);
if (crm_str_eq(op, "poke", TRUE)) {
return;
}
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote_peer);
}
#if SUPPORT_HEARTBEAT
static void
stonith_peer_hb_callback(HA_Message * msg, void *private_data)
{
xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__);
stonith_peer_callback(xml, private_data);
free_xml(xml);
}
static void
stonith_peer_hb_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Heartbeat disconnection complete... exiting");
} else {
crm_err("Heartbeat connection lost! Exiting.");
}
stonith_shutdown(0);
}
#endif
#if SUPPORT_COROSYNC
static void
stonith_peer_ais_callback(cpg_handle_t handle,
const struct cpg_name *groupName,
uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
{
uint32_t kind = 0;
xmlNode *xml = NULL;
const char *from = NULL;
char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
if(data == NULL) {
return;
}
if (kind == crm_class_cluster) {
xml = string2xml(data);
if (xml == NULL) {
crm_err("Invalid XML: '%.120s'", data);
free(data);
return;
}
crm_xml_add(xml, F_ORIG, from);
/* crm_xml_add_int(xml, F_SEQ, wrapper->id); */
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
free(data);
return;
}
static void
stonith_peer_cs_destroy(gpointer user_data)
{
crm_err("Corosync connection terminated");
stonith_shutdown(0);
}
#endif
void
do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
crm_client_t *client_obj = NULL;
int local_rc = pcmk_ok;
crm_trace("Sending response");
client_obj = crm_client_get_by_id(client_id);
crm_trace("Sending callback to request originator");
if (client_obj == NULL) {
local_rc = -1;
crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set.");
} else {
int rid = 0;
if (sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer ? "(originator of delegated request)" : "");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer ? "(originator of delegated request)" : "");
}
local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event);
}
if (local_rc < pcmk_ok && client_obj != NULL) {
crm_warn("%sSync reply to %s failed: %s",
sync_reply ? "" : "A-",
client_obj ? client_obj->name : "", pcmk_strerror(local_rc));
}
}
long long
get_stonith_flag(const char *name)
{
if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
return 0x01;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
return 0x04;
} else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
return 0x10;
}
return 0;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
crm_client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if (client->ipcs == NULL) {
crm_trace("Skipping client with NULL channel");
return;
}
if (client->options & get_stonith_flag(type)) {
int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error);
if (rc <= 0) {
crm_warn("%s notification of client %s.%.6s failed: %s (%d)",
type, crm_client_name(client), client->id, pcmk_strerror(rc), rc);
} else {
crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client),
client->id);
}
}
}
void
do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout)
{
crm_client_t *client = NULL;
xmlNode *notify_data = NULL;
if (!timeout || !call_id || !client_id) {
return;
}
client = crm_client_get_by_id(client_id);
if (!client) {
return;
}
notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE);
crm_xml_add(notify_data, F_STONITH_CALLID, call_id);
crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout);
crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id);
if (client) {
crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event);
}
free_xml(notify_data);
}
void
do_stonith_notify(int options, const char *type, int result, xmlNode * data)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL,;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if (data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
g_hash_table_foreach(client_connections, stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static stonith_key_value_t *
parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if (devices == NULL) {
return output;
}
max = strlen(devices);
for (lpc = 0; lpc <= max; lpc++) {
if (devices[lpc] == ',' || devices[lpc] == 0) {
char *line = NULL;
line = calloc(1, 2 + lpc - last);
snprintf(line, 1 + lpc - last, "%s", devices + last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
static void
topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL);
xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_DEL);
crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_ID, level);
crm_xml_add(data, F_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(0, STONITH_OP_LEVEL_DEL, rc, notify_data);
free_xml(notify_data);
free_xml(data);
free(desc);
}
static void
topology_register_helper(const char *node, int level, stonith_key_value_t * device_list)
{
int rc;
char *desc = NULL;
xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_ADD);
xmlNode *data = create_level_registration_xml(node, level, device_list);
rc = stonith_level_register(data, &desc);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(0, STONITH_OP_LEVEL_ADD, rc, notify_data);
free_xml(notify_data);
free_xml(data);
free(desc);
}
static void
remove_cib_device(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if(match != NULL) {
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
}
if (safe_str_neq(standard, "stonith")) {
continue;
}
rsc_id = crm_element_value(match, XML_ATTR_ID);
stonith_device_remove(rsc_id, TRUE);
}
}
static void
handle_topology_change(xmlNode *match, bool remove)
{
CRM_LOG_ASSERT(match != NULL);
if(match) {
int index = 0;
const char *target;
const char *dev_list;
stonith_key_value_t *devices = NULL;
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
target = crm_element_value(match, XML_ATTR_STONITH_TARGET);
dev_list = crm_element_value(match, XML_ATTR_STONITH_DEVICES);
devices = parse_device_list(dev_list);
crm_trace("Updating %s[%d] (%s) to %s", target, index, ID(match), dev_list);
if(remove) {
topology_remove_helper(target, index);
}
topology_register_helper(target, index, devices);
stonith_key_value_freeall(devices, 1, 1);
}
}
static void
remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_LOG_ASSERT(match != NULL);
if (match && crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
const char *target = crm_element_value(match, XML_ATTR_STONITH_TARGET);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if (target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if (index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void
register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force)
{
int max = numXpathResults(xpathObj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
handle_topology_change(match, TRUE);
}
}
/* Fencing
*/
static void
fencing_topology_init(xmlNode * msg)
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Full topology refresh");
if(topology) {
g_hash_table_destroy(topology);
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry);
}
/* Grab everything */
xpathObj = xpath_search(local_cib, xpath);
register_fencing_topology(xpathObj, TRUE);
freeXpathObject(xpathObj);
}
#define rsc_name(x) x->clone_name?x->clone_name:x->id
static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set)
{
node_t *node = NULL;
const char *value = NULL;
const char *rclass = NULL;
node_t *parent = NULL;
gboolean remove = TRUE;
/* TODO: Mark each installed device and remove if untouched when this process finishes */
if(rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, data_set);
if(rsc->variant == pe_clone || rsc->variant == pe_master) {
crm_trace("Only processing one copy of the clone %s", rsc->id);
break;
}
}
return;
}
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if(safe_str_neq(rclass, "stonith")) {
return;
}
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
if(value && strcmp(RSC_STOPPED, value) == 0) {
crm_info("Device %s has been disabled", rsc->id);
goto update_done;
} else if(stonith_our_uname) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if(node && strcmp(node->details->uname, stonith_our_uname) == 0) {
break;
}
node = NULL;
}
}
if (rsc->parent && rsc->parent->variant == pe_group && stonith_our_uname) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->parent->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&parent)) {
if(parent && strcmp(parent->details->uname, stonith_our_uname) == 0) {
break;
}
parent = NULL;
}
}
if(node == NULL) {
GHashTableIter iter;
crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname);
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
crm_trace("Available: %s = %d", node->details->uname, node->weight);
}
goto update_done;
} else if(node->weight < 0 || (parent && parent->weight < 0)) {
char *score = score2char((node->weight < 0) ? node->weight : parent->weight);
crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score);
free(score);
goto update_done;
} else {
xmlNode *data;
GHashTableIter gIter;
stonith_key_value_t *params = NULL;
const char *name = NULL;
const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE);
const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
const char *rsc_provides = NULL;
crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight);
get_rsc_attributes(rsc->parameters, rsc, node, data_set);
get_meta_attributes(rsc->meta, rsc, node, data_set);
rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES);
g_hash_table_iter_init(&gIter, rsc->parameters);
while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) {
if (!name || !value) {
continue;
}
params = stonith_key_value_add(params, name, value);
crm_trace(" %s=%s", name, value);
}
remove = FALSE;
data = create_device_registration_xml(rsc_name(rsc), provider, agent, params, rsc_provides);
stonith_device_register(data, NULL, TRUE);
stonith_key_value_freeall(params, 1, 1);
free_xml(data);
}
update_done:
if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) {
stonith_device_remove(rsc_name(rsc), TRUE);
}
}
extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now);
extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set);
static void
cib_devices_update(void)
{
GListPtr gIter = NULL;
pe_working_set_t data_set;
crm_info("Updating devices to version %s.%s.%s",
crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN),
crm_element_value(local_cib, XML_ATTR_GENERATION),
crm_element_value(local_cib, XML_ATTR_NUMUPDATES));
set_working_set_defaults(&data_set);
data_set.input = local_cib;
data_set.now = crm_time_new(NULL);
data_set.flags |= pe_flag_quick_location;
data_set.localhost = stonith_our_uname;
cluster_status(&data_set);
do_calculations(&data_set, NULL, NULL);
for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) {
cib_device_update(gIter->data, &data_set);
}
data_set.input = NULL; /* Wasn't a copy */
cleanup_alloc_calculations(&data_set);
}
static void
update_cib_stonith_devices_v2(const char *event, xmlNode * msg)
{
xmlNode *change = NULL;
char *reason = NULL;
bool needs_update = FALSE;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
const char *shortpath = NULL;
if(op == NULL || strcmp(op, "move") == 0) {
continue;
} else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) {
const char *rsc_id = NULL;
char *search = NULL;
char *mutable = strdup(xpath);
rsc_id = strstr(mutable, "primitive[@id=\'") + strlen("primitive[@id=\'");
search = strchr(rsc_id, '\'');
search[0] = 0;
stonith_device_remove(rsc_id, TRUE);
free(mutable);
} else if(strstr(xpath, "/"XML_CIB_TAG_RESOURCES)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
} else if(strstr(xpath, XML_CONS_TAG_RSC_LOCATION)) {
shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath);
reason = crm_strdup_printf("%s %s", op, shortpath+1);
needs_update = TRUE;
break;
}
}
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
free(reason);
}
static void
update_cib_stonith_devices_v1(const char *event, xmlNode * msg)
{
const char *reason = "none";
gboolean needs_update = FALSE;
xmlXPathObjectPtr xpath_obj = NULL;
/* process new constraints */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
/* Safest and simplest to always recompute */
needs_update = TRUE;
reason = "new location constraint";
for (lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpath_obj, lpc);
crm_log_xml_trace(match, "new constraint");
}
}
freeXpathObject(xpath_obj);
/* process deletions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
remove_cib_device(xpath_obj);
}
freeXpathObject(xpath_obj);
/* process additions */
xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE);
if (numXpathResults(xpath_obj) > 0) {
int max = numXpathResults(xpath_obj), lpc = 0;
for (lpc = 0; lpc < max; lpc++) {
const char *rsc_id = NULL;
const char *standard = NULL;
xmlNode *match = getXpathResult(xpath_obj, lpc);
rsc_id = crm_element_value(match, XML_ATTR_ID);
standard = crm_element_value(match, XML_AGENT_ATTR_CLASS);
if (safe_str_neq(standard, "stonith")) {
continue;
}
crm_trace("Fencing resource %s was added or modified", rsc_id);
reason = "new resource";
needs_update = TRUE;
}
}
freeXpathObject(xpath_obj);
if(needs_update) {
crm_info("Updating device list from the cib: %s", reason);
cib_devices_update();
}
}
static void
update_cib_stonith_devices(const char *event, xmlNode * msg)
{
int format = 1;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
switch(format) {
case 1:
update_cib_stonith_devices_v1(event, msg);
break;
case 2:
update_cib_stonith_devices_v2(event, msg);
break;
default:
crm_warn("Unknown patch format: %d", format);
}
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
int format = 1;
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
CRM_ASSERT(patchset);
crm_element_value_int(patchset, "format", &format);
if(format == 1) {
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
freeXpathObject(xpathObj);
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj, FALSE);
freeXpathObject(xpathObj);
} else if(format == 2) {
xmlNode *change = NULL;
for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) {
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *xpath = crm_element_value(change, XML_DIFF_PATH);
xmlNode *f_topology = get_message_xml(change, XML_TAG_FENCING_TOPOLOGY);
if(op == NULL) {
continue;
} else if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION) && f_topology != NULL) {
if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation", op);
fencing_topology_init(NULL);
}
return;
} else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY "/") == NULL) {
continue;
} else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL "/") == NULL) {
if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) {
crm_info("Re-initializing fencing topology after top-level %s operation", op);
fencing_topology_init(NULL);
}
return;
}
crm_trace("Handling %s operation for %s", op, xpath);
if(strcmp(op, "move") == 0) {
continue;
} else if(strcmp(op, "create") == 0) {
handle_topology_change(change->children, FALSE);
} else if(strcmp(op, "modify") == 0) {
xmlNode *match = first_named_child(change, XML_DIFF_RESULT);
if(match) {
handle_topology_change(match->children, TRUE);
}
} else if(strcmp(op, "delete") == 0) {
/* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */
crm_info("Re-initializing fencing topology after %s operation", op);
fencing_topology_init(NULL);
return;
}
}
} else {
crm_warn("Unknown patch format: %d", format);
}
}
static bool have_cib_devices = FALSE;
static void
update_cib_cache_cb(const char *event, xmlNode * msg)
{
int rc = pcmk_ok;
xmlNode *stonith_enabled_xml = NULL;
xmlNode *stonith_watchdog_xml = NULL;
const char *stonith_enabled_s = NULL;
static gboolean stonith_enabled_saved = TRUE;
if(!have_cib_devices) {
crm_trace("Skipping updates until we get a full dump");
return;
} else if(msg == NULL) {
crm_trace("Missing %s update", event);
return;
}
/* Maintain a local copy of the CIB so that we have full access to the device definitions and location constraints */
if (local_cib != NULL) {
int rc = pcmk_ok;
xmlNode *patchset = NULL;
crm_element_value_int(msg, F_CIB_RC, &rc);
if (rc != pcmk_ok) {
return;
}
patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT);
xml_log_patchset(LOG_TRACE, "Config update", patchset);
rc = xml_apply_patchset(local_cib, patchset, TRUE);
switch (rc) {
case pcmk_ok:
case -pcmk_err_old_data:
break;
case -pcmk_err_diff_resync:
case -pcmk_err_diff_failed:
crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
break;
default:
crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc);
free_xml(local_cib);
local_cib = NULL;
}
}
if (local_cib == NULL) {
crm_trace("Re-requesting the full cib");
rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call);
if(rc != pcmk_ok) {
crm_err("Couldnt retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc);
return;
}
CRM_ASSERT(local_cib != NULL);
stonith_enabled_saved = FALSE; /* Trigger a full refresh below */
}
stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE);
if (stonith_enabled_xml) {
stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE);
}
if(daemon_option_enabled(crm_system_name, "watchdog")) {
const char *value = NULL;
long timeout_ms = 0;
if(value == NULL) {
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
if (stonith_watchdog_xml) {
value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE);
}
}
if(value) {
timeout_ms = crm_get_msec(value);
}
if(timeout_ms != stonith_watchdog_timeout_ms) {
crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
stonith_watchdog_timeout_ms = timeout_ms;
}
}
if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) {
crm_trace("Ignoring cib updates while stonith is disabled");
stonith_enabled_saved = FALSE;
return;
} else if (stonith_enabled_saved == FALSE) {
crm_info("Updating stonith device and topology lists now that stonith is enabled");
stonith_enabled_saved = TRUE;
fencing_topology_init(NULL);
cib_devices_update();
} else {
update_fencing_topology(event, msg);
update_cib_stonith_devices(event, msg);
}
}
static void
init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
crm_info("Updating device list from the cib: init");
have_cib_devices = TRUE;
local_cib = copy_xml(output);
fencing_topology_init(msg);
cib_devices_update();
}
static void
stonith_shutdown(int nsig)
{
stonith_shutdown_flag = TRUE;
crm_info("Terminating with %d clients", crm_hash_table_size(client_connections));
if (mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
} else {
stonith_cleanup();
crm_exit(pcmk_ok);
}
}
static void
cib_connection_destroy(gpointer user_data)
{
if (stonith_shutdown_flag) {
crm_info("Connection to the CIB closed.");
return;
} else {
crm_notice("Connection to the CIB terminated. Shutting down.");
}
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
stonith_shutdown(0);
}
static void
stonith_cleanup(void)
{
if (cib_api) {
cib_api->cmds->signoff(cib_api);
}
if (ipcs) {
qb_ipcs_destroy(ipcs);
}
crm_peer_destroy();
crm_client_cleanup();
free(stonith_our_uname);
free_xml(local_cib);
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"stand-alone", 0, 0, 's'},
{"stand-alone-w-cpg", 0, 0, 'c'},
{"logfile", 1, 0, 'l'},
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
setup_cib(void)
{
int rc, retries = 0;
static cib_t *(*cib_new_fn) (void) = NULL;
if (cib_new_fn == NULL) {
cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE);
}
if (cib_new_fn != NULL) {
cib_api = (*cib_new_fn) ();
}
if (cib_api == NULL) {
crm_err("No connection to the CIB");
return;
}
do {
sleep(retries);
rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_CRMD, cib_command);
} while (rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc);
} else if (pcmk_ok !=
cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local);
cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb",
init_cib_cache_cb);
cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy);
crm_notice("Watching for stonith topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks = {
.connection_accept = st_ipc_accept,
.connection_created = st_ipc_created,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
static void
st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
xmlNode *query = NULL;
if (type == crm_status_processes) {
crm_update_peer_state(__FUNCTION__, node, is_set(node->processes, crm_proc_cpg)?CRM_NODE_MEMBER:CRM_NODE_LOST, 0);
return;
}
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in stonith_peer_callback()
*/
query = create_xml_node(NULL, "stonith_command");
crm_xml_add(query, F_XML_TAGNAME, "stonith_command");
crm_xml_add(query, F_TYPE, T_STONITH_NG);
crm_xml_add(query, F_STONITH_OPERATION, "poke");
crm_debug("Broadcasting our uname because of node %u", node->id);
send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE);
free_xml(query);
}
int
main(int argc, char **argv)
{
int flag;
int rc = 0;
int lpc = 0;
int argerr = 0;
int option_index = 0;
crm_cluster_t cluster;
const char *actions[] = { "reboot", "off", "list", "monitor", "status" };
crm_log_preinit("stonith-ng", argc, argv);
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch (flag) {
case 'V':
crm_bump_log_level(argc, argv);
break;
case 'l':
crm_add_logfile(optarg);
break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
default:
++argerr;
break;
}
}
if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
printf("\n");
printf("\n");
printf(" 1.0\n");
printf
(" This is a fake resource that details the instance attributes handled by stonithd.\n");
printf(" Options available for all stonith resources\n");
printf(" \n");
- printf(" \n");
- printf
- (" How long to wait for the STONITH action to complete per a stonith device.\n");
- printf
- (" Overrides the stonith-timeout cluster property\n");
- printf(" \n");
- printf(" \n");
-
printf(" \n");
printf
(" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTARG);
printf
(" Advanced use only: An alternate parameter to supply instead of 'port'\n");
printf
(" Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" \n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTMAP);
printf
(" A mapping of host names to ports numbers for devices that do not support host names.\n");
printf
(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTLIST);
printf
(" A list of machines controlled by this device (Optional unless %s=static-list).\n",
STONITH_ATTR_HOSTCHECK);
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTCHECK);
printf
(" How to determine which machines are controlled by the device.\n");
printf
(" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n",
STONITH_ATTR_HOSTLIST);
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_DELAY_MAX);
printf
(" Enable random delay for stonith actions and specify the maximum of random delay\n");
printf
(" This prevents double fencing when using slow devices such as sbd.\n"
"Use this to enable random delay for stonith actions and specify the maximum of random delay.\n");
printf(" \n");
printf(" \n");
for (lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" \n", actions[lpc]);
printf
(" Advanced use only: An alternate command to run instead of '%s'\n",
actions[lpc]);
printf
(" Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.\n",
actions[lpc]);
printf(" \n", actions[lpc]);
printf(" \n");
printf(" \n", actions[lpc]);
printf
(" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n",
actions[lpc]);
printf
(" Some devices need much more/less time to complete than normal.\n"
"Use this to specify an alternate, device-specific, timeout for '%s' actions.\n",
actions[lpc]);
printf(" \n");
printf(" \n");
printf(" \n", actions[lpc]);
printf
(" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n",
actions[lpc]);
printf(" Some devices do not support multiple connections."
" Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining."
" Use this option to alter the number of times Pacemaker retries '%s' actions before giving up."
"\n", actions[lpc]);
printf(" \n");
printf(" \n");
}
printf(" \n");
printf("\n");
return 0;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
if (stand_alone == FALSE) {
#if SUPPORT_HEARTBEAT
cluster.hb_conn = NULL;
cluster.hb_dispatch = stonith_peer_hb_callback;
cluster.destroy = stonith_peer_hb_destroy;
#endif
if (is_openais_cluster()) {
#if SUPPORT_COROSYNC
cluster.destroy = stonith_peer_cs_destroy;
cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback;
cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership;
#endif
}
if (crm_cluster_connect(&cluster) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
crm_exit(DAEMON_RESPAWN_STOP);
}
stonith_our_uname = cluster.uname;
stonith_our_uuid = cluster.uuid;
#if SUPPORT_HEARTBEAT
if (is_heartbeat_cluster()) {
/* crm_cluster_connect() registered us for crm_system_name, which
* usually is the only F_TYPE used by the respective sub system.
* Stonith needs to register two additional F_TYPE callbacks,
* because it can :-/ */
if (HA_OK !=
cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_NOTIFY,
cluster.hb_dispatch, cluster.hb_conn)) {
crm_crit("Cannot set msg callback %s: %s", T_STONITH_NOTIFY, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn));
crm_exit(DAEMON_RESPAWN_STOP);
}
if (HA_OK !=
cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_TIMEOUT_VALUE,
cluster.hb_dispatch, cluster.hb_conn)) {
crm_crit("Cannot set msg callback %s: %s", T_STONITH_TIMEOUT_VALUE, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn));
crm_exit(DAEMON_RESPAWN_STOP);
}
}
#endif
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
crm_set_status_callback(&st_peer_update_callback);
device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device);
topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry);
if(daemon_option_enabled(crm_system_name, "watchdog")) {
xmlNode *xml;
stonith_key_value_t *params = NULL;
params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname);
xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL);
stonith_device_register(xml, NULL, FALSE);
stonith_key_value_freeall(params, 1, 1);
free_xml(xml);
}
stonith_ipc_server_init(&ipcs, &ipc_callbacks);
#if SUPPORT_STONITH_CONFIG
if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) {
standalone_cfg_commit();
}
#endif
/* Create the mainloop and run it... */
mainloop = g_main_new(FALSE);
crm_info("Starting %s mainloop", crm_system_name);
g_main_run(mainloop);
stonith_cleanup();
#if SUPPORT_HEARTBEAT
if (cluster.hb_conn) {
cluster.hb_conn->llc_ops->delete(cluster.hb_conn);
}
#endif
crm_info("Done");
return crm_exit(rc);
}
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
index 42eda33348..e6123843ab 100644
--- a/lib/pengine/common.c
+++ b/lib/pengine/common.c
@@ -1,428 +1,428 @@
/*
* Copyright (C) 2004 Andrew Beekhof
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include
#include
#include
#include
#include
#include
#include
gboolean was_processing_error = FALSE;
gboolean was_processing_warning = FALSE;
static gboolean
check_health(const char *value)
{
if (safe_str_eq(value, "none")) {
return TRUE;
} else if (safe_str_eq(value, "custom")) {
return TRUE;
} else if (safe_str_eq(value, "only-green")) {
return TRUE;
} else if (safe_str_eq(value, "progressive")) {
return TRUE;
} else if (safe_str_eq(value, "migrate-on-red")) {
return TRUE;
}
return FALSE;
}
static gboolean
check_stonith_action(const char *value)
{
if (safe_str_eq(value, "reboot")) {
return TRUE;
} else if (safe_str_eq(value, "poweroff")) {
return TRUE;
} else if (safe_str_eq(value, "off")) {
return TRUE;
}
return FALSE;
}
static gboolean
check_placement_strategy(const char *value)
{
if (safe_str_eq(value, "default")) {
return TRUE;
} else if (safe_str_eq(value, "utilization")) {
return TRUE;
} else if (safe_str_eq(value, "minimal")) {
return TRUE;
} else if (safe_str_eq(value, "balanced")) {
return TRUE;
}
return FALSE;
}
/* *INDENT-OFF* */
pe_cluster_option pe_opts[] = {
/* name, old-name, validate, default, description */
{ "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum,
"What to do when the cluster does not have quorum", NULL },
{ "symmetric-cluster", "symmetric_cluster", "boolean", NULL, "true", &check_boolean,
"All resources can run anywhere by default", NULL },
{ "default-resource-stickiness", "default_resource_stickiness", "integer", NULL, "0", &check_number, "", NULL },
{ "is-managed-default", "is_managed_default", "boolean", NULL, "true", &check_boolean,
"Should the cluster start/stop resources as required", NULL },
{ "maintenance-mode", NULL, "boolean", NULL, "false", &check_boolean,
"Should the cluster monitor resources and start/stop them as required", NULL },
{ "start-failure-is-fatal", NULL, "boolean", NULL, "true", &check_boolean, "Always treat start failures as fatal",
"This was the old default. However when set to FALSE, the cluster will instead use the resource's failcount and value for resource-failure-stickiness" },
{ "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean,
"Should the cluster check for active resources during startup", NULL },
/* Stonith Options */
{ "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean,
"Failed nodes are STONITH'd", NULL },
{ "stonith-action", "stonith_action", "enum", "reboot, poweroff, off", "reboot", &check_stonith_action,
"Action to send to STONITH device", NULL },
{ "stonith-timeout", NULL, "time", NULL, "60s", &check_timer,
- "How long to wait for the STONITH action to complete", NULL },
+ "How long to wait for the STONITH action (reboot,on,off) to complete", NULL },
{ XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, "false", &check_boolean,
"Enable watchdog integration", "Set automatically by the cluster if SBD is detected. User configured values are ignored." },
{ "startup-fencing", "startup_fencing", "boolean", NULL, "true", &check_boolean,
"STONITH unseen nodes", "Advanced Use Only! Not using the default is very unsafe!" },
/* Timeouts etc */
{ "cluster-delay", "transition_idle_timeout", "time", NULL, "60s", &check_time,
"Round trip delay over the network (excluding action execution)",
"The \"correct\" value will depend on the speed and load of your network and cluster nodes." },
{ "batch-limit", NULL, "integer", NULL, "0", &check_number,
"The number of jobs that the TE is allowed to execute in parallel",
"The \"correct\" value will depend on the speed and load of your network and cluster nodes." },
{ "migration-limit", NULL, "integer", NULL, "-1", &check_number,
"The number of migration jobs that the TE is allowed to execute in parallel on a node"},
{ "default-action-timeout", "default_action_timeout", "time", NULL, "20s", &check_time,
"How long to wait for actions to complete", NULL },
/* Orphans and stopping */
{ "stop-all-resources", NULL, "boolean", NULL, "false", &check_boolean,
"Should the cluster stop all active resources (except those needed for fencing)", NULL },
{ "stop-orphan-resources", "stop_orphan_resources", "boolean", NULL, "true", &check_boolean,
"Should deleted resources be stopped", NULL },
{ "stop-orphan-actions", "stop_orphan_actions", "boolean", NULL, "true", &check_boolean,
"Should deleted actions be cancelled", NULL },
{ "remove-after-stop", "remove_after_stop", "boolean", NULL, "false", &check_boolean,
"Remove resources from the LRM after they are stopped",
"Always set this to false. Other values are, at best, poorly tested and potentially dangerous." },
/* { "", "", , "0", "", NULL }, */
/* Storing inputs */
{ "pe-error-series-max", NULL, "integer", NULL, "-1", &check_number,
"The number of PE inputs resulting in ERRORs to save", "Zero to disable, -1 to store unlimited." },
{ "pe-warn-series-max", NULL, "integer", NULL, "5000", &check_number,
"The number of PE inputs resulting in WARNINGs to save", "Zero to disable, -1 to store unlimited." },
{ "pe-input-series-max", NULL, "integer", NULL, "4000", &check_number,
"The number of other PE inputs to save", "Zero to disable, -1 to store unlimited." },
/* Node health */
{ "node-health-strategy", NULL, "enum", "none, migrate-on-red, only-green, progressive, custom", "none", &check_health,
"The strategy combining node attributes to determine overall node health.",
"Requires external entities to create node attributes (named with the prefix '#health') with values: 'red', 'yellow' or 'green'."},
{ "node-health-green", NULL, "integer", NULL, "0", &check_number,
"The score 'green' translates to in rsc_location constraints",
"Only used when node-health-strategy is set to custom or progressive." },
{ "node-health-yellow", NULL, "integer", NULL, "0", &check_number,
"The score 'yellow' translates to in rsc_location constraints",
"Only used when node-health-strategy is set to custom or progressive." },
{ "node-health-red", NULL, "integer", NULL, "-INFINITY", &check_number,
"The score 'red' translates to in rsc_location constraints",
"Only used when node-health-strategy is set to custom or progressive." },
/*Placement Strategy*/
{ "placement-strategy", NULL, "enum", "default, utilization, minimal, balanced", "default", &check_placement_strategy,
"The strategy to determine resource placement", NULL},
};
/* *INDENT-ON* */
void
pe_metadata(void)
{
config_metadata("Policy Engine", "1.0",
"Policy Engine Options",
"This is a fake resource that details the options that can be configured for the Policy Engine.",
pe_opts, DIMOF(pe_opts));
}
void
verify_pe_options(GHashTable * options)
{
verify_all_options(options, pe_opts, DIMOF(pe_opts));
}
const char *
pe_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, pe_opts, DIMOF(pe_opts), name);
}
const char *
fail2text(enum action_fail_response fail)
{
const char *result = "";
switch (fail) {
case action_fail_ignore:
result = "ignore";
break;
case action_fail_block:
result = "block";
break;
case action_fail_recover:
result = "recover";
break;
case action_fail_migrate:
result = "migrate";
break;
case action_fail_stop:
result = "stop";
break;
case action_fail_fence:
result = "fence";
break;
case action_fail_standby:
result = "standby";
break;
case action_fail_restart_container:
result = "restart-container";
break;
case action_fail_reset_remote:
result = "reset-remote";
break;
}
return result;
}
enum action_tasks
text2task(const char *task)
{
if (safe_str_eq(task, CRMD_ACTION_STOP)) {
return stop_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_STOPPED)) {
return stopped_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_START)) {
return start_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_STARTED)) {
return started_rsc;
} else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) {
return shutdown_crm;
} else if (safe_str_eq(task, CRM_OP_FENCE)) {
return stonith_node;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
return monitor_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
return action_notify;
} else if (safe_str_eq(task, CRMD_ACTION_NOTIFIED)) {
return action_notified;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
return action_promote;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
return action_demote;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTED)) {
return action_promoted;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTED)) {
return action_demoted;
}
#if SUPPORT_TRACING
if (safe_str_eq(task, CRMD_ACTION_CANCEL)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_DELETE)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
return no_action;
} else if (safe_str_eq(task, CRM_OP_PROBED)) {
return no_action;
} else if (safe_str_eq(task, CRM_OP_LRM_REFRESH)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
return no_action;
} else if (safe_str_eq(task, "fail")) {
return no_action;
} else if (safe_str_eq(task, "stonith_up")) {
return no_action;
} else if (safe_str_eq(task, "stonith_complete")) {
return no_action;
} else if (safe_str_eq(task, "all_stopped")) {
return no_action;
}
crm_trace("Unsupported action: %s", task);
#endif
return no_action;
}
const char *
task2text(enum action_tasks task)
{
const char *result = "";
switch (task) {
case no_action:
result = "no_action";
break;
case stop_rsc:
result = CRMD_ACTION_STOP;
break;
case stopped_rsc:
result = CRMD_ACTION_STOPPED;
break;
case start_rsc:
result = CRMD_ACTION_START;
break;
case started_rsc:
result = CRMD_ACTION_STARTED;
break;
case shutdown_crm:
result = CRM_OP_SHUTDOWN;
break;
case stonith_node:
result = CRM_OP_FENCE;
break;
case monitor_rsc:
result = CRMD_ACTION_STATUS;
break;
case action_notify:
result = CRMD_ACTION_NOTIFY;
break;
case action_notified:
result = CRMD_ACTION_NOTIFIED;
break;
case action_promote:
result = CRMD_ACTION_PROMOTE;
break;
case action_promoted:
result = CRMD_ACTION_PROMOTED;
break;
case action_demote:
result = CRMD_ACTION_DEMOTE;
break;
case action_demoted:
result = CRMD_ACTION_DEMOTED;
break;
}
return result;
}
const char *
role2text(enum rsc_role_e role)
{
CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
switch (role) {
case RSC_ROLE_UNKNOWN:
return RSC_ROLE_UNKNOWN_S;
case RSC_ROLE_STOPPED:
return RSC_ROLE_STOPPED_S;
case RSC_ROLE_STARTED:
return RSC_ROLE_STARTED_S;
case RSC_ROLE_SLAVE:
return RSC_ROLE_SLAVE_S;
case RSC_ROLE_MASTER:
return RSC_ROLE_MASTER_S;
}
return RSC_ROLE_UNKNOWN_S;
}
enum rsc_role_e
text2role(const char *role)
{
CRM_ASSERT(role != NULL);
if (safe_str_eq(role, RSC_ROLE_STOPPED_S)) {
return RSC_ROLE_STOPPED;
} else if (safe_str_eq(role, RSC_ROLE_STARTED_S)) {
return RSC_ROLE_STARTED;
} else if (safe_str_eq(role, RSC_ROLE_SLAVE_S)) {
return RSC_ROLE_SLAVE;
} else if (safe_str_eq(role, RSC_ROLE_MASTER_S)) {
return RSC_ROLE_MASTER;
} else if (safe_str_eq(role, RSC_ROLE_UNKNOWN_S)) {
return RSC_ROLE_UNKNOWN;
}
crm_err("Unknown role: %s", role);
return RSC_ROLE_UNKNOWN;
}
int
merge_weights(int w1, int w2)
{
int result = w1 + w2;
if (w1 <= -INFINITY || w2 <= -INFINITY) {
if (w1 >= INFINITY || w2 >= INFINITY) {
crm_trace("-INFINITY + INFINITY == -INFINITY");
}
return -INFINITY;
} else if (w1 >= INFINITY || w2 >= INFINITY) {
return INFINITY;
}
/* detect wrap-around */
if (result > 0) {
if (w1 <= 0 && w2 < 0) {
result = -INFINITY;
}
} else if (w1 > 0 && w2 > 0) {
result = INFINITY;
}
/* detect +/- INFINITY */
if (result >= INFINITY) {
result = INFINITY;
} else if (result <= -INFINITY) {
result = -INFINITY;
}
crm_trace("%d + %d = %d", w1, w2, result);
return result;
}
void
add_hash_param(GHashTable * hash, const char *name, const char *value)
{
CRM_CHECK(hash != NULL, return);
crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value));
if (name == NULL || value == NULL) {
return;
} else if (safe_str_eq(value, "#default")) {
return;
} else if (g_hash_table_lookup(hash, name) == NULL) {
g_hash_table_insert(hash, strdup(name), strdup(value));
}
}