diff --git a/doc/Pacemaker_Explained/en-US/Ch-Options.txt b/doc/Pacemaker_Explained/en-US/Ch-Options.txt
index e314980a05..71c58884dd 100644
--- a/doc/Pacemaker_Explained/en-US/Ch-Options.txt
+++ b/doc/Pacemaker_Explained/en-US/Ch-Options.txt
@@ -1,276 +1,278 @@
= Cluster Options =
== Special Options ==
indexterm:[Special Cluster Options]
indexterm:[Cluster Options,Special Options]
The reason for these fields to be placed at the top level instead of
with the rest of cluster options is simply a matter of parsing. These
options are used by the configuration database which is, by design,
mostly ignorant of the content it holds. So the decision was made to
place them in an easy to find location.
== Configuration Version ==
indexterm:[Configuration Version, Cluster Option]
indexterm:[Cluster Options,Configuration Version]
When a node joins the cluster, the cluster will perform a check to see
who has the best configuration based on the fields below. It then
asks the node with the highest (+admin_epoch+, +epoch+, +num_updates+)
tuple to replace the configuration on all the nodes - which makes
setting them, and setting them correctly, very important.
.Configuration Version Properties
[width="95%",cols="1m,5<",options="header",align="center"]
|=========================================================
|Field |Description
| admin_epoch |
indexterm:[admin_epoch Cluster Option]
indexterm:[Cluster Options,admin_epoch]
Never modified by the cluster. Use this to make the configurations on
any inactive nodes obsolete.
_Never set this value to zero_, in such cases the cluster cannot tell
the difference between your configuration and the "empty" one used
when nothing is found on disk.
| epoch |
indexterm:[epoch Cluster Option]
indexterm:[Cluster Options,epoch]
Incremented every time the configuration is updated (usually by the admin)
| num_updates |
indexterm:[num_updates Cluster Option]
indexterm:[Cluster Options,num_updates]
Incremented every time the configuration or status is updated (usually by the cluster)
|=========================================================
== Other Fields ==
.Properties Controlling Validation
[width="95%",cols="1m,5<",options="header",align="center"]
|=========================================================
|Field |Description
| validate-with |
indexterm:[validate-with Cluster Option]
indexterm:[Cluster Options,validate-with]
Determines the type of validation being done on the configuration. If
set to "none", the cluster will not verify that updates conform to the
DTD (nor reject ones that don't). This option can be useful when
operating a mixed version cluster during an upgrade.
|=========================================================
== Fields Maintained by the Cluster ==
.Properties Maintained by the Cluster
[width="95%",cols="1m,5<",options="header",align="center"]
|=========================================================
|Field |Description
|cib-last-written |
indexterm:[cib-last-written Cluster Fields]
indexterm:[Cluster Fields,cib-last-written]
Indicates when the configuration was last written to disk. Informational purposes only.
|dc-uuid |
indexterm:[dc-uuid Cluster Fields]
indexterm:[Cluster Fields,dc-uuid]
Indicates which cluster node is the current leader. Used by the
cluster when placing resources and determining the order of some
events.
|have-quorum |
indexterm:[have-quorum Cluster Fields]
indexterm:[Cluster Fields,have-quorum]
Indicates if the cluster has quorum. If false, this may mean that the
cluster cannot start resources or fence other nodes. See
+no-quorum-policy+ below.
|=========================================================
Note that although these fields can be written to by the admin, in
most cases the cluster will overwrite any values specified by the
admin with the "correct" ones. To change the +admin_epoch+, for
example, one would use:
pass:[cibadmin --modify --crm_xml ‘<cib admin_epoch="42"/>']
A complete set of fields will look something like this:
.An example of the fields set for a cib object
[source,XML]
-------
-------
== Cluster Options ==
Cluster options, as you might expect, control how the cluster behaves
when confronted with certain situations.
They are grouped into sets and, in advanced configurations, there may
be more than one.
footnote:[This will be described later in the section on
<> where we will show how to have the cluster use
different sets of options during working hours (when downtime is
usually to be avoided at all costs) than it does during the weekends
(when resources can be moved to the their preferred hosts without
bothering end users)]
For now we will describe the simple case where each option is present at most once.
== Available Cluster Options ==
.Cluster Options
[width="95%",cols="5m,2m,13",options="header",align="center"]
|=========================================================
|Option |Default |Description
| batch-limit | 30 |
indexterm:[batch-limit Cluster Options]
indexterm:[Cluster Options,batch-limit]
The number of jobs that the TE is allowed to execute in parallel. The
"correct" value will depend on the speed and load of your network and
cluster nodes.
| migration-limit | -1 (unlimited) |
indexterm:[migration-limit Cluster Options]
indexterm:[Cluster Options,migration-limit]
The number of migration jobs that the TE is allowed to execute in
parallel on a node.
| no-quorum-policy | stop |
indexterm:[no-quorum-policy Cluster Options]
indexterm:[Cluster Options,no-quorum-policy]
What to do when the cluster does not have quorum. Allowed values:
* ignore - continue all resource management
* freeze - continue resource management, but don't recover resources from nodes not in the affected partition
* stop - stop all resources in the affected cluster partition
* suicide - fence all nodes in the affected cluster partition
| symmetric-cluster | TRUE |
indexterm:[symmetric-cluster Cluster Options]
indexterm:[Cluster Options,symmetric-cluster]
Can all resources run on any node by default?
| stonith-enabled | TRUE |
indexterm:[stonith-enabled Cluster Options]
indexterm:[Cluster Options,stonith-enabled]
Should failed nodes and nodes with resources that can't be stopped be
shot? If you value your data, set up a STONITH device and enable this.
If true, or unset, the cluster will refuse to start resources unless
one or more STONITH resources have been configured also.
| stonith-action | reboot |
indexterm:[stonith-action Cluster Options]
indexterm:[Cluster Options,stonith-action]
-Action to send to STONITH device. Allowed values: reboot, poweroff.
+Action to send to STONITH device. Allowed values: reboot, off.
+The value 'poweroff' is also allowed, but is only used for
+legacy devices.
| cluster-delay | 60s |
indexterm:[cluster-delay Cluster Options]
indexterm:[Cluster Options,cluster-delay]
Round trip delay over the network (excluding action execution). The
"correct" value will depend on the speed and load of your network and
cluster nodes.
| stop-orphan-resources | TRUE |
indexterm:[stop-orphan-resources Cluster Options]
indexterm:[Cluster Options,stop-orphan-resources]
Should deleted resources be stopped?
| stop-orphan-actions | TRUE |
indexterm:[stop-orphan-actions Cluster Options]
indexterm:[Cluster Options,stop-orphan-actions]
Should deleted actions be cancelled?
| start-failure-is-fatal | TRUE |
indexterm:[start-failure-is-fatal Cluster Options]
indexterm:[Cluster Options,start-failure-is-fatal]
When set to FALSE, the cluster will instead use the resource's
+failcount+ and value for +resource-failure-stickiness+.
| pe-error-series-max | -1 (all) |
indexterm:[pe-error-series-max Cluster Options]
indexterm:[Cluster Options,pe-error-series-max]
The number of PE inputs resulting in ERRORs to save. Used when reporting problems.
| pe-warn-series-max | -1 (all) |
indexterm:[pe-warn-series-max Cluster Options]
indexterm:[Cluster Options,pe-warn-series-max]
The number of PE inputs resulting in WARNINGs to save. Used when reporting problems.
| pe-input-series-max | -1 (all) |
indexterm:[pe-input-series-max Cluster Options]
indexterm:[Cluster Options,pe-input-series-max]
The number of "normal" PE inputs to save. Used when reporting problems.
|=========================================================
You can always obtain an up-to-date list of cluster options, including
their default values, by running the pass:[pengine
metadata] command.
== Querying and Setting Cluster Options ==
indexterm:[Querying Cluster Options]
indexterm:[Setting Cluster Options]
indexterm:[Cluster Options,Querying]
indexterm:[Cluster Options,Setting]
Cluster options can be queried and modified using the
pass:[crm_attribute] tool. To get the current
value of +cluster-delay+, simply use:
pass:[crm_attribute --attr-name cluster-delay --get-value]
which is more simply written as
pass:[crm_attribute --get-value -n cluster-delay]
If a value is found, you'll see a result like this:
=======
pass:[ # crm_attribute --get-value -n cluster-delay]
name=cluster-delay value=60s
========
However, if no value is found, the tool will display an error:
=======
pass:[# crm_attribute --get-value -n clusta-deway]
name=clusta-deway value=(null)
Error performing operation: The object/attribute does not exist
========
To use a different value, eg. +30+, simply run:
pass:[crm_attribute --attr-name cluster-delay --attr-value 30s]
To go back to the cluster's default value you can delete the value, for example with this command:
pass:[crm_attribute --attr-name cluster-delay --delete-attr]
== When Options are Listed More Than Once ==
If you ever see something like the following, it means that the option you're modifying is present more than once.
.Deleting an option that is listed twice
=======
pass:[# crm_attribute --attr-name batch-limit --delete-attr]
Multiple attributes match name=batch-limit in crm_config:
Value: 50 (set=cib-bootstrap-options, id=cib-bootstrap-options-batch-limit)
Value: 100 (set=custom, id=custom-batch-limit)
Please choose from one of the matches above and supply the 'id' with --attr-id
=======
In such cases follow the on-screen instructions to perform the
requested action. To determine which value is currently being used by
the cluster, please refer to the section on <>.
diff --git a/fencing/main.c b/fencing/main.c
index 3ff5b48e85..cc721ffaf5 100644
--- a/fencing/main.c
+++ b/fencing/main.c
@@ -1,891 +1,891 @@
/*
* Copyright (C) 2009 Andrew Beekhof
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
char *stonith_our_uname = NULL;
GMainLoop *mainloop = NULL;
GHashTable *client_list = NULL;
gboolean stand_alone = FALSE;
gboolean no_cib_connect = FALSE;
gboolean stonith_shutdown_flag = FALSE;
qb_ipcs_service_t *ipcs = NULL;
#if SUPPORT_HEARTBEAT
ll_cluster_t *hb_conn = NULL;
#endif
static void stonith_shutdown(int nsig);
static void stonith_cleanup(void);
static int32_t
st_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("Connecting %p for uid=%d gid=%d", c, uid, gid);
if(stonith_shutdown_flag) {
crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c));
return -EPERM;
}
return 0;
}
static void
st_ipc_created(qb_ipcs_connection_t *c)
{
stonith_client_t *new_client = NULL;
#if 0
struct qb_ipcs_stats srv_stats;
qb_ipcs_stats_get(s1, &srv_stats, QB_FALSE);
qb_log(LOG_INFO, "Connection created (active:%d, closed:%d)",
srv_stats.active_connections,
srv_stats.closed_connections);
#endif
new_client = calloc(1, sizeof(stonith_client_t));
new_client->channel = c;
new_client->channel_name = strdup("ipc");
CRM_CHECK(new_client->id == NULL, free(new_client->id));
new_client->id = crm_generate_uuid();
crm_trace("Created channel %p for client %s", c, new_client->id);
/* make sure we can find ourselves later for sync calls
* redirected to the master instance
*/
g_hash_table_insert(client_list, new_client->id, new_client);
qb_ipcs_context_set(c, new_client);
CRM_ASSERT(qb_ipcs_context_get(c) != NULL);
}
/* Exit code means? */
static int32_t
st_ipc_dispatch(qb_ipcs_connection_t *c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
xmlNode *request = NULL;
stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c);
request = crm_ipcs_recv(c, data, size, &id, &flags);
if (request == NULL) {
crm_ipcs_send_ack(c, id, "nack", __FUNCTION__, __LINE__);
return 0;
}
CRM_CHECK(client != NULL, goto cleanup);
if(client->name == NULL) {
const char *value = crm_element_value(request, F_STONITH_CLIENTNAME);
if(value == NULL) {
client->name = crm_itoa(crm_ipcs_client_pid(c));
} else {
client->name = strdup(value);
}
}
CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p/%s", client, client->name); goto cleanup);
if(flags & crm_ipc_client_response) {
CRM_LOG_ASSERT(client->request_id == 0); /* This means the client has two synchronous events in-flight */
client->request_id = id; /* Reply only to the last one */
}
crm_xml_add(request, F_STONITH_CLIENTID, client->id);
crm_xml_add(request, F_STONITH_CLIENTNAME, client->name);
crm_log_xml_trace(request, "Client[inbound]");
stonith_command(client, id, flags, request, NULL);
cleanup:
if(client == NULL || client->id == NULL) {
crm_log_xml_notice(request, "Invalid client");
}
free_xml(request);
return 0;
}
/* Error code means? */
static int32_t
st_ipc_closed(qb_ipcs_connection_t *c)
{
stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c);
#if 0
qb_ipcs_stats_get(s1, &srv_stats, QB_FALSE);
qb_ipcs_connection_stats_get(c, &stats, QB_FALSE);
qb_log(LOG_INFO, "Connection to pid:%d destroyed (active:%d, closed:%d)",
stats.client_pid,
srv_stats.active_connections,
srv_stats.closed_connections);
qb_log(LOG_DEBUG, " Requests %"PRIu64"", stats.requests);
qb_log(LOG_DEBUG, " Responses %"PRIu64"", stats.responses);
qb_log(LOG_DEBUG, " Events %"PRIu64"", stats.events);
qb_log(LOG_DEBUG, " Send retries %"PRIu64"", stats.send_retries);
qb_log(LOG_DEBUG, " Recv retries %"PRIu64"", stats.recv_retries);
qb_log(LOG_DEBUG, " FC state %d", stats.flow_control_state);
qb_log(LOG_DEBUG, " FC count %"PRIu64"", stats.flow_control_count);
#endif
if (client == NULL) {
crm_err("No client");
return 0;
}
crm_trace("Cleaning up after client disconnect: %p/%s/%s", client, crm_str(client->name), client->id);
if(client->id != NULL) {
g_hash_table_remove(client_list, client->id);
}
/* 0 means: yes, go ahead and destroy the connection */
return 0;
}
static void
st_ipc_destroy(qb_ipcs_connection_t *c)
{
stonith_client_t *client = (stonith_client_t*)qb_ipcs_context_get(c);
/* Make sure the connection is fully cleaned up */
st_ipc_closed(c);
if(client == NULL) {
crm_trace("Nothing to destroy");
return;
}
crm_trace("Destroying %s (%p)", client->name, client);
free(client->name);
free(client->id);
free(client);
crm_trace("Done");
return;
}
static void
stonith_peer_callback(xmlNode * msg, void* private_data)
{
const char *remote = crm_element_value(msg, F_ORIG);
crm_log_xml_trace(msg, "Peer[inbound]");
stonith_command(NULL, 0, 0, msg, remote);
}
#if SUPPORT_HEARTBEAT
static void
stonith_peer_hb_callback(HA_Message * msg, void* private_data)
{
xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__);
stonith_peer_callback(xml, private_data);
free_xml(xml);
}
static void
stonith_peer_hb_destroy(gpointer user_data)
{
if(stonith_shutdown_flag) {
crm_info("Heartbeat disconnection complete... exiting");
} else {
crm_err("Heartbeat connection lost! Exiting.");
}
stonith_shutdown(0);
}
#endif
#if SUPPORT_COROSYNC
static gboolean stonith_peer_ais_callback(
AIS_Message *wrapper, char *data, int sender)
{
xmlNode *xml = NULL;
if(wrapper->header.id == crm_class_cluster) {
xml = string2xml(data);
if(xml == NULL) {
goto bail;
}
crm_xml_add(xml, F_ORIG, wrapper->sender.uname);
crm_xml_add_int(xml, F_SEQ, wrapper->id);
stonith_peer_callback(xml, NULL);
}
free_xml(xml);
return TRUE;
bail:
crm_err("Invalid XML: '%.120s'", data);
return TRUE;
}
static void
stonith_peer_ais_destroy(gpointer user_data)
{
crm_err("AIS connection terminated");
stonith_shutdown(0);
}
#endif
void do_local_reply(xmlNode *notify_src, const char *client_id,
gboolean sync_reply, gboolean from_peer)
{
/* send callback to originating child */
stonith_client_t *client_obj = NULL;
int local_rc = pcmk_ok;
crm_trace("Sending response");
if(client_id != NULL) {
client_obj = g_hash_table_lookup(client_list, client_id);
} else {
crm_trace("No client to sent the response to."
" F_STONITH_CLIENTID not set.");
}
crm_trace("Sending callback to request originator");
if(client_obj == NULL) {
local_rc = -1;
} else {
int rid = 0;
if(sync_reply) {
CRM_LOG_ASSERT(client_obj->request_id);
rid = client_obj->request_id;
client_obj->request_id = 0;
crm_trace("Sending response %d to %s %s",
rid, client_obj->name, from_peer?"(originator of delegated request)":"");
} else {
crm_trace("Sending an event to %s %s",
client_obj->name, from_peer?"(originator of delegated request)":"");
}
local_rc = crm_ipcs_send(client_obj->channel, rid, notify_src, !sync_reply);
}
if(local_rc < pcmk_ok && client_obj != NULL) {
crm_warn("%sSync reply to %s failed: %s",
sync_reply?"":"A-",
client_obj?client_obj->name:"", pcmk_strerror(local_rc));
}
}
long long get_stonith_flag(const char *name)
{
if(safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) {
return 0x01;
} else if(safe_str_eq(name, STONITH_OP_DEVICE_ADD)) {
return 0x04;
} else if(safe_str_eq(name, STONITH_OP_DEVICE_DEL)) {
return 0x10;
}
return 0;
}
static void
stonith_notify_client(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *update_msg = user_data;
stonith_client_t *client = value;
const char *type = NULL;
CRM_CHECK(client != NULL, return);
CRM_CHECK(update_msg != NULL, return);
type = crm_element_value(update_msg, F_SUBTYPE);
CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return);
if(client->channel == NULL) {
crm_trace("Skipping client with NULL channel");
return;
} else if(client->name == NULL) {
crm_trace("Skipping unnammed client / comamnd channel");
return;
}
if(client->flags & get_stonith_flag(type)) {
crm_trace("Sending %s-notification to client %s/%s", type, client->name, client->id);
if(crm_ipcs_send(client->channel, 0, update_msg, crm_ipc_server_event|crm_ipc_server_error) <= 0) {
crm_warn("%s-Notification of client %s/%s failed",
type, client->name, client->id);
}
}
}
void
do_stonith_notify(
int options, const char *type, int result, xmlNode *data,
const char *remote)
{
/* TODO: Standardize the contents of data */
xmlNode *update_msg = create_xml_node(NULL, "notify");
CRM_CHECK(type != NULL, ;);
crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY);
crm_xml_add(update_msg, F_SUBTYPE, type);
crm_xml_add(update_msg, F_STONITH_OPERATION, type);
crm_xml_add_int(update_msg, F_STONITH_RC, result);
if(data != NULL) {
add_message_xml(update_msg, F_STONITH_CALLDATA, data);
}
crm_trace("Notifying clients");
g_hash_table_foreach(client_list, stonith_notify_client, update_msg);
free_xml(update_msg);
crm_trace("Notify complete");
}
static stonith_key_value_t *parse_device_list(const char *devices)
{
int lpc = 0;
int max = 0;
int last = 0;
stonith_key_value_t *output = NULL;
if(devices == NULL) {
return output;
}
max = strlen(devices);
for(lpc = 0; lpc <= max; lpc++) {
if(devices[lpc] == ',' || devices[lpc] == 0) {
char *line = NULL;
line = calloc(1, 2 + lpc - last);
snprintf(line, 1 + lpc - last, "%s", devices+last);
output = stonith_key_value_add(output, NULL, line);
free(line);
last = lpc + 1;
}
}
return output;
}
static void topology_remove_helper(const char *node, int level)
{
int rc;
char *desc = NULL;
xmlNode *data = create_xml_node(NULL, F_STONITH_LEVEL);
xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_DEL);
crm_xml_add(data, "origin", __FUNCTION__);
crm_xml_add_int(data, XML_ATTR_ID, level);
crm_xml_add(data, F_STONITH_TARGET, node);
rc = stonith_level_remove(data, &desc);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(0, STONITH_OP_LEVEL_DEL, rc, notify_data, NULL);
free_xml(notify_data);
free_xml(data);
free(desc);
}
static void topology_register_helper(const char *node, int level, stonith_key_value_t *device_list)
{
int rc;
char *desc = NULL;
xmlNode *notify_data = create_xml_node(NULL, STONITH_OP_LEVEL_ADD);
xmlNode *data = create_level_registration_xml(node, level, device_list);
rc = stonith_level_register(data, &desc);
crm_xml_add(notify_data, F_STONITH_DEVICE, desc);
crm_xml_add_int(notify_data, F_STONITH_ACTIVE, g_hash_table_size(topology));
do_stonith_notify(0, STONITH_OP_LEVEL_ADD, rc, notify_data, NULL);
free_xml(notify_data);
free_xml(data);
free(desc);
}
static void remove_fencing_topology(xmlXPathObjectPtr xpathObj)
{
int max = 0, lpc = 0;
if(xpathObj && xpathObj->nodesetval) {
max = xpathObj->nodesetval->nodeNr;
}
for(lpc = 0; lpc < max; lpc++) {
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_CHECK(match != NULL, continue);
if(crm_element_value(match, XML_DIFF_MARKER)) {
/* Deletion */
int index = 0;
const char *target = crm_element_value(match, XML_ATTR_STONITH_TARGET);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
if(target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if(index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else {
topology_remove_helper(target, index);
}
/* } else { Deal with modifications during the 'addition' stage */
}
}
}
static void register_fencing_topology(xmlXPathObjectPtr xpathObj, gboolean force)
{
int max = 0, lpc = 0;
if(xpathObj && xpathObj->nodesetval) {
max = xpathObj->nodesetval->nodeNr;
}
for(lpc = 0; lpc < max; lpc++) {
int index = 0;
const char *target;
const char *dev_list;
stonith_key_value_t *devices = NULL;
xmlNode *match = getXpathResult(xpathObj, lpc);
CRM_CHECK(match != NULL, continue);
crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index);
target = crm_element_value(match, XML_ATTR_STONITH_TARGET);
dev_list = crm_element_value(match, XML_ATTR_STONITH_DEVICES);
devices = parse_device_list(dev_list);
crm_trace("Updating %s[%d] (%s) to %s", target, index, ID(match), dev_list);
if(target == NULL) {
crm_err("Invalid fencing target in element %s", ID(match));
} else if(index <= 0) {
crm_err("Invalid level for %s in element %s", target, ID(match));
} else if(force == FALSE && crm_element_value(match, XML_DIFF_MARKER)) {
/* Addition */
topology_register_helper(target, index, devices);
} else { /* Modification */
/* Remove then re-add */
topology_remove_helper(target, index);
topology_register_helper(target, index, devices);
}
stonith_key_value_freeall(devices, 1, 1);
}
}
/* Fencing
*/
static void
fencing_topology_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
xmlXPathObjectPtr xpathObj = NULL;
const char *xpath = "//" XML_TAG_FENCING_LEVEL;
crm_trace("Pushing in stonith topology");
/* Grab everything */
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj, TRUE);
if(xpathObj) {
xmlXPathFreeObject(xpathObj);
}
}
static void
update_fencing_topology(const char *event, xmlNode * msg)
{
const char *xpath;
xmlXPathObjectPtr xpathObj = NULL;
/* Process deletions (only) */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
remove_fencing_topology(xpathObj);
if(xpathObj) {
xmlXPathFreeObject(xpathObj);
}
/* Process additions and changes */
xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL;
xpathObj = xpath_search(msg, xpath);
register_fencing_topology(xpathObj, FALSE);
if(xpathObj) {
xmlXPathFreeObject(xpathObj);
}
}
static void
stonith_shutdown(int nsig)
{
stonith_shutdown_flag = TRUE;
crm_info("Terminating with %d clients", g_hash_table_size(client_list));
if(mainloop != NULL && g_main_is_running(mainloop)) {
g_main_quit(mainloop);
} else {
stonith_cleanup();
exit(EX_OK);
}
}
cib_t *cib = NULL;
static void
stonith_cleanup(void)
{
if(cib) {
cib->cmds->signoff(cib);
}
qb_ipcs_destroy(ipcs);
crm_peer_destroy();
g_hash_table_destroy(client_list);
free(stonith_our_uname);
#if HAVE_LIBXML2
crm_xml_cleanup();
#endif
}
/* *INDENT-OFF* */
static struct crm_option long_options[] = {
{"stand-alone", 0, 0, 's'},
{"stand-alone-w-cpg", 0, 0, 'c'},
{"verbose", 0, 0, 'V'},
{"version", 0, 0, '$'},
{"help", 0, 0, '?'},
{0, 0, 0, 0}
};
/* *INDENT-ON* */
static void
setup_cib(void)
{
static void *cib_library = NULL;
static cib_t *(*cib_new_fn)(void) = NULL;
static const char *(*cib_err_fn)(int) = NULL;
int rc, retries = 0;
if(cib_library == NULL) {
cib_library = dlopen(CIB_LIBRARY, RTLD_LAZY);
}
if(cib_library && cib_new_fn == NULL) {
cib_new_fn = dlsym(cib_library, "cib_new");
}
if(cib_library && cib_err_fn == NULL) {
cib_err_fn = dlsym(cib_library, "pcmk_strerror");
}
if(cib_new_fn != NULL) {
cib = (*cib_new_fn)();
}
if(cib == NULL) {
crm_err("No connection to the CIB");
return;
}
do {
sleep(retries);
rc = cib->cmds->signon(cib, CRM_SYSTEM_CRMD, cib_command);
} while(rc == -ENOTCONN && ++retries < 5);
if (rc != pcmk_ok) {
crm_err("Could not connect to the CIB service: %s", (*cib_err_fn)(rc));
} else if (pcmk_ok != cib->cmds->add_notify_callback(
cib, T_CIB_DIFF_NOTIFY, update_fencing_topology)) {
crm_err("Could not set CIB notification callback");
} else {
rc = cib->cmds->query(cib, NULL, NULL, cib_scope_local);
add_cib_op_callback(cib, rc, FALSE, NULL, fencing_topology_callback);
crm_notice("Watching for stonith topology changes");
}
}
struct qb_ipcs_service_handlers ipc_callbacks =
{
.connection_accept = st_ipc_accept,
.connection_created = st_ipc_created,
.msg_process = st_ipc_dispatch,
.connection_closed = st_ipc_closed,
.connection_destroyed = st_ipc_destroy
};
int
main(int argc, char ** argv)
{
int flag;
int rc = 0;
int lpc = 0;
int argerr = 0;
int option_index = 0;
- const char *actions[] = { "reboot", "poweroff", "list", "monitor", "status" };
+ const char *actions[] = { "reboot", "off", "list", "monitor", "status" };
crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE);
crm_set_options(NULL, "mode [options]", long_options,
"Provides a summary of cluster's current state."
"\n\nOutputs varying levels of detail in a number of different formats.\n");
while (1) {
flag = crm_get_option(argc, argv, &option_index);
if (flag == -1) {
break;
}
switch(flag) {
case 'V':
crm_bump_log_level();
break;
case 's':
stand_alone = TRUE;
break;
case 'c':
stand_alone = FALSE;
no_cib_connect = TRUE;
break;
case '$':
case '?':
crm_help(flag, EX_OK);
break;
default:
++argerr;
break;
}
}
if(argc - optind == 1 && safe_str_eq("metadata", argv[optind])) {
printf("\n");
printf("\n");
printf(" 1.0\n");
printf(" This is a fake resource that details the instance attributes handled by stonithd.\n");
printf(" Options available for all stonith resources\n");
printf(" \n");
printf(" \n");
printf(" How long to wait for the STONITH action to complete.\n");
printf(" Overrides the stonith-timeout cluster property\n");
printf(" \n");
printf(" \n");
printf(" \n");
printf(" The priority of the stonith resource. The lower the number, the higher the priority.\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTARG);
printf(" Advanced use only: An alternate parameter to supply instead of 'port'\n");
printf(" Some devices do not support the standard 'port' parameter or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n"
"A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n"
" \n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTMAP);
printf(" A mapping of host names to ports numbers for devices that do not support host names.\n");
printf(" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n");
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTLIST);
printf(" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK);
printf(" \n");
printf(" \n");
printf(" \n", STONITH_ATTR_HOSTCHECK);
printf(" How to determin which machines are controlled by the device.\n");
printf(" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST);
printf(" \n");
printf(" \n");
for(lpc = 0; lpc < DIMOF(actions); lpc++) {
printf(" \n", actions[lpc]);
printf(" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]);
printf(" Some devices do not support the standard commands or may provide additional ones.\n"
"Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]);
printf(" \n", actions[lpc]);
printf(" \n");
}
printf(" \n");
printf("\n");
return 0;
}
if (optind != argc) {
++argerr;
}
if (argerr) {
crm_help('?', EX_USAGE);
}
mainloop_add_signal(SIGTERM, stonith_shutdown);
crm_peer_init();
client_list = g_hash_table_new(crm_str_hash, g_str_equal);
if(stand_alone == FALSE) {
void *dispatch = NULL;
void *destroy = NULL;
#if SUPPORT_HEARTBEAT
dispatch = stonith_peer_hb_callback;
destroy = stonith_peer_hb_destroy;
#endif
if(is_openais_cluster()) {
#if SUPPORT_COROSYNC
destroy = stonith_peer_ais_destroy;
dispatch = stonith_peer_ais_callback;
#endif
}
if(crm_cluster_connect(&stonith_our_uname, NULL, dispatch, destroy,
#if SUPPORT_HEARTBEAT
&hb_conn
#else
NULL
#endif
) == FALSE) {
crm_crit("Cannot sign in to the cluster... terminating");
exit(100);
}
if (no_cib_connect == FALSE) {
setup_cib();
}
} else {
stonith_our_uname = strdup("localhost");
}
device_list = g_hash_table_new_full(
crm_str_hash, g_str_equal, NULL, free_device);
topology = g_hash_table_new_full(
crm_str_hash, g_str_equal, NULL, free_topology_entry);
ipcs = mainloop_add_ipc_server("stonith-ng", QB_IPC_NATIVE, &ipc_callbacks);
#if SUPPORT_STONITH_CONFIG
if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) {
standalone_cfg_commit();
}
#endif
if(ipcs != NULL) {
/* Create the mainloop and run it... */
mainloop = g_main_new(FALSE);
crm_info("Starting %s mainloop", crm_system_name);
g_main_run(mainloop);
} else {
crm_err("Couldnt start all communication channels, exiting.");
}
stonith_cleanup();
#if SUPPORT_HEARTBEAT
if(hb_conn) {
hb_conn->llc_ops->delete(hb_conn);
}
#endif
crm_info("Done");
qb_log_fini();
return rc;
}
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
index 8514c095c3..282ae3cb53 100644
--- a/lib/pengine/common.c
+++ b/lib/pengine/common.c
@@ -1,435 +1,438 @@
/*
* Copyright (C) 2004 Andrew Beekhof
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include
#include
#include
#include
#include
#include
#include
gboolean was_processing_error = FALSE;
gboolean was_processing_warning = FALSE;
static gboolean
check_quorum(const char *value)
{
if (safe_str_eq(value, "stop")) {
return TRUE;
} else if (safe_str_eq(value, "freeze")) {
return TRUE;
} else if (safe_str_eq(value, "ignore")) {
return TRUE;
} else if (safe_str_eq(value, "suicide")) {
return TRUE;
}
return FALSE;
}
static gboolean
check_health(const char *value)
{
if (safe_str_eq(value, "none")) {
return TRUE;
} else if (safe_str_eq(value, "custom")) {
return TRUE;
} else if (safe_str_eq(value, "only-green")) {
return TRUE;
} else if (safe_str_eq(value, "progressive")) {
return TRUE;
} else if (safe_str_eq(value, "migrate-on-red")) {
return TRUE;
}
return FALSE;
}
static gboolean
check_stonith_action(const char *value)
{
if (safe_str_eq(value, "reboot")) {
return TRUE;
} else if (safe_str_eq(value, "poweroff")) {
return TRUE;
+
+ } else if (safe_str_eq(value, "off")) {
+ return TRUE;
}
return FALSE;
}
static gboolean
check_placement_strategy(const char *value)
{
if (safe_str_eq(value, "default")) {
return TRUE;
} else if (safe_str_eq(value, "utilization")) {
return TRUE;
} else if (safe_str_eq(value, "minimal")) {
return TRUE;
} else if (safe_str_eq(value, "balanced")) {
return TRUE;
}
return FALSE;
}
/* *INDENT-OFF* */
pe_cluster_option pe_opts[] = {
/* name, old-name, validate, default, description */
{ "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum,
"What to do when the cluster does not have quorum", NULL },
{ "symmetric-cluster", "symmetric_cluster", "boolean", NULL, "true", &check_boolean,
"All resources can run anywhere by default", NULL },
{ "default-resource-stickiness", "default_resource_stickiness", "integer", NULL, "0", &check_number, "", NULL },
{ "is-managed-default", "is_managed_default", "boolean", NULL, "true", &check_boolean,
"Should the cluster start/stop resources as required", NULL },
{ "maintenance-mode", NULL, "boolean", NULL, "false", &check_boolean,
"Should the cluster monitor resources and start/stop them as required", NULL },
{ "start-failure-is-fatal", NULL, "boolean", NULL, "true", &check_boolean, "Always treat start failures as fatal",
"This was the old default. However when set to FALSE, the cluster will instead use the resource's failcount and value for resource-failure-stickiness" },
{ "enable-startup-probes", NULL, "boolean", NULL, "true", &check_boolean,
"Should the cluster check for active resources during startup", NULL },
/* Stonith Options */
{ "stonith-enabled", "stonith_enabled", "boolean", NULL, "true", &check_boolean,
"Failed nodes are STONITH'd", NULL },
- { "stonith-action", "stonith_action", "enum", "reboot, poweroff", "reboot", &check_stonith_action,
+ { "stonith-action", "stonith_action", "enum", "reboot, poweroff, off", "reboot", &check_stonith_action,
"Action to send to STONITH device", NULL },
{ "stonith-timeout", NULL, "time", NULL, "60s", &check_timer,
"How long to wait for the STONITH action to complete", NULL },
{ "startup-fencing", "startup_fencing", "boolean", NULL, "true", &check_boolean,
"STONITH unseen nodes", "Advanced Use Only! Not using the default is very unsafe!" },
/* Timeouts etc */
{ "cluster-delay", "transition_idle_timeout", "time", NULL, "60s", &check_time,
"Round trip delay over the network (excluding action execution)",
"The \"correct\" value will depend on the speed and load of your network and cluster nodes." },
{ "batch-limit", NULL, "integer", NULL, "30", &check_number,
"The number of jobs that the TE is allowed to execute in parallel",
"The \"correct\" value will depend on the speed and load of your network and cluster nodes." },
{ "migration-limit", NULL, "integer", NULL, "-1", &check_number,
"The number of migration jobs that the TE is allowed to execute in parallel on a node"},
{ "default-action-timeout", "default_action_timeout", "time", NULL, "20s", &check_time,
"How long to wait for actions to complete", NULL },
/* Orphans and stopping */
{ "stop-all-resources", NULL, "boolean", NULL, "false", &check_boolean,
"Should the cluster stop all active resources (except those needed for fencing)", NULL },
{ "stop-orphan-resources", "stop_orphan_resources", "boolean", NULL, "true", &check_boolean,
"Should deleted resources be stopped", NULL },
{ "stop-orphan-actions", "stop_orphan_actions", "boolean", NULL, "true", &check_boolean,
"Should deleted actions be cancelled", NULL },
{ "remove-after-stop", "remove_after_stop", "boolean", NULL, "false", &check_boolean,
"Remove resources from the LRM after they are stopped",
"Always set this to false. Other values are, at best, poorly tested and potentially dangerous." },
/* { "", "", , "0", "", NULL }, */
/* Storing inputs */
{ "pe-error-series-max", NULL, "integer", NULL, "-1", &check_number,
"The number of PE inputs resulting in ERRORs to save", "Zero to disable, -1 to store unlimited." },
{ "pe-warn-series-max", NULL, "integer", NULL, "5000", &check_number,
"The number of PE inputs resulting in WARNINGs to save", "Zero to disable, -1 to store unlimited." },
{ "pe-input-series-max", NULL, "integer", NULL, "4000", &check_number,
"The number of other PE inputs to save", "Zero to disable, -1 to store unlimited." },
/* Node health */
{ "node-health-strategy", NULL, "enum", "none, migrate-on-red, only-green, progressive, custom", "none", &check_health,
"The strategy combining node attributes to determine overall node health.",
"Requires external entities to create node attributes (named with the prefix '#health') with values: 'red', 'yellow' or 'green'."},
{ "node-health-green", NULL, "integer", NULL, "0", &check_number,
"The score 'green' translates to in rsc_location constraints",
"Only used when node-health-strategy is set to custom or progressive." },
{ "node-health-yellow", NULL, "integer", NULL, "0", &check_number,
"The score 'yellow' translates to in rsc_location constraints",
"Only used when node-health-strategy is set to custom or progressive." },
{ "node-health-red", NULL, "integer", NULL, "-INFINITY", &check_number,
"The score 'red' translates to in rsc_location constraints",
"Only used when node-health-strategy is set to custom or progressive." },
/*Placement Strategy*/
{ "placement-strategy", NULL, "enum", "default, utilization, minimal, balanced", "default", &check_placement_strategy,
"The strategy to determine resource placement", NULL},
};
/* *INDENT-ON* */
void
pe_metadata(void)
{
config_metadata("Policy Engine", "1.0",
"Policy Engine Options",
"This is a fake resource that details the options that can be configured for the Policy Engine.",
pe_opts, DIMOF(pe_opts));
}
void
verify_pe_options(GHashTable * options)
{
verify_all_options(options, pe_opts, DIMOF(pe_opts));
}
const char *
pe_pref(GHashTable * options, const char *name)
{
return get_cluster_pref(options, pe_opts, DIMOF(pe_opts), name);
}
const char *
fail2text(enum action_fail_response fail)
{
const char *result = "";
switch (fail) {
case action_fail_ignore:
result = "ignore";
break;
case action_fail_block:
result = "block";
break;
case action_fail_recover:
result = "recover";
break;
case action_fail_migrate:
result = "migrate";
break;
case action_fail_stop:
result = "stop";
break;
case action_fail_fence:
result = "fence";
break;
case action_fail_standby:
result = "standby";
break;
}
return result;
}
enum action_tasks
text2task(const char *task)
{
if (safe_str_eq(task, CRMD_ACTION_STOP)) {
return stop_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_STOPPED)) {
return stopped_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_START)) {
return start_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_STARTED)) {
return started_rsc;
} else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) {
return shutdown_crm;
} else if (safe_str_eq(task, CRM_OP_FENCE)) {
return stonith_node;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
return monitor_rsc;
} else if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) {
return action_notify;
} else if (safe_str_eq(task, CRMD_ACTION_NOTIFIED)) {
return action_notified;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
return action_promote;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
return action_demote;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTED)) {
return action_promoted;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTED)) {
return action_demoted;
}
#if SUPPORT_TRACING
if (safe_str_eq(task, CRMD_ACTION_CANCEL)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_DELETE)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
return no_action;
} else if (safe_str_eq(task, CRM_OP_PROBED)) {
return no_action;
} else if (safe_str_eq(task, CRM_OP_LRM_REFRESH)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
return no_action;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
return no_action;
} else if (safe_str_eq(task, "fail")) {
return no_action;
} else if (safe_str_eq(task, "stonith_up")) {
return no_action;
} else if (safe_str_eq(task, "stonith_complete")) {
return no_action;
} else if (safe_str_eq(task, "all_stopped")) {
return no_action;
}
crm_trace("Unsupported action: %s", task);
#endif
return no_action;
}
const char *
task2text(enum action_tasks task)
{
const char *result = "";
switch (task) {
case no_action:
result = "no_action";
break;
case stop_rsc:
result = CRMD_ACTION_STOP;
break;
case stopped_rsc:
result = CRMD_ACTION_STOPPED;
break;
case start_rsc:
result = CRMD_ACTION_START;
break;
case started_rsc:
result = CRMD_ACTION_STARTED;
break;
case shutdown_crm:
result = CRM_OP_SHUTDOWN;
break;
case stonith_node:
result = CRM_OP_FENCE;
break;
case monitor_rsc:
result = CRMD_ACTION_STATUS;
break;
case action_notify:
result = CRMD_ACTION_NOTIFY;
break;
case action_notified:
result = CRMD_ACTION_NOTIFIED;
break;
case action_promote:
result = CRMD_ACTION_PROMOTE;
break;
case action_promoted:
result = CRMD_ACTION_PROMOTED;
break;
case action_demote:
result = CRMD_ACTION_DEMOTE;
break;
case action_demoted:
result = CRMD_ACTION_DEMOTED;
break;
}
return result;
}
const char *
role2text(enum rsc_role_e role)
{
CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
switch (role) {
case RSC_ROLE_UNKNOWN:
return RSC_ROLE_UNKNOWN_S;
case RSC_ROLE_STOPPED:
return RSC_ROLE_STOPPED_S;
case RSC_ROLE_STARTED:
return RSC_ROLE_STARTED_S;
case RSC_ROLE_SLAVE:
return RSC_ROLE_SLAVE_S;
case RSC_ROLE_MASTER:
return RSC_ROLE_MASTER_S;
}
return RSC_ROLE_UNKNOWN_S;
}
enum rsc_role_e
text2role(const char *role)
{
CRM_ASSERT(role != NULL);
if (safe_str_eq(role, RSC_ROLE_STOPPED_S)) {
return RSC_ROLE_STOPPED;
} else if (safe_str_eq(role, RSC_ROLE_STARTED_S)) {
return RSC_ROLE_STARTED;
} else if (safe_str_eq(role, RSC_ROLE_SLAVE_S)) {
return RSC_ROLE_SLAVE;
} else if (safe_str_eq(role, RSC_ROLE_MASTER_S)) {
return RSC_ROLE_MASTER;
} else if (safe_str_eq(role, RSC_ROLE_UNKNOWN_S)) {
return RSC_ROLE_UNKNOWN;
}
crm_err("Unknown role: %s", role);
return RSC_ROLE_UNKNOWN;
}
int
merge_weights(int w1, int w2)
{
int result = w1 + w2;
if (w1 <= -INFINITY || w2 <= -INFINITY) {
if (w1 >= INFINITY || w2 >= INFINITY) {
crm_trace("-INFINITY + INFINITY == -INFINITY");
}
return -INFINITY;
} else if (w1 >= INFINITY || w2 >= INFINITY) {
return INFINITY;
}
/* detect wrap-around */
if (result > 0) {
if (w1 <= 0 && w2 < 0) {
result = -INFINITY;
}
} else if (w1 > 0 && w2 > 0) {
result = INFINITY;
}
/* detect +/- INFINITY */
if (result >= INFINITY) {
result = INFINITY;
} else if (result <= -INFINITY) {
result = -INFINITY;
}
crm_trace("%d + %d = %d", w1, w2, result);
return result;
}
void
add_hash_param(GHashTable * hash, const char *name, const char *value)
{
CRM_CHECK(hash != NULL, return);
crm_trace("adding: name=%s value=%s", crm_str(name), crm_str(value));
if (name == NULL || value == NULL) {
return;
} else if (safe_str_eq(value, "#default")) {
return;
} else if (g_hash_table_lookup(hash, name) == NULL) {
g_hash_table_insert(hash, strdup(name), strdup(value));
}
}