Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/attrd/commands.c b/attrd/commands.c
index 9e656d862c..378a4f8088 100644
--- a/attrd/commands.c
+++ b/attrd/commands.c
@@ -1,1032 +1,1032 @@
/*
* Copyright (C) 2013 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/types.h>
#include <regex.h>
#include <glib.h>
#include <crm/msg_xml.h>
#include <crm/cluster.h>
#include <crm/cib.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election.h>
#include <crm/cib/internal.h>
#include <internal.h>
#define ATTRD_PROTOCOL_VERSION "1"
int last_cib_op_done = 0;
char *peer_writer = NULL;
GHashTable *attributes = NULL;
typedef struct attribute_s {
char *uuid; /* TODO: Remove if at all possible */
char *id;
char *set;
GHashTable *values;
int update;
int timeout_ms;
/* TODO: refactor these three as a bitmask */
bool changed; /* whether attribute value has changed since last write */
bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */
gboolean is_private; /* whether to keep this attribute out of the CIB */
mainloop_timer_t *timer;
char *user;
} attribute_t;
typedef struct attribute_value_s {
uint32_t nodeid;
gboolean is_remote;
char *nodename;
char *current;
char *requested;
char *stored;
} attribute_value_t;
void write_attribute(attribute_t *a);
void write_or_elect_attribute(attribute_t *a);
void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
void attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source);
static gboolean
send_attrd_message(crm_node_t * node, xmlNode * data)
{
crm_xml_add(data, F_TYPE, T_ATTRD);
crm_xml_add(data, F_ATTRD_IGNORE_LOCALLY, "atomic-version"); /* Tell older versions to ignore our messages */
crm_xml_add(data, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION);
crm_xml_add_int(data, F_ATTRD_WRITER, election_state(writer));
return send_cluster_message(node, crm_msg_attrd, data, TRUE);
}
static gboolean
attribute_timer_cb(gpointer data)
{
attribute_t *a = data;
crm_trace("Dampen interval expired for %s in state %d", a->id, election_state(writer));
write_or_elect_attribute(a);
return FALSE;
}
static void
free_attribute_value(gpointer data)
{
attribute_value_t *v = data;
free(v->nodename);
free(v->current);
free(v->requested);
free(v->stored);
free(v);
}
void
free_attribute(gpointer data)
{
attribute_t *a = data;
if(a) {
free(a->id);
free(a->set);
free(a->uuid);
free(a->user);
mainloop_timer_del(a->timer);
g_hash_table_destroy(a->values);
free(a);
}
}
static xmlNode *
build_attribute_xml(
xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
gboolean is_private, const char *peer, uint32_t peerid, const char *value)
{
xmlNode *xml = create_xml_node(parent, __FUNCTION__);
crm_xml_add(xml, F_ATTRD_ATTRIBUTE, name);
crm_xml_add(xml, F_ATTRD_SET, set);
crm_xml_add(xml, F_ATTRD_KEY, uuid);
crm_xml_add(xml, F_ATTRD_USER, user);
crm_xml_add(xml, F_ATTRD_HOST, peer);
crm_xml_add_int(xml, F_ATTRD_HOST_ID, peerid);
crm_xml_add(xml, F_ATTRD_VALUE, value);
crm_xml_add_int(xml, F_ATTRD_DAMPEN, timeout_ms/1000);
crm_xml_add_int(xml, F_ATTRD_IS_PRIVATE, is_private);
return xml;
}
static attribute_t *
create_attribute(xmlNode *xml)
{
int dampen = 0;
const char *value = crm_element_value(xml, F_ATTRD_DAMPEN);
attribute_t *a = calloc(1, sizeof(attribute_t));
a->id = crm_element_value_copy(xml, F_ATTRD_ATTRIBUTE);
a->set = crm_element_value_copy(xml, F_ATTRD_SET);
a->uuid = crm_element_value_copy(xml, F_ATTRD_KEY);
a->values = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, free_attribute_value);
crm_element_value_int(xml, F_ATTRD_IS_PRIVATE, &a->is_private);
#if ENABLE_ACL
crm_trace("Performing all %s operations as user '%s'", a->id, a->user);
a->user = crm_element_value_copy(xml, F_ATTRD_USER);
#endif
if(value) {
dampen = crm_get_msec(value);
crm_trace("Created attribute %s with delay %dms (%s)", a->id, dampen, value);
} else {
crm_trace("Created attribute %s with no delay", a->id);
}
if(dampen > 0) {
a->timeout_ms = dampen;
a->timer = mainloop_timer_add(a->id, a->timeout_ms, FALSE, attribute_timer_cb, a);
}
g_hash_table_replace(attributes, a->id, a);
return a;
}
/*!
* \internal
* \brief Respond to a client peer-remove request (i.e. propagate to all peers)
*
* \param[in] client_name Name of client that made request (for log messages)
* \param[in] xml Root of request XML
*
* \return void
*/
void
attrd_client_peer_remove(const char *client_name, xmlNode *xml)
{
const char *host = crm_element_value(xml, F_ATTRD_HOST);
if (host) {
crm_info("Client %s is requesting all values for %s be removed",
client_name, host);
send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */
} else {
crm_info("Ignoring request by client %s to remove all peer values without specifying peer",
client_name);
}
}
/*!
* \internal
* \brief Respond to a client update request
*
* \param[in] xml Root of request XML
*
* \return void
*/
void
attrd_client_update(xmlNode *xml)
{
attribute_t *a = NULL;
attribute_value_t *v = NULL;
char *key = crm_element_value_copy(xml, F_ATTRD_KEY);
char *set = crm_element_value_copy(xml, F_ATTRD_SET);
char *host = crm_element_value_copy(xml, F_ATTRD_HOST);
const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
const char *value = crm_element_value(xml, F_ATTRD_VALUE);
const char *regex = crm_element_value(xml, F_ATTRD_REGEX);
/* If a regex was specified, broadcast a message for each match */
if ((attr == NULL) && regex) {
GHashTableIter aIter;
regex_t *r_patt = calloc(1, sizeof(regex_t));
crm_debug("Setting %s to %s", regex, value);
if (regcomp(r_patt, regex, REG_EXTENDED)) {
crm_err("Bad regex '%s' for update", regex);
} else {
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) {
int status = regexec(r_patt, attr, 0, NULL, 0);
if (status == 0) {
crm_trace("Matched %s with %s", attr, regex);
crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr);
send_attrd_message(NULL, xml);
}
}
}
free(key);
free(set);
free(host);
regfree(r_patt);
free(r_patt);
return;
}
if (host == NULL) {
crm_trace("Inferring host");
host = strdup(attrd_cluster->uname);
crm_xml_add(xml, F_ATTRD_HOST, host);
crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid);
}
a = g_hash_table_lookup(attributes, attr);
/* If value was specified using ++ or += notation, expand to real value */
if (value) {
int offset = 1;
int int_value = 0;
static const int plus_plus_len = 5;
if ((strlen(value) >= (plus_plus_len + 2)) && (value[plus_plus_len] == '+')
&& ((value[plus_plus_len + 1] == '+') || (value[plus_plus_len + 1] == '='))) {
if (a) {
v = g_hash_table_lookup(a->values, host);
}
if (v) {
int_value = char2score(v->current);
}
if (value[plus_plus_len + 1] != '+') {
const char *offset_s = value + (plus_plus_len + 2);
offset = char2score(offset_s);
}
int_value += offset;
if (int_value > INFINITY) {
int_value = INFINITY;
}
crm_info("Expanded %s=%s to %d", attr, value, int_value);
crm_xml_add_int(xml, F_ATTRD_VALUE, int_value);
/* Replacing the value frees the previous memory, so re-query it */
value = crm_element_value(xml, F_ATTRD_VALUE);
}
}
if ((peer_writer == NULL) && (election_state(writer) != election_in_progress)) {
crm_info("Starting an election to determine the writer");
election_vote(writer);
}
crm_debug("Broadcasting %s[%s] = %s%s", attr, host, value,
((election_state(writer) == election_won)? " (writer)" : ""));
free(key);
free(set);
free(host);
send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */
}
/*!
* \internal
* \brief Respond to a client refresh request (i.e. write out all attributes)
*
* \return void
*/
void
attrd_client_refresh(void)
{
GHashTableIter iter;
attribute_t *a = NULL;
/* 'refresh' forces a write of the current value of all attributes
* Cancel any existing timers, we're writing it NOW
*/
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
mainloop_timer_stop(a->timer);
}
crm_info("Updating all attributes");
write_attributes(TRUE, FALSE);
}
/*!
* \internal
* \brief Build the XML reply to a client query
*
* param[in] attr Name of requested attribute
* param[in] host Name of requested host (or NULL for all hosts)
*
* \return New XML reply
* \note Caller is responsible for freeing the resulting XML
*/
static xmlNode *build_query_reply(const char *attr, const char *host)
{
xmlNode *reply = create_xml_node(NULL, __FUNCTION__);
attribute_t *a;
if (reply == NULL) {
return NULL;
}
crm_xml_add(reply, F_TYPE, T_ATTRD);
crm_xml_add(reply, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION);
/* If desired attribute exists, add its value(s) to the reply */
a = g_hash_table_lookup(attributes, attr);
if (a) {
attribute_value_t *v;
xmlNode *host_value;
crm_xml_add(reply, F_ATTRD_ATTRIBUTE, attr);
/* Allow caller to use "localhost" to refer to local node */
if (safe_str_eq(host, "localhost")) {
host = attrd_cluster->uname;
crm_trace("Mapped localhost to %s", host);
}
/* If a specific node was requested, add its value */
if (host) {
v = g_hash_table_lookup(a->values, host);
host_value = create_xml_node(reply, XML_CIB_TAG_NODE);
if (host_value == NULL) {
free_xml(reply);
return NULL;
}
crm_xml_add(host_value, F_ATTRD_HOST, host);
crm_xml_add(host_value, F_ATTRD_VALUE, (v? v->current : NULL));
/* Otherwise, add all nodes' values */
} else {
GHashTableIter iter;
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
host_value = create_xml_node(reply, XML_CIB_TAG_NODE);
if (host_value == NULL) {
free_xml(reply);
return NULL;
}
crm_xml_add(host_value, F_ATTRD_HOST, v->nodename);
crm_xml_add(host_value, F_ATTRD_VALUE, v->current);
}
}
}
return reply;
}
/*!
* \internal
* \brief Respond to a client query
*
* \param[in] client Who queried us
* \param[in] query Root of query XML
*
* \return void
*/
void
attrd_client_query(crm_client_t *client, uint32_t id, uint32_t flags, xmlNode *query)
{
const char *attr;
const char *origin = crm_element_value(query, F_ORIG);
ssize_t rc;
xmlNode *reply;
if (origin == NULL) {
origin = "unknown client";
}
crm_debug("Query arrived from %s", origin);
/* Request must specify attribute name to query */
attr = crm_element_value(query, F_ATTRD_ATTRIBUTE);
if (attr == NULL) {
crm_warn("Ignoring malformed query from %s (no attribute name given)",
origin);
return;
}
/* Build the XML reply */
reply = build_query_reply(attr, crm_element_value(query, F_ATTRD_HOST));
if (reply == NULL) {
crm_err("Could not respond to query from %s: could not create XML reply",
origin);
return;
}
crm_log_xml_trace(reply, "Reply");
/* Send the reply to the client */
client->request_id = 0;
if ((rc = crm_ipcs_send(client, id, reply, flags)) < 0) {
crm_err("Could not respond to query from %s: %s (%d)",
origin, pcmk_strerror(-rc), -rc);
}
free_xml(reply);
}
void
attrd_peer_message(crm_node_t *peer, xmlNode *xml)
{
int peer_state = 0;
const char *v = crm_element_value(xml, F_ATTRD_VERSION);
const char *op = crm_element_value(xml, F_ATTRD_TASK);
const char *election_op = crm_element_value(xml, F_CRM_TASK);
const char *host = crm_element_value(xml, F_ATTRD_HOST);
if(election_op) {
enum election_result rc = 0;
crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname);
rc = election_count_vote(writer, xml, TRUE);
switch(rc) {
case election_start:
free(peer_writer);
peer_writer = NULL;
election_vote(writer);
break;
case election_lost:
free(peer_writer);
peer_writer = strdup(peer->uname);
break;
default:
election_check(writer);
break;
}
return;
} else if(v == NULL) {
/* From the non-atomic version */
if (safe_str_eq(op, ATTRD_OP_UPDATE)) {
const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
crm_trace("Compatibility update of %s from %s", name, peer->uname);
attrd_peer_update(peer, xml, host, FALSE);
} else if (safe_str_eq(op, ATTRD_OP_FLUSH)) {
const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
attribute_t *a = g_hash_table_lookup(attributes, name);
if(a) {
crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname);
write_or_elect_attribute(a);
}
} else if (safe_str_eq(op, ATTRD_OP_REFRESH)) {
GHashTableIter aIter;
attribute_t *a = NULL;
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname);
write_or_elect_attribute(a);
}
}
}
crm_element_value_int(xml, F_ATTRD_WRITER, &peer_state);
if(election_state(writer) == election_won
&& peer_state == election_won
&& safe_str_neq(peer->uname, attrd_cluster->uname)) {
crm_notice("Detected another attribute writer: %s", peer->uname);
election_vote(writer);
} else if(peer_state == election_won) {
if(peer_writer == NULL) {
peer_writer = strdup(peer->uname);
crm_notice("Recorded attribute writer: %s", peer->uname);
} else if(safe_str_neq(peer->uname, peer_writer)) {
crm_notice("Recorded new attribute writer: %s (was %s)", peer->uname, peer_writer);
free(peer_writer);
peer_writer = strdup(peer->uname);
}
}
if (safe_str_eq(op, ATTRD_OP_UPDATE)) {
attrd_peer_update(peer, xml, host, FALSE);
} else if (safe_str_eq(op, ATTRD_OP_SYNC)) {
attrd_peer_sync(peer, xml);
} else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) {
int host_id = 0;
char *endptr = NULL;
host_id = strtol(host, &endptr, 10);
if (errno != 0 || endptr == host || *endptr != '\0') {
host_id = 0;
} else {
host = NULL;
}
attrd_peer_remove(host_id, host, TRUE, peer->uname);
} else if (safe_str_eq(op, ATTRD_OP_SYNC_RESPONSE)
&& safe_str_neq(peer->uname, attrd_cluster->uname)) {
xmlNode *child = NULL;
crm_notice("Processing %s from %s", op, peer->uname);
for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) {
host = crm_element_value(child, F_ATTRD_HOST);
attrd_peer_update(peer, child, host, TRUE);
}
}
}
void
attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
{
GHashTableIter aIter;
GHashTableIter vIter;
attribute_t *a = NULL;
attribute_value_t *v = NULL;
xmlNode *sync = create_xml_node(NULL, __FUNCTION__);
crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE);
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
v->nodename, v->nodeid, v->current);
}
}
crm_debug("Syncing values to %s", peer?peer->uname:"everyone");
send_attrd_message(peer, sync);
free_xml(sync);
}
void
attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source)
{
attribute_t *a = NULL;
GHashTableIter aIter;
crm_notice("Removing all %s attributes for %s", host, source);
if(host == NULL) {
return;
}
g_hash_table_iter_init(&aIter, attributes);
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
if(g_hash_table_remove(a->values, host)) {
crm_debug("Removed %s[%s] for %s", a->id, host, source);
}
}
if (uncache) {
crm_remote_peer_cache_remove(host);
reap_crm_member(nodeid, host);
}
}
/*!
* \internal
* \brief Return host's hash table entry (creating one if needed)
*
* \param[in] values Hash table of values
* \param[in] host Name of peer to look up
* \param[in] xml XML describing the attribute
*
* \return Pointer to new or existing hash table entry
*/
static attribute_value_t *
attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
{
attribute_value_t *v = g_hash_table_lookup(values, host);
if (v == NULL) {
v = calloc(1, sizeof(attribute_value_t));
CRM_ASSERT(v != NULL);
v->nodename = strdup(host);
CRM_ASSERT(v->nodename != NULL);
crm_element_value_int(xml, F_ATTRD_IS_REMOTE, &v->is_remote);
if (v->is_remote == TRUE) {
crm_remote_peer_cache_add(host);
}
g_hash_table_replace(values, v->nodename, v);
}
return(v);
}
void
attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
{
bool changed = FALSE;
attribute_value_t *v = NULL;
const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE);
const char *value = crm_element_value(xml, F_ATTRD_VALUE);
attribute_t *a = g_hash_table_lookup(attributes, attr);
if(a == NULL) {
a = create_attribute(xml);
}
if(host == NULL) {
GHashTableIter vIter;
g_hash_table_iter_init(&vIter, a->values);
crm_debug("Setting %s for all hosts to %s", attr, value);
xml_remove_prop(xml, F_ATTRD_HOST_ID);
while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
attrd_peer_update(peer, xml, host, filter);
}
return;
}
v = attrd_lookup_or_create_value(a->values, host, xml);
if(filter
&& safe_str_neq(v->current, value)
&& safe_str_eq(host, attrd_cluster->uname)) {
xmlNode *sync = create_xml_node(NULL, __FUNCTION__);
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
a->id, host, v->current, value, peer->uname);
crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE);
v = g_hash_table_lookup(a->values, host);
build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
v->nodename, v->nodeid, v->current);
crm_xml_add_int(sync, F_ATTRD_WRITER, election_state(writer));
send_attrd_message(peer, sync);
free_xml(sync);
} else if(safe_str_neq(v->current, value)) {
crm_info("Setting %s[%s]: %s -> %s from %s", attr, host, v->current, value, peer->uname);
free(v->current);
if(value) {
v->current = strdup(value);
} else {
v->current = NULL;
}
changed = TRUE;
} else {
crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value);
}
a->changed |= changed;
if(changed) {
if(a->timer) {
crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id);
mainloop_timer_start(a->timer);
} else {
write_or_elect_attribute(a);
}
}
/* this only involves cluster nodes. */
if(v->nodeid == 0 && (v->is_remote == FALSE)) {
if(crm_element_value_int(xml, F_ATTRD_HOST_ID, (int*)&v->nodeid) == 0) {
/* Create the name/id association */
crm_node_t *peer = crm_get_peer(v->nodeid, host);
crm_trace("We know %s's node id now: %s", peer->uname, peer->uuid);
if(election_state(writer) == election_won) {
write_attributes(FALSE, TRUE);
return;
}
}
}
}
void
write_or_elect_attribute(attribute_t *a)
{
enum election_result rc = election_state(writer);
if(rc == election_won) {
write_attribute(a);
} else if(rc == election_in_progress) {
crm_trace("Election in progress to determine who will write out %s", a->id);
} else if(peer_writer == NULL) {
crm_info("Starting an election to determine who will write out %s", a->id);
election_vote(writer);
} else {
crm_trace("%s will write out %s, we are in state %d", peer_writer, a->id, rc);
}
}
gboolean
attrd_election_cb(gpointer user_data)
{
crm_trace("Election complete");
free(peer_writer);
peer_writer = strdup(attrd_cluster->uname);
/* Update the peers after an election */
attrd_peer_sync(NULL, NULL);
/* Update the CIB after an election */
write_attributes(TRUE, FALSE);
return FALSE;
}
void
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
{
if ((kind == crm_status_nstate) || (kind == crm_status_rstate)) {
if (safe_str_eq(peer->state, CRM_NODE_MEMBER)) {
/* If we're the writer, send new peers a list of all attributes
* (unless it's a remote node, which doesn't run its own attrd)
*/
if ((election_state(writer) == election_won)
&& !is_set(peer->flags, crm_remote_node)) {
attrd_peer_sync(peer, NULL);
}
} else {
/* Remove all attribute values associated with lost nodes */
attrd_peer_remove(peer->id, peer->uname, FALSE, __FUNCTION__);
if (peer_writer && safe_str_eq(peer->uname, peer_writer)) {
free(peer_writer);
peer_writer = NULL;
crm_notice("Lost attribute writer %s", peer->uname);
}
}
}
}
static void
attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
int level = LOG_ERR;
GHashTableIter iter;
const char *peer = NULL;
attribute_value_t *v = NULL;
char *name = user_data;
attribute_t *a = g_hash_table_lookup(attributes, name);
if(a == NULL) {
crm_info("Attribute %s no longer exists", name);
goto done;
}
a->update = 0;
if (rc == pcmk_ok && call_id < 0) {
rc = call_id;
}
switch (rc) {
case pcmk_ok:
level = LOG_INFO;
last_cib_op_done = call_id;
break;
case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */
case -ETIME: /* When an attr changes while there is a DC election */
case -ENXIO: /* When an attr changes while the CIB is syncing a
* newer config from a node that just came up
*/
level = LOG_WARNING;
break;
}
do_crm_log(level, "Update %d for %s: %s (%d)", call_id, name, pcmk_strerror(rc), rc);
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
do_crm_log(level, "Update %d for %s[%s]=%s: %s (%d)", call_id, a->id, peer, v->requested, pcmk_strerror(rc), rc);
if(rc == pcmk_ok) {
free(v->stored);
v->stored = v->requested;
v->requested = NULL;
} else {
free(v->requested);
v->requested = NULL;
a->changed = TRUE; /* Attempt write out again */
}
}
done:
if(a && a->changed && election_state(writer) == election_won) {
write_attribute(a);
}
}
void
write_attributes(bool all, bool peer_discovered)
{
GHashTableIter iter;
attribute_t *a = NULL;
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
if (peer_discovered && a->unknown_peer_uuids) {
/* a new peer uuid has been discovered, try writing this attribute again. */
a->changed = TRUE;
}
if(all || a->changed) {
write_attribute(a);
} else {
crm_debug("Skipping unchanged attribute %s", a->id);
}
}
}
static void
build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value)
{
char *set = NULL;
char *uuid = NULL;
xmlNode *xml_obj = NULL;
if(a->set) {
set = strdup(a->set);
} else {
set = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, nodeid);
}
if(a->uuid) {
uuid = strdup(a->uuid);
} else {
int lpc;
uuid = crm_strdup_printf("%s-%s", set, a->id);
/* Minimal attempt at sanitizing automatic IDs */
for (lpc = 0; uuid[lpc] != 0; lpc++) {
switch (uuid[lpc]) {
case ':':
uuid[lpc] = '.';
}
}
}
xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE);
crm_xml_add(xml_obj, XML_ATTR_ID, nodeid);
xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS);
crm_xml_add(xml_obj, XML_ATTR_ID, nodeid);
xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS);
crm_xml_add(xml_obj, XML_ATTR_ID, set);
xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR);
crm_xml_add(xml_obj, XML_ATTR_ID, uuid);
crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id);
if(value) {
crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value);
} else {
crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, "");
crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE);
}
free(uuid);
free(set);
}
void
write_attribute(attribute_t *a)
{
int private_updates = 0, cib_updates = 0;
xmlNode *xml_top = NULL;
attribute_value_t *v = NULL;
GHashTableIter iter;
enum cib_call_options flags = cib_quorum_override;
if (a == NULL) {
return;
}
/* If this attribute will be written to the CIB ... */
if (!a->is_private) {
/* Defer the write if now's not a good time */
if (the_cib == NULL) {
crm_info("Write out of '%s' delayed: cib not connected", a->id);
return;
} else if (a->update && (a->update < last_cib_op_done)) {
crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update);
} else if (a->update) {
crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update);
return;
} else if (mainloop_timer_running(a->timer)) {
crm_info("Write out of '%s' delayed: timer is running", a->id);
return;
}
/* Initialize the status update XML */
xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
}
/* Attribute will be written shortly, so clear changed flag */
a->changed = FALSE;
/* We will check all peers' uuids shortly, so initialize this to false */
a->unknown_peer_uuids = FALSE;
/* Iterate over each peer value of this attribute */
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) {
- crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_REMOTE|CRM_GET_PEER_CLUSTER);
+ crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY);
/* If the value's peer info does not correspond to a peer, ignore it */
if (peer == NULL) {
crm_notice("Update error (peer not found): %s[%s]=%s failed (host=%p)",
v->nodename, a->id, v->current, peer);
continue;
}
/* If we're just learning the peer's node id, remember it */
if (peer->id && (v->nodeid == 0)) {
crm_trace("Updating value's nodeid");
v->nodeid = peer->id;
}
/* If this is a private attribute, no update needs to be sent */
if (a->is_private) {
private_updates++;
continue;
}
/* If the peer is found, but its uuid is unknown, defer write */
if (peer->uuid == NULL) {
a->unknown_peer_uuids = TRUE;
crm_notice("Update error (unknown peer uuid, retry will be attempted once uuid is discovered): %s[%s]=%s failed (host=%p)",
v->nodename, a->id, v->current, peer);
continue;
}
/* Add this value to status update XML */
crm_debug("Update: %s[%s]=%s (%s %u %u %s)", v->nodename, a->id,
v->current, peer->uuid, peer->id, v->nodeid, peer->uname);
build_update_element(xml_top, a, peer->uuid, v->current);
cib_updates++;
free(v->requested);
v->requested = NULL;
if (v->current) {
v->requested = strdup(v->current);
} else {
/* Older attrd versions don't know about the cib_mixed_update
* flag so make sure it goes to the local cib which does
*/
flags |= cib_mixed_update|cib_scope_local;
}
}
if (private_updates) {
crm_info("Processed %d private change%s for %s, id=%s, set=%s",
private_updates, ((private_updates == 1)? "" : "s"),
a->id, (a->uuid? a->uuid : "<n/a>"), a->set);
}
if (cib_updates) {
crm_log_xml_trace(xml_top, __FUNCTION__);
a->update = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL,
flags, a->user);
crm_info("Sent update %d with %d changes for %s, id=%s, set=%s",
a->update, cib_updates, a->id, (a->uuid? a->uuid : "<n/a>"), a->set);
the_cib->cmds->register_callback_full(the_cib, a->update, 120, FALSE,
strdup(a->id),
"attrd_cib_callback",
attrd_cib_callback, free);
}
free_xml(xml_top);
}
diff --git a/crmd/te_actions.c b/crmd/te_actions.c
index d2267b8833..1fc7b15411 100644
--- a/crmd/te_actions.c
+++ b/crmd/te_actions.c
@@ -1,738 +1,738 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <crm/cluster.h>
#include <throttle.h>
char *te_uuid = NULL;
GHashTable *te_targets = NULL;
void send_rsc_command(crm_action_t * action);
static void te_update_job_count(crm_action_t * action, int offset);
static void
te_start_action_timer(crm_graph_t * graph, crm_action_t * action)
{
action->timer = calloc(1, sizeof(crm_action_timer_t));
action->timer->timeout = action->timeout;
action->timer->reason = timeout_action;
action->timer->action = action;
action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay,
action_timer_callback, (void *)action->timer);
CRM_ASSERT(action->timer->source_id != 0);
}
static gboolean
te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo)
{
crm_debug("Pseudo action %d fired and confirmed", pseudo->id);
te_action_confirmed(pseudo);
update_graph(graph, pseudo);
trigger_graph();
return TRUE;
}
void
send_stonith_update(crm_action_t * action, const char *target, const char *uuid)
{
int rc = pcmk_ok;
crm_node_t *peer = NULL;
/* zero out the node-status & remove all LRM status info */
xmlNode *node_state = NULL;
CRM_CHECK(target != NULL, return);
CRM_CHECK(uuid != NULL, return);
/* Make sure the membership and join caches are accurate */
- peer = crm_get_peer_full(0, target, CRM_GET_PEER_CLUSTER | CRM_GET_PEER_REMOTE);
+ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY);
CRM_CHECK(peer != NULL, return);
if (peer->uuid == NULL) {
crm_info("Recording uuid '%s' for node '%s'", uuid, target);
peer->uuid = strdup(uuid);
}
crmd_peer_down(peer, TRUE);
node_state = do_update_node_cib(peer, node_update_all, NULL, __FUNCTION__);
/* we have to mark whether or not remote nodes have already been fenced */
if (peer->flags & crm_remote_node) {
time_t now = time(NULL);
char *now_s = crm_itoa(now);
crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s);
free(now_s);
}
/* Force our known ID */
crm_xml_add(node_state, XML_ATTR_UUID, uuid);
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state,
cib_quorum_override | cib_scope_local | cib_can_create);
/* Delay processing the trigger until the update completes */
crm_debug("Sending fencing update %d for %s", rc, target);
fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated);
/* Make sure it sticks */
/* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */
erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local);
erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local);
free_xml(node_state);
return;
}
static gboolean
te_fence_node(crm_graph_t * graph, crm_action_t * action)
{
int rc = 0;
const char *id = NULL;
const char *uuid = NULL;
const char *target = NULL;
const char *type = NULL;
gboolean invalid_action = FALSE;
enum stonith_call_options options = st_opt_none;
id = ID(action->xml);
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
type = crm_meta_value(action->params, "stonith_action");
CRM_CHECK(id != NULL, invalid_action = TRUE);
CRM_CHECK(uuid != NULL, invalid_action = TRUE);
CRM_CHECK(type != NULL, invalid_action = TRUE);
CRM_CHECK(target != NULL, invalid_action = TRUE);
if (invalid_action) {
crm_log_xml_warn(action->xml, "BadAction");
return FALSE;
}
crm_notice("Executing %s fencing operation (%s) on %s (timeout=%d)",
type, id, target, transition_graph->stonith_timeout);
/* Passing NULL means block until we can connect... */
te_connect_stonith(NULL);
if (crmd_join_phase_count(crm_join_confirmed) == 1) {
options |= st_opt_allow_suicide;
}
rc = stonith_api->cmds->fence(stonith_api, options, target, type,
transition_graph->stonith_timeout / 1000, 0);
stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
st_opt_timeout_updates,
generate_transition_key(transition_graph->id, action->id,
0, te_uuid),
"tengine_stonith_callback", tengine_stonith_callback);
return TRUE;
}
static int
get_target_rc(crm_action_t * action)
{
const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
if (target_rc_s != NULL) {
return crm_parse_int(target_rc_s, "0");
}
return 0;
}
static gboolean
te_crm_command(crm_graph_t * graph, crm_action_t * action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task));
return FALSE);
crm_info("Executing crm-event (%s): %s on %s%s%s",
crm_str(id), crm_str(task), on_node,
is_local ? " (local)" : "", no_wait ? " - no waiting" : "");
if (safe_str_eq(router_node, fsa_our_uname)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) {
/* defer until everything else completes */
crm_info("crm-event (%s) is a local shutdown", crm_str(id));
graph->completion_action = tg_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action);
update_graph(graph, action);
trigger_graph();
return TRUE;
} else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) {
crm_node_t *peer = crm_get_peer(0, router_node);
crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter =
generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
te_action_confirmed(action);
update_graph(graph, action);
trigger_graph();
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = graph->network_delay;
}
te_start_action_timer(graph, action);
}
return TRUE;
}
gboolean
cib_action_update(crm_action_t * action, int status, int op_rc)
{
lrmd_event_data_t *op = NULL;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *xml_op = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *name = NULL;
const char *value = NULL;
const char *rsc_id = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
int call_options = cib_quorum_override | cib_scope_local;
int target_rc = get_target_rc(action);
if (status == PCMK_LRM_OP_PENDING) {
crm_debug("%s %d: Recording pending operation %s on %s",
crm_element_name(action->xml), action->id, task_uuid, target);
} else {
crm_warn("%s %d: %s on %s timed out",
crm_element_name(action->xml), action->id, task_uuid, target);
}
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return FALSE;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action");
return FALSE);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, XML_ATTR_UUID, target_uuid);
crm_xml_add(state, XML_ATTR_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
name = XML_ATTR_TYPE;
value = crm_element_value(action_rsc, name);
crm_xml_add(rsc, name, value);
name = XML_AGENT_ATTR_CLASS;
value = crm_element_value(action_rsc, name);
crm_xml_add(rsc, name, value);
name = XML_AGENT_ATTR_PROVIDER;
value = crm_element_value(action_rsc, name);
crm_xml_add(rsc, name, value);
op = convert_graph_action(NULL, action, status, op_rc);
op->call_id = -1;
op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid);
xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, target, __FUNCTION__, LOG_INFO);
lrmd_free_event(op);
crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s",
status < 0 ? "new action" : XML_ATTR_TIMEOUT,
crm_element_name(action->xml), crm_str(task), rsc_id, target);
crm_log_xml_trace(xml_op, "Op");
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)",
services_lrm_status_str(status), action->id, task_uuid, target, rc);
fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
free_xml(state);
action->sent_update = TRUE;
if (rc < pcmk_ok) {
return FALSE;
}
return TRUE;
}
static gboolean
te_rsc_command(crm_graph_t * graph, crm_action_t * action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
action->executed = FALSE;
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
CRM_CHECK(on_node != NULL && strlen(on_node) != 0,
crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task));
return FALSE);
rsc_op = action->xml;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter =
generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid);
crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
if (safe_str_eq(router_node, fsa_our_uname)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating action %d: %s %s on %s%s%s",
action->id, task, task_uuid, on_node,
is_local ? " (local)" : "", no_wait ? " - no waiting" : "");
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __FUNCTION__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg);
} else {
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
action->executed = TRUE;
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return FALSE;
} else if (no_wait) {
crm_info("Action %d confirmed - no wait", action->id);
action->confirmed = TRUE; /* Just mark confirmed.
* Don't bump the job count only to immediately decrement it
*/
update_graph(transition_graph, action);
trigger_graph();
} else if (action->confirmed == TRUE) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
value = crm_meta_value(action->params, XML_OP_ATTR_PENDING);
if (crm_is_true(value)
&& safe_str_neq(task, CRMD_ACTION_CANCEL)
&& safe_str_neq(task, CRMD_ACTION_DELETE)) {
/* write a "pending" entry to the CIB, inhibit notification */
crm_debug("Recording pending op %s in the CIB", task_uuid);
cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_OCF_UNKNOWN);
}
return TRUE;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(crm_action_t * action, int offset)
{
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (action->type != action_type_rsc || target == NULL) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all action occuring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) &&
(safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
te_update_job_count_on(target, offset, FALSE);
}
static gboolean
te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if(target == NULL) {
/* No limit on these */
return TRUE;
} else if(te_targets == NULL) {
return FALSE;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return FALSE;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return FALSE;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return TRUE;
}
static gboolean
te_should_perform_action(crm_graph_t * graph, crm_action_t * action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (action->type != action_type_rsc) {
/* No limit on these */
return TRUE;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all action occuring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if ((target == NULL) &&
(safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) {
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
if(te_should_perform_action_on(graph, action, target) == FALSE) {
return FALSE;
}
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
return te_should_perform_action_on(graph, action, target);
}
void
te_action_confirmed(crm_action_t * action)
{
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if (action->confirmed == FALSE && action->type == action_type_rsc && target != NULL) {
te_update_job_count(action, -1);
}
action->confirmed = TRUE;
}
crm_graph_functions_t te_graph_fns = {
te_pseudo_action,
te_rsc_command,
te_crm_command,
te_fence_node,
te_should_perform_action,
};
void
notify_crmd(crm_graph_t * graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state));
CRM_CHECK(graph->complete, graph->complete = TRUE);
switch (graph->completion_action) {
case tg_stop:
type = "stop";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_done:
type = "done";
if (fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case tg_restart:
type = "restart";
if (fsa_state == S_TRANSITION_ENGINE) {
if (too_many_st_failures() == FALSE) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
event = I_PE_CALC;
}
} else {
event = I_TE_SUCCESS;
}
} else if (fsa_state == S_POLICY_ENGINE) {
register_fsa_action(A_PE_INVOKE);
}
break;
case tg_shutdown:
type = "shutdown";
if (is_set(fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet our" " PE is telling us too.");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason));
graph->abort_reason = NULL;
graph->completion_action = tg_done;
clear_bit(fsa_input_register, R_IN_TRANSITION);
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else if (fsa_source) {
mainloop_set_trigger(fsa_source);
}
}
diff --git a/crmd/te_utils.c b/crmd/te_utils.c
index 36eecdb306..58242724f2 100644
--- a/crmd/te_utils.c
+++ b/crmd/te_utils.c
@@ -1,638 +1,638 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <tengine.h>
#include <crmd_fsa.h>
#include <crmd_messages.h>
#include <throttle.h>
#include <crm/fencing/internal.h>
crm_trigger_t *stonith_reconnect = NULL;
/*
* stonith cleanup list
*
* If the DC is shot, proper notifications might not go out.
* The stonith cleanup list allows the cluster to (re-)send
* notifications once a new DC is elected.
*/
static GListPtr stonith_cleanup_list = NULL;
/*!
* \internal
* \brief Add a node to the stonith cleanup list
*
* \param[in] target Name of node to add
*/
void
add_stonith_cleanup(const char *target) {
stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target));
}
/*!
* \internal
* \brief Remove a node from the stonith cleanup list
*
* \param[in] Name of node to remove
*/
void
remove_stonith_cleanup(const char *target)
{
GListPtr iter = stonith_cleanup_list;
while (iter != NULL) {
GListPtr tmp = iter;
char *iter_name = tmp->data;
iter = iter->next;
if (safe_str_eq(target, iter_name)) {
crm_trace("Removing %s from the cleanup list", iter_name);
stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp);
free(iter_name);
}
}
}
/*!
* \internal
* \brief Purge all entries from the stonith cleanup list
*/
void
purge_stonith_cleanup()
{
if (stonith_cleanup_list) {
GListPtr iter = NULL;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_info("Purging %s from stonith cleanup list", target);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
}
/*!
* \internal
* \brief Send stonith updates for all entries in cleanup list, then purge it
*/
void
execute_stonith_cleanup()
{
GListPtr iter;
for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) {
char *target = iter->data;
crm_node_t *target_node = crm_get_peer(0, target);
const char *uuid = crm_peer_uuid(target_node);
crm_notice("Marking %s, target of a previous stonith action, as clean", target);
send_stonith_update(NULL, target, uuid);
free(target);
}
g_list_free(stonith_cleanup_list);
stonith_cleanup_list = NULL;
}
/* end stonith cleanup list functions */
static gboolean
fail_incompletable_stonith(crm_graph_t * graph)
{
GListPtr lpc = NULL;
const char *task = NULL;
xmlNode *last_action = NULL;
if (graph == NULL) {
return FALSE;
}
for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) {
GListPtr lpc2 = NULL;
synapse_t *synapse = (synapse_t *) lpc->data;
if (synapse->confirmed) {
continue;
}
for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) {
crm_action_t *action = (crm_action_t *) lpc2->data;
if (action->type != action_type_crm || action->confirmed) {
continue;
}
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (task && safe_str_eq(task, CRM_OP_FENCE)) {
action->failed = TRUE;
last_action = action->xml;
update_graph(graph, action);
crm_notice("Failing action %d (%s): STONITHd terminated",
action->id, ID(action->xml));
}
}
}
if (last_action != NULL) {
crm_warn("STONITHd failure resulted in un-runnable actions");
abort_transition(INFINITY, tg_restart, "Stonith failure", last_action);
return TRUE;
}
return FALSE;
}
static void
tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e)
{
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_crit("Fencing daemon connection failed");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Fencing daemon disconnected");
}
/* cbchan will be garbage at this point, arrange for it to be reset */
if(stonith_api) {
stonith_api->state = stonith_disconnected;
}
if (AM_I_DC) {
fail_incompletable_stonith(transition_graph);
trigger_graph();
}
}
#if SUPPORT_CMAN
# include <libfenced.h>
#endif
char *te_client_id = NULL;
#ifdef HAVE_SYS_REBOOT_H
# include <unistd.h>
# include <sys/reboot.h>
#endif
static void
tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
{
if(te_client_id == NULL) {
te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid());
}
if (st_event == NULL) {
crm_err("Notify data not found");
return;
}
crmd_notify_fencing_op(st_event);
if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) {
crm_notice("%s was successfully unfenced by %s (at the request of %s)",
st_event->target, st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin);
/* TODO: Hook up st_event->device */
return;
} else if (safe_str_eq("on", st_event->action)) {
crm_err("Unfencing of %s by %s failed: %s (%d)",
st_event->target, st_event->executioner ? st_event->executioner : "<anyone>",
pcmk_strerror(st_event->result), st_event->result);
return;
} else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) {
crm_crit("We were allegedly just fenced by %s for %s!",
st_event->executioner ? st_event->executioner : "<anyone>", st_event->origin); /* Dumps blackbox if enabled */
qb_log_fini(); /* Try to get the above log message to disk - somehow */
/* Get out ASAP and do not come back up.
*
* Triggering a reboot is also not the worst idea either since
* the rest of the cluster thinks we're safely down
*/
#ifdef RB_HALT_SYSTEM
reboot(RB_HALT_SYSTEM);
#endif
/*
* If reboot() fails or is not supported, coming back up will
* probably lead to a situation where the other nodes set our
* status to 'lost' because of the fencing callback and will
* discard subsequent election votes with:
*
* Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster)
*
* So just stay dead, something is seriously messed up anyway.
*
*/
exit(100); /* None of our wrappers since we already called qb_log_fini() */
return;
}
if (st_event->result == pcmk_ok &&
safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
st_fail_count_reset(st_event->target);
}
crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
st_event->target, st_event->result == pcmk_ok ? "" : " not",
st_event->action,
st_event->executioner ? st_event->executioner : "<anyone>",
st_event->origin, pcmk_strerror(st_event->result), st_event->id,
st_event->client_origin ? st_event->client_origin : "<unknown>");
#if SUPPORT_CMAN
if (st_event->result == pcmk_ok && is_cman_cluster()) {
int local_rc = 0;
int confirm = 0;
char *target_copy = strdup(st_event->target);
/* In case fenced hasn't noticed yet
*
* Any fencing that has been inititated will be completed by way of the fence_pcmk redirect
*/
local_rc = fenced_external(target_copy);
if (local_rc != 0) {
crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target,
local_rc);
} else {
crm_notice("Notified CMAN that '%s' is now fenced", st_event->target);
}
/* In case fenced is already trying to shoot it */
confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY);
if (confirm > 0) {
int ignore = 0;
int len = strlen(target_copy);
errno = 0;
local_rc = write(confirm, target_copy, len);
ignore = write(confirm, "\n", 1);
if(ignore < 0 && errno == EBADF) {
crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target);
} else if (local_rc < len) {
crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc);
} else {
fsync(confirm);
crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target);
}
close(confirm);
}
free(target_copy);
}
#endif
if (st_event->result == pcmk_ok) {
- crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_REMOTE | CRM_GET_PEER_CLUSTER);
+ crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY);
const char *uuid = NULL;
gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname);
if (peer == NULL) {
return;
}
uuid = crm_peer_uuid(peer);
crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
if(AM_I_DC) {
/* The DC always sends updates */
send_stonith_update(NULL, st_event->target, uuid);
if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) {
/* Abort the current transition graph if it wasn't us
* that invoked stonith to fence someone
*/
crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
}
/* Assume it was our leader if we dont currently have one */
} else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target))
&& !is_set(peer->flags, crm_remote_node)) {
crm_notice("Target %s our leader %s (recorded: %s)",
fsa_our_dc ? "was" : "may have been", st_event->target,
fsa_our_dc ? fsa_our_dc : "<unset>");
/* Given the CIB resyncing that occurs around elections,
* have one node update the CIB now and, if the new DC is different,
* have them do so too after the election
*/
if (we_are_executioner) {
send_stonith_update(NULL, st_event->target, uuid);
}
add_stonith_cleanup(st_event->target);
}
crmd_peer_down(peer, TRUE);
}
}
gboolean
te_connect_stonith(gpointer user_data)
{
int lpc = 0;
int rc = pcmk_ok;
if (stonith_api == NULL) {
stonith_api = stonith_api_new();
}
if (stonith_api->state != stonith_disconnected) {
crm_trace("Still connected");
return TRUE;
}
for (lpc = 0; lpc < 30; lpc++) {
crm_debug("Attempting connection to fencing daemon...");
sleep(1);
rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL);
if (rc == pcmk_ok) {
break;
}
if (user_data != NULL) {
if (is_set(fsa_input_register, R_ST_REQUIRED)) {
crm_err("Sign-in failed: triggered a retry");
mainloop_set_trigger(stonith_reconnect);
} else {
crm_info("Sign-in failed, but no longer required");
}
return TRUE;
}
crm_err("Sign-in failed: pausing and trying again in 2s...");
sleep(1);
}
CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT,
tengine_stonith_connection_destroy);
stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE,
tengine_stonith_notify);
crm_trace("Connected");
return TRUE;
}
gboolean
stop_te_timer(crm_action_timer_t * timer)
{
const char *timer_desc = "action timer";
if (timer == NULL) {
return FALSE;
}
if (timer->reason == timeout_abort) {
timer_desc = "global timer";
crm_trace("Stopping %s", timer_desc);
}
if (timer->source_id != 0) {
crm_trace("Stopping %s", timer_desc);
g_source_remove(timer->source_id);
timer->source_id = 0;
} else {
crm_trace("%s was already stopped", timer_desc);
return FALSE;
}
return TRUE;
}
gboolean
te_graph_trigger(gpointer user_data)
{
enum transition_status graph_rc = -1;
if (transition_graph == NULL) {
crm_debug("Nothing to do");
return TRUE;
}
crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state));
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
return TRUE;
break;
default:
break;
}
if (transition_graph->complete == FALSE) {
int limit = transition_graph->batch_limit;
transition_graph->batch_limit = throttle_get_total_job_limit(limit);
graph_rc = run_graph(transition_graph);
transition_graph->batch_limit = limit; /* Restore the configured value */
/* significant overhead... */
/* print_graph(LOG_DEBUG_3, transition_graph); */
if (graph_rc == transition_active) {
crm_trace("Transition not yet complete");
return TRUE;
} else if (graph_rc == transition_pending) {
crm_trace("Transition not yet complete - no actions fired");
return TRUE;
}
if (graph_rc != transition_complete) {
crm_warn("Transition failed: %s", transition_status(graph_rc));
print_graph(LOG_NOTICE, transition_graph);
}
}
crm_debug("Transition %d is now complete", transition_graph->id);
transition_graph->complete = TRUE;
notify_crmd(transition_graph);
return TRUE;
}
void
trigger_graph_processing(const char *fn, int line)
{
crm_trace("%s:%d - Triggered graph processing", fn, line);
mainloop_set_trigger(transition_trigger);
}
void
abort_transition_graph(int abort_priority, enum transition_action abort_action,
const char *abort_text, xmlNode * reason, const char *fn, int line)
{
int add[] = { 0, 0, 0 };
int del[] = { 0, 0, 0 };
int level = LOG_INFO;
xmlNode *diff = NULL;
xmlNode *change = NULL;
CRM_CHECK(transition_graph != NULL, return);
switch (fsa_state) {
case S_STARTING:
case S_PENDING:
case S_NOT_DC:
case S_HALT:
case S_ILLEGAL:
case S_STOPPING:
case S_TERMINATE:
crm_info("Abort %s suppressed: state=%s (complete=%d)",
abort_text, fsa_state2string(fsa_state), transition_graph->complete);
return;
default:
break;
}
/* Make sure any queued calculations are discarded ASAP */
free(fsa_pe_ref);
fsa_pe_ref = NULL;
if (transition_graph->complete == FALSE) {
if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) {
level = LOG_NOTICE;
}
}
if(reason) {
xmlNode *search = NULL;
for(search = reason; search; search = search->parent) {
if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) {
diff = search;
break;
}
}
if(diff) {
xml_patch_versions(diff, add, del);
for(search = reason; search; search = search->parent) {
if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) {
change = search;
break;
}
}
}
}
if(reason == NULL) {
do_crm_log(level, "Transition aborted: %s (source=%s:%d, %d)",
abort_text, fn, line, transition_graph->complete);
} else if(change == NULL) {
char *local_path = xml_get_path(reason);
do_crm_log(level, "Transition aborted by %s.%s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, transition_graph->complete);
free(local_path);
} else {
const char *kind = NULL;
const char *op = crm_element_value(change, XML_DIFF_OP);
const char *path = crm_element_value(change, XML_DIFF_PATH);
if(change == reason) {
if(strcmp(op, "create") == 0) {
reason = reason->children;
} else if(strcmp(op, "modify") == 0) {
reason = first_named_child(reason, XML_DIFF_RESULT);
if(reason) {
reason = reason->children;
}
}
}
kind = TYPE(reason);
if(strcmp(op, "delete") == 0) {
const char *shortpath = strrchr(path, '/');
do_crm_log(level, "Transition aborted by deletion of %s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
shortpath?shortpath+1:path, abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
} else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) {
do_crm_log(level, "Transition aborted by %s, %s=%s: %s (%s cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
crm_element_value(reason, XML_ATTR_ID),
crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
abort_text, op, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
} else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) {
const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (magic=%s, cib=%d.%d.%d, source=%s:%d, %d)",
crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
magic, add[0], add[1], add[2], fn, line, transition_graph->complete);
} else if (safe_str_eq(XML_CIB_TAG_STATE, kind)
|| safe_str_eq(XML_CIB_TAG_NODE, kind)) {
const char *uname = crm_peer_uname(ID(reason));
do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (cib=%d.%d.%d, source=%s:%d, %d)",
kind, op, uname ? uname : ID(reason), abort_text,
add[0], add[1], add[2], fn, line, transition_graph->complete);
} else {
do_crm_log(level, "Transition aborted by %s.%s '%s': %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)",
TYPE(reason), ID(reason), op?op:"change", abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete);
}
}
if (transition_graph->complete) {
if (transition_timer->period_ms > 0) {
crm_timer_stop(transition_timer);
crm_timer_start(transition_timer);
} else {
register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
}
return;
}
mainloop_set_trigger(transition_trigger);
}
diff --git a/include/crm/cluster.h b/include/crm/cluster.h
index c60dbd1bb1..f9ff5d9ca7 100644
--- a/include/crm/cluster.h
+++ b/include/crm/cluster.h
@@ -1,233 +1,234 @@
/*
* Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CRM_COMMON_CLUSTER__H
# define CRM_COMMON_CLUSTER__H
# include <crm/common/xml.h>
# include <crm/common/util.h>
# if SUPPORT_HEARTBEAT
# include <heartbeat/hb_api.h>
# include <ocf/oc_event.h>
# endif
# if SUPPORT_COROSYNC
# include <corosync/cpg.h>
# endif
extern gboolean crm_have_quorum;
extern GHashTable *crm_peer_cache;
extern GHashTable *crm_remote_peer_cache;
extern unsigned long long crm_peer_seq;
# ifndef CRM_SERVICE
# define CRM_SERVICE PCMK_SERVICE_ID
# endif
/* *INDENT-OFF* */
#define CRM_NODE_LOST "lost"
#define CRM_NODE_MEMBER "member"
#define CRM_NODE_ACTIVE CRM_NODE_MEMBER
#define CRM_NODE_EVICTED "evicted"
enum crm_join_phase
{
crm_join_nack = -1,
crm_join_none = 0,
crm_join_welcomed = 1,
crm_join_integrated = 2,
crm_join_finalized = 3,
crm_join_confirmed = 4,
};
enum crm_node_flags
{
/* node is not a cluster node and should not be considered for cluster membership */
crm_remote_node = 0x0001,
/* deprecated (not used by cluster) */
crm_remote_container = 0x0002,
crm_remote_baremetal = 0x0004,
};
/* *INDENT-ON* */
typedef struct crm_peer_node_s {
uint32_t id; /* Only used by corosync derivatives */
uint64_t born; /* Only used by heartbeat and the legacy plugin */
uint64_t last_seen;
uint64_t flags; /* Specified by crm_node_flags enum */
int32_t votes; /* Only used by the legacy plugin */
uint32_t processes;
enum crm_join_phase join;
char *uname;
char *uuid;
char *state;
char *expected;
char *addr; /* Only used by the legacy plugin */
char *version; /* Unused */
} crm_node_t;
void crm_peer_init(void);
void crm_peer_destroy(void);
typedef struct crm_cluster_s {
char *uuid;
char *uname;
uint32_t nodeid;
void (*destroy) (gpointer);
# if SUPPORT_HEARTBEAT
ll_cluster_t *hb_conn;
void (*hb_dispatch) (HA_Message * msg, void *private);
# endif
# if SUPPORT_COROSYNC
struct cpg_name group;
cpg_callbacks_t cpg;
cpg_handle_t cpg_handle;
# endif
} crm_cluster_t;
gboolean crm_cluster_connect(crm_cluster_t * cluster);
void crm_cluster_disconnect(crm_cluster_t * cluster);
/* *INDENT-OFF* */
enum crm_ais_msg_class {
crm_class_cluster = 0,
crm_class_members = 1,
crm_class_notify = 2,
crm_class_nodeid = 3,
crm_class_rmpeer = 4,
crm_class_quorum = 5,
};
/* order here matters - its used to index into the crm_children array */
enum crm_ais_msg_types {
crm_msg_none = 0,
crm_msg_ais = 1,
crm_msg_lrmd = 2,
crm_msg_cib = 3,
crm_msg_crmd = 4,
crm_msg_attrd = 5,
crm_msg_stonithd = 6,
crm_msg_te = 7,
crm_msg_pe = 8,
crm_msg_stonith_ng = 9,
};
/* used with crm_get_peer_full */
enum crm_get_peer_flags {
CRM_GET_PEER_CLUSTER = 0x0001,
CRM_GET_PEER_REMOTE = 0x0002,
+ CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE,
};
/* *INDENT-ON* */
gboolean send_cluster_message(crm_node_t * node, enum crm_ais_msg_types service,
xmlNode * data, gboolean ordered);
int crm_remote_peer_cache_size(void);
/* Initialize and refresh the remote peer cache from a cib config */
void crm_remote_peer_cache_refresh(xmlNode *cib);
void crm_remote_peer_cache_add(const char *node_name);
void crm_remote_peer_cache_remove(const char *node_name);
/* allows filtering of remote and cluster nodes using crm_get_peer_flags */
crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags);
/* only searches cluster nodes */
crm_node_t *crm_get_peer(unsigned int id, const char *uname);
guint crm_active_peers(void);
gboolean crm_is_peer_active(const crm_node_t * node);
guint reap_crm_member(uint32_t id, const char *name);
int crm_terminate_member(int nodeid, const char *uname, void *unused);
int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection);
# if SUPPORT_HEARTBEAT
gboolean crm_is_heartbeat_peer_active(const crm_node_t * node);
# endif
# if SUPPORT_COROSYNC
extern int ais_fd_sync;
uint32_t get_local_nodeid(cpg_handle_t handle);
gboolean cluster_connect_cpg(crm_cluster_t *cluster);
void cluster_disconnect_cpg(crm_cluster_t * cluster);
void pcmk_cpg_membership(cpg_handle_t handle,
const struct cpg_name *groupName,
const struct cpg_address *member_list, size_t member_list_entries,
const struct cpg_address *left_list, size_t left_list_entries,
const struct cpg_address *joined_list, size_t joined_list_entries);
gboolean crm_is_corosync_peer_active(const crm_node_t * node);
gboolean send_cluster_text(int class, const char *data, gboolean local,
crm_node_t * node, enum crm_ais_msg_types dest);
# endif
const char *crm_peer_uuid(crm_node_t *node);
const char *crm_peer_uname(const char *uuid);
void set_uuid(xmlNode *xml, const char *attr, crm_node_t *node);
enum crm_status_type {
crm_status_uname,
crm_status_nstate,
crm_status_processes,
crm_status_rstate, /* remote node state */
};
enum crm_ais_msg_types text2msg_type(const char *text);
void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *));
void crm_set_autoreap(gboolean autoreap);
/* *INDENT-OFF* */
enum cluster_type_e
{
pcmk_cluster_unknown = 0x0001,
pcmk_cluster_invalid = 0x0002,
pcmk_cluster_heartbeat = 0x0004,
pcmk_cluster_classic_ais = 0x0010,
pcmk_cluster_corosync = 0x0020,
pcmk_cluster_cman = 0x0040,
};
/* *INDENT-ON* */
enum cluster_type_e get_cluster_type(void);
const char *name_for_cluster_type(enum cluster_type_e type);
gboolean is_corosync_cluster(void);
gboolean is_cman_cluster(void);
gboolean is_openais_cluster(void);
gboolean is_classic_ais_cluster(void);
gboolean is_heartbeat_cluster(void);
const char *get_local_node_name(void);
char *get_node_name(uint32_t nodeid);
# if SUPPORT_COROSYNC
char *pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *msg,
uint32_t *kind, const char **from);
# endif
#endif

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 10, 1:24 AM (15 h, 54 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2001703
Default Alt Text
(85 KB)

Event Timeline