diff --git a/attrd/commands.c b/attrd/commands.c index c7c9ab5891..6526b2be69 100644 --- a/attrd/commands.c +++ b/attrd/commands.c @@ -1,1106 +1,1106 @@ /* * Copyright (C) 2013 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #define ATTRD_PROTOCOL_VERSION "1" int last_cib_op_done = 0; char *peer_writer = NULL; GHashTable *attributes = NULL; typedef struct attribute_s { char *uuid; /* TODO: Remove if at all possible */ char *id; char *set; GHashTable *values; int update; int timeout_ms; /* TODO: refactor these three as a bitmask */ bool changed; /* whether attribute value has changed since last write */ bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ gboolean is_private; /* whether to keep this attribute out of the CIB */ mainloop_timer_t *timer; char *user; } attribute_t; typedef struct attribute_value_s { uint32_t nodeid; gboolean is_remote; char *nodename; char *current; char *requested; char *stored; } attribute_value_t; void write_attribute(attribute_t *a); void write_or_elect_attribute(attribute_t *a); void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source); static gboolean send_attrd_message(crm_node_t * node, xmlNode * data) { crm_xml_add(data, F_TYPE, T_ATTRD); crm_xml_add(data, F_ATTRD_IGNORE_LOCALLY, "atomic-version"); /* Tell older versions to ignore our messages */ crm_xml_add(data, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION); crm_xml_add_int(data, F_ATTRD_WRITER, election_state(writer)); return send_cluster_message(node, crm_msg_attrd, data, TRUE); } static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s in state %d", a->id, election_state(writer)); write_or_elect_attribute(a); return FALSE; } static void free_attribute_value(gpointer data) { attribute_value_t *v = data; free(v->nodename); free(v->current); free(v->requested); free(v->stored); free(v); } void free_attribute(gpointer data) { attribute_t *a = data; if(a) { free(a->id); free(a->set); free(a->uuid); free(a->user); mainloop_timer_del(a->timer); g_hash_table_destroy(a->values); free(a); } } static xmlNode * build_attribute_xml( xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user, gboolean is_private, const char *peer, uint32_t peerid, const char *value) { xmlNode *xml = create_xml_node(parent, __FUNCTION__); crm_xml_add(xml, F_ATTRD_ATTRIBUTE, name); crm_xml_add(xml, F_ATTRD_SET, set); crm_xml_add(xml, F_ATTRD_KEY, uuid); crm_xml_add(xml, F_ATTRD_USER, user); crm_xml_add(xml, F_ATTRD_HOST, peer); crm_xml_add_int(xml, F_ATTRD_HOST_ID, peerid); crm_xml_add(xml, F_ATTRD_VALUE, value); crm_xml_add_int(xml, F_ATTRD_DAMPEN, timeout_ms/1000); crm_xml_add_int(xml, F_ATTRD_IS_PRIVATE, is_private); return xml; } static attribute_t * create_attribute(xmlNode *xml) { int dampen = 0; const char *value = crm_element_value(xml, F_ATTRD_DAMPEN); attribute_t *a = calloc(1, sizeof(attribute_t)); a->id = crm_element_value_copy(xml, F_ATTRD_ATTRIBUTE); a->set = crm_element_value_copy(xml, F_ATTRD_SET); a->uuid = crm_element_value_copy(xml, F_ATTRD_KEY); a->values = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, free_attribute_value); crm_element_value_int(xml, F_ATTRD_IS_PRIVATE, &a->is_private); #if ENABLE_ACL crm_trace("Performing all %s operations as user '%s'", a->id, a->user); a->user = crm_element_value_copy(xml, F_ATTRD_USER); #endif if(value) { dampen = crm_get_msec(value); crm_trace("Created attribute %s with delay %dms (%s)", a->id, dampen, value); } else { crm_trace("Created attribute %s with no delay", a->id); } if(dampen > 0) { a->timeout_ms = dampen; a->timer = mainloop_timer_add(a->id, a->timeout_ms, FALSE, attribute_timer_cb, a); } else if (dampen < 0) { crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id); } g_hash_table_replace(attributes, a->id, a); return a; } /*! * \internal * \brief Respond to a client peer-remove request (i.e. propagate to all peers) * * \param[in] client_name Name of client that made request (for log messages) * \param[in] xml Root of request XML * * \return void */ void attrd_client_peer_remove(const char *client_name, xmlNode *xml) { const char *host = crm_element_value(xml, F_ATTRD_HOST); if (host) { crm_info("Client %s is requesting all values for %s be removed", client_name, host); send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */ } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", client_name); } } /*! * \internal * \brief Respond to a client update request * * \param[in] xml Root of request XML * * \return void */ void attrd_client_update(xmlNode *xml) { attribute_t *a = NULL; attribute_value_t *v = NULL; char *key = crm_element_value_copy(xml, F_ATTRD_KEY); char *set = crm_element_value_copy(xml, F_ATTRD_SET); char *host = crm_element_value_copy(xml, F_ATTRD_HOST); const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(xml, F_ATTRD_VALUE); const char *regex = crm_element_value(xml, F_ATTRD_REGEX); /* If a regex was specified, broadcast a message for each match */ if ((attr == NULL) && regex) { GHashTableIter aIter; regex_t *r_patt = calloc(1, sizeof(regex_t)); crm_debug("Setting %s to %s", regex, value); if (regcomp(r_patt, regex, REG_EXTENDED)) { crm_err("Bad regex '%s' for update", regex); } else { g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(r_patt, attr, 0, NULL, 0); if (status == 0) { crm_trace("Matched %s with %s", attr, regex); crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr); send_attrd_message(NULL, xml); } } } free(key); free(set); free(host); regfree(r_patt); free(r_patt); return; } if (host == NULL) { crm_trace("Inferring host"); host = strdup(attrd_cluster->uname); crm_xml_add(xml, F_ATTRD_HOST, host); crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid); } a = g_hash_table_lookup(attributes, attr); /* If value was specified using ++ or += notation, expand to real value */ if (value) { int offset = 1; int int_value = 0; static const int plus_plus_len = 5; if ((strlen(value) >= (plus_plus_len + 2)) && (value[plus_plus_len] == '+') && ((value[plus_plus_len + 1] == '+') || (value[plus_plus_len + 1] == '='))) { if (a) { v = g_hash_table_lookup(a->values, host); } if (v) { int_value = char2score(v->current); } if (value[plus_plus_len + 1] != '+') { const char *offset_s = value + (plus_plus_len + 2); offset = char2score(offset_s); } int_value += offset; if (int_value > INFINITY) { int_value = INFINITY; } crm_info("Expanded %s=%s to %d", attr, value, int_value); crm_xml_add_int(xml, F_ATTRD_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ value = crm_element_value(xml, F_ATTRD_VALUE); } } if ((peer_writer == NULL) && (election_state(writer) != election_in_progress)) { crm_info("Starting an election to determine the writer"); election_vote(writer); } crm_debug("Broadcasting %s[%s] = %s%s", attr, host, value, ((election_state(writer) == election_won)? " (writer)" : "")); free(key); free(set); free(host); send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */ } /*! * \internal * \brief Respond to a client refresh request (i.e. write out all attributes) * * \return void */ void attrd_client_refresh(void) { GHashTableIter iter; attribute_t *a = NULL; /* 'refresh' forces a write of the current value of all attributes * Cancel any existing timers, we're writing it NOW */ g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { mainloop_timer_stop(a->timer); } crm_info("Updating all attributes"); write_attributes(TRUE, FALSE); } /*! * \internal * \brief Build the XML reply to a client query * * param[in] attr Name of requested attribute * param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = create_xml_node(NULL, __FUNCTION__); attribute_t *a; if (reply == NULL) { return NULL; } crm_xml_add(reply, F_TYPE, T_ATTRD); crm_xml_add(reply, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, F_ATTRD_ATTRIBUTE, attr); /* Allow caller to use "localhost" to refer to local node */ if (safe_str_eq(host, "localhost")) { host = attrd_cluster->uname; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } crm_xml_add(host_value, F_ATTRD_HOST, host); crm_xml_add(host_value, F_ATTRD_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } crm_xml_add(host_value, F_ATTRD_HOST, v->nodename); crm_xml_add(host_value, F_ATTRD_VALUE, v->current); } } } return reply; } /*! * \internal * \brief Respond to a client query * * \param[in] client Who queried us * \param[in] query Root of query XML * * \return void */ void attrd_client_query(crm_client_t *client, uint32_t id, uint32_t flags, xmlNode *query) { const char *attr; const char *origin = crm_element_value(query, F_ORIG); ssize_t rc; xmlNode *reply; if (origin == NULL) { origin = "unknown client"; } crm_debug("Query arrived from %s", origin); /* Request must specify attribute name to query */ attr = crm_element_value(query, F_ATTRD_ATTRIBUTE); if (attr == NULL) { crm_warn("Ignoring malformed query from %s (no attribute name given)", origin); return; } /* Build the XML reply */ reply = build_query_reply(attr, crm_element_value(query, F_ATTRD_HOST)); if (reply == NULL) { crm_err("Could not respond to query from %s: could not create XML reply", origin); return; } crm_log_xml_trace(reply, "Reply"); /* Send the reply to the client */ client->request_id = 0; if ((rc = crm_ipcs_send(client, id, reply, flags)) < 0) { crm_err("Could not respond to query from %s: %s (%d)", origin, pcmk_strerror(-rc), -rc); } free_xml(reply); } void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { int peer_state = 0; const char *v = crm_element_value(xml, F_ATTRD_VERSION); const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *election_op = crm_element_value(xml, F_CRM_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); if(election_op) { enum election_result rc = 0; crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); rc = election_count_vote(writer, xml, TRUE); switch(rc) { case election_start: free(peer_writer); peer_writer = NULL; election_vote(writer); break; case election_lost: free(peer_writer); peer_writer = strdup(peer->uname); break; default: election_check(writer); break; } return; } else if(v == NULL) { /* From the non-atomic version */ if (safe_str_eq(op, ATTRD_OP_UPDATE)) { const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE); crm_trace("Compatibility update of %s from %s", name, peer->uname); attrd_peer_update(peer, xml, host, FALSE); } else if (safe_str_eq(op, ATTRD_OP_FLUSH)) { const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE); attribute_t *a = g_hash_table_lookup(attributes, name); if(a) { crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname); write_or_elect_attribute(a); } } else if (safe_str_eq(op, ATTRD_OP_REFRESH)) { GHashTableIter aIter; attribute_t *a = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname); write_or_elect_attribute(a); } } } crm_element_value_int(xml, F_ATTRD_WRITER, &peer_state); if(election_state(writer) == election_won && peer_state == election_won && safe_str_neq(peer->uname, attrd_cluster->uname)) { crm_notice("Detected another attribute writer: %s", peer->uname); election_vote(writer); } else if(peer_state == election_won) { if(peer_writer == NULL) { peer_writer = strdup(peer->uname); crm_notice("Recorded attribute writer: %s", peer->uname); } else if(safe_str_neq(peer->uname, peer_writer)) { crm_notice("Recorded new attribute writer: %s (was %s)", peer->uname, peer_writer); free(peer_writer); peer_writer = strdup(peer->uname); } } if (safe_str_eq(op, ATTRD_OP_UPDATE) || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH) || safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { attrd_peer_update(peer, xml, host, FALSE); } else if (safe_str_eq(op, ATTRD_OP_SYNC)) { attrd_peer_sync(peer, xml); } else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { int host_id = 0; char *endptr = NULL; host_id = strtol(host, &endptr, 10); if (errno != 0 || endptr == host || *endptr != '\0') { host_id = 0; } else { host = NULL; } crm_notice("Processing %s from %s: %s %u", op, peer->uname, host, host_id); attrd_peer_remove(host_id, host, TRUE, peer->uname); } else if (safe_str_eq(op, ATTRD_OP_SYNC_RESPONSE) && safe_str_neq(peer->uname, attrd_cluster->uname)) { xmlNode *child = NULL; - crm_notice("Processing %s from %s", op, peer->uname); + crm_info("Processing %s from %s", op, peer->uname); for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { host = crm_element_value(child, F_ATTRD_HOST); attrd_peer_update(peer, child, host, TRUE); } } } void attrd_peer_sync(crm_node_t *peer, xmlNode *xml) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = create_xml_node(NULL, __FUNCTION__); crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, v->nodename, v->nodeid, v->current); } } crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); send_attrd_message(peer, sync); free_xml(sync); } void attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; crm_notice("Removing all %s (%u) attributes for %s", host, nodeid, source); if(host == NULL) { return; } g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { attribute_value_t *v = g_hash_table_lookup(a->values, host); if(v && v->current) { free(v->current); v->current = NULL; a->changed = TRUE; crm_debug("Removed %s[%s]=%s for %s", a->id, host, v->current, source); if(a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id); mainloop_timer_start(a->timer); } else { write_or_elect_attribute(a); } } } if (uncache) { crm_remote_peer_cache_remove(host); reap_crm_member(nodeid, host); } } /*! * \internal * \brief Return host's hash table entry (creating one if needed) * * \param[in] values Hash table of values * \param[in] host Name of peer to look up * \param[in] xml XML describing the attribute * * \return Pointer to new or existing hash table entry */ static attribute_value_t * attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml) { attribute_value_t *v = g_hash_table_lookup(values, host); int is_remote = 0; crm_element_value_int(xml, F_ATTRD_IS_REMOTE, &is_remote); if (is_remote) { /* If we previously assumed this node was an unseen cluster node, * remove its entry from the cluster peer cache. */ crm_node_t *dup = crm_find_peer(0, host); if (dup && (dup->uuid == NULL)) { reap_crm_member(0, host); } /* Ensure this host is in the remote peer cache */ crm_remote_peer_cache_add(host); } if (v == NULL) { v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(v != NULL); v->nodename = strdup(host); CRM_ASSERT(v->nodename != NULL); v->is_remote = is_remote; g_hash_table_replace(values, v->nodename, v); } return(v); } void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { bool changed = FALSE; attribute_value_t *v = NULL; int dampen = 0; const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(xml, F_ATTRD_VALUE); const char *dvalue = crm_element_value(xml, F_ATTRD_DAMPEN); attribute_t *a = g_hash_table_lookup(attributes, attr); if(a == NULL) { if (op == NULL /* The xml children from an ATTRD_OP_SYNC_RESPONSE have no F_ATTRD_TASK */ || safe_str_eq(op, ATTRD_OP_UPDATE) || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH)) { a = create_attribute(xml); } else { crm_warn("Update error (attribute %s not found)", attr); return; } } if (op == NULL /* The xml children from an ATTRD_OP_SYNC_RESPONSE have no F_ATTRD_TASK */ || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH) || safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { if (dvalue) { dampen = crm_get_msec(dvalue); if (dampen >= 0) { if (a->timeout_ms != dampen) { mainloop_timer_stop(a->timer); mainloop_timer_del(a->timer); a->timeout_ms = dampen; if (dampen > 0) { a->timer = mainloop_timer_add(a->id, a->timeout_ms, FALSE, attribute_timer_cb, a); crm_info("Update attribute %s with delay %dms (%s)", a->id, dampen, dvalue); } else { a->timer = NULL; crm_info("Update attribute %s with not delay", a->id); } //if dampen is changed, attrd writes in a current value immediately. write_or_elect_attribute(a); if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { return; } } else { if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { crm_trace("Unchanged attribute %s with delay %dms (%s).(ATTRD_OP_UPDATE_DELAY)", a->id, dampen, dvalue); return; } } } else { crm_warn("Update error (A positive number is necessary for delay parameter. attribute %s : %dms (%s))", a->id, dampen, dvalue); return; } } else { crm_warn("Update error (delay parameter is necessary for the update of the attribute %s)", a->id); return; } } if(host == NULL) { GHashTableIter vIter; g_hash_table_iter_init(&vIter, a->values); crm_debug("Setting %s for all hosts to %s", attr, value); xml_remove_prop(xml, F_ATTRD_HOST_ID); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { attrd_peer_update(peer, xml, host, filter); } return; } v = attrd_lookup_or_create_value(a->values, host, xml); if(filter && safe_str_neq(v->current, value) && safe_str_eq(host, attrd_cluster->uname)) { xmlNode *sync = create_xml_node(NULL, __FUNCTION__); crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", a->id, host, v->current, value, peer->uname); crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE); v = g_hash_table_lookup(a->values, host); build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, v->nodename, v->nodeid, v->current); crm_xml_add_int(sync, F_ATTRD_WRITER, election_state(writer)); send_attrd_message(peer, sync); free_xml(sync); } else if(safe_str_neq(v->current, value)) { crm_info("Setting %s[%s]: %s -> %s from %s", attr, host, v->current, value, peer->uname); free(v->current); if(value) { v->current = strdup(value); } else { v->current = NULL; } changed = TRUE; } else { crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } a->changed |= changed; if(changed) { if(a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id); mainloop_timer_start(a->timer); } else { write_or_elect_attribute(a); } } /* this only involves cluster nodes. */ if(v->nodeid == 0 && (v->is_remote == FALSE)) { if(crm_element_value_int(xml, F_ATTRD_HOST_ID, (int*)&v->nodeid) == 0) { /* Create the name/id association */ crm_node_t *peer = crm_get_peer(v->nodeid, host); crm_trace("We know %s's node id now: %s", peer->uname, peer->uuid); if(election_state(writer) == election_won) { write_attributes(FALSE, TRUE); return; } } } } void write_or_elect_attribute(attribute_t *a) { enum election_result rc = election_state(writer); if(rc == election_won) { write_attribute(a); } else if(rc == election_in_progress) { crm_trace("Election in progress to determine who will write out %s", a->id); } else if(peer_writer == NULL) { crm_info("Starting an election to determine who will write out %s", a->id); election_vote(writer); } else { crm_trace("%s will write out %s, we are in state %d", peer_writer, a->id, rc); } } gboolean attrd_election_cb(gpointer user_data) { crm_trace("Election complete"); free(peer_writer); peer_writer = strdup(attrd_cluster->uname); /* Update the peers after an election */ attrd_peer_sync(NULL, NULL); /* Update the CIB after an election */ write_attributes(TRUE, FALSE); return FALSE; } void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { if ((kind == crm_status_nstate) || (kind == crm_status_rstate)) { if (safe_str_eq(peer->state, CRM_NODE_MEMBER)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if ((election_state(writer) == election_won) && !is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { /* Remove all attribute values associated with lost nodes */ attrd_peer_remove(peer->id, peer->uname, FALSE, __FUNCTION__); if (peer_writer && safe_str_eq(peer->uname, peer_writer)) { free(peer_writer); peer_writer = NULL; crm_notice("Lost attribute writer %s", peer->uname); } } } } static void attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); goto done; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "Update %d for %s: %s (%d)", call_id, name, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { do_crm_log(level, "Update %d for %s[%s]=%s: %s (%d)", call_id, a->id, peer, v->requested, pcmk_strerror(rc), rc); if(rc == pcmk_ok) { free(v->stored); v->stored = v->requested; v->requested = NULL; } else { free(v->requested); v->requested = NULL; a->changed = TRUE; /* Attempt write out again */ } } done: if(a && a->changed && election_state(writer) == election_won) { write_attribute(a); } } void write_attributes(bool all, bool peer_discovered) { GHashTableIter iter; attribute_t *a = NULL; g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { if (peer_discovered && a->unknown_peer_uuids) { /* a new peer uuid has been discovered, try writing this attribute again. */ a->changed = TRUE; } if(all || a->changed) { write_attribute(a); } else { crm_debug("Skipping unchanged attribute %s", a->id); } } } static void build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value) { char *set = NULL; char *uuid = NULL; xmlNode *xml_obj = NULL; if(a->set) { set = strdup(a->set); } else { set = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, nodeid); } if(a->uuid) { uuid = strdup(a->uuid); } else { int lpc; uuid = crm_strdup_printf("%s-%s", set, a->id); /* Minimal attempt at sanitizing automatic IDs */ for (lpc = 0; uuid[lpc] != 0; lpc++) { switch (uuid[lpc]) { case ':': uuid[lpc] = '.'; } } } xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); crm_xml_add(xml_obj, XML_ATTR_ID, set); xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_obj, XML_ATTR_ID, uuid); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id); if(value) { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value); } else { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, ""); crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE); } free(uuid); free(set); } void write_attribute(attribute_t *a) { int private_updates = 0, cib_updates = 0; xmlNode *xml_top = NULL; attribute_value_t *v = NULL; GHashTableIter iter; enum cib_call_options flags = cib_quorum_override; if (a == NULL) { return; } /* If this attribute will be written to the CIB ... */ if (!a->is_private) { /* Defer the write if now's not a good time */ if (the_cib == NULL) { crm_info("Write out of '%s' delayed: cib not connected", a->id); return; } else if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); } else if (a->update) { crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); return; } else if (mainloop_timer_running(a->timer)) { crm_info("Write out of '%s' delayed: timer is running", a->id); return; } /* Initialize the status update XML */ xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); } /* Attribute will be written shortly, so clear changed flag */ a->changed = FALSE; /* We will check all peers' uuids shortly, so initialize this to false */ a->unknown_peer_uuids = FALSE; /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); /* If the value's peer info does not correspond to a peer, ignore it */ if (peer == NULL) { crm_notice("Update error (peer not found): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer); continue; } /* If we're just learning the peer's node id, remember it */ if (peer->id && (v->nodeid == 0)) { crm_trace("Updating value's nodeid"); v->nodeid = peer->id; } /* If this is a private attribute, no update needs to be sent */ if (a->is_private) { private_updates++; continue; } /* If the peer is found, but its uuid is unknown, defer write */ if (peer->uuid == NULL) { a->unknown_peer_uuids = TRUE; crm_notice("Update error (unknown peer uuid, retry will be attempted once uuid is discovered): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer); continue; } /* Add this value to status update XML */ crm_debug("Update: %s[%s]=%s (%s %u %u %s)", v->nodename, a->id, v->current, peer->uuid, peer->id, v->nodeid, peer->uname); build_update_element(xml_top, a, peer->uuid, v->current); cib_updates++; free(v->requested); v->requested = NULL; if (v->current) { v->requested = strdup(v->current); } else { /* Older attrd versions don't know about the cib_mixed_update * flag so make sure it goes to the local cib which does */ flags |= cib_mixed_update|cib_scope_local; } } if (private_updates) { crm_info("Processed %d private change%s for %s, id=%s, set=%s", private_updates, ((private_updates == 1)? "" : "s"), a->id, (a->uuid? a->uuid : ""), a->set); } if (cib_updates) { crm_log_xml_trace(xml_top, __FUNCTION__); a->update = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL, flags, a->user); crm_info("Sent update %d with %d changes for %s, id=%s, set=%s", a->update, cib_updates, a->id, (a->uuid? a->uuid : ""), a->set); the_cib->cmds->register_callback_full(the_cib, a->update, 120, FALSE, strdup(a->id), "attrd_cib_callback", attrd_cib_callback, free); } free_xml(xml_top); } diff --git a/crmd/fsa.c b/crmd/fsa.c index cf54d193e8..1b0962fae4 100644 --- a/crmd/fsa.c +++ b/crmd/fsa.c @@ -1,675 +1,677 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *fsa_our_dc = NULL; cib_t *fsa_cib_conn = NULL; char *fsa_our_dc_version = NULL; char *fsa_our_uuid = NULL; char *fsa_our_uname = NULL; char *fsa_cluster_name = NULL; #if SUPPORT_HEARTBEAT ll_cluster_t *fsa_cluster_conn; #endif election_t *fsa_election = NULL; fsa_timer_t *wait_timer = NULL; /* How long to wait before retrying to connect to the cib/lrmd/ccm */ fsa_timer_t *recheck_timer = NULL; /* Periodically re-run the PE to account for time based rules/preferences */ fsa_timer_t *election_trigger = NULL; /* How long to wait at startup, or after an election, for the DC to make contact */ fsa_timer_t *transition_timer = NULL; /* How long to delay the start of a new transition with the expectation something else might happen too */ fsa_timer_t *integration_timer = NULL; fsa_timer_t *finalization_timer = NULL; fsa_timer_t *shutdown_escalation_timer = NULL; /* How long to wait for the DC to stop all resources and give us the all-clear to shut down */ volatile gboolean do_fsa_stall = FALSE; volatile long long fsa_input_register = 0; volatile long long fsa_actions = A_NOTHING; volatile enum crmd_fsa_state fsa_state = S_STARTING; extern uint highest_born_on; extern uint num_join_invites; extern void initialize_join(gboolean before); #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data); void dump_rsc_info(void); void dump_rsc_info_callback(const xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); void ghash_print_node(gpointer key, gpointer value, gpointer user_data); void s_crmd_fsa_actions(fsa_data_t * fsa_data); void log_fsa_input(fsa_data_t * stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX "digraph \"g\" {"); do_dot_log(DOT_PREFIX " size = \"30,30\""); do_dot_log(DOT_PREFIX " graph ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " node ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " shape = \"ellipse\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " edge ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// special nodes"); do_dot_log(DOT_PREFIX " \"S_PENDING\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"blue\""); do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"red\""); do_dot_log(DOT_PREFIX " fontcolor = \"red\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// DC only nodes"); do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t * fsa_data, long long an_action, void (*function) (long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) { fsa_actions &= ~an_action; crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data); } static long long startup_actions = A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_CCM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED | A_CL_JOIN_QUERY; enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; enum crmd_fsa_state last_state; crm_trace("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_actions, "Initial"); do_fsa_stall = FALSE; if (is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while (is_message() && do_fsa_stall == FALSE) { crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); if(fsa_data == NULL) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; fsa_dump_actions(fsa_data->actions, "Restored actions"); /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; fsa_dump_actions(new_actions, "New actions"); if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } /* logging : *before* the state is changed */ if (is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if (is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if (is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Remove certain actions during shutdown */ if (fsa_state == S_STOPPING || ((fsa_input_register & R_SHUTDOWN) == R_SHUTDOWN)) { clear_bit(fsa_actions, startup_actions); } /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if (last_state != fsa_state) { fsa_actions = do_state_transition(fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if (g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall ? "true" : "false"); } else { crm_trace("Exiting the FSA"); } /* cleanup inputs? */ if (register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same); } fsa_dump_actions(fsa_actions, "Remaining"); fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t * fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ CRM_CHECK(fsa_data != NULL, return); while (fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if (fsa_actions & A_ERROR) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if (fsa_actions & A_WARN) { do_fsa_action(fsa_data, A_WARN, do_log); } else if (fsa_actions & A_LOG) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if (fsa_actions & A_EXIT_1) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* sub-system restart */ } else if ((fsa_actions & O_LRM_RECONNECT) == O_LRM_RECONNECT) { do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); } else if ((fsa_actions & O_CIB_RESTART) == O_CIB_RESTART) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if ((fsa_actions & O_PE_RESTART) == O_PE_RESTART) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if ((fsa_actions & O_TE_RESTART) == O_TE_RESTART) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* essential start tasks */ } else if (fsa_actions & A_STARTUP) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if (fsa_actions & A_CIB_START) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if (fsa_actions & A_HA_CONNECT) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if (fsa_actions & A_READCONFIG) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if (fsa_actions & A_LRM_CONNECT) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if (fsa_actions & A_CCM_CONNECT) { #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { do_fsa_action(fsa_data, A_CCM_CONNECT, do_ccm_control); } #endif fsa_actions &= ~A_CCM_CONNECT; } else if (fsa_actions & A_TE_START) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if (fsa_actions & A_PE_START) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* Timers */ /* else if(fsa_actions & O_DC_TIMER_RESTART) { do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */ ; } else if (fsa_actions & A_DC_TIMER_STOP) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_STOP) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_START) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_STOP) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_START) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if (fsa_actions & A_MSG_ROUTE) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if (fsa_actions & A_RECOVER) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if (fsa_actions & A_CL_JOIN_RESULT) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if (fsa_actions & A_CL_JOIN_REQUEST) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if (fsa_actions & A_SHUTDOWN_REQ) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if (fsa_actions & A_ELECTION_VOTE) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if (fsa_actions & A_ELECTION_COUNT) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); } else if (fsa_actions & A_LRM_EVENT) { do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); /* * High priority actions */ } else if (fsa_actions & A_STARTED) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if (fsa_actions & A_CL_JOIN_QUERY) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if (fsa_actions & A_DC_TIMER_START) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions * - Membership */ } else if (fsa_actions & A_DC_TAKEOVER) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if (fsa_actions & A_DC_RELEASE) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if (fsa_actions & A_DC_JOIN_FINAL) { do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); } else if (fsa_actions & A_ELECTION_CHECK) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if (fsa_actions & A_ELECTION_START) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if (fsa_actions & A_DC_JOIN_OFFER_ALL) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if (fsa_actions & A_DC_JOIN_OFFER_ONE) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one); } else if (fsa_actions & A_DC_JOIN_PROCESS_REQ) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); } else if (fsa_actions & A_DC_JOIN_FINALIZE) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ } else if (fsa_actions & A_TE_HALT) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if (fsa_actions & A_TE_CANCEL) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if (fsa_actions & A_LRM_INVOKE) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if (fsa_actions & A_PE_INVOKE) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if (fsa_actions & A_TE_INVOKE) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); /* Shutdown actions */ } else if (fsa_actions & A_DC_RELEASED) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if (fsa_actions & A_PE_STOP) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if (fsa_actions & A_TE_STOP) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if (fsa_actions & A_SHUTDOWN) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if (fsa_actions & A_LRM_DISCONNECT) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if (fsa_actions & A_CCM_DISCONNECT) { #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { do_fsa_action(fsa_data, A_CCM_DISCONNECT, do_ccm_control); } #endif fsa_actions &= ~A_CCM_DISCONNECT; } else if (fsa_actions & A_HA_DISCONNECT) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if (fsa_actions & A_CIB_STOP) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); } else if (fsa_actions & A_STOP) { do_fsa_action(fsa_data, A_STOP, do_stop); /* exit gracefully */ } else if (fsa_actions & A_EXIT_0) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { crm_err("Action %s (0x%llx) not supported ", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t * stored_msg) { CRM_ASSERT(stored_msg); crm_trace("Processing queued input %d", stored_msg->id); if (stored_msg->fsa_cause == C_CCM_CALLBACK) { crm_trace("FSA processing CCM callback from %s", stored_msg->origin); } else if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_trace("FSA processing LRM callback from %s", stored_msg->origin); } else if (stored_msg->data == NULL) { crm_trace("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_trace("FSA processing XML message from %s", stored_msg->origin); crm_log_xml_trace(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data) { int level = LOG_INFO; long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_LOG_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (cur_state == S_IDLE || next_state == S_IDLE) { level = LOG_NOTICE; } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) { level = LOG_NOTICE; } else if (cur_state == S_ELECTION) { level = LOG_NOTICE; } else if (cur_state == S_STARTING) { level = LOG_NOTICE; } else if (next_state == S_RECOVERY) { level = LOG_WARNING; } - do_crm_log(level, "State transition %s -> %s [ input=%s cause=%s origin=%s ]", - state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); + do_crm_log(level, "State transition %s -> %s " + CRM_XS " input=%s cause=%s origin=%s", + state_from, state_to, input, fsa_cause2string(cause), + msg_data->origin); /* the last two clauses might cause trouble later */ if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { election_timeout_stop(fsa_election); /* } else { */ /* crm_timer_start(election_timeout); */ } #if 0 if ((fsa_input_register & R_SHUTDOWN)) { set_bit(tmp, A_DC_TIMER_STOP); } #endif if (next_state == S_INTEGRATION) { set_bit(tmp, A_INTEGRATE_TIMER_START); } else { set_bit(tmp, A_INTEGRATE_TIMER_STOP); } if (next_state == S_FINALIZE_JOIN) { set_bit(tmp, A_FINALIZE_TIMER_START); } else { set_bit(tmp, A_FINALIZE_TIMER_STOP); } if (next_state != S_PENDING) { set_bit(tmp, A_DC_TIMER_STOP); } if (next_state != S_ELECTION) { highest_born_on = 0; } if (next_state != S_IDLE) { crm_timer_stop(recheck_timer); } if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__); } switch (next_state) { case S_PENDING: fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); /* fall through */ case S_ELECTION: crm_trace("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); update_dc(NULL); break; case S_NOT_DC: election_trigger->counter = 0; purge_stonith_cleanup(); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit(tmp, A_SHUTDOWN_REQ); } CRM_LOG_ASSERT(fsa_our_dc != NULL); if (fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (crmd_join_phase_count(crm_join_welcomed) > 0) { crm_warn("%u cluster nodes failed to respond" " to the join offer.", crmd_join_phase_count(crm_join_welcomed)); crmd_join_phase_log(LOG_NOTICE); } else { crm_debug("All %d cluster nodes responded to the join offer.", crmd_join_phase_count(crm_join_integrated)); } break; case S_POLICY_ENGINE: election_trigger->counter = 0; CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_info("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (crmd_join_phase_count(crm_join_finalized) > 0) { crm_err("%u cluster nodes failed to confirm their join.", crmd_join_phase_count(crm_join_finalized)); crmd_join_phase_log(LOG_NOTICE); } else if (crmd_join_phase_count(crm_join_confirmed) == crm_active_peers()) { crm_debug("All %u cluster nodes are" " eligible to run resources.", crm_active_peers()); } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) { crm_err("We have more confirmed nodes than our membership does: %d vs. %d", crmd_join_phase_count(crm_join_confirmed), crm_active_peers()); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (saved_ccm_membership_id != crm_peer_seq) { crm_info("Membership changed: %llu -> %llu - join restart", saved_ccm_membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else { crm_warn("Only %u of %u cluster " "nodes are eligible to run resources - continue %d", crmd_join_phase_count(crm_join_confirmed), crm_active_peers(), crmd_join_phase_count(crm_join_welcomed)); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ set_bit(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: CRM_LOG_ASSERT(AM_I_DC); dump_rsc_info(); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we are the DC"); set_bit(tmp, A_SHUTDOWN_REQ); } if (recheck_timer->period_ms > 0) { crm_debug("Starting %s", get_timer_desc(recheck_timer)); crm_timer_start(recheck_timer); } break; default: break; } if (clear_recovery_bit && next_state != S_PENDING) { tmp &= ~A_RECOVER; } else if (clear_recovery_bit == FALSE) { tmp |= A_RECOVER; } if (tmp != actions) { /* fsa_dump_actions(actions ^ tmp, "New actions"); */ actions = tmp; } return actions; } void dump_rsc_info(void) { } void ghash_print_node(gpointer key, gpointer value, gpointer user_data) { const char *text = user_data; const char *uname = key; const char *value_s = value; crm_info("%s: %s %s", text, uname, value_s); } diff --git a/crmd/main.c b/crmd/main.c index 75ed91ca44..9c1ce944ab 100644 --- a/crmd/main.c +++ b/crmd/main.c @@ -1,168 +1,168 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hV" void usage(const char *cmd, int exit_status); int crmd_init(void); void crmd_hamsg_callback(const xmlNode * msg, void *private_data); extern void init_dotfile(void); GMainLoop *crmd_mainloop = NULL; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int index = 0; int argerr = 0; crmd_mainloop = g_main_new(FALSE); crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for aggregating resource and node failures as well as co-ordinating the cluster's response"); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { crmd_metadata(); return 0; } else if (argc - optind == 1 && safe_str_eq("version", argv[optind])) { fprintf(stdout, "CRM Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); return 0; } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_notice("CRM Git Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); + crm_info("CRM Git Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } if (crm_is_writable(PE_STATE_DIR, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on " PE_STATE_DIR ". Terminating"); fprintf(stderr, "ERROR: Bad permissions on " PE_STATE_DIR ". See logs for details\n"); fflush(stderr); return 100; } else if (crm_is_writable(CRM_CONFIG_DIR, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on " CRM_CONFIG_DIR ". Terminating"); fprintf(stderr, "ERROR: Bad permissions on " CRM_CONFIG_DIR ". See logs for details\n"); fflush(stderr); return 100; } return crmd_init(); } int crmd_init(void) { int exit_code = 0; enum crmd_fsa_state state; fsa_state = S_STARTING; fsa_input_register = 0; /* zero out the regester */ init_dotfile(); crm_debug("Starting %s", crm_system_name); register_fsa_input(C_STARTUP, I_STARTUP, NULL); crm_peer_init(); state = s_crmd_fsa(C_STARTUP); if (state == S_PENDING || state == S_STARTING) { /* Create the mainloop and run it... */ crm_trace("Starting %s's mainloop", crm_system_name); #ifdef REALTIME_SUPPORT static int crm_realtime = 1; if (crm_realtime == 1) { cl_enable_realtime(); } else if (crm_realtime == 0) { cl_disable_realtime(); } cl_make_realtime(SCHED_RR, 5, 64, 64); #endif g_main_run(crmd_mainloop); if (is_set(fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting automated respawn"); exit_code = 100; } } else { crm_err("Startup of %s failed. Current state: %s", crm_system_name, fsa_state2string(state)); exit_code = 1; } crm_info("%u stopped: %s (%d)", getpid(), pcmk_strerror(exit_code), exit_code); return crmd_fast_exit(exit_code); } diff --git a/cts/patterns.py b/cts/patterns.py index 784641288b..45e43aa5c1 100644 --- a/cts/patterns.py +++ b/cts/patterns.py @@ -1,545 +1,545 @@ import sys, os from cts.CTSvars import * patternvariants = {} class BasePatterns: def __init__(self, name): self.name = name patternvariants[name] = self self.ignore = [ "avoid confusing Valgrind", ] self.BadNews = [] self.components = {} self.commands = { "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "CibQuery" : "cibadmin -Ql", "CibAddXml" : "cibadmin --modify -c --xml-text %s", "CibDelXpath" : "cibadmin --delete --xpath %s", # 300,000 == 5 minutes "RscRunning" : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "UUIDQueryCmd" : "crmadmin -N", "SetCheckInterval" : "cibadmin --modify -c --xml-text ''", "ClearCheckInterval" : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"", "MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''", "MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", "StandbyCmd" : "crm_attribute -VQ -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_attribute -QG -U %s -n standby -l forever -d off 2>/dev/null", } self.search = { "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions "Pat:Local_started" : "%s\W.*The local CRM is operational", "Pat:Slave_started" : "%s\W.*State transition.*-> S_NOT_DC", "Pat:Master_started": "%s\W.*State transition.*-> S_IDLE", "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s\W.*logd:.*Exiting write process", "Pat:They_stopped" : "%s\W.*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", "Pat:Fencing_start" : "Initiating remote operation .* for %s", "Pat:Fencing_ok" : r"stonith.*:\s*Operation .* of %s by .* for .*@.*: OK", "Pat:Fencing_recover" : r"pengine.*: Recover %s", "Pat:RscOpOK" : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(.*confirmed=\S+\)", "Pat:RscRemoteOpOK" : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(node=%s,.*,\s*confirmed=true\)", "Pat:NodeFenced" : r"crmd.*:\s*Peer\s+%s\s+was\s+terminated\s+\(.*\)\s+by\s+.*\s+for\s+.*:\s+OK", "Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0", } def get_component(self, key): if key in self.components: return self.components[key] print("Unknown component '%s' for %s" % (key, self.name)) return [] def get_patterns(self, key): if key == "BadNews": return self.BadNews elif key == "BadNewsIgnore": return self.ignore elif key == "Commands": return self.commands elif key == "Search": return self.search elif key == "Components": return self.components def __getitem__(self, key): if key == "Name": return self.name elif key in self.commands: return self.commands[key] elif key in self.search: return self.search[key] else: print("Unknown template '%s' for %s" % (key, self.name)) return None class crm_lha(BasePatterns): def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "StartCmd" : "service heartbeat start > /dev/null 2>&1", "StopCmd" : "service heartbeat stop > /dev/null 2>&1", "EpochCmd" : "crm_node -H -e", "QuorumCmd" : "crm_node -H -q", "PartitionCmd" : "crm_node -H -p", }) self.search.update({ # Patterns to look for in the log files for various occasions... "Pat:ChildKilled" : "%s\W.*heartbeat.*%s.*killed by signal 9", "Pat:ChildRespawn" : "%s\W.*heartbeat.*Respawning client.*%s", "Pat:ChildExit" : "(ERROR|error): Client .* exited with return code", }) self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r"global_timer_callback:", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", ] self.ignore = self.ignore + [ r"(ERROR|error):.*\s+assert\s+at\s+crm_glib_handler:" "(ERROR|error): Message hist queue is filling up", "stonithd.*CRIT: external_hostlist:.*'vmware gethosts' returned an empty hostlist", "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.", "pengine.*Preventing .* from re-starting", ] class crm_cs_v0(BasePatterns): def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "EpochCmd" : "crm_node -e --openais", "QuorumCmd" : "crm_node -q --openais", "PartitionCmd" : "crm_node -p --openais", "StartCmd" : "service corosync start", "StopCmd" : "service corosync stop", }) self.search.update({ # The next pattern is too early # "Pat:We_stopped" : "%s.*Service engine unloaded: Pacemaker Cluster Manager", # The next pattern would be preferred, but it doesn't always come out # "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status", "Pat:We_stopped" : "%s\W.*Service engine unloaded: corosync cluster quorum service", - "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", + "Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost", "Pat:They_dead" : "corosync:.*Node %s is now: lost", "Pat:ChildExit" : "Child process .* exited", "Pat:ChildKilled" : "%s\W.*corosync.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*corosync.*Respawning failed child process: %s", "Pat:InfraUp" : "%s\W.*corosync.*Initializing transport", "Pat:PacemakerUp" : "%s\W.*corosync.*CRM: Initialized", }) self.ignore = self.ignore + [ r"crm_mon:", r"crmadmin:", r"update_trace_data", r"async_notify:.*strange, client not found", r"Parse error: Ignoring unknown option .*nodename", r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:", r"Child process .* terminated with signal 9", r"getinfo response error: 1$", "sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE", r"sbd.* pcmk:\s*error:.*Connection to cib_ro failed", r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* closed .I/O condition=17", ] self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"(WARN|warn).*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"The .* process .* terminated with signal", r"Child process .* terminated with signal", r"pengine:.*Recover .*\(.* -\> .*\)", r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", r"Peer is not part of our cluster", r"We appear to be in an election loop", r"Unknown node -> we will not deliver message", r"(Blackbox dump requested|Problem detected)", r"pacemakerd.*Could not connect to Cluster Configuration Database API", r"Receiving messages from a node we think is dead", r"share the same cluster nodeid", r"share the same name", #r"crm_ipc_send:.*Request .* failed", #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received", # Not inherently bad, but worth tracking #r"No need to invoke the TE", #r"ping.*: DEBUG: Updated connected = 0", #r"Digest mis-match:", r"crmd:.*Transition failed: terminated", r"Local CIB .* differs from .*:", r"warn.*:\s*Continuing but .* will NOT be used", r"warn.*:\s*Cluster configuration file .* is corrupt", #r"Executing .* fencing operation", #r"fence_pcmk.* Call to fence", #r"fence_pcmk", r"cman killed by node", r"Election storm", r"stalled the FSA with pending inputs", ] self.components["common-ignore"] = [ "Pending action:", "error: crm_log_message_adv:", "resources were active at shutdown", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff:.*Diff application failed!", r"crmd.*:\s*Action A_RECOVER .* not supported", "unconfirmed_actions:.*Waiting on .* unconfirmed actions", "cib_native_msgready:.*Message pending on command channel", r"crmd.*:\s*Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped:.*Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", "error: attrd_connection_destroy:.*Lost connection to attrd", r".*:\s*Executing .* fencing operation \(.*\) on ", r"(Blackbox dump requested|Problem detected)", # "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery", # "error: process_pe_message: Transition .* ERRORs found during PE processing", ] self.components["corosync-ignore"] = [ r"error:.*Connection to the CPG API failed: Library error", r"The .* process .* exited", r"pacemakerd.*error:.*Child process .* exited", r"cib.*error:.*Corosync connection lost", r"stonith-ng.*error:.*Corosync connection terminated", r"The cib process .* exited: Invalid argument", r"The attrd process .* exited: Transport endpoint is not connected", r"The crmd process .* exited: Link has been severed", r"error:.*Child process cib .* exited: Invalid argument", r"error:.*Child process attrd .* exited: Transport endpoint is not connected", r"error:.*Child process crmd .* exited: Link has been severed", r"lrmd.*error:.*Connection to stonith-ng failed", r"lrmd.*error:.*Connection to stonith-ng.* closed", r"lrmd.*error:.*LRMD lost STONITH connection", r"crmd.*State transition .* S_RECOVERY", r"crmd.*error:.*FSA: Input I_ERROR", r"crmd.*error:.*FSA: Input I_TERMINATE", r"crmd.*error:.*Connection to cman failed", r"crmd.*error:.*Could not recover from internal error", r"error:.*Connection to cib_shm failed", r"error:.*Connection to cib_shm.* closed", r"error:.*STONITH connection failed", r"error: Connection to stonith-ng failed", r"crit: Fencing daemon connection failed", r"error: Connection to stonith-ng.* closed", ] self.components["corosync"] = [ r"pacemakerd.*error:.*Connection destroyed", r"attrd.*:\s*crit:.*Lost connection to Corosync service", r"stonith.*:\s*Corosync connection terminated", r"cib.*:\s*Corosync connection lost!\s+Exiting.", r"crmd.*:\s*connection terminated", r"pengine.*Scheduling Node .* for STONITH", r"crmd.*:\s*Peer %s was terminated \(.*\) by .* for .*:\s*OK", ] self.components["cib-ignore"] = [ "lrmd.*Connection to stonith-ng failed", "lrmd.*Connection to stonith-ng.* closed", "lrmd.*LRMD lost STONITH connection", "lrmd.*STONITH connection failed, finalizing .* pending operations", ] self.components["cib"] = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "Respawning .* attrd", "Connection to cib_.* failed", "Connection to cib_.* closed", "Connection to the CIB terminated...", "(Child process|The) crmd .* exited: Generic Pacemaker error", "(Child process|The) attrd .* exited: (Connection reset by peer|Transport endpoint is not connected)", "Lost connection to CIB service", "crmd.*Input I_TERMINATE from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", "crmd.*Could not recover from internal error", ] self.components["lrmd"] = [ "State transition .* S_RECOVERY", "LRM Connection failed", "Respawning .* crmd", "Connection to lrmd failed", "Connection to lrmd.* closed", "crmd.*I_ERROR.*lrm_connection_destroy", "(Child process|The) crmd .* exited: Generic Pacemaker error", "crmd.*Input I_TERMINATE from do_recover", "crmd.*Could not recover from internal error", ] self.components["lrmd-ignore"] = [] self.components["crmd"] = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ] self.components["crmd-ignore"] = [] self.components["attrd"] = [] self.components["attrd-ignore"] = [] self.components["pengine"] = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "(The|Child process) crmd .* exited: Generic Pacemaker error", "Connection to pengine failed", "Connection to pengine.* closed", "Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", "crmd.*Input I_TERMINATE from do_recover", "crmd.*Could not recover from internal error", ] self.components["pengine-ignore"] = [] self.components["stonith"] = [ "Connection to stonith-ng failed", "LRMD lost STONITH connection", "Connection to stonith-ng.* closed", "Fencing daemon connection failed", r"crmd.*:\s*warn.*:\s*Callback already present", ] self.components["stonith-ignore"] = [ r"pengine.*: Recover Fencing", r"Updating failcount for Fencing", r"error:.*Connection to stonith-ng failed", r"error:.*Connection to stonith-ng.*closed \(I/O condition=17\)", r"crit:.*Fencing daemon connection failed", r"error:.*Sign-in failed: triggered a retry", "STONITH connection failed, finalizing .* pending operations.", r"crmd.*:\s*Operation Fencing.* Error", ] self.components["stonith-ignore"].extend(self.components["common-ignore"]) class crm_mcp(crm_cs_v0): ''' The crm version 4 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of native corosync (no plugins) ''' def __init__(self, name): crm_cs_v0.__init__(self, name) self.commands.update({ "StartCmd" : "service corosync start && service pacemaker start", "StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop; service corosync stop", "EpochCmd" : "crm_node -e", "QuorumCmd" : "crm_node -q", "PartitionCmd" : "crm_node -p", }) self.search.update({ # Close enough... "Corosync Cluster Engine exiting normally" isn't printed # reliably and there's little interest in doing anything about it "Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines", - "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", - "Pat:They_dead" : "crmd.*Node %s\[.*state is now lost", + "Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost", + "Pat:They_dead" : "crmd.*Node %s(\[|\s).*state is now lost", "Pat:ChildExit" : "The .* process exited", "Pat:ChildKilled" : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", }) # if self.Env["have_systemd"]: # self.update({ # # When systemd is in use, we can look for this instead # "Pat:We_stopped" : "%s.*Stopped Corosync Cluster Engine", # }) class crm_mcp_docker(crm_mcp): ''' The crm version 4 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of native corosync (no plugins) ''' def __init__(self, name): crm_mcp.__init__(self, name) self.commands.update({ "StartCmd" : "pcmk_start", "StopCmd" : "pcmk_stop", }) class crm_cman(crm_cs_v0): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, name): crm_cs_v0.__init__(self, name) self.commands.update({ "StartCmd" : "service pacemaker start", "StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop", "EpochCmd" : "crm_node -e --cman", "QuorumCmd" : "crm_node -q --cman", "PartitionCmd" : "crm_node -p --cman", }) self.search.update({ "Pat:We_stopped" : "%s.*Unloading all Corosync service engines", - "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", - "Pat:They_dead" : "crmd.*Node %s\[.*state is now lost", + "Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost", + "Pat:They_dead" : "crmd.*Node %s(\[|\s).*state is now lost", "Pat:ChildKilled" : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", }) class PatternSelector: def __init__(self, name=None): self.name = name self.base = BasePatterns("crm-base") if not name: crm_cs_v0("crm-plugin-v0") crm_cman("crm-cman") crm_mcp("crm-mcp") crm_lha("crm-lha") elif name == "crm-lha": crm_lha(name) elif name == "crm-plugin-v0": crm_cs_v0(name) elif name == "crm-cman": crm_cman(name) elif name == "crm-mcp": crm_mcp(name) elif name == "crm-mcp-docker": crm_mcp_docker(name) def get_variant(self, variant): if variant in patternvariants: return patternvariants[variant] print("defaulting to crm-base for %s" % variant) return self.base def get_patterns(self, variant, kind): return self.get_variant(variant).get_patterns(kind) def get_template(self, variant, key): v = self.get_variant(variant) return v[key] def get_component(self, variant, kind): return self.get_variant(variant).get_component(kind) def __getitem__(self, key): return self.get_template(self.name, key) # python cts/CTSpatt.py -k crm-mcp -t StartCmd if __name__ == '__main__': pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory kind=None template=None skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-k" or args[i] == "--kind": skipthis=1 kind = args[i+1] elif args[i] == "-t" or args[i] == "--template": skipthis=1 template = args[i+1] else: print("Illegal argument " + args[i]) print(PatternSelector(kind)[template]) diff --git a/fencing/main.c b/fencing/main.c index 1d50355e9c..34f393a8ba 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,1546 +1,1546 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; char *stonith_our_uuid = NULL; long stonith_watchdog_timeout_ms = 0; GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean no_cib_connect = FALSE; gboolean stonith_shutdown_flag = FALSE; qb_ipcs_service_t *ipcs = NULL; xmlNode *local_cib = NULL; GHashTable *known_peer_names = NULL; static cib_t *cib_api = NULL; static void *cib_library = NULL; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void st_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection created for %p", c); } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; crm_client_t *c = crm_client_get(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = crm_ipcs_recv(c, data, size, &id, &flags); if (request == NULL) { crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__); return 0; } op = crm_element_value(request, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "Client[inbound]"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); crm_client_destroy(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (crm_str_eq(op, "poke", TRUE)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_HEARTBEAT static void stonith_peer_hb_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } static void stonith_peer_hb_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } stonith_shutdown(0); } #endif #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_err("Corosync connection terminated"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ crm_client_t *client_obj = NULL; int local_rc = pcmk_ok; crm_trace("Sending response"); client_obj = crm_client_get_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = -1; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer ? "(originator of delegated request)" : ""); } local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event); } if (local_rc < pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } long long get_stonith_flag(const char *name) { if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) { return 0x01; } else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (client->options & get_stonith_flag(type)) { int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error); if (rc <= 0) { crm_warn("%s notification of client %s.%.6s failed: %s (%d)", type, crm_client_name(client), client->id, pcmk_strerror(rc), rc); } else { crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client), client->id); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { crm_client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = crm_client_get_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_connections, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static void do_stonith_notify_config(int options, const char *op, int rc, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); do_stonith_notify(options, op, rc, notify_data); free_xml(notify_data); } void do_stonith_notify_device(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); } void do_stonith_notify_level(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (safe_str_neq(standard, "stonith")) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { int rc; char *desc = NULL; CRM_CHECK(match != NULL, return); crm_trace("Updating %s", ID(match)); if(remove) { int index = 0; char *key = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } rc = stonith_level_register(match, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void fencing_topology_init() { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); if(topology) { g_hash_table_destroy(topology); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); } /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static node_t * our_node_allowed_for(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in] rsc Resource to check * \param[in] data_set Cluster working set with device information */ static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) { node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; node_t *parent = NULL; gboolean remove = TRUE; /* If this is a complex resource, check children rather than this resource itself. * TODO: Mark each installed device and remove if untouched when this process finishes. */ if(rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(rsc->variant == pe_clone || rsc->variant == pe_master) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(safe_str_neq(rclass, "stonith")) { return; } /* If this STONITH resource is disabled, just remove it. */ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (safe_str_eq(value, RSC_STOPPED)) { crm_info("Device %s has been disabled", rsc->id); goto update_done; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } goto update_done; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); goto update_done; } else { /* Our node is allowed, so update the device information */ xmlNode *data; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); get_rsc_attributes(rsc->parameters, rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES); g_hash_table_iter_init(&gIter, rsc->parameters); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } remove = FALSE; data = create_device_registration_xml(rsc_name(rsc), provider, agent, params, rsc_provides); stonith_device_register(data, NULL, TRUE); stonith_key_value_freeall(params, 1, 1); free_xml(data); } update_done: if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) { stonith_device_remove(rsc_name(rsc), TRUE); } } extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GListPtr gIter = NULL; pe_working_set_t data_set; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); set_working_set_defaults(&data_set); data_set.input = local_cib; data_set.now = crm_time_new(NULL); data_set.flags |= pe_flag_quick_location; data_set.localhost = stonith_our_uname; cluster_status(&data_set); do_calculations(&data_set, NULL, NULL); for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, &data_set); } data_set.input = NULL; /* Wasn't a copy */ cleanup_alloc_calculations(&data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if(op == NULL || strcmp(op, "move") == 0) { continue; } else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS)) { needs_update = TRUE; break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, TRUE); } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if(strstr(xpath, "/"XML_CIB_TAG_RESOURCES)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } else if(strstr(xpath, XML_CONS_TAG_RSC_LOCATION)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (safe_str_neq(standard, "stonith")) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /* * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_TRACE); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; xmlNode *stonith_watchdog_xml = NULL; const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting the full cib"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) { long timeout_ms = 0; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if(value) { timeout_ms = crm_get_msec(value); } if(timeout_ms != stonith_watchdog_timeout_ms) { crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); stonith_watchdog_timeout_ms = timeout_ms; } } else { stonith_watchdog_timeout_ms = 0; } if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { crm_trace("Ignoring cib updates while stonith is disabled"); stonith_enabled_saved = FALSE; return; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating stonith device and topology lists now that stonith is enabled"); stonith_enabled_saved = TRUE; fencing_topology_init(); cib_devices_update(); } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from the cib: init"); have_cib_devices = TRUE; local_cib = copy_xml(output); fencing_topology_init(); cib_devices_update(); } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { stonith_cleanup(); crm_exit(pcmk_ok); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB closed."); return; } else { crm_notice("Connection to the CIB terminated. Shutting down."); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } g_hash_table_destroy(known_peer_names); known_peer_names = NULL; crm_peer_destroy(); crm_client_cleanup(); free(stonith_our_uname); free_xml(local_cib); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"stand-alone-w-cpg", 0, 0, 'c'}, {"logfile", 1, 0, 'l'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { int rc, retries = 0; static cib_t *(*cib_new_fn) (void) = NULL; if (cib_new_fn == NULL) { cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE); } if (cib_new_fn != NULL) { cib_api = (*cib_new_fn) (); } if (cib_api == NULL) { crm_err("No connection to the CIB"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_CRMD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); - crm_notice("Watching for stonith topology changes"); + crm_info("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = st_ipc_created, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) { xmlNode *query = NULL; if (node->id && node->uname) { g_hash_table_insert(known_peer_names, GUINT_TO_POINTER(node->id), strdup(node->uname)); } /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } int main(int argc, char **argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t cluster; const char *actions[] = { "reboot", "off", "list", "monitor", "status" }; crm_log_preinit("stonith-ng", argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': crm_add_logfile(optarg); break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf (" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf (" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf (" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf (" How to determine which machines are controlled by the device.\n"); printf (" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_DELAY_MAX); printf (" Enable random delay for stonith actions and specify the maximum of random delay\n"); printf (" This prevents double fencing when using slow devices such as sbd.\n" "Use this to enable random delay for stonith actions and specify the maximum of random delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_ACTION_LIMIT); printf (" The maximum number of actions can be performed in parallel on this device\n"); printf (" Pengine property concurrent-fencing=true needs to be configured first.\n" "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); known_peer_names = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free); if (stand_alone == FALSE) { #if SUPPORT_HEARTBEAT cluster.hb_conn = NULL; cluster.hb_dispatch = stonith_peer_hb_callback; cluster.destroy = stonith_peer_hb_destroy; #endif if (is_openais_cluster()) { #if SUPPORT_COROSYNC cluster.destroy = stonith_peer_cs_destroy; cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(&cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(DAEMON_RESPAWN_STOP); } stonith_our_uname = cluster.uname; stonith_our_uuid = cluster.uuid; #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { /* crm_cluster_connect() registered us for crm_system_name, which * usually is the only F_TYPE used by the respective sub system. * Stonith needs to register two additional F_TYPE callbacks, * because it can :-/ */ if (HA_OK != cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_NOTIFY, cluster.hb_dispatch, cluster.hb_conn)) { crm_crit("Cannot set msg callback %s: %s", T_STONITH_NOTIFY, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn)); crm_exit(DAEMON_RESPAWN_STOP); } if (HA_OK != cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_TIMEOUT_VALUE, cluster.hb_dispatch, cluster.hb_conn)) { crm_crit("Cannot set msg callback %s: %s", T_STONITH_TIMEOUT_VALUE, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn)); crm_exit(DAEMON_RESPAWN_STOP); } } #endif if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); if(stonith_watchdog_timeout_ms > 0) { xmlNode *xml; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname); xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL); stonith_device_register(xml, NULL, FALSE); stonith_key_value_freeall(params, 1, 1); free_xml(xml); } stonith_ipc_server_init(&ipcs, &ipc_callbacks); #if SUPPORT_STONITH_CONFIG if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { standalone_cfg_commit(); } #endif /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); stonith_cleanup(); #if SUPPORT_HEARTBEAT if (cluster.hb_conn) { cluster.hb_conn->llc_ops->delete(cluster.hb_conn); } #endif crm_info("Done"); return crm_exit(rc); } diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 93a10e2d23..5cd1efd6a4 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,634 +1,638 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include quorum_handle_t pcmk_quorum_handle = 0; gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; /* * CFG functionality stolen from node_name() in corosync-quorumtool.c * This resolves the first address assigned to a node and returns the name or IP address. */ char * corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) { int lpc = 0; int rc = CS_OK; int retries = 0; char *name = NULL; cmap_handle_t local_handle = 0; /* nodeid == 0 == CMAN_NODEID_US */ if (nodeid == 0) { nodeid = get_local_nodeid(0); } if (cmap_handle == 0 && local_handle == 0) { retries = 0; crm_trace("Initializing CMAP connection"); do { rc = cmap_initialize(&local_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %s", cs_strerror(rc)); local_handle = 0; } } if (cmap_handle == 0) { cmap_handle = local_handle; } while (name == NULL && cmap_handle != 0) { uint32_t id = 0; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &id); crm_trace("Checking %u vs %u from %s", nodeid, id, key); free(key); if (rc != CS_OK) { break; } if (nodeid == id) { crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, name); if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, name); if (node_name_is_valid(key, name) == FALSE) { free(name); name = NULL; } free(key); } if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.name", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s %d", key, name, rc); free(key); } break; } lpc++; } if(local_handle) { cmap_finalize(local_handle); } if (name == NULL) { crm_info("Unable to get node name for nodeid %u", nodeid); } return name; } void terminate_cs_connection(crm_cluster_t *cluster) { crm_notice("Disconnecting from Corosync"); cluster_disconnect_cpg(cluster); if (pcmk_quorum_handle) { crm_trace("Disconnecting quorum"); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } else { crm_info("No Quorum connection"); } } int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; static int pcmk_quorum_dispatch(gpointer user_data) { int rc = 0; rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); pcmk_quorum_handle = 0; return -1; } return 0; } static void pcmk_quorum_notification(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) { int i; GHashTableIter iter; crm_node_t *node = NULL; static gboolean init_phase = TRUE; if (quorate != crm_have_quorum) { crm_notice("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "acquired" : "lost", (long unsigned int)view_list_entries); crm_have_quorum = quorate; } else { crm_info("Membership " U64T ": quorum %s (%lu)", ring_id, quorate ? "retained" : "still lost", (long unsigned int)view_list_entries); } if (view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; /* Reset last_seen for all cached nodes so we can tell which ones aren't * in the view list */ g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->last_seen = 0; } /* Update the peer cache for each node in view list */ for (i = 0; i < view_list_entries; i++) { uint32_t id = view_list[i]; crm_debug("Member[%d] %u ", i, id); /* Get this node's peer cache entry (adding one if not already there) */ node = crm_get_peer(id, NULL); if (node->uname == NULL) { char *name = corosync_node_name(0, id); crm_info("Obtaining name for new node %u", id); node = crm_get_peer(id, name); free(name); } /* Update the node state (including updating last_seen to ring_id) */ crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, ring_id); } /* Remove any peer cache entries we didn't update */ crm_reap_unseen_nodes(ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, quorate); } } quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = pcmk_quorum_notification, }; gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { int rc = -1; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d\n", rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured\n"); goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d\n", rc); goto bail; } - crm_notice("Quorum %s", quorate ? "acquired" : "lost"); + if (quorate) { + crm_notice("Quorum acquired"); + } else { + crm_warn("Quorum lost"); + } quorum_app_callback = dispatch; crm_have_quorum = quorate; rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d\n", rc); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %d\n", rc); goto bail; } mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); corosync_initialize_nodelist(NULL, FALSE, NULL); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); return FALSE; } return TRUE; } gboolean init_cs_connection(crm_cluster_t * cluster) { int retries = 0; while (retries < 5) { int rc = init_cs_connection_once(cluster); retries++; switch (rc) { case CS_OK: return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: sleep(retries); break; default: return FALSE; } } crm_err("Could not connect to corosync after %d retries", retries); return FALSE; } gboolean init_cs_connection_once(crm_cluster_t * cluster) { crm_node_t *peer = NULL; enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ if (stack != pcmk_cluster_corosync) { crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; } if (cluster_connect_cpg(cluster) == FALSE) { return FALSE; } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); cluster->nodeid = get_local_nodeid(0); if(cluster->nodeid == 0) { crm_err("Could not establish local nodeid"); return FALSE; } cluster->uname = get_node_name(0); if(cluster->uname == NULL) { crm_err("Could not establish local node name"); return FALSE; } /* Ensure the local node always exists */ peer = crm_get_peer(cluster->nodeid, cluster->uname); cluster->uuid = get_corosync_uuid(peer); return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verified message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } enum cluster_type_e find_corosync_variant(void) { int rc = CS_OK; cmap_handle_t handle; rc = cmap_initialize(&handle); switch(rc) { case CS_OK: break; case CS_ERR_SECURITY: crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc); /* It's there, we just can't talk to it. * Good enough for us to identify as 'corosync' */ return pcmk_cluster_corosync; default: crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc); return pcmk_cluster_unknown; } cmap_finalize(handle); return pcmk_cluster_corosync; } gboolean crm_is_corosync_peer_active(const crm_node_t * node) { if (node == NULL) { crm_trace("NULL"); return FALSE; } else if (safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if ((node->processes & crm_proc_cpg) == 0) { crm_trace("%s: processes=%.16x", node->uname, node->processes); return FALSE; } return TRUE; } gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent) { int lpc = 0; int rc = CS_OK; int retries = 0; gboolean any = FALSE; cmap_handle_t cmap_handle; do { rc = cmap_initialize(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } crm_peer_init(); crm_trace("Initializing corosync nodelist"); for (lpc = 0;; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); free(key); if (rc != CS_OK) { break; } name = corosync_node_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node && node->uname && strcasecmp(node->uname, name) == 0) { if (node->id && node->id != nodeid) { crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, nodeid, name); crm_exit(DAEMON_RESPAWN_STOP); } } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); crm_get_peer(nodeid, name); } if (nodeid > 0 && name != NULL) { any = TRUE; if (xml_parent) { char buffer[64]; xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE); if(snprintf(buffer, 63, "%u", nodeid) > 0) { crm_xml_add(node, XML_ATTR_ID, buffer); } crm_xml_add(node, XML_ATTR_UNAME, name); if (force_member) { crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER); } } } free(name); } cmap_finalize(cmap_handle); return any; } char * corosync_cluster_name(void) { cmap_handle_t handle; char *cluster_name = NULL; int rc = CS_OK; rc = cmap_initialize(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc); return NULL; } rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); if (rc != CS_OK) { crm_info("Cannot get totem.cluster_name: %s (%d)", ais_error2text(rc), rc); } else { crm_debug("cmap totem.cluster_name = '%s'", cluster_name); } cmap_finalize(handle); return cluster_name; } int corosync_cmap_has_config(const char *prefix) { int rc = CS_OK; int retries = 0; static int found = -1; cmap_handle_t cmap_handle; cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; if(found != -1) { return found; } do { rc = cmap_initialize(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API: %s (rc=%d)", cs_strerror(rc), rc); return -1; } rc = cmap_iter_init(cmap_handle, prefix, &iter_handle); if (rc != CS_OK) { crm_warn("Failed to initialize iteration for corosync cmap '%s': %s (rc=%d)", prefix, cs_strerror(rc), rc); goto bail; } found = 0; while ((rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL)) == CS_OK) { crm_trace("'%s' is configured in corosync cmap: %s", prefix, key_name); found++; break; } cmap_iter_finalize(cmap_handle, iter_handle); bail: cmap_finalize(cmap_handle); return found; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 9d17bfb259..56c76136e1 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1102 +1,1105 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include /* The peer cache remembers cluster nodes that have been seen. * This is managed mostly automatically by libcluster, based on * cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. */ GHashTable *crm_peer_cache = NULL; /* * The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * That said, using a single cache would be more logical and less error-prone, * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: * crm_remote_peer_get(), crm_remote_peer_cache_add() and * crm_remote_peer_cache_remove() directly manage it, * while crm_remote_peer_cache_refresh() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; static gboolean crm_autoreap = TRUE; int crm_remote_peer_cache_size(void) { if (crm_remote_peer_cache == NULL) { return 0; } return g_hash_table_size(crm_remote_peer_cache); } /*! * \brief Get a remote node peer cache entry, creating it if necessary * * \param[in] node_name Name of remote node * * \return Cache entry for node on success, NULL (and set errno) otherwise * * \note When creating a new entry, this will leave the node state undetermined, * so the caller should also call crm_update_peer_state() if the state is * known. */ crm_node_t * crm_remote_peer_get(const char *node_name) { crm_node_t *node; if (node_name == NULL) { errno = -EINVAL; return NULL; } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { return NULL; } /* Populate the essential information */ node->flags = crm_remote_node; node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); errno = -ENOMEM; return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ crm_update_peer_uname(node, node_name); return node; } /*! * \brief Add a node to the remote peer cache * * \param[in] node_name Name of remote node * * \note This is a legacy convenience wrapper for crm_remote_peer_get() * for callers that don't need the cache entry returned. */ void crm_remote_peer_cache_add(const char *node_name) { CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); } void crm_remote_peer_cache_remove(const char *node_name) { if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { crm_trace("removed %s from remote peer cache", node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state, * CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older crmd versions that don't set it. */ static const char * remote_state_from_cib(xmlNode *node_state) { const char *status; status = crm_element_value(node_state, XML_NODE_IN_CLUSTER); if (status && !crm_is_true(status)) { status = CRM_NODE_LOST; } else { status = CRM_NODE_MEMBER; } return status; } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { struct refresh_data *data = user_data; const char *remote = crm_element_value(result, data->field); const char *state = NULL; crm_node_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(crm_remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ node = crm_remote_peer_get(remote); CRM_ASSERT(node); if (state) { crm_update_peer_state(__FUNCTION__, node, state, 0); } } else if (is_set(node->flags, crm_node_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_bit(node->flags, crm_node_dirty); if (state) { crm_update_peer_state(__FUNCTION__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_bit(((crm_node_t*)value)->flags, crm_node_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { return is_set(((crm_node_t*)value)->flags, crm_node_dirty); } /* search string to find CIB resources entries for guest nodes */ #define XPATH_GUEST_NODE_CONFIG \ "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \ "[@name='" XML_RSC_ATTR_REMOTE_NODE "']" /* search string to find CIB resources entries for remote nodes */ #define XPATH_REMOTE_NODE_CONFIG \ "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ "[@type='remote'][@provider='pacemaker']" /* search string to find CIB node status entries for pacemaker_remote nodes */ #define XPATH_REMOTE_NODE_STATUS \ "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \ "[@" XML_NODE_IS_REMOTE "='true']" /*! * \brief Repopulate the remote peer cache based on CIB XML * * \param[in] xmlNode CIB XML to parse */ void crm_remote_peer_cache_refresh(xmlNode *cib) { struct refresh_data data; /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = "id"; data.has_state = TRUE; crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = "value"; data.has_state = FALSE; crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = "id"; data.has_state = FALSE; crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } gboolean crm_is_peer_active(const crm_node_t * node) { if(node == NULL) { return FALSE; } if (is_set(node->flags, crm_remote_node)) { /* remote nodes are never considered active members. This * guarantees they will never be considered for DC membership.*/ return FALSE; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { return crm_is_corosync_peer_active(node); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { return crm_is_heartbeat_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; } else if (search->id && node->id != search->id) { return FALSE; } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_notice("Removing %s/%u from the membership list", node->uname, node->id); return TRUE; } return FALSE; } /*! * \brief Remove all peer cache entries matching a node ID and/or uname * * \param[in] id ID of node to remove (or 0 to ignore) * \param[in] name Uname of node to remove (or NULL to ignore) * * \return Number of cache entries removed */ guint reap_crm_member(uint32_t id, const char *name) { int matches = 0; crm_node_t search; if (crm_peer_cache == NULL) { crm_trace("Nothing to do, cache not initialized"); return 0; } search.id = id; search.uname = name ? strdup(name) : NULL; matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); if(matches) { crm_notice("Purged %d peers with id=%u and/or uname=%s from the membership cache", matches, search.id, search.uname); } else { crm_info("No peers with id=%u and/or uname=%s exist", id, name); } free(search.uname); return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; if (crm_peer_cache) { g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u: %s", node->id, node->uname); free(node->addr); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node); } void crm_peer_init(void) { if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node); } if (crm_remote_peer_cache == NULL) { crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; /*! * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Previously, client callbacks were responsible for peer cache * management. This is no longer the case, and client callbacks should do * only client-specific handling. Callbacks MUST NOT add or remove entries * in the peer caches. */ void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } /*! * \brief Tell the library whether to automatically reap lost nodes * * If TRUE (the default), calling crm_update_peer_proc() will also update the * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state() * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable */ void crm_set_autoreap(gboolean autoreap) { crm_autoreap = autoreap; } static void crm_dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); } } static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { if(value == user_data) { return TRUE; } return FALSE; } crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (flags & CRM_GET_PEER_REMOTE) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) { node = crm_find_peer(id, uname); } return node; } crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (flags & CRM_GET_PEER_REMOTE) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) { node = crm_get_peer(id, uname); } return node; } crm_node_t * crm_find_peer(unsigned int id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { if(safe_str_eq(uname, by_id->uname)) { crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name); } else { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); crm_dump_peer_hash(LOG_INFO, __FUNCTION__); crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE); } } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync based clusters use nodeid's * * The functions that call crm_update_peer_state() only know nodeid * so 'by_id' is authorative when merging * * Same for crm_update_peer_proc() */ crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name); } return node; } #if SUPPORT_COROSYNC static guint crm_remove_conflicting_peer(crm_node_t *node) { int matches = 0; GHashTableIter iter; crm_node_t *existing_node = NULL; if (node->id == 0 || node->uname == NULL) { return 0; } # if !SUPPORT_PLUGIN if (corosync_cmap_has_config("nodelist") != 0) { return 0; } # endif g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if (existing_node->id > 0 && existing_node->id != node->id && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { if (crm_is_peer_active(existing_node)) { continue; } crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", existing_node->id, existing_node->uname, node->id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); node = crm_find_peer(id, uname); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { uname_lookup = get_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = crm_find_peer(id, uname); } } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if (uname && (node->uname == NULL)) { crm_update_peer_uname(node, uname); } if(node->uuid == NULL) { const char *uuid = crm_peer_uuid(node); if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); } } free(uname_lookup); return node; } /*! * \internal * \brief Update all of a node's information (process list, state, etc.) * * \param[in] source Caller's function name (for log messages) * * \return NULL if node was reaped from peer caches, pointer to node otherwise * * \note This function should not be called within a peer cache iteration, * otherwise reaping could invalidate the iterator. */ crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { #if SUPPORT_PLUGIN gboolean addr_changed = FALSE; gboolean votes_changed = FALSE; #endif crm_node_t *node = NULL; id = get_corosync_id(id, uuid); node = crm_get_peer(id, uname); CRM_ASSERT(node != NULL); if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ crm_peer_uuid(node); } else if (uuid != NULL) { node->uuid = strdup(uuid); } } if (children > 0) { if (crm_update_peer_proc(source, node, children, state) == NULL) { return NULL; } } if (state != NULL) { if (crm_update_peer_state(source, node, state, seen) == NULL) { return NULL; } } #if SUPPORT_HEARTBEAT if (born != 0) { node->born = born; } #endif #if SUPPORT_PLUGIN /* These were only used by the plugin */ if (born != 0) { node->born = born; } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; free(node->addr); node->addr = strdup(addr); } } if (addr_changed || votes_changed) { crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x", source, node->uname, node->id, node->state, node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes); } #endif return node; } /*! * \internal * \brief Update a node's uname * * \param[in] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ void crm_update_peer_uname(crm_node_t *node, const char *uname) { int i, len = strlen(uname); for (i = 0; i < len; i++) { if (uname[i] >= 'A' && uname[i] <= 'Z') { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } free(node->uname); node->uname = strdup(uname); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } #if SUPPORT_COROSYNC if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) { crm_remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (is_set(node->flags, crm_remote_node)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) != flag) { set_bit(node->processes, flag); changed = TRUE; } #if SUPPORT_PLUGIN } else if (safe_str_eq(status, CRM_NODE_MEMBER)) { if (flag > 0 && node->processes != flag) { node->processes = flag; changed = TRUE; } #endif } else if (node->processes & flag) { clear_bit(node->processes, flag); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, peer2text(flag), status); } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (crm_peer_cache == NULL) { return NULL; } if (crm_autoreap) { node = crm_update_peer_state(__FUNCTION__, node, is_set(node->processes, crm_get_cluster_proc())? CRM_NODE_MEMBER : CRM_NODE_LOST, 0); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, peer2text(flag), status); } return node; } void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (is_set(node->flags, crm_remote_node)) { return; } last = node->expected; if (expected != NULL && safe_str_neq(node->expected, expected)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, expected, last); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter) { gboolean is_member; - CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state); - return NULL); + CRM_CHECK(node != NULL, + crm_err("Could not set state for unknown host to %s" + CRM_XS " source=%s", state, source); + return NULL); is_member = safe_str_eq(state, CRM_NODE_MEMBER); if (membership && is_member) { node->last_seen = membership; } if (state && safe_str_neq(node->state, state)) { char *last = node->state; enum crm_status_type status_type = is_set(node->flags, crm_remote_node)? crm_status_rstate : crm_status_nstate; node->state = strdup(state); - crm_notice("%s: Node %s[%u] - state is now %s (was %s)", - source, node->uname, node->id, state, last); + crm_notice("Node %s state is now %s " CRM_XS + " nodeid=%u previous=%s source=%s", node->uname, state, + node->id, (last? last : "unknown"), source); if (crm_status_callback) { crm_status_callback(status_type, node, last); } free(last); if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by * crm_remote_peer_cache_refresh(). */ if(iter) { crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); } else { reap_crm_member(node->id, node->uname); } node = NULL; } } else { - crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id, - state); + crm_trace("Node %s state is unchanged (%s) " CRM_XS + " nodeid=%u source=%s", node->uname, state, node->id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership) { return crm_update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void crm_reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; crm_node_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->last_seen != membership) { if (node->state) { /* * Calling crm_update_peer_state_iter() allows us to * remove the node from crm_peer_cache without * invalidating our iterator */ crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter); } else { crm_info("State of node %s[%u] is still unknown", node->uname, node->id); } } } } int crm_terminate_member(int nodeid, const char *uname, void *unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } diff --git a/mcp/corosync.c b/mcp/corosync.c index 24f3ba2753..975b93c410 100644 --- a/mcp/corosync.c +++ b/mcp/corosync.c @@ -1,480 +1,483 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include /* for calls to stat() */ #include /* For basename() and dirname() */ #include #include /* For getpwname() */ #include #include #include #if HAVE_CONFDB # include #endif #include #include #if SUPPORT_CMAN # include #endif #if HAVE_CMAP # include #endif enum cluster_type_e stack = pcmk_cluster_unknown; static corosync_cfg_handle_t cfg_handle; /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ static void cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) { crm_info("Corosync wants to shut down: %s", (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" : (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional"); /* Never allow corosync to shut down while we're running */ corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); } static corosync_cfg_callbacks_t cfg_callbacks = { .corosync_cfg_shutdown_callback = cfg_shutdown_callback, }; static int pcmk_cfg_dispatch(gpointer user_data) { corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data; cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); if (rc != CS_OK) { return -1; } return 0; } static void cfg_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cfg_handle = 0; pcmk_shutdown(SIGTERM); } gboolean cluster_disconnect_cfg(void) { if (cfg_handle) { corosync_cfg_finalize(cfg_handle); cfg_handle = 0; } pcmk_shutdown(SIGTERM); return TRUE; } #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) gboolean cluster_connect_cfg(uint32_t * nodeid) { cs_error_t rc; int fd = 0, retries = 0; static struct mainloop_fd_callbacks cfg_fd_callbacks = { .dispatch = pcmk_cfg_dispatch, .destroy = cfg_connection_destroy, }; cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); if (rc != CS_OK) { crm_err("corosync cfg init error %d", rc); return FALSE; } rc = corosync_cfg_fd_get(cfg_handle, &fd); if (rc != CS_OK) { crm_err("corosync cfg fd_get error %d", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, nodeid)); if (rc != CS_OK) { crm_err("corosync cfg local_get error %d", rc); goto bail; } crm_debug("Our nodeid: %d", *nodeid); mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); return TRUE; bail: corosync_cfg_finalize(cfg_handle); return FALSE; } /* =::=::=::= Configuration =::=::=::= */ #if HAVE_CONFDB static int get_config_opt(confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if (*value) { free(*value); *value = NULL; } if (object_handle > 0) { if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = strdup(env_value); return 0; } if (fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if (rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if (top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if (rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } #else static int get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value, const char *fallback) { int rc = 0, retries = 0; cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value)); if (rc != CS_OK) { crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback); if (fallback) { *value = strdup(fallback); } else { *value = NULL; } } crm_trace("%s: %s", key, *value); return rc; } #endif #if HAVE_CONFDB # define KEY_PREFIX "" #elif HAVE_CMAP # define KEY_PREFIX "logging." #endif gboolean mcp_read_config(void) { int rc = CS_OK; int retries = 0; const char *const_value = NULL; #if HAVE_CONFDB char *value = NULL; confdb_handle_t config = 0; confdb_handle_t top_handle = 0; hdb_handle_t local_handle; static confdb_callbacks_t callbacks = { }; do { rc = confdb_initialize(&config, &callbacks); if (rc != CS_OK) { retries++; printf("confdb connection setup failed: %s. Retrying in %ds\n", ais_error2text(rc), retries); crm_info("confdb connection setup failed: %s. Retrying in %ds", ais_error2text(rc), retries); sleep(retries); } else { break; } } while (retries < 5); #elif HAVE_CMAP cmap_handle_t local_handle; uint64_t config = 0; /* There can be only one (possibility if confdb isn't around) */ do { rc = cmap_initialize(&local_handle); if (rc != CS_OK) { retries++; printf("cmap connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries); crm_info("cmap connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } else { break; } } while (retries < 5); #endif if (rc != CS_OK) { printf("Could not connect to Cluster Configuration Database API, error %d\n", rc); crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } stack = get_cluster_type(); crm_info("Reading configure for stack: %s", name_for_cluster_type(stack)); /* =::=::= Should we be here =::=::= */ if (stack == pcmk_cluster_corosync) { set_daemon_option("cluster_type", "corosync"); set_daemon_option("quorum_type", "corosync"); #if HAVE_CONFDB } else if (stack == pcmk_cluster_cman) { set_daemon_option("cluster_type", "cman"); set_daemon_option("quorum_type", "cman"); enable_crmd_as_root(TRUE); } else if (stack == pcmk_cluster_classic_ais) { set_daemon_option("cluster_type", "openais"); set_daemon_option("quorum_type", "pcmk"); /* Look for a service block to indicate our plugin is loaded */ top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); while (local_handle) { get_config_opt(config, local_handle, "name", &value, NULL); if (safe_str_eq("pacemaker", value)) { get_config_opt(config, local_handle, "ver", &value, "0"); if (safe_str_eq(value, "1")) { get_config_opt(config, local_handle, "use_logd", &value, "no"); set_daemon_option("use_logd", value); set_daemon_option("LOGD", value); get_config_opt(config, local_handle, "use_mgmtd", &value, "no"); enable_mgmtd(crm_is_true(value)); } else { crm_err("We can only start Pacemaker from init if using version 1" " of the Pacemaker plugin for Corosync. Terminating."); crm_exit(DAEMON_RESPAWN_STOP); } break; } local_handle = config_find_next(config, "service", top_handle); } free(value); #endif } else { crm_err("Unsupported stack type: %s", name_for_cluster_type(stack)); return FALSE; } #if HAVE_CONFDB top_handle = config_find_init(config); local_handle = config_find_next(config, "logging", top_handle); #endif /* =::=::= Logging =::=::= */ if (daemon_option("debug")) { /* Syslog logging is already setup by crm_log_init() */ } else { /* Check corosync */ char *debug_enabled = NULL; get_config_opt(config, local_handle, KEY_PREFIX "debug", &debug_enabled, "off"); if (crm_is_true(debug_enabled)) { set_daemon_option("debug", "1"); if (get_crm_log_level() < LOG_DEBUG) { set_crm_log_level(LOG_DEBUG); } } else { set_daemon_option("debug", "0"); } free(debug_enabled); } const_value = daemon_option("debugfile"); if (daemon_option("logfile")) { /* File logging is already setup by crm_log_init() */ } else if(const_value) { /* From when we cared what options heartbeat used */ set_daemon_option("logfile", const_value); crm_add_logfile(const_value); } else { /* Check corosync */ char *logfile = NULL; char *logfile_enabled = NULL; get_config_opt(config, local_handle, KEY_PREFIX "to_logfile", &logfile_enabled, "on"); get_config_opt(config, local_handle, KEY_PREFIX "logfile", &logfile, "/var/log/pacemaker.log"); if (crm_is_true(logfile_enabled) == FALSE) { crm_trace("File logging disabled in corosync"); } else if (crm_add_logfile(logfile)) { set_daemon_option("logfile", logfile); } else { crm_err("Couldn't create logfile: %s", logfile); set_daemon_option("logfile", "none"); } free(logfile); free(logfile_enabled); } if (daemon_option("logfacility")) { /* Syslog logging is already setup by crm_log_init() */ } else { /* Check corosync */ char *syslog_enabled = NULL; char *syslog_facility = NULL; get_config_opt(config, local_handle, KEY_PREFIX "to_syslog", &syslog_enabled, "on"); get_config_opt(config, local_handle, KEY_PREFIX "syslog_facility", &syslog_facility, "daemon"); if (crm_is_true(syslog_enabled) == FALSE) { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); set_daemon_option("logfacility", "none"); } else { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(syslog_facility)); qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); set_daemon_option("logfacility", syslog_facility); } free(syslog_enabled); free(syslog_facility); } const_value = daemon_option("logfacility"); if (const_value) { /* cluster-glue module needs HA_LOGFACILITY */ setenv("HA_LOGFACILITY", const_value, 1); } #if HAVE_CONFDB confdb_finalize(config); #elif HAVE_CMAP if(local_handle){ gid_t gid = 0; if (crm_user_lookup(CRM_DAEMON_USER, NULL, &gid) < 0) { - crm_warn("No group found for user %s", CRM_DAEMON_USER); + crm_warn("Could not authorize group with corosync " CRM_XS + " No group found for user %s", CRM_DAEMON_USER); } else { char key[PATH_MAX]; snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); rc = cmap_set_uint8(local_handle, key, 1); - crm_notice("Configured corosync to accept connections from group %u: %s (%d)", - gid, ais_error2text(rc), rc); + if (rc != CS_OK) { + crm_warn("Could not authorize group with corosync "CRM_XS + " group=%u rc=%d (%s)", gid, rc, ais_error2text(rc)); + } } } cmap_finalize(local_handle); #endif return TRUE; } diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index f7f1d462b3..9ef33bbf1b 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -1,1148 +1,1149 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean pcmk_quorate = FALSE; gboolean fatal_error = FALSE; GMainLoop *mainloop = NULL; #define PCMK_PROCESS_CHECK_INTERVAL 5 const char *local_name = NULL; uint32_t local_nodeid = 0; crm_trigger_t *shutdown_trigger = NULL; const char *pid_file = "/var/run/pacemaker.pid"; typedef struct pcmk_child_s { int pid; long flag; int start_seq; int respawn_count; gboolean respawn; const char *name; const char *uid; const char *command; gboolean active_before_startup; } pcmk_child_t; /* Index into the array below */ #define pcmk_child_crmd 4 #define pcmk_child_mgmtd 8 /* *INDENT-OFF* */ static pcmk_child_t pcmk_children[] = { { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL }, { 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL }, { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" }, { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" }, { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" }, { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" }, { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL }, { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" }, { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" }, { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" }, }; /* *INDENT-ON* */ static gboolean start_child(pcmk_child_t * child); static gboolean check_active_before_startup_processes(gpointer user_data); void update_process_clients(crm_client_t *client); void update_process_peers(void); void enable_crmd_as_root(gboolean enable) { if (enable) { pcmk_children[pcmk_child_crmd].uid = NULL; } else { pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER; } } void enable_mgmtd(gboolean enable) { if (enable) { pcmk_children[pcmk_child_mgmtd].start_seq = 7; } else { pcmk_children[pcmk_child_mgmtd].start_seq = 0; } } static uint32_t get_process_list(void) { int lpc = 0; uint32_t procs = crm_get_cluster_proc(); for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static void pcmk_process_exit(pcmk_child_t * child) { child->pid = 0; child->active_before_startup = FALSE; /* Broadcast the fact that one of our processes died ASAP * * Try to get some logging of the cause out first though * because we're probably about to get fenced * * Potentially do this only if respawn_count > N * to allow for local recovery */ update_node_processes(local_nodeid, NULL, get_process_list()); child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Child respawn count exceeded by %s", child->name); child->respawn = FALSE; } if (shutdown_trigger) { mainloop_set_trigger(shutdown_trigger); update_node_processes(local_nodeid, NULL, get_process_list()); } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) { crm_err("Rebooting system because of %s", child->name); pcmk_panic(__FUNCTION__); } else if (child->respawn) { crm_notice("Respawning failed child process: %s", child->name); start_child(child); } } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo && signo == SIGKILL) { crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else if (signo) { crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else { switch(exitcode) { case pcmk_ok: crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; case DAEMON_RESPAWN_STOP: crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case pcmk_err_panic: do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_panic(__FUNCTION__); pcmk_shutdown(SIGTERM); break; default: crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; } } pcmk_process_exit(child); } static gboolean stop_child(pcmk_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } if (child->command == NULL) { crm_debug("Nothing to do for child \"%s\"", child->name); return TRUE; } if (child->pid <= 0) { crm_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping %s: Sent -%d to process %d", child->name, signal, child->pid); } else { crm_perror(LOG_ERR, "Stopping %s: Could not send -%d to process %d failed", child->name, signal, child->pid); } return TRUE; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; static gboolean start_child(pcmk_child_t * child) { int lpc = 0; uid_t uid = 0; gid_t gid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("PCMK_valgrind_enabled"); const char *env_callgrind = getenv("PCMK_callgrind_enabled"); enum cluster_type_e stack = get_cluster_type(); child->active_before_startup = FALSE; if (child->command == NULL) { crm_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); return FALSE; } crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); crm_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); update_node_processes(local_nodeid, NULL, get_process_list()); return TRUE; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrarys */ opts_vgrind[0] = strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = strdup("--tool=callgrind"); opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = strdup(child->command);; if(gid) { if(stack == pcmk_cluster_corosync) { /* Drop root privileges completely * * We can do this because we set uidgid.gid.${gid}=1 * via CMAP which allows these processes to connect to * corosync */ if (setgid(gid) < 0) { crm_perror(LOG_ERR, "Could not set group to %d", gid); } /* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */ } else if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno); } } if (uid && setuid(uid) < 0) { crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; /* never reached */ } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Child %s not terminating in a timely manner, forcing", child->name); stop_child(child, SIGSEGV); } return FALSE; } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); int lpc = 0; if (phase == 0) { crm_notice("Shutting down Pacemaker"); phase = max; /* Add a second, more frequent, check to speed up shutdown */ g_timeout_add_seconds(5, check_active_before_startup_processes, NULL); } for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { pcmk_child_t *child = &(pcmk_children[lpc]); if (phase != child->start_seq) { continue; } if (child->pid) { time_t now = time(NULL); if (child->respawn) { next_log = now + 30; child->respawn = FALSE; stop_child(child, SIGTERM); if (phase < pcmk_children[pcmk_child_crmd].start_seq) { g_timeout_add(180000 /* 3m */ , escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", child->name, child->pid, child->start_seq); } return TRUE; } /* cleanup */ crm_debug("%s confirmed stopped", child->name); child->pid = 0; } } /* send_cluster_id(); */ crm_notice("Shutdown complete"); { const char *delay = daemon_option("shutdown_delay"); if(delay) { sync(); sleep(crm_get_msec(delay) / 1000); } } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Attempting to inhibit respawning after fatal error"); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; } static void pcmk_ignore(int nsig) { crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); } static void pcmk_sigquit(int nsig) { pcmk_panic(__FUNCTION__); } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } static int32_t pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void pcmk_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; const char *task = NULL; crm_client_t *c = crm_client_get(qbc); xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags); crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } task = crm_element_value(msg, F_CRM_TASK); if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) { /* Time to quit */ crm_notice("Shutting down in response to ticket %s (%s)", crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN)); pcmk_shutdown(15); } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { /* Send to everyone */ struct iovec *iov; int id = 0; const char *name = NULL; crm_element_value_int(msg, XML_ATTR_ID, &id); name = crm_element_value(msg, XML_ATTR_UNAME); crm_notice("Instructing peers to remove references to node %s/%u", name, id); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = dump_xml_unformatted(msg); iov->iov_len = 1 + strlen(iov->iov_base); send_cpg_iov(iov); } else { update_process_clients(c); } free_xml(msg); return 0; } /* Error code means? */ static int32_t pcmk_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void pcmk_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pcmk_ipc_closed(c); } struct qb_ipcs_service_handlers mcp_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, .connection_created = pcmk_ipc_created, .msg_process = pcmk_ipc_dispatch, .connection_closed = pcmk_ipc_closed, .connection_destroyed = pcmk_ipc_destroy }; /*! * \internal * \brief Send an XML message with process list of all known peers to client(s) * * \param[in] client Send message to this client, or all clients if NULL */ void update_process_clients(crm_client_t *client) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *update = create_xml_node(NULL, "nodes"); if (is_corosync_cluster()) { crm_xml_add_int(update, "quorate", pcmk_quorate); } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = create_xml_node(update, "node"); crm_xml_add_int(xml, "id", node->id); crm_xml_add(xml, "uname", node->uname); crm_xml_add(xml, "state", node->state); crm_xml_add_int(xml, "processes", node->processes); } if(client) { crm_trace("Sending process list to client %s", client->id); crm_ipcs_send(client, 0, update, crm_ipc_server_event); } else { crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections)); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) { crm_ipcs_send(client, 0, update, crm_ipc_server_event); } } free_xml(update); } /*! * \internal * \brief Send a CPG message with local node's process list to all peers */ void update_process_peers(void) { /* Do nothing for corosync-2 based clusters */ char buffer[1024]; struct iovec *iov; int rc = 0; memset(buffer, 0, SIZEOF(buffer)); if (local_name) { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", local_name, get_process_list()); } else { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", get_process_list()); } crm_trace("Sending %s", buffer); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = strdup(buffer); iov->iov_len = rc + 1; send_cpg_iov(iov); } /*! * \internal * \brief Update a node's process list, notifying clients and peers if needed * * \param[in] id Node ID of affected node * \param[in] uname Uname of affected node * \param[in] procs Affected node's process list mask * * \return TRUE if the process list changed, FALSE otherwise */ gboolean update_node_processes(uint32_t id, const char *uname, uint32_t procs) { gboolean changed = FALSE; crm_node_t *node = crm_get_peer(id, uname); if (procs != 0) { if (procs != node->processes) { crm_debug("Node %s now has process list: %.32x (was %.32x)", node->uname, procs, node->processes); node->processes = procs; changed = TRUE; /* If local node's processes have changed, notify clients/peers */ if (id == local_nodeid) { update_process_clients(NULL); update_process_peers(); } } else { crm_trace("Node %s still has process list: %.32x", node->uname, procs); } } return changed; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"}, {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"}, {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ static void mcp_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", path, CRM_DAEMON_USER, gid, pcmk_strerror(errno)); } } static gboolean check_active_before_startup_processes(gpointer user_data) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); gboolean keep_tracking = FALSE; for (start_seq = 1; start_seq < max; start_seq++) { for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].active_before_startup == FALSE) { /* we are already tracking it as a child process. */ continue; } else if (start_seq != pcmk_children[lpc].start_seq) { continue; } else { const char *name = pcmk_children[lpc].name; if (pcmk_children[lpc].flag == crm_proc_stonith_ng) { name = "stonithd"; } if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) { crm_notice("Process %s terminated (pid=%d)", name, pcmk_children[lpc].pid); pcmk_process_exit(&(pcmk_children[lpc])); continue; } } /* at least one of the processes found at startup * is still going, so keep this recurring timer around */ keep_tracking = TRUE; } } return keep_tracking; } static bool find_and_track_existing_processes(void) { DIR *dp; struct dirent *entry; int start_tracker = 0; char entry_name[64]; dp = opendir("/proc"); if (!dp) { /* no proc directory to search through */ crm_notice("Can not read /proc directory to track existing components"); return FALSE; } while ((entry = readdir(dp)) != NULL) { int pid; int max = SIZEOF(pcmk_children); int i; if (crm_procfs_process_info(entry, entry_name, &pid) < 0) { continue; } for (i = 0; i < max; i++) { const char *name = pcmk_children[i].name; if (pcmk_children[i].start_seq == 0) { continue; } if (pcmk_children[i].flag == crm_proc_stonith_ng) { name = "stonithd"; } if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) { crm_notice("Tracking existing %s process (pid=%d)", name, pid); pcmk_children[i].pid = pid; pcmk_children[i].active_before_startup = TRUE; start_tracker = 1; break; } } } if (start_tracker) { g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, NULL); } closedir(dp); return start_tracker; } static void init_children_processes(void) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); /* start any children that have not been detected */ for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].pid) { /* we are already tracking it */ continue; } if (start_seq == pcmk_children[lpc].start_seq) { start_child(&(pcmk_children[lpc])); } } } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ setenv("PCMK_respawned", "true", 1); } static void mcp_cpg_destroy(gpointer user_data) { crm_err("Connection destroyed"); crm_exit(ENOTCONN); } /*! * \internal * \brief Process a CPG message (process list or manual peer cache removal) * * \param[in] handle CPG connection (ignored) * \param[in] groupName CPG group name (ignored) * \param[in] nodeid ID of affected node * \param[in] pid Process ID (ignored) * \param[in] msg CPG XML message * \param[in] msg_len Length of msg in bytes (ignored) */ static void mcp_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = string2xml(msg); const char *task = crm_element_value(xml, F_CRM_TASK); crm_trace("Received CPG message (%s): %.200s", (task? task : "process list"), (char*)msg); if (task == NULL) { if (nodeid == local_nodeid) { crm_info("Ignoring process list sent by peer for local node"); } else { uint32_t procs = 0; const char *uname = crm_element_value(xml, "uname"); crm_element_value_int(xml, "proclist", (int *)&procs); if (update_node_processes(nodeid, uname, procs)) { update_process_clients(NULL); } } } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { int id = 0; const char *name = NULL; crm_element_value_int(xml, XML_ATTR_ID, &id); name = crm_element_value(xml, XML_ATTR_UNAME); reap_crm_member(id, name); } if (xml != NULL) { free_xml(xml); } } static void mcp_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Update peer cache if needed */ pcmk_cpg_membership(handle, groupName, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries); /* Always broadcast our own presence after any membership change */ update_process_peers(); } static gboolean mcp_quorum_callback(unsigned long long seq, gboolean quorate) { pcmk_quorate = quorate; return TRUE; } static void mcp_quorum_destroy(gpointer user_data) { crm_info("connection lost"); } #if SUPPORT_CMAN static gboolean mcp_cman_dispatch(unsigned long long seq, gboolean quorate) { pcmk_quorate = quorate; return TRUE; } static void mcp_cman_destroy(gpointer user_data) { crm_info("connection closed"); } #endif int main(int argc, char **argv) { int rc; int flag; int argerr = 0; int option_index = 0; gboolean shutdown = FALSE; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; struct rlimit cores; crm_ipc_t *old_instance = NULL; qb_ipcs_service_t *ipcs = NULL; const char *facility = daemon_option("logfacility"); static crm_cluster_t cluster; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n"); mainloop_add_signal(SIGHUP, pcmk_ignore); mainloop_add_signal(SIGQUIT, pcmk_sigquit); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'f': /* Legacy */ break; case 'p': pid_file = optarg; break; case '$': case '?': crm_help(flag, EX_OK); break; case 'S': shutdown = TRUE; break; case 'F': printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); crm_exit(pcmk_ok); default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', EX_USAGE); } setenv("LC_ALL", "C", 1); setenv("HA_LOGD", "no", 1); set_daemon_option("mcp", "true"); set_daemon_option("use_logd", "off"); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); /* Restore the original facility so that mcp_read_config() does the right thing */ set_daemon_option("logfacility", facility); crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP); old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0); crm_ipc_connect(old_instance); if (shutdown) { crm_debug("Terminating previous instance"); while (crm_ipc_connected(old_instance)) { xmlNode *cmd = create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL); crm_debug("."); crm_ipc_send(old_instance, cmd, 0, 0, NULL); free_xml(cmd); sleep(2); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_exit(pcmk_ok); } else if (crm_ipc_connected(old_instance)) { crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("Pacemaker is already active, aborting startup"); crm_exit(DAEMON_RESPAWN_STOP); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); if (mcp_read_config() == FALSE) { crm_notice("Could not obtain corosync config data, exiting"); crm_exit(ENODATA); } - crm_notice("Starting Pacemaker %s (Build: %s): %s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); + crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s", + PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); mainloop = g_main_new(FALSE); sysrq_init(); rc = getrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Cannot determine current maximum core size."); } else { if (cores.rlim_max == 0 && geteuid() == 0) { cores.rlim_max = RLIM_INFINITY; } else { crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max); } cores.rlim_cur = cores.rlim_max; rc = setrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } #if 0 /* system() is not thread-safe, can't call from here * Actually, its a pretty hacky way to try and achieve this anyway */ if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid"); } #endif } rc = pcmk_ok; if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); crm_exit(ENOKEY); } mkdir(CRM_STATE_DIR, 0750); mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store core files in */ crm_build_path(CRM_CORE_DIR, 0775); mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid); /* Used to store blackbox dumps in */ crm_build_path(CRM_BLACKBOX_DIR, 0755); mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid); /* Used to store policy engine inputs in */ crm_build_path(PE_STATE_DIR, 0755); mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store the cluster configuration */ crm_build_path(CRM_CONFIG_DIR, 0755); mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid); /* Resource agent paths are constructed by the lrmd */ ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks); if (ipcs == NULL) { crm_err("Couldn't start IPC server"); crm_exit(EIO); } /* Allows us to block shutdown */ if (cluster_connect_cfg(&local_nodeid) == FALSE) { crm_err("Couldn't connect to Corosync's CFG service"); crm_exit(ENOPROTOOPT); } if(pcmk_locate_sbd() > 0) { setenv("PCMK_watchdog", "true", 1); } else { setenv("PCMK_watchdog", "false", 1); } find_and_track_existing_processes(); cluster.destroy = mcp_cpg_destroy; cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; crm_set_autoreap(FALSE); if(cluster_connect_cpg(&cluster) == FALSE) { crm_err("Couldn't connect to Corosync's CPG service"); rc = -ENOPROTOOPT; } if (rc == pcmk_ok && is_corosync_cluster()) { /* Keep the membership list up-to-date for crm_node to query */ if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) { rc = -ENOTCONN; } } #if SUPPORT_CMAN if (rc == pcmk_ok && is_cman_cluster()) { init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy); } #endif if(rc == pcmk_ok) { local_name = get_local_node_name(); update_node_processes(local_nodeid, local_name, get_process_list()); mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); init_children_processes(); crm_info("Starting mainloop"); g_main_run(mainloop); } if (ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } g_main_destroy(mainloop); cluster_disconnect_cpg(&cluster); cluster_disconnect_cfg(); crm_info("Exiting %s", crm_system_name); return crm_exit(rc); }