diff --git a/attrd/commands.c b/attrd/commands.c index c7c9ab5891..6526b2be69 100644 --- a/attrd/commands.c +++ b/attrd/commands.c @@ -1,1106 +1,1106 @@ /* * Copyright (C) 2013 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #define ATTRD_PROTOCOL_VERSION "1" int last_cib_op_done = 0; char *peer_writer = NULL; GHashTable *attributes = NULL; typedef struct attribute_s { char *uuid; /* TODO: Remove if at all possible */ char *id; char *set; GHashTable *values; int update; int timeout_ms; /* TODO: refactor these three as a bitmask */ bool changed; /* whether attribute value has changed since last write */ bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ gboolean is_private; /* whether to keep this attribute out of the CIB */ mainloop_timer_t *timer; char *user; } attribute_t; typedef struct attribute_value_s { uint32_t nodeid; gboolean is_remote; char *nodename; char *current; char *requested; char *stored; } attribute_value_t; void write_attribute(attribute_t *a); void write_or_elect_attribute(attribute_t *a); void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source); static gboolean send_attrd_message(crm_node_t * node, xmlNode * data) { crm_xml_add(data, F_TYPE, T_ATTRD); crm_xml_add(data, F_ATTRD_IGNORE_LOCALLY, "atomic-version"); /* Tell older versions to ignore our messages */ crm_xml_add(data, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION); crm_xml_add_int(data, F_ATTRD_WRITER, election_state(writer)); return send_cluster_message(node, crm_msg_attrd, data, TRUE); } static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s in state %d", a->id, election_state(writer)); write_or_elect_attribute(a); return FALSE; } static void free_attribute_value(gpointer data) { attribute_value_t *v = data; free(v->nodename); free(v->current); free(v->requested); free(v->stored); free(v); } void free_attribute(gpointer data) { attribute_t *a = data; if(a) { free(a->id); free(a->set); free(a->uuid); free(a->user); mainloop_timer_del(a->timer); g_hash_table_destroy(a->values); free(a); } } static xmlNode * build_attribute_xml( xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user, gboolean is_private, const char *peer, uint32_t peerid, const char *value) { xmlNode *xml = create_xml_node(parent, __FUNCTION__); crm_xml_add(xml, F_ATTRD_ATTRIBUTE, name); crm_xml_add(xml, F_ATTRD_SET, set); crm_xml_add(xml, F_ATTRD_KEY, uuid); crm_xml_add(xml, F_ATTRD_USER, user); crm_xml_add(xml, F_ATTRD_HOST, peer); crm_xml_add_int(xml, F_ATTRD_HOST_ID, peerid); crm_xml_add(xml, F_ATTRD_VALUE, value); crm_xml_add_int(xml, F_ATTRD_DAMPEN, timeout_ms/1000); crm_xml_add_int(xml, F_ATTRD_IS_PRIVATE, is_private); return xml; } static attribute_t * create_attribute(xmlNode *xml) { int dampen = 0; const char *value = crm_element_value(xml, F_ATTRD_DAMPEN); attribute_t *a = calloc(1, sizeof(attribute_t)); a->id = crm_element_value_copy(xml, F_ATTRD_ATTRIBUTE); a->set = crm_element_value_copy(xml, F_ATTRD_SET); a->uuid = crm_element_value_copy(xml, F_ATTRD_KEY); a->values = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, free_attribute_value); crm_element_value_int(xml, F_ATTRD_IS_PRIVATE, &a->is_private); #if ENABLE_ACL crm_trace("Performing all %s operations as user '%s'", a->id, a->user); a->user = crm_element_value_copy(xml, F_ATTRD_USER); #endif if(value) { dampen = crm_get_msec(value); crm_trace("Created attribute %s with delay %dms (%s)", a->id, dampen, value); } else { crm_trace("Created attribute %s with no delay", a->id); } if(dampen > 0) { a->timeout_ms = dampen; a->timer = mainloop_timer_add(a->id, a->timeout_ms, FALSE, attribute_timer_cb, a); } else if (dampen < 0) { crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id); } g_hash_table_replace(attributes, a->id, a); return a; } /*! * \internal * \brief Respond to a client peer-remove request (i.e. propagate to all peers) * * \param[in] client_name Name of client that made request (for log messages) * \param[in] xml Root of request XML * * \return void */ void attrd_client_peer_remove(const char *client_name, xmlNode *xml) { const char *host = crm_element_value(xml, F_ATTRD_HOST); if (host) { crm_info("Client %s is requesting all values for %s be removed", client_name, host); send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */ } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", client_name); } } /*! * \internal * \brief Respond to a client update request * * \param[in] xml Root of request XML * * \return void */ void attrd_client_update(xmlNode *xml) { attribute_t *a = NULL; attribute_value_t *v = NULL; char *key = crm_element_value_copy(xml, F_ATTRD_KEY); char *set = crm_element_value_copy(xml, F_ATTRD_SET); char *host = crm_element_value_copy(xml, F_ATTRD_HOST); const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(xml, F_ATTRD_VALUE); const char *regex = crm_element_value(xml, F_ATTRD_REGEX); /* If a regex was specified, broadcast a message for each match */ if ((attr == NULL) && regex) { GHashTableIter aIter; regex_t *r_patt = calloc(1, sizeof(regex_t)); crm_debug("Setting %s to %s", regex, value); if (regcomp(r_patt, regex, REG_EXTENDED)) { crm_err("Bad regex '%s' for update", regex); } else { g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(r_patt, attr, 0, NULL, 0); if (status == 0) { crm_trace("Matched %s with %s", attr, regex); crm_xml_add(xml, F_ATTRD_ATTRIBUTE, attr); send_attrd_message(NULL, xml); } } } free(key); free(set); free(host); regfree(r_patt); free(r_patt); return; } if (host == NULL) { crm_trace("Inferring host"); host = strdup(attrd_cluster->uname); crm_xml_add(xml, F_ATTRD_HOST, host); crm_xml_add_int(xml, F_ATTRD_HOST_ID, attrd_cluster->nodeid); } a = g_hash_table_lookup(attributes, attr); /* If value was specified using ++ or += notation, expand to real value */ if (value) { int offset = 1; int int_value = 0; static const int plus_plus_len = 5; if ((strlen(value) >= (plus_plus_len + 2)) && (value[plus_plus_len] == '+') && ((value[plus_plus_len + 1] == '+') || (value[plus_plus_len + 1] == '='))) { if (a) { v = g_hash_table_lookup(a->values, host); } if (v) { int_value = char2score(v->current); } if (value[plus_plus_len + 1] != '+') { const char *offset_s = value + (plus_plus_len + 2); offset = char2score(offset_s); } int_value += offset; if (int_value > INFINITY) { int_value = INFINITY; } crm_info("Expanded %s=%s to %d", attr, value, int_value); crm_xml_add_int(xml, F_ATTRD_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ value = crm_element_value(xml, F_ATTRD_VALUE); } } if ((peer_writer == NULL) && (election_state(writer) != election_in_progress)) { crm_info("Starting an election to determine the writer"); election_vote(writer); } crm_debug("Broadcasting %s[%s] = %s%s", attr, host, value, ((election_state(writer) == election_won)? " (writer)" : "")); free(key); free(set); free(host); send_attrd_message(NULL, xml); /* ends up at attrd_peer_message() */ } /*! * \internal * \brief Respond to a client refresh request (i.e. write out all attributes) * * \return void */ void attrd_client_refresh(void) { GHashTableIter iter; attribute_t *a = NULL; /* 'refresh' forces a write of the current value of all attributes * Cancel any existing timers, we're writing it NOW */ g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { mainloop_timer_stop(a->timer); } crm_info("Updating all attributes"); write_attributes(TRUE, FALSE); } /*! * \internal * \brief Build the XML reply to a client query * * param[in] attr Name of requested attribute * param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = create_xml_node(NULL, __FUNCTION__); attribute_t *a; if (reply == NULL) { return NULL; } crm_xml_add(reply, F_TYPE, T_ATTRD); crm_xml_add(reply, F_ATTRD_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, F_ATTRD_ATTRIBUTE, attr); /* Allow caller to use "localhost" to refer to local node */ if (safe_str_eq(host, "localhost")) { host = attrd_cluster->uname; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } crm_xml_add(host_value, F_ATTRD_HOST, host); crm_xml_add(host_value, F_ATTRD_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } crm_xml_add(host_value, F_ATTRD_HOST, v->nodename); crm_xml_add(host_value, F_ATTRD_VALUE, v->current); } } } return reply; } /*! * \internal * \brief Respond to a client query * * \param[in] client Who queried us * \param[in] query Root of query XML * * \return void */ void attrd_client_query(crm_client_t *client, uint32_t id, uint32_t flags, xmlNode *query) { const char *attr; const char *origin = crm_element_value(query, F_ORIG); ssize_t rc; xmlNode *reply; if (origin == NULL) { origin = "unknown client"; } crm_debug("Query arrived from %s", origin); /* Request must specify attribute name to query */ attr = crm_element_value(query, F_ATTRD_ATTRIBUTE); if (attr == NULL) { crm_warn("Ignoring malformed query from %s (no attribute name given)", origin); return; } /* Build the XML reply */ reply = build_query_reply(attr, crm_element_value(query, F_ATTRD_HOST)); if (reply == NULL) { crm_err("Could not respond to query from %s: could not create XML reply", origin); return; } crm_log_xml_trace(reply, "Reply"); /* Send the reply to the client */ client->request_id = 0; if ((rc = crm_ipcs_send(client, id, reply, flags)) < 0) { crm_err("Could not respond to query from %s: %s (%d)", origin, pcmk_strerror(-rc), -rc); } free_xml(reply); } void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { int peer_state = 0; const char *v = crm_element_value(xml, F_ATTRD_VERSION); const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *election_op = crm_element_value(xml, F_CRM_TASK); const char *host = crm_element_value(xml, F_ATTRD_HOST); if(election_op) { enum election_result rc = 0; crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); rc = election_count_vote(writer, xml, TRUE); switch(rc) { case election_start: free(peer_writer); peer_writer = NULL; election_vote(writer); break; case election_lost: free(peer_writer); peer_writer = strdup(peer->uname); break; default: election_check(writer); break; } return; } else if(v == NULL) { /* From the non-atomic version */ if (safe_str_eq(op, ATTRD_OP_UPDATE)) { const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE); crm_trace("Compatibility update of %s from %s", name, peer->uname); attrd_peer_update(peer, xml, host, FALSE); } else if (safe_str_eq(op, ATTRD_OP_FLUSH)) { const char *name = crm_element_value(xml, F_ATTRD_ATTRIBUTE); attribute_t *a = g_hash_table_lookup(attributes, name); if(a) { crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname); write_or_elect_attribute(a); } } else if (safe_str_eq(op, ATTRD_OP_REFRESH)) { GHashTableIter aIter; attribute_t *a = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { crm_trace("Compatibility write-out of %s for %s from %s", a->id, op, peer->uname); write_or_elect_attribute(a); } } } crm_element_value_int(xml, F_ATTRD_WRITER, &peer_state); if(election_state(writer) == election_won && peer_state == election_won && safe_str_neq(peer->uname, attrd_cluster->uname)) { crm_notice("Detected another attribute writer: %s", peer->uname); election_vote(writer); } else if(peer_state == election_won) { if(peer_writer == NULL) { peer_writer = strdup(peer->uname); crm_notice("Recorded attribute writer: %s", peer->uname); } else if(safe_str_neq(peer->uname, peer_writer)) { crm_notice("Recorded new attribute writer: %s (was %s)", peer->uname, peer_writer); free(peer_writer); peer_writer = strdup(peer->uname); } } if (safe_str_eq(op, ATTRD_OP_UPDATE) || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH) || safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { attrd_peer_update(peer, xml, host, FALSE); } else if (safe_str_eq(op, ATTRD_OP_SYNC)) { attrd_peer_sync(peer, xml); } else if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { int host_id = 0; char *endptr = NULL; host_id = strtol(host, &endptr, 10); if (errno != 0 || endptr == host || *endptr != '\0') { host_id = 0; } else { host = NULL; } crm_notice("Processing %s from %s: %s %u", op, peer->uname, host, host_id); attrd_peer_remove(host_id, host, TRUE, peer->uname); } else if (safe_str_eq(op, ATTRD_OP_SYNC_RESPONSE) && safe_str_neq(peer->uname, attrd_cluster->uname)) { xmlNode *child = NULL; - crm_notice("Processing %s from %s", op, peer->uname); + crm_info("Processing %s from %s", op, peer->uname); for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { host = crm_element_value(child, F_ATTRD_HOST); attrd_peer_update(peer, child, host, TRUE); } } } void attrd_peer_sync(crm_node_t *peer, xmlNode *xml) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = create_xml_node(NULL, __FUNCTION__); crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, v->nodename, v->nodeid, v->current); } } crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); send_attrd_message(peer, sync); free_xml(sync); } void attrd_peer_remove(uint32_t nodeid, const char *host, gboolean uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; crm_notice("Removing all %s (%u) attributes for %s", host, nodeid, source); if(host == NULL) { return; } g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { attribute_value_t *v = g_hash_table_lookup(a->values, host); if(v && v->current) { free(v->current); v->current = NULL; a->changed = TRUE; crm_debug("Removed %s[%s]=%s for %s", a->id, host, v->current, source); if(a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id); mainloop_timer_start(a->timer); } else { write_or_elect_attribute(a); } } } if (uncache) { crm_remote_peer_cache_remove(host); reap_crm_member(nodeid, host); } } /*! * \internal * \brief Return host's hash table entry (creating one if needed) * * \param[in] values Hash table of values * \param[in] host Name of peer to look up * \param[in] xml XML describing the attribute * * \return Pointer to new or existing hash table entry */ static attribute_value_t * attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml) { attribute_value_t *v = g_hash_table_lookup(values, host); int is_remote = 0; crm_element_value_int(xml, F_ATTRD_IS_REMOTE, &is_remote); if (is_remote) { /* If we previously assumed this node was an unseen cluster node, * remove its entry from the cluster peer cache. */ crm_node_t *dup = crm_find_peer(0, host); if (dup && (dup->uuid == NULL)) { reap_crm_member(0, host); } /* Ensure this host is in the remote peer cache */ crm_remote_peer_cache_add(host); } if (v == NULL) { v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(v != NULL); v->nodename = strdup(host); CRM_ASSERT(v->nodename != NULL); v->is_remote = is_remote; g_hash_table_replace(values, v->nodename, v); } return(v); } void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { bool changed = FALSE; attribute_value_t *v = NULL; int dampen = 0; const char *op = crm_element_value(xml, F_ATTRD_TASK); const char *attr = crm_element_value(xml, F_ATTRD_ATTRIBUTE); const char *value = crm_element_value(xml, F_ATTRD_VALUE); const char *dvalue = crm_element_value(xml, F_ATTRD_DAMPEN); attribute_t *a = g_hash_table_lookup(attributes, attr); if(a == NULL) { if (op == NULL /* The xml children from an ATTRD_OP_SYNC_RESPONSE have no F_ATTRD_TASK */ || safe_str_eq(op, ATTRD_OP_UPDATE) || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH)) { a = create_attribute(xml); } else { crm_warn("Update error (attribute %s not found)", attr); return; } } if (op == NULL /* The xml children from an ATTRD_OP_SYNC_RESPONSE have no F_ATTRD_TASK */ || safe_str_eq(op, ATTRD_OP_UPDATE_BOTH) || safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { if (dvalue) { dampen = crm_get_msec(dvalue); if (dampen >= 0) { if (a->timeout_ms != dampen) { mainloop_timer_stop(a->timer); mainloop_timer_del(a->timer); a->timeout_ms = dampen; if (dampen > 0) { a->timer = mainloop_timer_add(a->id, a->timeout_ms, FALSE, attribute_timer_cb, a); crm_info("Update attribute %s with delay %dms (%s)", a->id, dampen, dvalue); } else { a->timer = NULL; crm_info("Update attribute %s with not delay", a->id); } //if dampen is changed, attrd writes in a current value immediately. write_or_elect_attribute(a); if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { return; } } else { if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { crm_trace("Unchanged attribute %s with delay %dms (%s).(ATTRD_OP_UPDATE_DELAY)", a->id, dampen, dvalue); return; } } } else { crm_warn("Update error (A positive number is necessary for delay parameter. attribute %s : %dms (%s))", a->id, dampen, dvalue); return; } } else { crm_warn("Update error (delay parameter is necessary for the update of the attribute %s)", a->id); return; } } if(host == NULL) { GHashTableIter vIter; g_hash_table_iter_init(&vIter, a->values); crm_debug("Setting %s for all hosts to %s", attr, value); xml_remove_prop(xml, F_ATTRD_HOST_ID); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { attrd_peer_update(peer, xml, host, filter); } return; } v = attrd_lookup_or_create_value(a->values, host, xml); if(filter && safe_str_neq(v->current, value) && safe_str_eq(host, attrd_cluster->uname)) { xmlNode *sync = create_xml_node(NULL, __FUNCTION__); crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", a->id, host, v->current, value, peer->uname); crm_xml_add(sync, F_ATTRD_TASK, ATTRD_OP_SYNC_RESPONSE); v = g_hash_table_lookup(a->values, host); build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, v->nodename, v->nodeid, v->current); crm_xml_add_int(sync, F_ATTRD_WRITER, election_state(writer)); send_attrd_message(peer, sync); free_xml(sync); } else if(safe_str_neq(v->current, value)) { crm_info("Setting %s[%s]: %s -> %s from %s", attr, host, v->current, value, peer->uname); free(v->current); if(value) { v->current = strdup(value); } else { v->current = NULL; } changed = TRUE; } else { crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } a->changed |= changed; if(changed) { if(a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, a->id); mainloop_timer_start(a->timer); } else { write_or_elect_attribute(a); } } /* this only involves cluster nodes. */ if(v->nodeid == 0 && (v->is_remote == FALSE)) { if(crm_element_value_int(xml, F_ATTRD_HOST_ID, (int*)&v->nodeid) == 0) { /* Create the name/id association */ crm_node_t *peer = crm_get_peer(v->nodeid, host); crm_trace("We know %s's node id now: %s", peer->uname, peer->uuid); if(election_state(writer) == election_won) { write_attributes(FALSE, TRUE); return; } } } } void write_or_elect_attribute(attribute_t *a) { enum election_result rc = election_state(writer); if(rc == election_won) { write_attribute(a); } else if(rc == election_in_progress) { crm_trace("Election in progress to determine who will write out %s", a->id); } else if(peer_writer == NULL) { crm_info("Starting an election to determine who will write out %s", a->id); election_vote(writer); } else { crm_trace("%s will write out %s, we are in state %d", peer_writer, a->id, rc); } } gboolean attrd_election_cb(gpointer user_data) { crm_trace("Election complete"); free(peer_writer); peer_writer = strdup(attrd_cluster->uname); /* Update the peers after an election */ attrd_peer_sync(NULL, NULL); /* Update the CIB after an election */ write_attributes(TRUE, FALSE); return FALSE; } void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { if ((kind == crm_status_nstate) || (kind == crm_status_rstate)) { if (safe_str_eq(peer->state, CRM_NODE_MEMBER)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if ((election_state(writer) == election_won) && !is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { /* Remove all attribute values associated with lost nodes */ attrd_peer_remove(peer->id, peer->uname, FALSE, __FUNCTION__); if (peer_writer && safe_str_eq(peer->uname, peer_writer)) { free(peer_writer); peer_writer = NULL; crm_notice("Lost attribute writer %s", peer->uname); } } } } static void attrd_cib_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); goto done; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "Update %d for %s: %s (%d)", call_id, name, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { do_crm_log(level, "Update %d for %s[%s]=%s: %s (%d)", call_id, a->id, peer, v->requested, pcmk_strerror(rc), rc); if(rc == pcmk_ok) { free(v->stored); v->stored = v->requested; v->requested = NULL; } else { free(v->requested); v->requested = NULL; a->changed = TRUE; /* Attempt write out again */ } } done: if(a && a->changed && election_state(writer) == election_won) { write_attribute(a); } } void write_attributes(bool all, bool peer_discovered) { GHashTableIter iter; attribute_t *a = NULL; g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { if (peer_discovered && a->unknown_peer_uuids) { /* a new peer uuid has been discovered, try writing this attribute again. */ a->changed = TRUE; } if(all || a->changed) { write_attribute(a); } else { crm_debug("Skipping unchanged attribute %s", a->id); } } } static void build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value) { char *set = NULL; char *uuid = NULL; xmlNode *xml_obj = NULL; if(a->set) { set = strdup(a->set); } else { set = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, nodeid); } if(a->uuid) { uuid = strdup(a->uuid); } else { int lpc; uuid = crm_strdup_printf("%s-%s", set, a->id); /* Minimal attempt at sanitizing automatic IDs */ for (lpc = 0; uuid[lpc] != 0; lpc++) { switch (uuid[lpc]) { case ':': uuid[lpc] = '.'; } } } xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); crm_xml_add(xml_obj, XML_ATTR_ID, set); xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); crm_xml_add(xml_obj, XML_ATTR_ID, uuid); crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id); if(value) { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value); } else { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, ""); crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE); } free(uuid); free(set); } void write_attribute(attribute_t *a) { int private_updates = 0, cib_updates = 0; xmlNode *xml_top = NULL; attribute_value_t *v = NULL; GHashTableIter iter; enum cib_call_options flags = cib_quorum_override; if (a == NULL) { return; } /* If this attribute will be written to the CIB ... */ if (!a->is_private) { /* Defer the write if now's not a good time */ if (the_cib == NULL) { crm_info("Write out of '%s' delayed: cib not connected", a->id); return; } else if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); } else if (a->update) { crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); return; } else if (mainloop_timer_running(a->timer)) { crm_info("Write out of '%s' delayed: timer is running", a->id); return; } /* Initialize the status update XML */ xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); } /* Attribute will be written shortly, so clear changed flag */ a->changed = FALSE; /* We will check all peers' uuids shortly, so initialize this to false */ a->unknown_peer_uuids = FALSE; /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); /* If the value's peer info does not correspond to a peer, ignore it */ if (peer == NULL) { crm_notice("Update error (peer not found): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer); continue; } /* If we're just learning the peer's node id, remember it */ if (peer->id && (v->nodeid == 0)) { crm_trace("Updating value's nodeid"); v->nodeid = peer->id; } /* If this is a private attribute, no update needs to be sent */ if (a->is_private) { private_updates++; continue; } /* If the peer is found, but its uuid is unknown, defer write */ if (peer->uuid == NULL) { a->unknown_peer_uuids = TRUE; crm_notice("Update error (unknown peer uuid, retry will be attempted once uuid is discovered): %s[%s]=%s failed (host=%p)", v->nodename, a->id, v->current, peer); continue; } /* Add this value to status update XML */ crm_debug("Update: %s[%s]=%s (%s %u %u %s)", v->nodename, a->id, v->current, peer->uuid, peer->id, v->nodeid, peer->uname); build_update_element(xml_top, a, peer->uuid, v->current); cib_updates++; free(v->requested); v->requested = NULL; if (v->current) { v->requested = strdup(v->current); } else { /* Older attrd versions don't know about the cib_mixed_update * flag so make sure it goes to the local cib which does */ flags |= cib_mixed_update|cib_scope_local; } } if (private_updates) { crm_info("Processed %d private change%s for %s, id=%s, set=%s", private_updates, ((private_updates == 1)? "" : "s"), a->id, (a->uuid? a->uuid : ""), a->set); } if (cib_updates) { crm_log_xml_trace(xml_top, __FUNCTION__); a->update = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL, flags, a->user); crm_info("Sent update %d with %d changes for %s, id=%s, set=%s", a->update, cib_updates, a->id, (a->uuid? a->uuid : ""), a->set); the_cib->cmds->register_callback_full(the_cib, a->update, 120, FALSE, strdup(a->id), "attrd_cib_callback", attrd_cib_callback, free); } free_xml(xml_top); } diff --git a/attrd/main.c b/attrd/main.c index ba327f3604..c4e4081286 100644 --- a/attrd/main.c +++ b/attrd/main.c @@ -1,399 +1,399 @@ /* * Copyright (C) 2013 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include cib_t *the_cib = NULL; GMainLoop *mloop = NULL; bool shutting_down = FALSE; crm_cluster_t *attrd_cluster = NULL; election_t *writer = NULL; int attrd_error = pcmk_ok; static void attrd_shutdown(int nsig) { shutting_down = TRUE; crm_info("Shutting down"); if (mloop != NULL && g_main_is_running(mloop)) { g_main_quit(mloop); } else { crm_exit(pcmk_ok); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (shutting_down) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to Corosync service!"); attrd_error = ECONNRESET; attrd_shutdown(0); } } static void attrd_cib_replaced_cb(const char *event, xmlNode * msg) { crm_notice("Updating all attributes after %s event", event); if(election_state(writer) == election_won) { write_attributes(TRUE, FALSE); } } static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (shutting_down) { crm_info("Connection disconnection complete"); } else { /* eventually this should trigger a reconnect, not a shutdown */ crm_err("Lost connection to CIB service!"); attrd_error = ECONNRESET; attrd_shutdown(0); } return; } static cib_t * attrd_cib_connect(int max_retry) { int rc = -ENOTCONN; static int attempts = 0; cib_t *connection = cib_new(); if(connection == NULL) { return NULL; } do { if(attempts > 0) { sleep(attempts); } attempts++; crm_debug("CIB signon attempt %d", attempts); rc = connection->cmds->signon(connection, T_ATTRD, cib_command); } while(rc != pcmk_ok && attempts < max_retry); if (rc != pcmk_ok) { crm_err("Signon to CIB failed: %s (%d)", pcmk_strerror(rc), rc); goto cleanup; } crm_info("Connected to the CIB after %d attempts", attempts); rc = connection->cmds->set_connection_dnotify(connection, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = connection->cmds->add_notify_callback(connection, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); if(rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } return connection; cleanup: connection->cmds->signoff(connection); cib_delete(connection); return NULL; } static int32_t attrd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (shutting_down) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void attrd_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *xml = crm_ipcs_recv(client, data, size, &id, &flags); const char *op; if (xml == NULL) { crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c); return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(xml, F_ATTRD_USER, client->user); #endif crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c); crm_log_xml_trace(xml, __FUNCTION__); op = crm_element_value(xml, F_ATTRD_TASK); if (client->name == NULL) { const char *value = crm_element_value(xml, F_ORIG); client->name = crm_strdup_printf("%s.%d", value?value:"unknown", client->pid); } if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { attrd_send_ack(client, id, flags); attrd_client_peer_remove(client->name, xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE_BOTH)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_REFRESH)) { attrd_send_ack(client, id, flags); attrd_client_refresh(); } else if (safe_str_eq(op, ATTRD_OP_QUERY)) { /* queries will get reply, so no ack is necessary */ attrd_client_query(client, id, flags, xml); } else { crm_info("Ignoring request from client %s with unknown operation %s", client->name, op); } free_xml(xml); return 0; } /* Error code means? */ static int32_t attrd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void attrd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); attrd_ipc_closed(c); } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, .connection_created = attrd_ipc_created, .msg_process = attrd_ipc_dispatch, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy }; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int rc = pcmk_ok; int flag = 0; int index = 0; int argerr = 0; qb_ipcs_service_t *ipcs = NULL; mloop = g_main_new(FALSE); crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for aggregating and atomically storing node attribute updates into the CIB"); mainloop_add_signal(SIGTERM, attrd_shutdown); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_info("Starting up"); attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute); attrd_cluster = malloc(sizeof(crm_cluster_t)); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); rc = DAEMON_RESPAWN_STOP; goto done; } crm_info("Cluster connection active"); writer = election_init(T_ATTRD, attrd_cluster->uname, 120000, attrd_election_cb); attrd_ipc_server_init(&ipcs, &ipc_callbacks); crm_info("Accepting attribute updates"); the_cib = attrd_cib_connect(10); if (the_cib == NULL) { rc = DAEMON_RESPAWN_STOP; goto done; } crm_info("CIB connection active"); g_main_run(mloop); done: - crm_notice("Cleaning up before exit"); + crm_info("Shutting down attribute manager"); election_fini(writer); crm_client_disconnect_all(ipcs); if (ipcs) { qb_ipcs_destroy(ipcs); g_hash_table_destroy(attributes); } if (the_cib) { the_cib->cmds->signoff(the_cib); cib_delete(the_cib); } if(attrd_error) { return crm_exit(attrd_error); } return crm_exit(rc); } diff --git a/crmd/callbacks.c b/crmd/callbacks.c index 62064a1d84..4501dd5412 100644 --- a/crmd/callbacks.c +++ b/crmd/callbacks.c @@ -1,307 +1,309 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void crmd_ha_connection_destroy(gpointer user_data); /* From join_dc... */ extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); void crmd_ha_connection_destroy(gpointer user_data) { crm_trace("Invoked"); if (is_set(fsa_input_register, R_HA_DISCONNECTED)) { /* we signed out, so this is expected */ crm_info("Heartbeat disconnection complete"); return; } crm_crit("Lost connection to heartbeat service!"); register_fsa_input(C_HA_DISCONNECT, I_ERROR, NULL); trigger_fsa(fsa_source); } void crmd_ha_msg_filter(xmlNode * msg) { if (AM_I_DC) { const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); if (safe_str_eq(sys_from, CRM_SYSTEM_DC)) { const char *from = crm_element_value(msg, F_ORIG); if (safe_str_neq(from, fsa_our_uname)) { int level = LOG_INFO; const char *op = crm_element_value(msg, F_CRM_TASK); /* make sure the election happens NOW */ if (fsa_state != S_ELECTION) { ha_msg_input_t new_input; level = LOG_WARNING; new_input.msg = msg; register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input, __FUNCTION__); } do_crm_log(level, "Another DC detected: %s (op=%s)", from, op); goto done; } } } else { const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); if (safe_str_eq(sys_to, CRM_SYSTEM_DC)) { return; } } /* crm_log_xml_trace("HA[inbound]", msg); */ route_message(C_HA_MESSAGE, msg); done: trigger_fsa(fsa_source); } #define state_text(state) ((state)? (const char *)(state) : "in unknown state") void peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { uint32_t old = 0; uint32_t changed = 0; bool appeared = FALSE; bool is_remote = is_set(node->flags, crm_remote_node); const char *status = NULL; /* Crmd waits to receive some information from the membership layer before * declaring itself operational. If this is being called for a cluster node, * indicate that we have it. */ if (!is_remote) { set_bit(fsa_input_register, R_PEER_DATA); } if (node->uname == NULL) { return; } switch (type) { case crm_status_uname: /* If we've never seen the node, then it also wont be in the status section */ crm_info("%s is now %s", node->uname, state_text(node->state)); return; case crm_status_rstate: case crm_status_nstate: /* This callback should not be called unless the state actually * changed, but here's a failsafe just in case. */ CRM_CHECK(safe_str_neq(data, node->state), return); crm_info("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), node->uname, state_text(node->state), state_text(data)); if (safe_str_eq(CRM_NODE_MEMBER, node->state)) { appeared = TRUE; if (!is_remote) { remove_stonith_cleanup(node->uname); } } crmd_notify_node_event(node); break; case crm_status_processes: if (data) { old = *(const uint32_t *)data; changed = node->processes ^ old; } status = (node->processes & proc_flags) ? ONLINESTATUS : OFFLINESTATUS; crm_info("Client %s/%s now has status [%s] (DC=%s, changed=%6x)", node->uname, peer2text(proc_flags), status, AM_I_DC ? "true" : crm_str(fsa_our_dc), changed); if ((changed & proc_flags) == 0) { /* Peer process did not change */ crm_trace("No change %6x %6x %6x", old, node->processes, proc_flags); return; } else if (is_not_set(fsa_input_register, R_CIB_CONNECTED)) { crm_trace("Not connected"); return; } else if (fsa_state == S_STOPPING) { crm_trace("Stopping"); return; } appeared = (node->processes & proc_flags) != 0; if (safe_str_eq(node->uname, fsa_our_uname) && (node->processes & proc_flags) == 0) { /* Did we get evicted? */ crm_notice("Our peer connection failed"); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL); } else if (safe_str_eq(node->uname, fsa_our_dc) && crm_is_peer_active(node) == FALSE) { /* Did the DC leave us? */ crm_notice("Our peer on the DC (%s) is dead", fsa_our_dc); register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL); if (compare_version(fsa_our_dc_version, "3.0.9") > 0) { erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); } } else if(AM_I_DC && appeared == FALSE) { crm_info("Peer %s left us", node->uname); erase_status_tag(node->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); /* crm_update_peer_join(__FUNCTION__, node, crm_join_none); */ } break; } if (AM_I_DC) { xmlNode *update = NULL; int flags = node_update_peer; gboolean alive = is_remote? appeared : crm_is_peer_active(node); crm_action_t *down = match_down_event(node->uuid, appeared); crm_trace("Alive=%d, appeared=%d, down=%d", alive, appeared, (down? down->id : -1)); if (alive && type == crm_status_processes) { register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } if (down) { const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRM_OP_FENCE)) { /* tengine_stonith_callback() confirms fence actions */ crm_trace("Updating CIB %s stonithd reported fencing of %s complete", (down->confirmed? "after" : "before"), node->uname); } else if ((alive == FALSE) && safe_str_eq(task, CRM_OP_SHUTDOWN)) { - crm_notice("%s of %s (op %d) is complete", task, node->uname, down->id); + crm_notice("%s of peer %s is complete "CRM_XS" op=%d", + task, node->uname, down->id); + /* down->confirmed = TRUE; */ stop_te_timer(down->timer); if (!is_remote) { flags |= node_update_join | node_update_expected; crmd_peer_down(node, FALSE); check_join_state(fsa_state, __FUNCTION__); } update_graph(transition_graph, down); trigger_graph(); } else { crm_trace("Node %s is %salive, was expected to %s (op %d)", node->uname, (alive? "" : "not "), task, down->id); } } else if (appeared == FALSE) { crm_notice("Stonith/shutdown of %s not matched", node->uname); if (!is_remote) { crm_update_peer_join(__FUNCTION__, node, crm_join_none); check_join_state(fsa_state, __FUNCTION__); } abort_transition(INFINITY, tg_restart, "Node failure", NULL); fail_incompletable_actions(transition_graph, node->uuid); } else { crm_trace("Node %s came up, was not expected to be down", node->uname); } if (is_remote) { /* A pacemaker_remote node won't have its cluster status updated * in the CIB by membership-layer callbacks, so do it here. */ flags |= node_update_cluster; /* Trigger resource placement on newly integrated nodes */ if (appeared) { abort_transition(INFINITY, tg_restart, "pacemaker_remote node integrated", NULL); } } /* Update the CIB node state */ update = do_update_node_cib(node, flags, NULL, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } trigger_fsa(fsa_source); } void crmd_cib_connection_destroy(gpointer user_data) { CRM_CHECK(user_data == fsa_cib_conn,;); crm_trace("Invoked"); trigger_fsa(fsa_source); fsa_cib_conn->state = cib_disconnected; if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Connection to the CIB terminated..."); return; } /* eventually this will trigger a reconnect, not a shutdown */ crm_err("Connection to the CIB terminated..."); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_CIB_CONNECTED); return; } gboolean crm_fsa_trigger(gpointer user_data) { crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue)); s_crmd_fsa(C_FSA_INTERNAL); crm_trace("Exited (queue len: %d)", g_list_length(fsa_message_queue)); return TRUE; } diff --git a/crmd/cib.c b/crmd/cib.c index 0c3f7478ce..40fed4a987 100644 --- a/crmd/cib.c +++ b/crmd/cib.c @@ -1,278 +1,277 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include struct crm_subsystem_s *cib_subsystem = NULL; int cib_retries = 0; static void do_cib_updated(const char *event, xmlNode * msg) { int rc = -1; int format= 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xmlNode *change = NULL; xmlXPathObject *xpathObj = NULL; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } crm_element_value_int(patchset, "format", &format); if (format == 1) { if ((xpathObj = xpath_search( msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_CRMCONFIG " | " \ "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_ALERTS )) != NULL) { freeXpathObject(xpathObj); mainloop_set_trigger(config_read); } } else if (format == 2) { for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *xpath = crm_element_value(change, XML_DIFF_PATH); if (xpath == NULL) { continue; } /* modifying properties */ if (!strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_CRMCONFIG "/") && !strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS)) { xmlNode *section = NULL; const char *name = NULL; /* adding notifications section */ if ((strcmp(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION) != 0) || ((section = __xml_first_child(change)) == NULL) || ((name = crm_element_name(section)) == NULL) || (strcmp(name, XML_CIB_TAG_ALERTS) != 0)) { continue; } } mainloop_set_trigger(config_read); break; } } else { crm_warn("Unknown patch format: %d", format); } } static void revision_check_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int cmp = -1; xmlNode *generation = NULL; const char *revision = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } generation = output; CRM_CHECK(safe_str_eq(crm_element_name(generation), XML_TAG_CIB), crm_log_xml_err(output, __FUNCTION__); return); crm_trace("Checking our feature revision is allowed: %s", CIB_FEATURE_SET); revision = crm_element_value(generation, XML_ATTR_CRM_VERSION); cmp = compare_version(revision, CRM_FEATURE_SET); if (cmp > 0) { - crm_err("This build (%s) does not support the current resource configuration", PACEMAKER_VERSION); - crm_err("We can only support up to CRM feature set %s (current=%s)", - CRM_FEATURE_SET, revision); - crm_err("Shutting down the CRM"); + crm_err("Shutting down because the current configuration is not supported by this version " + CRM_XS " build=%s supported=%s current=%s", + PACEMAKER_VERSION, CRM_FEATURE_SET, revision); /* go into a stall state */ register_fsa_error_adv(C_FSA_INTERNAL, I_SHUTDOWN, NULL, NULL, __FUNCTION__); return; } } static void do_cib_replaced(const char *event, xmlNode * msg) { crm_debug("Updating the CIB after a replace: DC=%s", AM_I_DC ? "true" : "false"); if (AM_I_DC == FALSE) { return; } else if (fsa_state == S_FINALIZE_JOIN && is_set(fsa_input_register, R_CIB_ASKED)) { /* no need to restart the join - we asked for this replace op */ return; } /* start the join process again so we get everyone's LRM status */ populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } /* A_CIB_STOP, A_CIB_START, A_CIB_RESTART, */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct crm_subsystem_s *this_subsys = cib_subsystem; long long stop_actions = A_CIB_STOP; long long start_actions = A_CIB_START; if (action & stop_actions) { if (fsa_cib_conn->state != cib_disconnected && last_resource_update != 0) { crm_info("Waiting for resource update %d to complete", last_resource_update); crmd_fsa_stall(FALSE); return; } crm_info("Disconnecting CIB"); clear_bit(fsa_input_register, R_CIB_CONNECTED); CRM_ASSERT(fsa_cib_conn != NULL); fsa_cib_conn->cmds->del_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); if (fsa_cib_conn->state != cib_disconnected) { fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); fsa_cib_conn->cmds->signoff(fsa_cib_conn); } } if (action & start_actions) { int rc = pcmk_ok; CRM_ASSERT(fsa_cib_conn != NULL); if (cur_state == S_STOPPING) { crm_err("Ignoring request to start %s after shutdown", this_subsys->name); return; } rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); if (rc != pcmk_ok) { /* a short wait that usually avoids stalling the FSA */ sleep(1); rc = fsa_cib_conn->cmds->signon(fsa_cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB service: %s", pcmk_strerror(rc)); } else if (pcmk_ok != fsa_cib_conn->cmds->set_connection_dnotify(fsa_cib_conn, crmd_cib_connection_destroy)) { crm_err("Could not set dnotify callback"); } else if (pcmk_ok != fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced)) { crm_err("Could not set CIB notification callback (replace)"); } else if (pcmk_ok != fsa_cib_conn->cmds->add_notify_callback(fsa_cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated)) { crm_err("Could not set CIB notification callback (update)"); } else { set_bit(fsa_input_register, R_CIB_CONNECTED); } if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { cib_retries++; crm_warn("Couldn't complete CIB registration %d" " times... pause and retry", cib_retries); if (cib_retries < 30) { crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB" " registration %d times..." " hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } else { int call_id = 0; crm_info("CIB connection established"); call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, revision_check_callback); cib_retries = 0; } } } /*! * \internal * \brief Get CIB call options to use local scope if master unavailable * * \return CIB call options */ int crmd_cib_smart_opt() { int call_opt = cib_quorum_override; if (fsa_state == S_ELECTION || fsa_state == S_PENDING) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(fsa_state)); call_opt |= cib_scope_local; } return call_opt; } diff --git a/crmd/control.c b/crmd/control.c index fc866b0e2d..3e0370cec5 100644 --- a/crmd/control.c +++ b/crmd/control.c @@ -1,1143 +1,1146 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; extern gboolean crm_connect_corosync(crm_cluster_t * cluster); extern void crmd_ha_connection_destroy(gpointer user_data); void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; bool no_quorum_suicide_escalation = FALSE; static gboolean election_timeout_popped(gpointer data) { /* Not everyone voted */ crm_info("Election failed: Declaring ourselves the winner"); register_fsa_input(C_TIMER_POPPED, I_ELECTION_DC, NULL); return FALSE; } /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = calloc(1, sizeof(crm_cluster_t)); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); set_bit(fsa_input_register, R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); if (is_openais_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(cluster); #endif } else if (is_heartbeat_cluster()) { #if SUPPORT_HEARTBEAT cluster->destroy = crmd_ha_connection_destroy; cluster->hb_dispatch = crmd_ha_msg_callback; registered = crm_cluster_connect(cluster); fsa_cluster_conn = cluster->hb_conn; crm_trace("Be informed of Node Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_nstatus_callback(fsa_cluster_conn, crmd_ha_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set nstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } crm_trace("Be informed of CRM Client Status changes"); if (registered && fsa_cluster_conn->llc_ops->set_cstatus_callback(fsa_cluster_conn, crmd_client_status_callback, fsa_cluster_conn) != HA_OK) { crm_err("Cannot set cstatus callback: %s", fsa_cluster_conn->llc_ops->errmsg(fsa_cluster_conn)); registered = FALSE; } if (registered) { crm_trace("Requesting an initial dump of CRMD client_status"); fsa_cluster_conn->llc_ops->client_status(fsa_cluster_conn, NULL, CRM_SYSTEM_CRMD, -1); } #endif } fsa_election = election_init(NULL, cluster->uname, 60000/*60s*/, election_timeout_popped); fsa_our_uname = cluster->uname; fsa_our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } if (registered == FALSE) { set_bit(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __FUNCTION__); clear_bit(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static bool need_spawn_pengine_from_crmd(void) { static int result = -1; if (result != -1) return result; if (!is_heartbeat_cluster()) { result = 0; return result; } /* NULL, or "strange" value: rather spawn from here. */ result = TRUE; crm_str_to_boolean(daemon_option("crmd_spawns_pengine"), &result); return result; } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ set_bit(fsa_input_register, R_SHUTDOWN); if (need_spawn_pengine_from_crmd()) { if (is_set(fsa_input_register, pe_subsystem->flag_connected)) { crm_info("Terminating the %s", pe_subsystem->name); if (stop_subsystem(pe_subsystem, TRUE) == FALSE) { /* its gone... */ crm_err("Faking %s exit", pe_subsystem->name); clear_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Waiting for subsystems to exit"); crmd_fsa_stall(FALSE); } } crm_info("All subsystems stopped, continuing"); } if (stonith_api) { /* Prevent it from coming up again */ clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting STONITH..."); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; crm_info("Sending shutdown request to %s", crm_str(fsa_our_dc)); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); /* set_bit(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern crm_ipc_t *attrd_ipc; extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern GHashTable *metadata_hash; extern char *te_client_id; void log_connected_client(gpointer key, gpointer value, gpointer user_data); void log_connected_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *client = value; crm_err("%s is still connected at exit", crm_client_name(client)); } int crmd_fast_exit(int rc) { if (is_set(fsa_input_register, R_STAYDOWN)) { - crm_warn("Inhibiting respawn: %d -> %d", rc, 100); - rc = 100; + crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", + rc, DAEMON_RESPAWN_STOP); + rc = DAEMON_RESPAWN_STOP; } if (rc == pcmk_ok && is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); rc = pcmk_err_generic; } return crm_exit(rc); } int crmd_exit(int rc) { GListPtr gIter = NULL; GMainLoop *mloop = crmd_mainloop; static bool in_progress = FALSE; if(in_progress && rc == 0) { crm_debug("Exit is already in progress"); return rc; } else if(in_progress) { - crm_notice("Error during shutdown process, terminating now: %s (%d)", pcmk_strerror(rc), rc); + crm_notice("Error during shutdown process, terminating now with status %d: %s", + rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(rc); } in_progress = TRUE; crm_trace("Preparing to exit: %d", rc); /* Suppress secondary errors resulting from us disconnecting everything */ set_bit(fsa_input_register, R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } if (attrd_ipc) { crm_trace("Closing attrd connection"); crm_ipc_close(attrd_ipc); crm_ipc_destroy(attrd_ipc); attrd_ipc = NULL; } if (pe_subsystem && pe_subsystem->client && pe_subsystem->client->ipcs) { crm_trace("Disconnecting Policy Engine"); qb_ipcs_disconnect(pe_subsystem->client->ipcs); } if(stonith_api) { crm_trace("Disconnecting fencing API"); clear_bit(fsa_input_register, R_ST_REQUIRED); stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (rc == pcmk_ok && crmd_mainloop == NULL) { crm_debug("No mainloop detected"); rc = EPROTO; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if(rc != pcmk_ok) { - crm_notice("Forcing immediate exit: %s (%d)", pcmk_strerror(rc), rc); + crm_notice("Forcing immediate exit with status %d: %s", + rc, pcmk_strerror(rc)); crm_write_blackbox(SIGTRAP, NULL); return crmd_fast_exit(rc); } /* Clean up as much memory as possible for valgrind */ for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } clear_bit(fsa_input_register, R_MEMBERSHIP); g_list_free(fsa_message_queue); fsa_message_queue = NULL; free(pe_subsystem); pe_subsystem = NULL; free(te_subsystem); te_subsystem = NULL; free(cib_subsystem); cib_subsystem = NULL; if (metadata_hash) { crm_trace("Destroying reload cache with %d members", g_hash_table_size(metadata_hash)); g_hash_table_destroy(metadata_hash); metadata_hash = NULL; } election_fini(fsa_election); fsa_election = NULL; cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; verify_stopped(fsa_state, LOG_WARNING); clear_bit(fsa_input_register, R_LRM_CONNECTED); lrm_state_destroy_all(); /* This basically will not work, since mainloop has a reference to it */ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; mainloop_destroy_trigger(config_read); config_read = NULL; mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; crm_client_cleanup(); crm_peer_destroy(); crm_timer_stop(transition_timer); crm_timer_stop(integration_timer); crm_timer_stop(finalization_timer); crm_timer_stop(election_trigger); election_timeout_stop(fsa_election); crm_timer_stop(shutdown_escalation_timer); crm_timer_stop(wait_timer); crm_timer_stop(recheck_timer); free(transition_timer); transition_timer = NULL; free(integration_timer); integration_timer = NULL; free(finalization_timer); finalization_timer = NULL; free(election_trigger); election_trigger = NULL; election_fini(fsa_election); free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; free(wait_timer); wait_timer = NULL; free(recheck_timer); recheck_timer = NULL; free(fsa_our_dc_version); fsa_our_dc_version = NULL; free(fsa_our_uname); fsa_our_uname = NULL; free(fsa_our_uuid); fsa_our_uuid = NULL; free(fsa_our_dc); fsa_our_dc = NULL; free(fsa_cluster_name); fsa_cluster_name = NULL; free(te_uuid); te_uuid = NULL; free(te_client_id); te_client_id = NULL; free(fsa_pe_ref); fsa_pe_ref = NULL; free(failed_stop_offset); failed_stop_offset = NULL; free(failed_start_offset); failed_start_offset = NULL; free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); mainloop_destroy_signal(SIGCHLD); if (mloop) { int lpc = 0; GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); /* Don't re-enter this block */ crmd_mainloop = NULL; crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); while(g_main_context_pending(ctx) && lpc < 10) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); #if SUPPORT_HEARTBEAT /* Do this only after g_main_loop_quit(). * * This interface was broken (incomplete) since it was introduced. * ->delete() does cleanup and free most of it, but it does not * actually remove and destroy the corresponding GSource, so the next * prepare/check iteratioin would find a corrupt (because partially * freed) GSource, and segfault. * * Apparently one was supposed to store the GSource as returned by * G_main_add_ll_cluster(), and g_source_destroy() that "by hand". * * But no-one ever did this, not even in the old hb code when this was * introduced. * * Note that fsa_cluster_conn was set as an "alias" to cluster->hb_conn * in do_ha_control() right after crm_cluster_connect(), and only * happens to still point at that object, because do_ha_control() does * not reset it to NULL after crm_cluster_disconnect() above does * reset cluster->hb_conn to NULL. * Not sure if that's something to cleanup, too. * * I'll try to fix this up in heartbeat proper, so ->delete * will actually remove, and destroy, and unref, and free this thing. * Doing so after g_main_loop_quit() is valid with both old, * and eventually fixed heartbeat. * * If we introduce the "by hand" destroy/remove/unref, * this may break again once heartbeat is fixed :-( * * -- Lars Ellenberg */ if (fsa_cluster_conn) { crm_trace("Deleting heartbeat api object"); fsa_cluster_conn->llc_ops->delete(fsa_cluster_conn); fsa_cluster_conn = NULL; } #endif /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); crm_trace("Done %d", rc); } /* Graceful */ return rc; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int exit_code = pcmk_ok; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { /* exit_code = pcmk_err_generic; */ log_level = LOG_ERR; exit_type = "forcefully"; exit_code = pcmk_err_generic; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the CRMd", fsa_action2string(action), exit_type); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); crm_debug("Creating CIB and LRM objects"); fsa_cib_conn = cib_new(); lrm_state_init_local(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think its in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because its in S_PENDING * * if we have nodes where heartbeat is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } /* set up the sub systems */ cib_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); te_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); pe_subsystem = calloc(1, sizeof(struct crm_subsystem_s)); if (cib_subsystem != NULL) { cib_subsystem->pid = -1; cib_subsystem->name = CRM_SYSTEM_CIB; cib_subsystem->flag_connected = R_CIB_CONNECTED; cib_subsystem->flag_required = R_CIB_REQUIRED; } else { was_error = TRUE; } if (te_subsystem != NULL) { te_subsystem->pid = -1; te_subsystem->name = CRM_SYSTEM_TENGINE; te_subsystem->flag_connected = R_TE_CONNECTED; te_subsystem->flag_required = R_TE_REQUIRED; } else { was_error = TRUE; } if (pe_subsystem != NULL) { pe_subsystem->pid = -1; pe_subsystem->path = CRM_DAEMON_DIR; pe_subsystem->name = CRM_SYSTEM_PENGINE; pe_subsystem->command = CRM_DAEMON_DIR "/" CRM_SYSTEM_PENGINE; pe_subsystem->args = NULL; pe_subsystem->flag_connected = R_PE_CONNECTED; pe_subsystem->flag_required = R_PE_REQUIRED; } else { was_error = TRUE; } if (was_error == FALSE && need_spawn_pengine_from_crmd()) { if (start_subsystem(pe_subsystem) == FALSE) { was_error = TRUE; } } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static int32_t crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void crmd_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_trace("Invoked: %s", crm_client_name(client)); crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif crm_trace("Processing msg from %s", crm_client_name(client)); crm_log_xml_trace(msg, "CRMd[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (crmd_authorize_message(msg, client, NULL)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); struct crm_subsystem_s *the_subsystem = NULL; if (client == NULL) { return 0; } crm_trace("Connection %p", c); if (client->userdata == NULL) { crm_trace("Client hadn't registered with us yet"); } else if (strcasecmp(CRM_SYSTEM_PENGINE, client->userdata) == 0) { the_subsystem = pe_subsystem; } else if (strcasecmp(CRM_SYSTEM_TENGINE, client->userdata) == 0) { the_subsystem = te_subsystem; } else if (strcasecmp(CRM_SYSTEM_CIB, client->userdata) == 0) { the_subsystem = cib_subsystem; } if (the_subsystem != NULL) { the_subsystem->source = NULL; the_subsystem->client = NULL; crm_info("Received HUP from %s:[%d]", the_subsystem->name, the_subsystem->pid); } else { /* else that was a transient client */ crm_trace("Received HUP from transient client"); } crm_trace("Disconnecting client %s (%p)", crm_client_name(client), client); free(client->userdata); crm_client_destroy(client); trigger_fsa(fsa_source); return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); crmd_ipc_closed(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, LRM not connected (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { /* try reading from HA */ crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { HA_Message *msg = NULL; crm_trace("Looking for a HA message"); msg = fsa_cluster_conn->llc_ops->readmsg(fsa_cluster_conn, 0); if (msg != NULL) { crm_trace("There was a HA message"); ha_msg_del(msg); } } #endif crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = crmd_ipc_server_init(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); crm_notice("The local CRM is operational"); clear_bit(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { set_bit(fsa_input_register, R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ pe_cluster_option crmd_opts[] = { /* name, old-name, validate, values, default, short description, long description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "heartbeat", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, "dc_deadtime", "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, "cluster_recheck_interval", "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, #ifdef RHEL7_COMPAT /* These options were superseded by the alerts feature and now are just an * alternate interface to it. It was never released upstream, but was * released in RHEL 7, so we allow it to be enabled at compile-time by * defining RHEL7_COMPAT. */ { "notification-agent", NULL, "string", NULL, "/dev/null", &check_script, "Notification script or tool to be called after significant cluster events", "Full path to a script or binary that will be invoked when resources start/stop/fail, fencing occurs or nodes join/leave the cluster.\n" "Must exist on all nodes in the cluster." }, { "notification-recipient", NULL, "string", NULL, "", NULL, "Destination for notifications (Optional)", "Where should the supplied script send notifications to. Useful to avoid hard-coding this in the script." }, #endif { "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization, "The maximum amount of system resources that should be used by nodes in the cluster", "The cluster will slow down its recovery process when the amount of system resources used" " (currently CPU) approaches this limit", }, { "node-action-limit", NULL, "integer", NULL, "0", &check_number, "The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"}, { XML_CONFIG_ATTR_ELECTION_FAIL, "election_timeout", "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, "shutdown_escalation", "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." }, { "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\n" "Enabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n" "Useful if your configuration is sensitive to the order in which ping updates arrive." }, { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout, "How long to wait before we can assume nodes are safely down", NULL }, { "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, #if SUPPORT_PLUGIN { XML_ATTR_EXPECTED_VOTES, NULL, "integer", NULL, "2", &check_number, "The number of nodes expected to be in the cluster", "Used to calculate quorum in openais based clusters." }, #endif }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("CRM Daemon", "1.0", "CRM Daemon Options", "This is a fake resource that details the options that can be configured for the CRM Daemon.", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { #ifdef RHEL7_COMPAT const char *script = NULL; #endif const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit(fsa_input_register, R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig) && (crm_element_name(crmconfig)) && (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); #ifdef RHEL7_COMPAT script = crmd_pref(config_hash, "notification-agent"); value = crmd_pref(config_hash, "notification-recipient"); crmd_enable_notifications(script, value); #endif value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */ throttle_update_job_max(value); value = crmd_pref(config_hash, "load-threshold"); if(value) { throttle_load_target = strtof(value, NULL) / 100; } value = crmd_pref(config_hash, "no-quorum-policy"); if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) { no_quorum_suicide_escalation = TRUE; } value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); /* How long to declare an election over - even if not everyone voted */ crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); election_timeout_set_period(fsa_election, crm_get_msec(value)); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "crmd-transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "crmd-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { value = crmd_pref(config_hash, XML_ATTR_EXPECTED_VOTES); crm_debug("Sending expected-votes=%s to corosync", value); send_cluster_text(crm_class_quorum, value, TRUE, NULL, crm_msg_ais); } #endif free(fsa_cluster_name); fsa_cluster_name = NULL; value = g_hash_table_lookup(config_hash, "cluster-name"); if (value) { fsa_cluster_name = strdup(value); } alerts = output?first_named_child(output, XML_CIB_TAG_ALERTS):NULL; parse_notifications(alerts); set_bit(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* can't rely on this... */ - crm_notice("Requesting shutdown, upper limit is %dms", - shutdown_escalation_timer->period_ms); + crm_notice("Shutting down cluster resource manager " CRM_XS + " limit=%dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); crmd_exit(pcmk_ok); } } diff --git a/crmd/election.c b/crmd/election.c index a0b2c450db..1ff49653cf 100644 --- a/crmd/election.c +++ b/crmd/election.c @@ -1,264 +1,265 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #define STORM_INTERVAL 2 /* in seconds */ #define STORM_MULTIPLIER 5 /* multiplied by the number of nodes */ /* A_ELECTION_VOTE */ void do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean not_voting = FALSE; /* don't vote if we're in one of these states or wanting to shut down */ switch (cur_state) { case S_STARTING: case S_RECOVERY: case S_STOPPING: case S_TERMINATE: crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); not_voting = TRUE; break; case S_ELECTION: case S_INTEGRATION: case S_RELEASE_DC: break; default: crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); break; } if (not_voting == FALSE) { if (is_set(fsa_input_register, R_STARTING)) { not_voting = TRUE; } } if (not_voting) { if (AM_I_DC) { register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } return; } election_vote(fsa_election); return; } void do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (fsa_state != S_ELECTION) { crm_debug("Ignore election check: we not in an election"); } else if(election_check(fsa_election)) { register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); } return; } #define loss_dampen 2 /* in seconds */ /* A_ELECTION_COUNT */ void do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { enum election_result rc = 0; ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); if(crm_peer_cache == NULL) { if(is_not_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Internal error, no peer cache"); } return; } rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING); switch(rc) { case election_start: election_reset(fsa_election); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); break; case election_lost: update_dc(NULL); if (fsa_input_register & R_THE_DC) { register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); } else if (cur_state != S_STARTING) { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } break; case election_in_progress: break; default: crm_err("Unhandled election result: %d", rc); } } /* A_ELECT_TIMER_START, A_ELECTION_TIMEOUT */ /* we won */ void do_election_timer_ctrl(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { } static void feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; - crm_notice("Update failed: %s (%d)", pcmk_strerror(rc), rc); + crm_notice("Feature update failed: %s "CRM_XS" rc=%d", + pcmk_strerror(rc), rc); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_TAKEOVER */ void do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int rc = pcmk_ok; xmlNode *cib = NULL; const char *cluster_type = name_for_cluster_type(get_cluster_type()); pid_t watchdog = pcmk_locate_sbd(); crm_info("Taking over DC status for this partition"); set_bit(fsa_input_register, R_THE_DC); execute_stonith_cleanup(); #if SUPPORT_COROSYNC if (is_classic_ais_cluster()) { send_cluster_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); } #endif election_reset(fsa_election); set_bit(fsa_input_register, R_JOIN_OK); set_bit(fsa_input_register, R_INVOKE_PE); fsa_cib_conn->cmds->set_master(fsa_cib_conn, cib_scope_local); cib = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); fsa_cib_update(XML_TAG_CIB, cib, cib_quorum_override, rc, NULL); fsa_register_cib_callback(rc, FALSE, NULL, feature_update_callback); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false", FALSE, NULL, NULL); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "dc-version", PACEMAKER_VERSION "-" BUILD_VERSION, FALSE, NULL, NULL); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "cluster-infrastructure", cluster_type, FALSE, NULL, NULL); #if SUPPORT_COROSYNC # if !SUPPORT_PLUGIN if (fsa_cluster_name == NULL && is_corosync_cluster()) { char *cluster_name = corosync_cluster_name(); if (cluster_name) { update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "cluster-name", cluster_name, FALSE, NULL, NULL); } free(cluster_name); } # endif #endif mainloop_set_trigger(config_read); free_xml(cib); } /* A_DC_RELEASE */ void do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_DC_RELEASE) { crm_debug("Releasing the role of DC"); clear_bit(fsa_input_register, R_THE_DC); } else if (action & A_DC_RELEASED) { crm_info("DC role released"); #if 0 if (are there errors) { /* we can't stay up if not healthy */ /* or perhaps I_ERROR and go to S_RECOVER? */ result = I_SHUTDOWN; } #endif if (is_set(fsa_input_register, R_SHUTDOWN)) { xmlNode *update = NULL; crm_node_t *node = crm_get_peer(0, fsa_our_uname); crm_update_peer_expected(__FUNCTION__, node, CRMD_JOINSTATE_DOWN); update = do_update_node_cib(node, node_update_expected, NULL, __FUNCTION__); fsa_cib_anon_update(XML_CIB_TAG_STATUS, update, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(update); } register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); } else { - crm_err("Unknown action %s", fsa_action2string(action)); + crm_err("Unknown DC action %s", fsa_action2string(action)); } crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO); } diff --git a/crmd/fsa.c b/crmd/fsa.c index cf54d193e8..c9319bcfc2 100644 --- a/crmd/fsa.c +++ b/crmd/fsa.c @@ -1,675 +1,677 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *fsa_our_dc = NULL; cib_t *fsa_cib_conn = NULL; char *fsa_our_dc_version = NULL; char *fsa_our_uuid = NULL; char *fsa_our_uname = NULL; char *fsa_cluster_name = NULL; #if SUPPORT_HEARTBEAT ll_cluster_t *fsa_cluster_conn; #endif election_t *fsa_election = NULL; fsa_timer_t *wait_timer = NULL; /* How long to wait before retrying to connect to the cib/lrmd/ccm */ fsa_timer_t *recheck_timer = NULL; /* Periodically re-run the PE to account for time based rules/preferences */ fsa_timer_t *election_trigger = NULL; /* How long to wait at startup, or after an election, for the DC to make contact */ fsa_timer_t *transition_timer = NULL; /* How long to delay the start of a new transition with the expectation something else might happen too */ fsa_timer_t *integration_timer = NULL; fsa_timer_t *finalization_timer = NULL; fsa_timer_t *shutdown_escalation_timer = NULL; /* How long to wait for the DC to stop all resources and give us the all-clear to shut down */ volatile gboolean do_fsa_stall = FALSE; volatile long long fsa_input_register = 0; volatile long long fsa_actions = A_NOTHING; volatile enum crmd_fsa_state fsa_state = S_STARTING; extern uint highest_born_on; extern uint num_join_invites; extern void initialize_join(gboolean before); #define DOT_PREFIX "actions:trace: " #define do_dot_log(fmt, args...) crm_trace( fmt, ##args) long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data); void dump_rsc_info(void); void dump_rsc_info_callback(const xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); void ghash_print_node(gpointer key, gpointer value, gpointer user_data); void s_crmd_fsa_actions(fsa_data_t * fsa_data); void log_fsa_input(fsa_data_t * stored_msg); void init_dotfile(void); void init_dotfile(void) { do_dot_log(DOT_PREFIX "digraph \"g\" {"); do_dot_log(DOT_PREFIX " size = \"30,30\""); do_dot_log(DOT_PREFIX " graph ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " node ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " shape = \"ellipse\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " edge ["); do_dot_log(DOT_PREFIX " fontsize = \"12\""); do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\""); do_dot_log(DOT_PREFIX " fontcolor = \"black\""); do_dot_log(DOT_PREFIX " color = \"black\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// special nodes"); do_dot_log(DOT_PREFIX " \"S_PENDING\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"blue\""); do_dot_log(DOT_PREFIX " fontcolor = \"blue\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX " \"S_TERMINATE\" "); do_dot_log(DOT_PREFIX " ["); do_dot_log(DOT_PREFIX " color = \"red\""); do_dot_log(DOT_PREFIX " fontcolor = \"red\""); do_dot_log(DOT_PREFIX " ]"); do_dot_log(DOT_PREFIX "// DC only nodes"); do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]"); do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]"); } static void do_fsa_action(fsa_data_t * fsa_data, long long an_action, void (*function) (long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)) { fsa_actions &= ~an_action; crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action)); function(an_action, fsa_data->fsa_cause, fsa_state, fsa_data->fsa_input, fsa_data); } static long long startup_actions = A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_CCM_CONNECT | A_HA_CONNECT | A_READCONFIG | A_STARTED | A_CL_JOIN_QUERY; enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause) { fsa_data_t *fsa_data = NULL; long long register_copy = fsa_input_register; long long new_actions = A_NOTHING; enum crmd_fsa_state last_state; crm_trace("FSA invoked with Cause: %s\tState: %s", fsa_cause2string(cause), fsa_state2string(fsa_state)); fsa_dump_actions(fsa_actions, "Initial"); do_fsa_stall = FALSE; if (is_message() == FALSE && fsa_actions != A_NOTHING) { /* fake the first message so we can get into the loop */ fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->fsa_input = I_NULL; fsa_data->fsa_cause = C_FSA_INTERNAL; fsa_data->origin = __FUNCTION__; fsa_data->data_type = fsa_dt_none; fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); fsa_data = NULL; } while (is_message() && do_fsa_stall == FALSE) { crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue)); fsa_data = get_message(); if(fsa_data == NULL) { continue; } log_fsa_input(fsa_data); /* add any actions back to the queue */ fsa_actions |= fsa_data->actions; fsa_dump_actions(fsa_data->actions, "Restored actions"); /* get the next batch of actions */ new_actions = crmd_fsa_actions[fsa_data->fsa_input][fsa_state]; fsa_actions |= new_actions; fsa_dump_actions(new_actions, "New actions"); if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) { crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); } /* logging : *before* the state is changed */ if (is_set(fsa_actions, A_ERROR)) { do_fsa_action(fsa_data, A_ERROR, do_log); } if (is_set(fsa_actions, A_WARN)) { do_fsa_action(fsa_data, A_WARN, do_log); } if (is_set(fsa_actions, A_LOG)) { do_fsa_action(fsa_data, A_LOG, do_log); } /* update state variables */ last_state = fsa_state; fsa_state = crmd_fsa_state[fsa_data->fsa_input][fsa_state]; /* * Remove certain actions during shutdown */ if (fsa_state == S_STOPPING || ((fsa_input_register & R_SHUTDOWN) == R_SHUTDOWN)) { clear_bit(fsa_actions, startup_actions); } /* * Hook for change of state. * Allows actions to be added or removed when entering a state */ if (last_state != fsa_state) { fsa_actions = do_state_transition(fsa_actions, last_state, fsa_state, fsa_data); } else { do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s" " \tInput=%s \tOrigin=%s() \tid=%d", fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id); } /* start doing things... */ s_crmd_fsa_actions(fsa_data); delete_fsa_input(fsa_data); fsa_data = NULL; } if (g_list_length(fsa_message_queue) > 0 || fsa_actions != A_NOTHING || do_fsa_stall) { crm_debug("Exiting the FSA: queue=%d, fsa_actions=0x%llx, stalled=%s", g_list_length(fsa_message_queue), fsa_actions, do_fsa_stall ? "true" : "false"); } else { crm_trace("Exiting the FSA"); } /* cleanup inputs? */ if (register_copy != fsa_input_register) { long long same = register_copy & fsa_input_register; fsa_dump_inputs(LOG_DEBUG, "Added", fsa_input_register ^ same); fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same); } fsa_dump_actions(fsa_actions, "Remaining"); fsa_dump_queue(LOG_DEBUG); return fsa_state; } void s_crmd_fsa_actions(fsa_data_t * fsa_data) { /* * Process actions in order of priority but do only one * action at a time to avoid complicating the ordering. */ CRM_CHECK(fsa_data != NULL, return); while (fsa_actions != A_NOTHING && do_fsa_stall == FALSE) { /* regular action processing in order of action priority * * Make sure all actions that connect to required systems * are performed first */ if (fsa_actions & A_ERROR) { do_fsa_action(fsa_data, A_ERROR, do_log); } else if (fsa_actions & A_WARN) { do_fsa_action(fsa_data, A_WARN, do_log); } else if (fsa_actions & A_LOG) { do_fsa_action(fsa_data, A_LOG, do_log); /* get out of here NOW! before anything worse happens */ } else if (fsa_actions & A_EXIT_1) { do_fsa_action(fsa_data, A_EXIT_1, do_exit); /* sub-system restart */ } else if ((fsa_actions & O_LRM_RECONNECT) == O_LRM_RECONNECT) { do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control); } else if ((fsa_actions & O_CIB_RESTART) == O_CIB_RESTART) { do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control); } else if ((fsa_actions & O_PE_RESTART) == O_PE_RESTART) { do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control); } else if ((fsa_actions & O_TE_RESTART) == O_TE_RESTART) { do_fsa_action(fsa_data, O_TE_RESTART, do_te_control); /* essential start tasks */ } else if (fsa_actions & A_STARTUP) { do_fsa_action(fsa_data, A_STARTUP, do_startup); } else if (fsa_actions & A_CIB_START) { do_fsa_action(fsa_data, A_CIB_START, do_cib_control); } else if (fsa_actions & A_HA_CONNECT) { do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control); } else if (fsa_actions & A_READCONFIG) { do_fsa_action(fsa_data, A_READCONFIG, do_read_config); /* sub-system start/connect */ } else if (fsa_actions & A_LRM_CONNECT) { do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control); } else if (fsa_actions & A_CCM_CONNECT) { #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { do_fsa_action(fsa_data, A_CCM_CONNECT, do_ccm_control); } #endif fsa_actions &= ~A_CCM_CONNECT; } else if (fsa_actions & A_TE_START) { do_fsa_action(fsa_data, A_TE_START, do_te_control); } else if (fsa_actions & A_PE_START) { do_fsa_action(fsa_data, A_PE_START, do_pe_control); /* Timers */ /* else if(fsa_actions & O_DC_TIMER_RESTART) { do_fsa_action(fsa_data, O_DC_TIMER_RESTART, do_timer_control) */ ; } else if (fsa_actions & A_DC_TIMER_STOP) { do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_STOP) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_INTEGRATE_TIMER_START) { do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_STOP) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control); } else if (fsa_actions & A_FINALIZE_TIMER_START) { do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control); /* * Highest priority actions */ } else if (fsa_actions & A_MSG_ROUTE) { do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route); } else if (fsa_actions & A_RECOVER) { do_fsa_action(fsa_data, A_RECOVER, do_recover); } else if (fsa_actions & A_CL_JOIN_RESULT) { do_fsa_action(fsa_data, A_CL_JOIN_RESULT, do_cl_join_finalize_respond); } else if (fsa_actions & A_CL_JOIN_REQUEST) { do_fsa_action(fsa_data, A_CL_JOIN_REQUEST, do_cl_join_offer_respond); } else if (fsa_actions & A_SHUTDOWN_REQ) { do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req); } else if (fsa_actions & A_ELECTION_VOTE) { do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote); } else if (fsa_actions & A_ELECTION_COUNT) { do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote); } else if (fsa_actions & A_LRM_EVENT) { do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event); /* * High priority actions */ } else if (fsa_actions & A_STARTED) { do_fsa_action(fsa_data, A_STARTED, do_started); } else if (fsa_actions & A_CL_JOIN_QUERY) { do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query); } else if (fsa_actions & A_DC_TIMER_START) { do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control); /* * Medium priority actions * - Membership */ } else if (fsa_actions & A_DC_TAKEOVER) { do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover); } else if (fsa_actions & A_DC_RELEASE) { do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release); } else if (fsa_actions & A_DC_JOIN_FINAL) { do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final); } else if (fsa_actions & A_ELECTION_CHECK) { do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check); } else if (fsa_actions & A_ELECTION_START) { do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote); } else if (fsa_actions & A_DC_JOIN_OFFER_ALL) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all); } else if (fsa_actions & A_DC_JOIN_OFFER_ONE) { do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one); } else if (fsa_actions & A_DC_JOIN_PROCESS_REQ) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ, do_dc_join_filter_offer); } else if (fsa_actions & A_DC_JOIN_PROCESS_ACK) { do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack); } else if (fsa_actions & A_DC_JOIN_FINALIZE) { do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize); } else if (fsa_actions & A_CL_JOIN_ANNOUNCE) { do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce); /* * Low(er) priority actions * Make sure the CIB is always updated before invoking the * PE, and the PE before the TE */ } else if (fsa_actions & A_TE_HALT) { do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke); } else if (fsa_actions & A_TE_CANCEL) { do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke); } else if (fsa_actions & A_LRM_INVOKE) { do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke); } else if (fsa_actions & A_PE_INVOKE) { do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke); } else if (fsa_actions & A_TE_INVOKE) { do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke); /* Shutdown actions */ } else if (fsa_actions & A_DC_RELEASED) { do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release); } else if (fsa_actions & A_PE_STOP) { do_fsa_action(fsa_data, A_PE_STOP, do_pe_control); } else if (fsa_actions & A_TE_STOP) { do_fsa_action(fsa_data, A_TE_STOP, do_te_control); } else if (fsa_actions & A_SHUTDOWN) { do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown); } else if (fsa_actions & A_LRM_DISCONNECT) { do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control); } else if (fsa_actions & A_CCM_DISCONNECT) { #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { do_fsa_action(fsa_data, A_CCM_DISCONNECT, do_ccm_control); } #endif fsa_actions &= ~A_CCM_DISCONNECT; } else if (fsa_actions & A_HA_DISCONNECT) { do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control); } else if (fsa_actions & A_CIB_STOP) { do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control); } else if (fsa_actions & A_STOP) { do_fsa_action(fsa_data, A_STOP, do_stop); /* exit gracefully */ } else if (fsa_actions & A_EXIT_0) { do_fsa_action(fsa_data, A_EXIT_0, do_exit); /* Error checking and reporting */ } else { - crm_err("Action %s (0x%llx) not supported ", + crm_err("Action %s not supported "CRM_XS" 0x%llx", fsa_action2string(fsa_actions), fsa_actions); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL, __FUNCTION__); } } } void log_fsa_input(fsa_data_t * stored_msg) { CRM_ASSERT(stored_msg); crm_trace("Processing queued input %d", stored_msg->id); if (stored_msg->fsa_cause == C_CCM_CALLBACK) { crm_trace("FSA processing CCM callback from %s", stored_msg->origin); } else if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) { crm_trace("FSA processing LRM callback from %s", stored_msg->origin); } else if (stored_msg->data == NULL) { crm_trace("FSA processing input from %s", stored_msg->origin); } else { ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg, __FUNCTION__); crm_trace("FSA processing XML message from %s", stored_msg->origin); crm_log_xml_trace(ha_input->xml, "FSA message data"); } } long long do_state_transition(long long actions, enum crmd_fsa_state cur_state, enum crmd_fsa_state next_state, fsa_data_t * msg_data) { int level = LOG_INFO; long long tmp = actions; gboolean clear_recovery_bit = TRUE; enum crmd_fsa_cause cause = msg_data->fsa_cause; enum crmd_fsa_input current_input = msg_data->fsa_input; const char *state_from = fsa_state2string(cur_state); const char *state_to = fsa_state2string(next_state); const char *input = fsa_input2string(current_input); CRM_LOG_ASSERT(cur_state != next_state); do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]", state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); if (cur_state == S_IDLE || next_state == S_IDLE) { level = LOG_NOTICE; } else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) { level = LOG_NOTICE; } else if (cur_state == S_ELECTION) { level = LOG_NOTICE; } else if (cur_state == S_STARTING) { level = LOG_NOTICE; } else if (next_state == S_RECOVERY) { level = LOG_WARNING; } - do_crm_log(level, "State transition %s -> %s [ input=%s cause=%s origin=%s ]", - state_from, state_to, input, fsa_cause2string(cause), msg_data->origin); + do_crm_log(level, "State transition %s -> %s " + CRM_XS " input=%s cause=%s origin=%s", + state_from, state_to, input, fsa_cause2string(cause), + msg_data->origin); /* the last two clauses might cause trouble later */ if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) { election_timeout_stop(fsa_election); /* } else { */ /* crm_timer_start(election_timeout); */ } #if 0 if ((fsa_input_register & R_SHUTDOWN)) { set_bit(tmp, A_DC_TIMER_STOP); } #endif if (next_state == S_INTEGRATION) { set_bit(tmp, A_INTEGRATE_TIMER_START); } else { set_bit(tmp, A_INTEGRATE_TIMER_STOP); } if (next_state == S_FINALIZE_JOIN) { set_bit(tmp, A_FINALIZE_TIMER_START); } else { set_bit(tmp, A_FINALIZE_TIMER_STOP); } if (next_state != S_PENDING) { set_bit(tmp, A_DC_TIMER_STOP); } if (next_state != S_ELECTION) { highest_born_on = 0; } if (next_state != S_IDLE) { crm_timer_stop(recheck_timer); } if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) { populate_cib_nodes(node_update_quick|node_update_all, __FUNCTION__); } switch (next_state) { case S_PENDING: fsa_cib_conn->cmds->set_slave(fsa_cib_conn, cib_scope_local); /* fall through */ case S_ELECTION: crm_trace("Resetting our DC to NULL on transition to %s", fsa_state2string(next_state)); update_dc(NULL); break; case S_NOT_DC: election_trigger->counter = 0; purge_stonith_cleanup(); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we have a new DC"); set_bit(tmp, A_SHUTDOWN_REQ); } CRM_LOG_ASSERT(fsa_our_dc != NULL); if (fsa_our_dc == NULL) { crm_err("Reached S_NOT_DC without a DC" " being recorded"); } break; case S_RECOVERY: clear_recovery_bit = FALSE; break; case S_FINALIZE_JOIN: CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_warn("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (crmd_join_phase_count(crm_join_welcomed) > 0) { crm_warn("%u cluster nodes failed to respond" " to the join offer.", crmd_join_phase_count(crm_join_welcomed)); crmd_join_phase_log(LOG_NOTICE); } else { crm_debug("All %d cluster nodes responded to the join offer.", crmd_join_phase_count(crm_join_integrated)); } break; case S_POLICY_ENGINE: election_trigger->counter = 0; CRM_LOG_ASSERT(AM_I_DC); if (cause == C_TIMER_POPPED) { crm_info("Progressed to state %s after %s", fsa_state2string(next_state), fsa_cause2string(cause)); } if (crmd_join_phase_count(crm_join_finalized) > 0) { crm_err("%u cluster nodes failed to confirm their join.", crmd_join_phase_count(crm_join_finalized)); crmd_join_phase_log(LOG_NOTICE); } else if (crmd_join_phase_count(crm_join_confirmed) == crm_active_peers()) { crm_debug("All %u cluster nodes are" " eligible to run resources.", crm_active_peers()); } else if (crmd_join_phase_count(crm_join_confirmed) > crm_active_peers()) { crm_err("We have more confirmed nodes than our membership does: %d vs. %d", crmd_join_phase_count(crm_join_confirmed), crm_active_peers()); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } else if (saved_ccm_membership_id != crm_peer_seq) { crm_info("Membership changed: %llu -> %llu - join restart", saved_ccm_membership_id, crm_peer_seq); register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } else { crm_warn("Only %u of %u cluster " "nodes are eligible to run resources - continue %d", crmd_join_phase_count(crm_join_confirmed), crm_active_peers(), crmd_join_phase_count(crm_join_welcomed)); } /* initialize_join(FALSE); */ break; case S_STOPPING: case S_TERMINATE: /* possibly redundant */ set_bit(fsa_input_register, R_SHUTDOWN); break; case S_IDLE: CRM_LOG_ASSERT(AM_I_DC); dump_rsc_info(); if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("(Re)Issuing shutdown request now" " that we are the DC"); set_bit(tmp, A_SHUTDOWN_REQ); } if (recheck_timer->period_ms > 0) { crm_debug("Starting %s", get_timer_desc(recheck_timer)); crm_timer_start(recheck_timer); } break; default: break; } if (clear_recovery_bit && next_state != S_PENDING) { tmp &= ~A_RECOVER; } else if (clear_recovery_bit == FALSE) { tmp |= A_RECOVER; } if (tmp != actions) { /* fsa_dump_actions(actions ^ tmp, "New actions"); */ actions = tmp; } return actions; } void dump_rsc_info(void) { } void ghash_print_node(gpointer key, gpointer value, gpointer user_data) { const char *text = user_data; const char *uname = key; const char *value_s = value; crm_info("%s: %s %s", text, uname, value_s); } diff --git a/crmd/join_client.c b/crmd/join_client.c index 65e3beda39..ba19a8a048 100644 --- a/crmd/join_client.c +++ b/crmd/join_client.c @@ -1,280 +1,283 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include int reannounce_count = 0; void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); sleep(1); /* give the CCM time to propogate to the DC */ update_dc(NULL); /* Unset any existing value so that the result is not discarded */ crm_debug("Querying for a DC"); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } /* A_CL_JOIN_ANNOUNCE */ /* this is kind of a workaround for the fact that we may not be around * or are otherwise unable to reply when the DC sends out A_WELCOME_ALL */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* Once we hear from the DC, we can stop the timer * * This timer was started either on startup or when a node * left the CCM list */ /* dont announce if we're in one of these states */ if (cur_state != S_PENDING) { - crm_warn("Do not announce ourselves in state %s", fsa_state2string(cur_state)); + crm_warn("Not announcing cluster join because in state %s", + fsa_state2string(cur_state)); return; } if (AM_I_OPERATIONAL) { /* send as a broadcast */ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_debug("Announcing availability"); update_dc(NULL); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } else { /* Delay announce until we have finished local startup */ - crm_warn("Delaying announce until local startup is complete"); + crm_warn("Delaying announce of cluster join until local startup is complete"); return; } } static int query_call_id = 0; /* A_CL_JOIN_REQUEST */ /* aka. accept the welcome offer */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); const char *join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); #if 0 if (we are sick) { log error; /* save the request for later? */ return; } #endif - crm_trace("Accepting join offer from %s: join-%s", + crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s", welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID)); /* we only ever want the last one */ if (query_call_id > 0) { crm_trace("Cancelling previous join query: %d", query_call_id); remove_cib_op_callback(query_call_id, FALSE); query_call_id = 0; } if (update_dc(input->msg) == FALSE) { - crm_warn("Discarding offer from %s (expected %s)", welcome_from, fsa_our_dc); + crm_warn("Discarding cluster join offer from node %s (expected %s)", + welcome_from, fsa_our_dc); return; } CRM_LOG_ASSERT(input != NULL); query_call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children); fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback); crm_trace("Registered join query callback: %d", query_call_id); register_fsa_action(A_DC_TIMER_STOP); } void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *join_id = user_data; xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); CRM_LOG_ASSERT(join_id != NULL); if (query_call_id != call_id) { crm_trace("Query %d superseded", call_id); goto done; } query_call_id = 0; if(rc != pcmk_ok || output == NULL) { crm_err("Could not retrieve version details for join-%s: %s (%d)", join_id, pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } else if (fsa_our_dc == NULL) { crm_debug("Membership is in flux, not continuing join-%s", join_id); } else { xmlNode *reply = NULL; crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc); copy_in_properties(generation, output); reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add(reply, F_CRM_JOIN_ID, join_id); send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); } done: free_xml(generation); } /* A_CL_JOIN_RESULT */ /* aka. this is notification that we have (or have not) been accepted */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *tmp1 = NULL; gboolean was_nack = TRUE; static gboolean first_join = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); int join_id = -1; const char *op = crm_element_value(input->msg, F_CRM_TASK); const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK); const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); if (safe_str_neq(op, CRM_OP_JOIN_ACKNAK)) { crm_trace("Ignoring op=%s message", op); return; } /* calculate if it was an ack or a nack */ if (crm_is_true(ack_nack)) { was_nack = FALSE; } crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id); if (was_nack) { - crm_err("Join (join-%d) with leader %s failed (NACK'd): Shutting down", - join_id, welcome_from); + crm_err("Shutting down because cluster join with leader %s failed " + CRM_XS" join-%d NACK'd", welcome_from, join_id); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (AM_I_DC == FALSE && safe_str_eq(welcome_from, fsa_our_uname)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return; } if (update_dc(input->msg) == FALSE) { - crm_warn("Discarding %s from %s (expected %s)", op, welcome_from, fsa_our_dc); + crm_warn("Discarding %s from node %s (expected from %s)", + op, welcome_from, fsa_our_dc); return; } /* send our status section to the DC */ crm_debug("Confirming join join-%d: %s", join_id, crm_element_value(input->msg, F_CRM_TASK)); tmp1 = do_lrm_query(TRUE, fsa_our_uname); if (tmp1 != NULL) { xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id); crm_debug("join-%d: Join complete." " Sending local LRM status to %s", join_id, fsa_our_dc); if (first_join) { first_join = FALSE; /* * Clear any previous transient node attribute and lrm operations * * Corosync has a nasty habit of not being able to tell if a * node is returning or didn't leave in the first place. * This confuses Pacemaker because it never gets a "node up" * event which is normally used to clean up the status section. * * Do not remove the resources though, they'll be cleaned up in * do_dc_join_ack(). Removing them here creates a race * condition if the crmd is being recovered. * Instead of a list of active resources from the lrmd * we may end up with a blank status section. * If we are _NOT_ lucky, we will probe for the "wrong" instance * of anonymous clones and end up with multiple active * instances on the machine. */ erase_status_tag(fsa_our_uname, XML_TAG_TRANSIENT_NODEATTRS, 0); /* Just in case attrd was still around too */ if (is_not_set(fsa_input_register, R_SHUTDOWN)) { update_attrd(fsa_our_uname, "terminate", NULL, NULL, FALSE); update_attrd(fsa_our_uname, XML_CIB_ATTR_SHUTDOWN, "0", NULL, FALSE); } } send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); if (AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __FUNCTION__); update_attrd(NULL, NULL, NULL, NULL, FALSE); } free_xml(tmp1); } else { crm_err("Could not send our LRM state to the DC"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } diff --git a/crmd/join_dc.c b/crmd/join_dc.c index 89172319cc..94bb399abd 100644 --- a/crmd/join_dc.c +++ b/crmd/join_dc.c @@ -1,696 +1,699 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include "tengine.h" char *max_epoch = NULL; char *max_generation_from = NULL; xmlNode *max_generation_xml = NULL; void initialize_join(gboolean before); void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static int current_join_id = 0; unsigned long long saved_ccm_membership_id = 0; void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) { enum crm_join_phase last = 0; if(node == NULL) { - crm_err("%s: Could not set join-%u to %d for NULL", source, current_join_id, phase); + crm_err("Could not update join because node not specified" CRM_XS + " join-%u source=%s phase=%d", source, current_join_id, phase); return; } /* Remote nodes do not participate in joins */ if (is_set(node->flags, crm_remote_node)) { return; } last = node->join; if(phase == last) { crm_trace("%s: Node %s[%u] - join-%u phase still %u", source, node->uname, node->id, current_join_id, last); } else if (phase <= crm_join_none) { node->join = phase; crm_info("%s: Node %s[%u] - join-%u phase %u -> %u", source, node->uname, node->id, current_join_id, last, phase); } else if(phase == last + 1) { node->join = phase; crm_info("%s: Node %s[%u] - join-%u phase %u -> %u", source, node->uname, node->id, current_join_id, last, phase); } else { - crm_err("%s: Node %s[%u] - join-%u phase cannot transition from %u to %u", - source, node->uname, node->id, current_join_id, last, phase); + crm_err("Could not update join for node %s because phase transition invalid " + CRM_XS " join-%u source=%s node_id=%u last=%u new=%u", + node->uname, current_join_id, source, node->id, last, phase); } } void initialize_join(gboolean before) { GHashTableIter iter; crm_node_t *peer = NULL; /* clear out/reset a bunch of stuff */ crm_debug("join-%d: Initializing join data (flag=%s)", current_join_id, before ? "true" : "false"); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__FUNCTION__, peer, crm_join_none); } if (before) { if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } clear_bit(fsa_input_register, R_HAVE_CIB); clear_bit(fsa_input_register, R_CIB_ASKED); } } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { xmlNode *offer = NULL; crm_node_t *member = (crm_node_t *)value; CRM_ASSERT(member != NULL); if (crm_is_peer_active(member) == FALSE) { crm_info("Not making an offer to %s: not active (%s)", member->uname, member->state); if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->uname == NULL) { crm_err("No recipient for welcome message"); return; } if (saved_ccm_membership_id != crm_peer_seq) { saved_ccm_membership_id = crm_peer_seq; crm_info("Making join offers based on membership %llu", crm_peer_seq); } if(user_data && member->join > crm_join_none) { crm_info("Skipping %s: already known %d", member->uname, member->join); return; } crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none); offer = create_request(CRM_OP_JOIN_OFFER, NULL, member->uname, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); crm_xml_add_int(offer, F_CRM_JOIN_ID, current_join_id); /* send the welcome */ crm_info("join-%d: Sending offer to %s", current_join_id, member->uname); send_cluster_message(member, crm_msg_crmd, offer, TRUE); free_xml(offer); crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed); /* crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_PENDING); */ } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* reset everyones status back to down or in_ccm in the CIB * * any nodes that are active in the CIB but not in the CCM list * will be seen as offline by the PE anyway */ current_join_id++; initialize_join(TRUE); /* do_update_cib_nodes(TRUE, __FUNCTION__); */ update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); /* dont waste time by invoking the PE yet; */ crm_info("join-%d: Waiting on %d outstanding join acks", current_join_id, crmd_join_phase_count(crm_join_welcomed)); } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; const char *op = NULL; const char *join_to = NULL; if (msg_data->data) { welcome = fsa_typed_data(fsa_dt_ha_msg); } else { crm_info("An unknown node joined - (re-)offer to any unconfirmed nodes"); g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); check_join_state(cur_state, __FUNCTION__); return; } if (welcome == NULL) { crm_err("Attempt to send welcome message without a message to reply to!"); return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if (join_to == NULL) { crm_err("Attempt to send welcome message without a host to reply to!"); return; } member = crm_get_peer(0, join_to); op = crm_element_value(welcome->msg, F_CRM_TASK); if (join_to != NULL && (cur_state == S_INTEGRATION || cur_state == S_FINALIZE_JOIN)) { /* note: it _is_ possible that a node will have been * sick or starting up when the original offer was made. * however, it will either re-announce itself in due course * _or_ we can re-store the original offer on the client. */ crm_trace("(Re-)offering membership to %s...", join_to); } crm_info("join-%d: Processing %s request from %s in state %s", current_join_id, op, join_to, fsa_state2string(cur_state)); crm_update_peer_join(__FUNCTION__, member, crm_join_none); join_make_offer(NULL, member, NULL); /* always offer to the DC (ourselves) * this ensures the correct value for max_generation_from */ member = crm_get_peer(0, fsa_our_uname); join_make_offer(NULL, member, NULL); /* this was a genuine join request, cancel any existing * transition and invoke the PE */ abort_transition(INFINITY, tg_restart, "Node join", NULL); /* dont waste time by invoking the pe yet; */ crm_debug("Waiting on %d outstanding join acks for join-%d", crmd_join_phase_count(crm_join_welcomed), current_join_id); } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = crm_element_value(left, field); const char *elem_r = crm_element_value(right, field); int int_elem_l = crm_int_helper(elem_l, NULL); int int_elem_r = crm_int_helper(elem_r, NULL); if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; gboolean ack_nack_bool = TRUE; const char *ack_nack = CRMD_JOINSTATE_MEMBER; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); crm_node_t *join_node = crm_get_peer(0, join_from); crm_debug("Processing req from %s", join_from); generation = join_ack->xml; crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } if (join_id != current_join_id) { crm_debug("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); check_join_state(cur_state, __FUNCTION__); return; } else if (join_node == NULL || crm_is_peer_active(join_node) == FALSE) { crm_err("Node %s is not a member", join_from); ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Generation was NULL"); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { max_generation_xml = copy_xml(generation); max_generation_from = strdup(join_from); } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) { crm_debug("%s has a better generation number than" " the current max %s", join_from, max_generation_from); if (max_generation_xml) { crm_log_xml_debug(max_generation_xml, "Max generation"); } crm_log_xml_debug(generation, "Their generation"); free(max_generation_from); free_xml(max_generation_xml); max_generation_from = strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } if (ack_nack_bool == FALSE) { /* NACK this client */ ack_nack = CRMD_JOINSTATE_NACK; crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack); - crm_err("join-%d: NACK'ing node %s (ref %s)", join_id, join_from, ref); + crm_err("Rejecting cluster join request from %s " CRM_XS + " NACK join-%d ref=%s", join_from, join_id, ref); } else { crm_debug("join-%d: Welcoming node %s (ref %s)", join_id, join_from, ref); crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated); } crm_update_peer_expected(__FUNCTION__, join_node, ack_nack); crm_debug("%u nodes have been integrated into join-%d", crmd_join_phase_count(crm_join_integrated), join_id); if (check_join_state(cur_state, __FUNCTION__) == FALSE) { /* dont waste time by invoking the PE yet; */ crm_debug("join-%d: Still waiting on %d outstanding offers", join_id, crmd_join_phase_count(crm_join_welcomed)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ crm_debug("Finializing join-%d for %d clients", current_join_id, crmd_join_phase_count(crm_join_integrated)); crmd_join_phase_log(LOG_INFO); if (crmd_join_phase_count(crm_join_welcomed) != 0) { crm_info("Waiting for %d more nodes", crmd_join_phase_count(crm_join_welcomed)); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (crmd_join_phase_count(crm_join_integrated) == 0) { /* Nothing to do */ check_join_state(fsa_state, __FUNCTION__); return; } clear_bit(fsa_input_register, R_HAVE_CIB); if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) { set_bit(fsa_input_register, R_HAVE_CIB); } if (is_set(fsa_input_register, R_IN_TRANSITION)) { - crm_warn("join-%d: We are still in a transition." - " Delaying until the TE completes.", current_join_id); + crm_warn("Delaying response to cluster join offer while transition in progress " + CRM_XS " join-%d", current_join_id); crmd_fsa_stall(FALSE); return; } if (max_generation_from && is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ sync_from = strdup(max_generation_from); set_bit(fsa_input_register, R_CIB_ASKED); - crm_notice("join-%d: Syncing the CIB from %s to the rest of the cluster", - current_join_id, sync_from); + crm_notice("Syncing the Cluster Information Base from %s to rest of cluster " + CRM_XS " join-%d", sync_from, current_join_id); crm_log_xml_notice(max_generation_xml, "Requested version"); } else { /* Send _our_ CIB out to everyone */ sync_from = strdup(fsa_our_uname); crm_info("join-%d: Syncing our CIB to the rest of the cluster", current_join_id); crm_log_xml_debug(max_generation_xml, "Requested version"); } rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override); fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback); } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); clear_bit(fsa_input_register, R_CIB_ASKED); if (rc != pcmk_ok) { do_crm_log((rc == -pcmk_err_old_data ? LOG_WARNING : LOG_ERR), "Sync from %s failed: %s", (char *)user_data, pcmk_strerror(rc)); /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); } else if (AM_I_DC && fsa_state == S_FINALIZE_JOIN) { set_bit(fsa_input_register, R_HAVE_CIB); clear_bit(fsa_input_register, R_CIB_ASKED); /* make sure dc_uuid is re-set to us */ if (check_join_state(fsa_state, __FUNCTION__) == FALSE) { crm_debug("Notifying %d clients of join-%d results", crmd_join_phase_count(crm_join_integrated), current_join_id); g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); } } else { crm_debug("No longer the DC in S_FINALIZE_JOIN: %s/%s", AM_I_DC ? "DC" : "CRMd", fsa_state2string(fsa_state)); } } static void join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_debug("Join update %d complete", call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("Join update %d failed", call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); crm_node_t *peer = crm_get_peer(0, join_from); if (safe_str_neq(op, CRM_OP_JOIN_CONFIRM) || peer == NULL) { crm_debug("Ignoring op=%s message from %s", op, join_from); return; } crm_trace("Processing ack from %s", join_from); crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (peer->join != crm_join_finalized) { crm_info("Join not in progress: ignoring join-%d from %s (phase = %d)", join_id, join_from, peer->join); return; } else if (join_id != current_join_id) { crm_err("Invalid response from %s: join-%d vs. join-%d", join_from, join_id, current_join_id); crm_update_peer_join(__FUNCTION__, peer, crm_join_nack); return; } crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed); crm_info("join-%d: Updating node state to %s for %s", join_id, CRMD_JOINSTATE_MEMBER, join_from); /* update CIB with the current LRM status from the node * We dont need to notify the TE of these updates, a transition will * be started in due time */ erase_status_tag(join_from, XML_CIB_TAG_LRM, cib_scope_local); fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback); crm_debug("join-%d: Registered callback for LRM update %d", join_id, call_id); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; crm_node_t *join_node = value; const char *join_to = join_node->uname; if(join_node->join != crm_join_integrated) { crm_trace("Skipping %s in state %d", join_to, join_node->join); return; } /* make sure a node entry exists for the new node */ crm_trace("Creating node entry for %s", join_to); tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); set_uuid(tmp1, XML_ATTR_UUID, join_node); crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1, cib_scope_local | cib_quorum_override | cib_can_create); free_xml(tmp1); join_node = crm_get_peer(0, join_to); if (crm_is_peer_active(join_node) == FALSE) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING); return; } /* send the ack/nack to the node */ acknak = create_request(CRM_OP_JOIN_ACKNAK, NULL, join_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); crm_xml_add_int(acknak, F_CRM_JOIN_ID, current_join_id); crm_debug("join-%d: ACK'ing join request from %s", current_join_id, join_to); crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized); crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE); free_xml(acknak); return; } void ghash_print_node(gpointer key, gpointer value, gpointer user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; crm_debug("Invoked by %s in state: %s", source, fsa_state2string(cur_state)); if (saved_ccm_membership_id != crm_peer_seq) { crm_debug("%s: Membership changed since join started: %llu -> %llu (%llu)", source, saved_ccm_membership_id, crm_peer_seq, highest_seq); if(highest_seq < crm_peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = crm_peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(crm_join_welcomed) == 0) { crm_debug("join-%d: Integration of %d peers complete: %s", current_join_id, crmd_join_phase_count(crm_join_integrated), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying I_FINALIZED until we have the CIB", current_join_id); return TRUE; } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { crm_debug("join-%d: Still waiting on %d welcomed nodes", current_join_id, crmd_join_phase_count(crm_join_welcomed)); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_integrated) != 0) { crm_debug("join-%d: Still waiting on %d integrated nodes", current_join_id, crmd_join_phase_count(crm_join_integrated)); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_finalized) != 0) { crm_debug("join-%d: Still waiting on %d finalized nodes", current_join_id, crmd_join_phase_count(crm_join_finalized)); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d complete: %s", current_join_id, source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); update_attrd(NULL, NULL, NULL, NULL, FALSE); crm_update_quorum(crm_have_quorum, TRUE); } int crmd_join_phase_count(enum crm_join_phase phase) { int count = 0; crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if(peer->join == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { const char *state = "unknown"; switch(peer->join) { case crm_join_nack: state = "nack"; break; case crm_join_none: state = "none"; break; case crm_join_welcomed: state = "welcomed"; break; case crm_join_integrated: state = "integrated"; break; case crm_join_finalized: state = "finalized"; break; case crm_join_confirmed: state = "confirmed"; break; } do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, state); } } diff --git a/crmd/lrm.c b/crmd/lrm.c index 71f689c53c..633ac36728 100644 --- a/crmd/lrm.c +++ b/crmd/lrm.c @@ -1,2386 +1,2401 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("LRM Connection failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("LRM Connection disconnected"); } } static char * make_stop_id(const char *rsc, int call_id) { char *op_id = NULL; op_id = calloc(1, strlen(rsc) + 34); if (op_id != NULL) { snprintf(op_id, strlen(rsc) + 34, "%s:%d", rsc, call_id); } return op_id; } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /* * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval == existing->interval) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /* * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /* * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.class); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.class = strdup(rsc->class); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval > 0) { crm_trace("Removing cancelled recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping %s_%s_%d rc=%d, status=%d", op->rsc_id, op->op_type, op->interval, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* We must store failed monitors here * - otherwise the block below will cause them to be forgetten them when a stop happens */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: %s_%s_%d", op->rsc_id, op->op_type, op->interval); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: %s_%s_%d", g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval); history_free_recurring_ops(entry); } } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* if this is the local lrmd ipc connection, set the right bits in the * crmd when the connection goes down */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local lrmd connections. Remote connections are handled as * resources within the pengine. Connecting and disconnecting from remote lrmd instances * handled differently than the local. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the LRM"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state); crm_notice("Disconnected from the LRM"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the LRM"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to sign on to the LRM %d" " (%d max) times", lrm_state->num_lrm_register_fails, MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to sign on to the LRM %d" " (max) times", lrm_state->num_lrm_register_fails); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("LRM connection established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); + guint nremaining = g_hash_table_size(lrm_state->pending_ops); - crm_notice("Stopped %u recurring operations at %s (%u ops remaining)", - removed, when, g_hash_table_size(lrm_state->pending_ops)); + if (removed || nremaining) { + crm_notice("Stopped %u recurring operations at %s (%u operations remaining)", + removed, when, nremaining); + } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending LRM operations at %s", counter, when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (cur_state == S_TERMINATE || is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; crm_trace("Found %s active", entry->id); if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resources were active at %s.", counter, when); } return rc; } GHashTable *metadata_hash = NULL; static char * get_rsc_metadata(const char *type, const char *rclass, const char *provider, bool force) { int rc = pcmk_ok; int len = 0; char *key = NULL; char *metadata = NULL; /* Always use a local connection for this operation */ lrm_state_t *lrm_state = lrm_state_find(fsa_our_uname); CRM_CHECK(type != NULL, return NULL); CRM_CHECK(rclass != NULL, return NULL); CRM_CHECK(lrm_state != NULL, return NULL); if (provider == NULL) { provider = "heartbeat"; } if (metadata_hash == NULL) { metadata_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); } len = strlen(type) + strlen(rclass) + strlen(provider) + 4; key = malloc(len); if(key == NULL) { return NULL; } snprintf(key, len, "%s::%s:%s", type, rclass, provider); if(force == FALSE) { crm_trace("Retreiving cached metadata for %s", key); metadata = g_hash_table_lookup(metadata_hash, key); } if(metadata == NULL) { rc = lrm_state_get_metadata(lrm_state, rclass, provider, type, &metadata, 0); crm_trace("Retrieved live metadata for %s: %s (%d)", key, pcmk_strerror(rc), rc); if(rc == pcmk_ok) { CRM_LOG_ASSERT(metadata != NULL); g_hash_table_insert(metadata_hash, key, metadata); key = NULL; } else { CRM_LOG_ASSERT(metadata == NULL); metadata = NULL; } } if (metadata == NULL) { crm_warn("No metadata found for %s: %s (%d)", key, pcmk_strerror(rc), rc); } free(key); return metadata; } static char * build_parameter_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode *result, const char *criteria, bool target, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; xmlNode *param = NULL; xmlNode *params = NULL; const char *secure_terms[] = { "password", "passwd", "user", }; if(safe_str_eq("private", criteria)) { /* It will take time for the agents to be updated * Check for some common terms */ max = DIMOF(secure_terms); } params = find_xml_node(metadata, "parameters", TRUE); for (param = __xml_first_child(params); param != NULL; param = __xml_next(param)) { if (crm_str_eq((const char *)param->name, "parameter", TRUE)) { bool accept = FALSE; const char *name = crm_element_value(param, "name"); const char *value = crm_element_value(param, criteria); if(max && value) { /* Turn off the compatibility logic once an agent has been updated to know about 'private' */ max = 0; } if (name == NULL) { crm_err("Invalid parameter in %s metadata", op->rsc_id); } else if(target == crm_is_true(value)) { accept = TRUE; } else if(max) { int lpc = 0; bool found = FALSE; for(lpc = 0; found == FALSE && lpc < max; lpc++) { if(safe_str_eq(secure_terms[lpc], name)) { found = TRUE; } } if(found == target) { accept = TRUE; } } if(accept) { int start = len; crm_trace("Attr %s is %s%s", name, target?"":"not ", criteria); len += strlen(name) + 2; list = realloc_safe(list, len + 1); sprintf(list + start, " %s ", name); } else { crm_trace("Rejecting %s for %s", name, criteria); } if(invert_for_xml) { crm_trace("Inverting %s match for %s xml", name, criteria); accept = !accept; } if(result && accept) { value = g_hash_table_lookup(op->params, name); if(value != NULL) { crm_trace("Adding attr %s=%s to the xml result", name, value); crm_xml_add(result, name, value); } } } } return list; } static bool resource_supports_action(xmlNode *metadata, const char *name) { const char *value = NULL; xmlNode *action = NULL; xmlNode *actions = NULL; actions = find_xml_node(metadata, "actions", TRUE); for (action = __xml_first_child(actions); action != NULL; action = __xml_next(action)) { if (crm_str_eq((const char *)action->name, "action", TRUE)) { value = crm_element_value(action, "name"); if (safe_str_eq(name, value)) { return TRUE; } } } return FALSE; } static void append_restart_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval > 0) { /* monitors are not reloadable */ return; } if(resource_supports_action(metadata, "reload")) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Any parameters with unique="1" should be added into the "op-force-restart" list. */ list = build_parameter_list(op, metadata, restart, "unique", TRUE, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, xmlNode *metadata, xmlNode * update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, "private", TRUE, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; xmlNode *metadata = NULL; const char *m_string = NULL; const char *caller_version = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if (rsc == NULL || op->params == NULL || crm_str_eq(CRMD_ACTION_STOP, op->op_type, TRUE)) { /* Stopped resources don't need the digest logic */ crm_trace("No digests needed for %s %p %p %s", op->rsc_id, op->params, rsc, op->op_type); return TRUE; } m_string = get_rsc_metadata(rsc->type, rsc->class, rsc->provider, safe_str_eq(op->op_type, RSC_START)); if(m_string == NULL) { crm_err("No metadata for %s::%s:%s", rsc->provider, rsc->class, rsc->type); return TRUE; } metadata = string2xml(m_string); if(metadata == NULL) { crm_err("Metadata for %s::%s:%s is not valid XML", rsc->provider, rsc->class, rsc->type); return TRUE; } crm_trace("Includind additional digests for %s::%s:%s", rsc->provider, rsc->class, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); free_xml(metadata); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if (entry->last->interval == 0 && entry->last->rc == PCMK_OCF_NOT_CONFIGURED) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.class); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = do_update_node_cib(peer, update_flags, NULL, __FUNCTION__); xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current state of the LRM"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); xmlNode *xml_state; if (!lrm_state) { crm_err("Could not query lrm state for lrmd node %s", node_name); return NULL; } xml_state = do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); /* In case this function is called to generate a join confirmation to * send to the DC, force the current and expected join state to member. * This isn't necessary for newer DCs but is backward compatible. */ crm_xml_add(xml_state, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER); crm_xml_add(xml_state, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER); return xml_state; } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, from_host, rsc_id, rc == pcmk_ok ? "" : " not"); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); CRM_ASSERT(op != NULL); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the LRM", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int max = 0; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); max = strlen(rsc_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + 1; rsc_xpath = calloc(1, max); snprintf(rsc_xpath, max, rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /* * Remove the op from the CIB * * Avoids refreshing the entire LRM section of this host */ #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']" #define op_call_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" static void delete_op_entry(lrm_state_t * lrm_state, lrmd_event_data_t * op, const char *rsc_id, const char *key, int call_id) { xmlNode *xml_top = NULL; if (op != NULL) { xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("async: Sending delete op for %s_%s_%d (call=%d)", op->rsc_id, op->op_type, op->interval, op->call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); } else if (rsc_id != NULL && key != NULL) { int max = 0; char *op_xpath = NULL; if (call_id > 0) { max = strlen(op_call_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 10; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_call_template, lrm_state->node_name, rsc_id, key, call_id); } else { max = strlen(op_template) + strlen(rsc_id) + strlen(lrm_state->node_name) + strlen(key) + 1; op_xpath = calloc(1, max); snprintf(op_xpath, max, op_template, lrm_state->node_name, rsc_id, key); } crm_debug("sync: Sending delete op for %s (call=%d)", rsc_id, call_id); fsa_cib_conn->cmds->delete(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } else { crm_err("Not enough information to delete op entry: rsc=%p key=%p", rsc_id, key); return; } crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } void lrm_clear_last_failure(const char *rsc_id, const char *node_name) { char *attr = NULL; GHashTableIter iter; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; rsc_history_t *entry = NULL; attr = generate_op_key(rsc_id, "last_failure", 0); /* This clears last failure for every lrm state that has this rsc.*/ for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (node_name != NULL) { if (strcmp(node_name, lrm_state->node_name) != 0) { /* filter by node_name if node_name is present */ continue; } } delete_op_entry(lrm_state, NULL, rsc_id, attr, 0); if (!lrm_state->resource_history) { continue; } g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { if (crm_str_eq(rsc_id, entry->id, TRUE)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } free(attr); g_list_free(lrm_state_list); } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } static lrmd_rsc_info_t * get_lrm_resource(lrm_state_t * lrm_state, xmlNode * resource, xmlNode * op_msg, gboolean do_create) { lrmd_rsc_info_t *rsc = NULL; const char *id = ID(resource); const char *type = crm_element_value(resource, XML_ATTR_TYPE); const char *class = crm_element_value(resource, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(resource, XML_AGENT_ATTR_PROVIDER); const char *long_id = crm_element_value(resource, XML_ATTR_ID_LONG); crm_trace("Retrieving %s from the LRM.", id); CRM_CHECK(id != NULL, return NULL); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc && long_id) { rsc = lrm_state_get_rsc_info(lrm_state, long_id, 0); } if (!rsc && do_create) { CRM_CHECK(class != NULL, return NULL); CRM_CHECK(type != NULL, return NULL); crm_trace("Adding rsc %s before operation", id); lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); rsc = lrm_state_get_rsc_info(lrm_state, id, 0); if (!rsc) { fsa_data_t *msg_data = NULL; crm_err("Could not add resource %s to LRM %s", id, lrm_state->node_name); /* only register this as a internal error if this involves the local * lrmd. Otherwise we're likely dealing with an unresponsive remote-node * which is not a FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } return rsc; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* And finally, _delete_ the value in attrd * Setting it to FALSE results in the PE sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if(xml_rsc == NULL) { /* Do something else? driect_ack? */ crm_info("Skipping %s=%d on %s (%p): no resource", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node, lrm_state); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Skipping %s=%d on %s (%p): no operation", crm_element_value(action, XML_ATTR_TRANSITION_KEY), rc, target_node, lrm_state); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); CRM_ASSERT(op != NULL); op->call_id = get_fake_call_id(lrm_state, op->rsc_id); if(safe_str_eq(operation, RSC_NOTIFY)) { /* Notifications can't fail yet */ op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = rc; } op->t_run = time(NULL); op->t_rcchange = op->t_run; crm_info("Faking result %d for %s_%s_%d on %s (%p)", op->rc, op->rsc_id, op->op_type, op->interval, target_node, lrm_state); if(lrm_state) { process_lrm_event(lrm_state, op, NULL); } else { lrmd_rsc_info_t rsc; rsc.id = strdup(op->rsc_id); rsc.type = crm_element_value_copy(xml_rsc, XML_ATTR_TYPE); rsc.class = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_CLASS); rsc.provider = crm_element_value_copy(xml_rsc, XML_AGENT_ATTR_PROVIDER); do_update_resource(target_node, &rsc, op); free(rsc.id); free(rsc.type); free(rsc.class); free(rsc.provider); } lrmd_free_event(op); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean create_rsc = TRUE; lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; gboolean crm_rsc_delete = FALSE; if (input->xml != NULL) { /* Remote node operations are routed here to their remote connections */ target_node = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); } if (target_node == NULL) { target_node = fsa_our_uname; } else if (safe_str_neq(target_node, fsa_our_uname)) { is_remote_node = TRUE; } lrm_state = lrm_state_find(target_node); if (lrm_state == NULL && is_remote_node) { crm_err("no lrmd connection for remote node %s found on cluster node %s. Can not process request.", target_node, fsa_our_uname); /* The action must be recorded here and in the CIB as failed */ synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("LRM command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("LRM command from: %s", from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { /* remember this delete op came from crm_resource */ crm_rsc_delete = TRUE; operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { operation = CRM_OP_LRM_REFRESH; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The lrmd can not fail a resource, it does not understand the * concept of success or failure in relation to a resource, it simply * executes operations and reports the results. We determine what a failure is. * Becaues of this, if we want to fail a resource we have to fake what we * understand a failure to look like. * * To do this we create a fake lrmd operation event for the resource * we want to fail. We then pass that event to the lrmd client callback * so it will be processed as if it actually came from the lrmd. */ op = construct_op(lrm_state, input->xml, ID(xml_rsc), "asyncmon"); CRM_ASSERT(op != NULL); free((char *)op->user_data); op->user_data = NULL; op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->interval = 0; op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc) { crm_info("Failing resource %s...", rsc->id); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(input->msg, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local LRM refresh: call=%d", rc); if(strcmp(CRM_SYSTEM_CRMD, from_sys) != 0) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(input->msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if(strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0 && strcmp(CRM_SYSTEM_TENGINE, from_sys) != 0) { xmlNode *reply = create_request( CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *params = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* only the first 16 chars are used by the LRM */ params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { create_rsc = FALSE; } if(lrm_state_is_connected(lrm_state) == FALSE) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } rsc = get_lrm_resource(lrm_state, xml_rsc, input->xml, create_rsc); if (rsc == NULL && create_rsc) { crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "bad input"); /* if the operation couldn't complete because we can't register * the resource, return a generic error */ synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); } else if (rsc == NULL) { lrmd_event_data_t *op = NULL; crm_notice("Not creating resource for a %s event: %s", operation, ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); op = construct_op(lrm_state, input->xml, ID(xml_rsc), operation); /* Deleting something that does not exist is a success */ op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; CRM_ASSERT(op != NULL); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } else if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *op_interval = NULL; gboolean in_progress = FALSE; CRM_CHECK(params != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL); op_interval = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); CRM_CHECK(op_interval != NULL, crm_log_xml_warn(input->xml, "Bad command"); lrmd_free_rsc_info(rsc); return); op_key = generate_op_key(rsc->id, op_task, crm_parse_int(op_interval, "0")); crm_debug("PE requested op %s (call=%s) be cancelled", op_key, call_id ? call_id : "NA"); call = crm_parse_int(call_id, "0"); if (call == 0) { /* the normal case when the PE cancels a recurring op */ in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { /* the normal case when the PE cancels an orphan op */ in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } if (in_progress == FALSE) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc->id, op_task); crm_info("Nothing known about operation %d for %s", call, op_key); delete_op_entry(lrm_state, NULL, rsc->id, op_key, call); CRM_ASSERT(op != NULL); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(from_host, from_sys, rsc, op, rsc->id); lrmd_free_event(op); /* needed?? surely not otherwise the cancel_op_(_key) wouldn't * have failed in the first place */ g_hash_table_remove(lrm_state->pending_ops, op_key); } free(op_key); } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun | cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err ("Attempted deletion of resource status '%s' from CIB for %s (user=%s) on %s failed: (rc=%d) %s", rsc->id, from_sys, user_name ? user_name : "unknown", from_host, cib_rc, pcmk_strerror(cib_rc)); op = construct_op(lrm_state, input->xml, rsc->id, operation); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); lrmd_free_rsc_info(rsc); return; } #endif if (crm_rsc_delete == TRUE && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Operation was neither a lrm_query, nor a rsc op. %s", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *op_interval = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id != NULL); op = calloc(1, sizeof(lrmd_event_data_t)); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); op_interval = crm_meta_value(params, XML_LRM_ATTR_INTERVAL); op->interval = crm_parse_int(op_interval, "0"); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->interval < 0) { op->interval = 0; } if (op->timeout <= 0) { op->timeout = op->interval; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %d", op->interval); op->interval = 0; } } crm_trace("Constructed %s op for %s: interval=%d", operation, rsc_id, op->interval); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = do_update_node_cib(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op %s_%s_%d from %s: %s", op->rsc_id, op->op_type, op->interval, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0 && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && op->interval == 0 && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is ever unexpected * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if (op->interval == 0 && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); - crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d", - removed, rsc->id, operation, op->interval); + if (removed) { + crm_debug("Stopped %u recurring operations in preparation for %s_%s_%d", + removed, rsc->id, operation, op->interval); + } } /* now do the op */ crm_info("Performing key=%s op=%s_%s_%d", transition, rsc->id, operation, op->interval); if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE && fsa_state != S_TRANSITION_ENGINE) { if (safe_str_neq(operation, "fail") && safe_str_neq(operation, CRMD_ACTION_STOP)) { crm_info("Discarding attempt to perform action %s on %s in state %s", operation, rsc->id, fsa_state2string(fsa_state)); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } } op_id = generate_op_key(rsc->id, op->op_type, op->interval); if (op->interval > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); op->call_id = get_fake_call_id(lrm_state, rsc->id); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_UNKNOWN_ERROR; op->t_run = time(NULL); op->t_rcchange = op->t_run; process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval = op->interval; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = strdup(op->user_data); g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if (op->interval > 0 && op->start_delay > START_DELAY_THRESHOLD) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->class); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the lrmd", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" when the really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%d on %s", rc, op->op_type, op->interval, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Leave it up to the TE/PE to decide if this is an error */ op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (safe_str_eq(op->op_type, RSC_NOTIFY)) { /* Keep notify ops out of the CIB */ send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { update_id = do_update_resource(lrm_state->node_name, rsc, op); } } else if (op->interval == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ delete_op_entry(lrm_state, op, op->rsc_id, op_key, op->call_id); } else if (pending && op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if(op->interval != 0 && op->op_status == PCMK_LRM_OP_CANCELLED) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: - crm_info("Operation %s: %s (node=%s, call=%d, confirmed=%s)", - op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, - op->call_id, removed ? "true" : "false"); + crm_info("Result of %s operation for %s on %s: %s " + CRM_XS " call=%d key=%s confirmed=%s", + op->op_type, op->rsc_id, lrm_state->node_name, + services_lrm_status_str(op->op_status), + op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: do_crm_log(op->interval?LOG_INFO:LOG_NOTICE, - "Operation %s: %s (node=%s, call=%d, rc=%d, cib-update=%d, confirmed=%s)", - op_key, services_ocf_exitcode_str(op->rc), lrm_state->node_name, - op->call_id, op->rc, update_id, removed ? "true" : "false"); + "Result of %s operation for %s on %s: %s " + CRM_XS " call=%d key=%s confirmed=%s rc=%d cib-update=%d", + op->op_type, op->rsc_id, lrm_state->node_name, + services_ocf_exitcode_str(op->rc), + op->call_id, op_key, (removed? "true" : "false"), + op->rc, update_id); break; case PCMK_LRM_OP_TIMEOUT: - crm_err("Operation %s: %s (node=%s, call=%d, timeout=%dms)", - op_key, services_lrm_status_str(op->op_status), lrm_state->node_name, op->call_id, op->timeout); + crm_err("Result of %s operation for %s on %s: %s " + CRM_XS " call=%d key=%s timeout=%dms", + op->op_type, op->rsc_id, lrm_state->node_name, + services_lrm_status_str(op->op_status), + op->call_id, op_key, op->timeout); break; default: - crm_err("Operation %s (node=%s, call=%d, status=%d, cib-update=%d, confirmed=%s) %s", - op_key, lrm_state->node_name, op->call_id, op->op_status, update_id, removed ? "true" : "false", - services_lrm_status_str(op->op_status)); + crm_err("Result of %s operation for %s on %s: %s " + CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", + op->op_type, op->rsc_id, lrm_state->node_name, + services_lrm_status_str(op->op_status), op->call_id, op_key, + (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-%s_%s_%d:%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } crmd_notify_resource_op(lrm_state->node_name, op); if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/crmd/main.c b/crmd/main.c index 75ed91ca44..9c1ce944ab 100644 --- a/crmd/main.c +++ b/crmd/main.c @@ -1,168 +1,168 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hV" void usage(const char *cmd, int exit_status); int crmd_init(void); void crmd_hamsg_callback(const xmlNode * msg, void *private_data); extern void init_dotfile(void); GMainLoop *crmd_mainloop = NULL; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int index = 0; int argerr = 0; crmd_mainloop = g_main_new(FALSE); crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for aggregating resource and node failures as well as co-ordinating the cluster's response"); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { crmd_metadata(); return 0; } else if (argc - optind == 1 && safe_str_eq("version", argv[optind])) { fprintf(stdout, "CRM Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); return 0; } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_notice("CRM Git Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); + crm_info("CRM Git Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } if (crm_is_writable(PE_STATE_DIR, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on " PE_STATE_DIR ". Terminating"); fprintf(stderr, "ERROR: Bad permissions on " PE_STATE_DIR ". See logs for details\n"); fflush(stderr); return 100; } else if (crm_is_writable(CRM_CONFIG_DIR, NULL, CRM_DAEMON_USER, CRM_DAEMON_GROUP, FALSE) == FALSE) { crm_err("Bad permissions on " CRM_CONFIG_DIR ". Terminating"); fprintf(stderr, "ERROR: Bad permissions on " CRM_CONFIG_DIR ". See logs for details\n"); fflush(stderr); return 100; } return crmd_init(); } int crmd_init(void) { int exit_code = 0; enum crmd_fsa_state state; fsa_state = S_STARTING; fsa_input_register = 0; /* zero out the regester */ init_dotfile(); crm_debug("Starting %s", crm_system_name); register_fsa_input(C_STARTUP, I_STARTUP, NULL); crm_peer_init(); state = s_crmd_fsa(C_STARTUP); if (state == S_PENDING || state == S_STARTING) { /* Create the mainloop and run it... */ crm_trace("Starting %s's mainloop", crm_system_name); #ifdef REALTIME_SUPPORT static int crm_realtime = 1; if (crm_realtime == 1) { cl_enable_realtime(); } else if (crm_realtime == 0) { cl_disable_realtime(); } cl_make_realtime(SCHED_RR, 5, 64, 64); #endif g_main_run(crmd_mainloop); if (is_set(fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting automated respawn"); exit_code = 100; } } else { crm_err("Startup of %s failed. Current state: %s", crm_system_name, fsa_state2string(state)); exit_code = 1; } crm_info("%u stopped: %s (%d)", getpid(), pcmk_strerror(exit_code), exit_code); return crmd_fast_exit(exit_code); } diff --git a/crmd/misc.c b/crmd/misc.c index 1ad73ec3a0..eaa4398b9d 100644 --- a/crmd/misc.c +++ b/crmd/misc.c @@ -1,68 +1,67 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* A_LOG, A_WARN, A_ERROR */ void do_log(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { unsigned log_type = LOG_TRACE; if (action & A_LOG) { log_type = LOG_INFO; } else if (action & A_WARN) { log_type = LOG_WARNING; } else if (action & A_ERROR) { log_type = LOG_ERR; } - do_crm_log(log_type, - "FSA: Input %s from %s() received in state %s", + do_crm_log(log_type, "Input %s received in state %s from %s", fsa_input2string(msg_data->fsa_input), - msg_data->origin, fsa_state2string(cur_state)); + fsa_state2string(cur_state), msg_data->origin); if (msg_data->data_type == fsa_dt_ha_msg) { ha_msg_input_t *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input->msg, __FUNCTION__); } else if (msg_data->data_type == fsa_dt_xml) { xmlNode *input = fsa_typed_data(msg_data->data_type); crm_log_xml_debug(input, __FUNCTION__); } else if (msg_data->data_type == fsa_dt_lrm) { lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type); do_crm_log(log_type, "Resource %s: Call ID %d returned %d (%d)." " New status if rc=0: %s", input->rsc_id, input->call_id, input->rc, input->op_status, (char *)input->user_data); } } diff --git a/crmd/pengine.c b/crmd/pengine.c index 4a8d617ba6..f8331d3554 100644 --- a/crmd/pengine.c +++ b/crmd/pengine.c @@ -1,362 +1,367 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* for access */ #include /* for calls to open */ #include /* for calls to open */ #include /* for calls to open */ #include /* for getpwuid */ #include /* for initgroups */ #include /* for getrlimit */ #include /* for getrlimit */ #include #include #include #include #include #include #include #include struct crm_subsystem_s *pe_subsystem = NULL; void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); static void save_cib_contents(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { int len = 15; char *filename = NULL; len += strlen(id); len += strlen(PE_STATE_DIR); filename = calloc(1, len); CRM_CHECK(filename != NULL, return); sprintf(filename, PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { - crm_err("Could not save CIB contents after PE crash to %s", filename); + crm_err("Could not save Cluster Information Base to %s after Policy Engine crash", + filename); } else { - crm_notice("Saved CIB contents after PE crash to %s", filename); + crm_notice("Saved Cluster Information Base to %s after Policy Engine crash", + filename); } free(filename); } } static void pe_ipc_destroy(gpointer user_data) { if (is_set(fsa_input_register, pe_subsystem->flag_required)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); - crm_crit("Connection to the Policy Engine failed (pid=%d, uuid=%s)", - pe_subsystem->pid, uuid_str); + crm_crit("Connection to the Policy Engine failed " + CRM_XS " pid=%d uuid=%s", pe_subsystem->pid, uuid_str); /* *The PE died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents); } else { if (is_heartbeat_cluster()) { stop_subsystem(pe_subsystem, FALSE); } crm_info("Connection to the Policy Engine released"); } clear_bit(fsa_input_register, pe_subsystem->flag_connected); pe_subsystem->pid = -1; pe_subsystem->source = NULL; pe_subsystem->client = NULL; mainloop_set_trigger(fsa_source); return; } static int pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { route_message(C_IPC_MESSAGE, msg); } free_xml(msg); return 0; } /* A_PE_START, A_PE_STOP, A_TE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { struct crm_subsystem_s *this_subsys = pe_subsystem; long long stop_actions = A_PE_STOP; long long start_actions = A_PE_START; static struct ipc_client_callbacks pe_callbacks = { .dispatch = pe_ipc_dispatch, .destroy = pe_ipc_destroy }; if (action & stop_actions) { clear_bit(fsa_input_register, pe_subsystem->flag_required); mainloop_del_ipc_client(pe_subsystem->source); pe_subsystem->source = NULL; clear_bit(fsa_input_register, pe_subsystem->flag_connected); } if ((action & start_actions) && (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE)) { if (cur_state != S_STOPPING) { set_bit(fsa_input_register, pe_subsystem->flag_required); pe_subsystem->source = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, G_PRIORITY_DEFAULT, 5 * 1024 * 1024 /* 5Mb */ , NULL, &pe_callbacks); if (pe_subsystem->source == NULL) { crm_warn("Setup of client connection failed, not adding channel to mainloop"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); return; } /* if (is_openais_cluster()) { */ /* pe_subsystem->pid = pe_subsystem->ipc->farside_pid; */ /* } */ set_bit(fsa_input_register, pe_subsystem->flag_connected); } else { crm_info("Ignoring request to start %s while shutting down", this_subsys->name); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { - crm_err("Not DC: No need to invoke the PE (anymore): %s", fsa_action2string(action)); + crm_err("Not invoking Policy Engine because not DC: %s", + fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { - crm_err("Cannot shut down gracefully without the PE"); + crm_err("Cannot shut down gracefully without the Policy Engine"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { - crm_info("Waiting for the PE to connect"); + crm_info("Waiting for the Policy Engine to connect"); crmd_fsa_stall(FALSE); register_fsa_action(A_PE_START); } return; } if (cur_state != S_POLICY_ENGINE) { - crm_notice("No need to invoke the PE in state %s", fsa_state2string(cur_state)); + crm_notice("Not invoking Policy Engine because in state %s", + fsa_state2string(cur_state)); return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { - crm_err("Attempted to invoke the PE without a consistent copy of the CIB!"); + crm_err("Attempted to invoke Policy Engine without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); /* Make sure any queued calculations are discarded */ free(fsa_pe_ref); fsa_pe_ref = NULL; fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; int xpath_max = 1024; char *xpath_string = NULL; xmlXPathObjectPtr xpathObj = NULL; xpath_string = calloc(1, xpath_max); lpc = snprintf(xpath_string, xpath_max, "%.128s//%s//nvpair[@name='%.128s']", get_object_path(XML_CIB_TAG_CRMCONFIG), XML_CIB_TAG_PROPSET, attr_name); CRM_LOG_ASSERT(lpc > 0); xpathObj = xpath_search(xml, xpath_string); max = numXpathResults(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); } if(max == 0) { char *attr_id = crm_concat(CIB_OPTIONS_FIRST, attr_name, '-'); xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s/%s = %s", attr_id, attr_name, attr_value); configuration = find_entity(xml, XML_CIB_TAG_CONFIGURATION, NULL); if (configuration == NULL) { configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); } crm_config = find_entity(configuration, XML_CIB_TAG_CRMCONFIG, NULL); if (crm_config == NULL) { crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); } cluster_property_set = find_entity(crm_config, XML_CIB_TAG_PROPSET, NULL); if (cluster_property_set == NULL) { cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); } xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); crm_xml_add(xml, XML_ATTR_ID, attr_id); crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); free(attr_id); } freeXpathObject(xpathObj); } void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { int sent; xmlNode *cmd = NULL; pid_t watchdog = pcmk_locate_sbd(); if (rc != pcmk_ok) { - crm_err("Cant retrieve the CIB: %s (call %d)", pcmk_strerror(rc), call_id); + crm_err("Could not retrieve the Cluster Information Base: %s " + CRM_XS " call=%d", pcmk_strerror(rc), call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the PE anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding PE request in state: %s", fsa_state2string(fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_err("Invoking PE in state: %s", fsa_state2string(fsa_state)); return; } CRM_LOG_ASSERT(output != NULL); /* refresh our remote-node cache when the pengine is invoked */ crm_remote_peer_cache_refresh(output); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); force_local_option(output, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false"); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); free(fsa_pe_ref); fsa_pe_ref = crm_element_value_copy(cmd, XML_ATTR_REFERENCE); sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem->source), cmd, 0, 0, NULL); if (sent <= 0) { crm_err("Could not contact the pengine: %d", sent); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } crm_debug("Invoking the PE: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); free_xml(cmd); } diff --git a/crmd/te_actions.c b/crmd/te_actions.c index bf25ccc840..e162ecf7de 100644 --- a/crmd/te_actions.c +++ b/crmd/te_actions.c @@ -1,754 +1,755 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include char *te_uuid = NULL; GHashTable *te_targets = NULL; void send_rsc_command(crm_action_t * action); static void te_update_job_count(crm_action_t * action, int offset); static void te_start_action_timer(crm_graph_t * graph, crm_action_t * action) { action->timer = calloc(1, sizeof(crm_action_timer_t)); action->timer->timeout = action->timeout; action->timer->reason = timeout_action; action->timer->action = action; action->timer->source_id = g_timeout_add(action->timer->timeout + graph->network_delay, action_timer_callback, (void *)action->timer); CRM_ASSERT(action->timer->source_id != 0); } static gboolean te_pseudo_action(crm_graph_t * graph, crm_action_t * pseudo) { crm_debug("Pseudo action %d fired and confirmed", pseudo->id); te_action_confirmed(pseudo); update_graph(graph, pseudo); trigger_graph(); return TRUE; } void send_stonith_update(crm_action_t * action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate */ peer = crm_get_peer_full(0, target, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = do_update_node_cib(peer, flags, NULL, __FUNCTION__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { time_t now = time(NULL); char *now_s = crm_itoa(now); crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_UUID, uuid); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state, cib_quorum_override | cib_scope_local | cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, FALSE, strdup(target), cib_fencing_updated); /* Make sure it sticks */ /* fsa_cib_conn->cmds->bump_epoch(fsa_cib_conn, cib_quorum_override|cib_scope_local); */ erase_status_tag(peer->uname, XML_CIB_TAG_LRM, cib_scope_local); erase_status_tag(peer->uname, XML_TAG_TRANSIENT_NODEATTRS, cib_scope_local); free_xml(node_state); return; } static gboolean te_fence_node(crm_graph_t * graph, crm_action_t * action) { int rc = 0; const char *id = NULL; const char *uuid = NULL; const char *target = NULL; const char *type = NULL; gboolean invalid_action = FALSE; enum stonith_call_options options = st_opt_none; id = ID(action->xml); target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); type = crm_meta_value(action->params, "stonith_action"); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return FALSE; } - crm_notice("Executing %s fencing operation (%s) on %s (timeout=%d)", - type, id, target, transition_graph->stonith_timeout); + crm_notice("Requesting fencing (%s) of node %s " + CRM_XS " action=%s timeout=%d", + type, target, id, transition_graph->stonith_timeout); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); if (crmd_join_phase_count(crm_join_confirmed) == 1) { options |= st_opt_allow_suicide; } rc = stonith_api->cmds->fence(stonith_api, options, target, type, transition_graph->stonith_timeout / 1000, 0); stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000, st_opt_timeout_updates, generate_transition_key(transition_graph->id, action->id, 0, te_uuid), "tengine_stonith_callback", tengine_stonith_callback); return TRUE; } static int get_target_rc(crm_action_t * action) { const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC); if (target_rc_s != NULL) { return crm_parse_int(target_rc_s, "0"); } return 0; } static gboolean te_crm_command(crm_graph_t * graph, crm_action_t * action) { char *counter = NULL; xmlNode *cmd = NULL; gboolean is_local = FALSE; const char *id = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; id = ID(action->xml); task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command (id=%s) %s: no node", crm_str(id), crm_str(task)); return FALSE); crm_info("Executing crm-event (%s): %s on %s%s%s", crm_str(id), crm_str(task), on_node, is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } if (is_local && safe_str_eq(task, CRM_OP_SHUTDOWN)) { /* defer until everything else completes */ crm_info("crm-event (%s) is a local shutdown", crm_str(id)); graph->completion_action = tg_shutdown; graph->abort_reason = "local shutdown"; te_action_confirmed(action); update_graph(graph, action); trigger_graph(); return TRUE; } else if (safe_str_eq(task, CRM_OP_SHUTDOWN)) { crm_node_t *peer = crm_get_peer(0, router_node); crm_update_peer_expected(__FUNCTION__, peer, CRMD_JOINSTATE_DOWN); } cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL); counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); free(counter); free_xml(cmd); if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { te_action_confirmed(action); update_graph(graph, action); trigger_graph(); } else { if (action->timeout <= 0) { crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_start_action_timer(graph, action); } return TRUE; } gboolean cib_action_update(crm_action_t * action, int status, int op_rc) { lrmd_event_data_t *op = NULL; xmlNode *state = NULL; xmlNode *rsc = NULL; xmlNode *xml_op = NULL; xmlNode *action_rsc = NULL; int rc = pcmk_ok; const char *name = NULL; const char *value = NULL; const char *rsc_id = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); int call_options = cib_quorum_override | cib_scope_local; int target_rc = get_target_rc(action); if (status == PCMK_LRM_OP_PENDING) { crm_debug("%s %d: Recording pending operation %s on %s", crm_element_name(action->xml), action->id, task_uuid, target); } else { crm_warn("%s %d: %s on %s timed out", crm_element_name(action->xml), action->id, task_uuid, target); } action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE); if (action_rsc == NULL) { return FALSE; } rsc_id = ID(action_rsc); CRM_CHECK(rsc_id != NULL, crm_log_xml_err(action->xml, "Bad:action"); return FALSE); /* update the CIB */ state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(state, XML_ATTR_UUID, target_uuid); crm_xml_add(state, XML_ATTR_UNAME, target); rsc = create_xml_node(state, XML_CIB_TAG_LRM); crm_xml_add(rsc, XML_ATTR_ID, target_uuid); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES); rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE); crm_xml_add(rsc, XML_ATTR_ID, rsc_id); name = XML_ATTR_TYPE; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_CLASS; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); name = XML_AGENT_ATTR_PROVIDER; value = crm_element_value(action_rsc, name); crm_xml_add(rsc, name, value); op = convert_graph_action(NULL, action, status, op_rc); op->call_id = -1; op->user_data = generate_transition_key(transition_graph->id, action->id, target_rc, te_uuid); xml_op = create_operation_update(rsc, op, CRM_FEATURE_SET, target_rc, target, __FUNCTION__, LOG_INFO); lrmd_free_event(op); crm_trace("Updating CIB with \"%s\" (%s): %s %s on %s", status < 0 ? "new action" : XML_ATTR_TIMEOUT, crm_element_name(action->xml), crm_str(task), rsc_id, target); crm_log_xml_trace(xml_op, "Op"); rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options); crm_trace("Updating CIB with %s action %d: %s on %s (call_id=%d)", services_lrm_status_str(status), action->id, task_uuid, target, rc); fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated); free_xml(state); action->sent_update = TRUE; if (rc < pcmk_ok) { return FALSE; } return TRUE; } static gboolean te_rsc_command(crm_graph_t * graph, crm_action_t * action) { /* never overwrite stop actions in the CIB with * anything other than completed results * * Writing pending stops makes it look like the * resource is running again */ xmlNode *cmd = NULL; xmlNode *rsc_op = NULL; gboolean rc = TRUE; gboolean no_wait = FALSE; gboolean is_local = FALSE; char *counter = NULL; const char *task = NULL; const char *value = NULL; const char *on_node = NULL; const char *router_node = NULL; const char *task_uuid = NULL; CRM_ASSERT(action != NULL); CRM_ASSERT(action->xml != NULL); action->executed = FALSE; on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); CRM_CHECK(on_node != NULL && strlen(on_node) != 0, crm_err("Corrupted command(id=%s) %s: no node", ID(action->xml), crm_str(task)); return FALSE); rsc_op = action->xml; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE); if (!router_node) { router_node = on_node; } counter = generate_transition_key(transition_graph->id, action->id, get_target_rc(action), te_uuid); crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter); if (safe_str_eq(router_node, fsa_our_uname)) { is_local = TRUE; } value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT); if (crm_is_true(value)) { no_wait = TRUE; } - crm_notice("Initiating action %d: %s %s on %s%s%s", - action->id, task, task_uuid, on_node, - is_local ? " (local)" : "", no_wait ? " - no waiting" : ""); + crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d", + task, task_uuid, (is_local? " locally" : ""), on_node, + (no_wait? " without waiting" : ""), action->id); cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node, CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL); if (is_local) { /* shortcut local resource commands */ ha_msg_input_t data = { .msg = cmd, .xml = rsc_op, }; fsa_data_t msg = { .id = 0, .data = &data, .data_type = fsa_dt_ha_msg, .fsa_input = I_NULL, .fsa_cause = C_FSA_INTERNAL, .actions = A_LRM_INVOKE, .origin = __FUNCTION__, }; do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, fsa_state, I_NULL, &msg); } else { rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); } free(counter); free_xml(cmd); action->executed = TRUE; if (rc == FALSE) { crm_err("Action %d failed: send", action->id); return FALSE; } else if (no_wait) { crm_info("Action %d confirmed - no wait", action->id); action->confirmed = TRUE; /* Just mark confirmed. * Don't bump the job count only to immediately decrement it */ update_graph(transition_graph, action); trigger_graph(); } else if (action->confirmed == TRUE) { crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.", action->id, task, task_uuid, on_node, action->timeout); } else { if (action->timeout <= 0) { crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %dms instead", action->id, task, task_uuid, on_node, action->timeout, graph->network_delay); action->timeout = graph->network_delay; } te_update_job_count(action, 1); te_start_action_timer(graph, action); } value = crm_meta_value(action->params, XML_OP_ATTR_PENDING); if (crm_is_true(value) && safe_str_neq(task, CRMD_ACTION_CANCEL) && safe_str_neq(task, CRMD_ACTION_DELETE)) { /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op %s in the CIB", task_uuid); cib_action_update(action, PCMK_LRM_OP_PENDING, PCMK_OCF_UNKNOWN); } return TRUE; } struct te_peer_s { char *name; int jobs; int migrate_jobs; }; static void te_peer_free(gpointer p) { struct te_peer_s *peer = p; free(peer->name); free(peer); } void te_reset_job_counts(void) { GHashTableIter iter; struct te_peer_s *peer = NULL; if(te_targets == NULL) { te_targets = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, te_peer_free); } g_hash_table_iter_init(&iter, te_targets); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) { peer->jobs = 0; peer->migrate_jobs = 0; } } static void te_update_job_count_on(const char *target, int offset, bool migrate) { struct te_peer_s *r = NULL; if(target == NULL || te_targets == NULL) { return; } r = g_hash_table_lookup(te_targets, target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } r->jobs += offset; if(migrate) { r->migrate_jobs += offset; } crm_trace("jobs[%s] = %d", target, r->jobs); } static void te_update_job_count(crm_action_t * action, int offset) { const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (action->type != action_type_rsc || target == NULL) { /* No limit on these */ return; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all action occuring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) { const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); te_update_job_count_on(t1, offset, TRUE); te_update_job_count_on(t2, offset, TRUE); return; } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } te_update_job_count_on(target, offset, FALSE); } static gboolean te_should_perform_action_on(crm_graph_t * graph, crm_action_t * action, const char *target) { int limit = 0; struct te_peer_s *r = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY); if(target == NULL) { /* No limit on these */ return TRUE; } else if(te_targets == NULL) { return FALSE; } r = g_hash_table_lookup(te_targets, target); limit = throttle_get_job_limit(target); if(r == NULL) { r = calloc(1, sizeof(struct te_peer_s)); r->name = strdup(target); g_hash_table_insert(te_targets, r->name, r); } if(limit <= r->jobs) { crm_trace("Peer %s is over their job limit of %d (%d): deferring %s", target, limit, r->jobs, id); return FALSE; } else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) { if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s", target, graph->migration_limit, r->migrate_jobs, id); return FALSE; } } crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit); return TRUE; } static gboolean te_should_perform_action(crm_graph_t * graph, crm_action_t * action) { const char *target = NULL; const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (action->type != action_type_rsc) { /* No limit on these */ return TRUE; } /* if we have a router node, this means the action is performing * on a remote node. For now, we count all action occuring on a * remote node against the job list on the cluster node hosting * the connection resources */ target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); if ((target == NULL) && (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED))) { target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE); if(te_should_perform_action_on(graph, action, target) == FALSE) { return FALSE; } target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET); } else if (target == NULL) { target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); } return te_should_perform_action_on(graph, action, target); } void te_action_confirmed(crm_action_t * action) { const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (action->confirmed == FALSE && action->type == action_type_rsc && target != NULL) { te_update_job_count(action, -1); } action->confirmed = TRUE; } crm_graph_functions_t te_graph_fns = { te_pseudo_action, te_rsc_command, te_crm_command, te_fence_node, te_should_perform_action, }; void notify_crmd(crm_graph_t * graph) { const char *type = "unknown"; enum crmd_fsa_input event = I_NULL; crm_debug("Processing transition completion in state %s", fsa_state2string(fsa_state)); CRM_CHECK(graph->complete, graph->complete = TRUE); switch (graph->completion_action) { case tg_stop: type = "stop"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_done: type = "done"; if (fsa_state == S_TRANSITION_ENGINE) { event = I_TE_SUCCESS; } break; case tg_restart: type = "restart"; if (fsa_state == S_TRANSITION_ENGINE) { if (too_many_st_failures() == FALSE) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { event = I_PE_CALC; } } else { event = I_TE_SUCCESS; } } else if (fsa_state == S_POLICY_ENGINE) { register_fsa_action(A_PE_INVOKE); } break; case tg_shutdown: type = "shutdown"; if (is_set(fsa_input_register, R_SHUTDOWN)) { event = I_STOP; } else { crm_err("We didn't ask to be shut down, yet our PE is telling us to."); event = I_TERMINATE; } } crm_debug("Transition %d status: %s - %s", graph->id, type, crm_str(graph->abort_reason)); graph->abort_reason = NULL; graph->completion_action = tg_done; clear_bit(fsa_input_register, R_IN_TRANSITION); if (event != I_NULL) { register_fsa_input(C_FSA_INTERNAL, event, NULL); } else if (fsa_source) { mainloop_set_trigger(fsa_source); } } diff --git a/crmd/te_utils.c b/crmd/te_utils.c index c2e16f5e5f..085966237d 100644 --- a/crmd/te_utils.c +++ b/crmd/te_utils.c @@ -1,648 +1,664 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include crm_trigger_t *stonith_reconnect = NULL; /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GListPtr stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GListPtr iter = stonith_cleanup_list; while (iter != NULL) { GListPtr tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (safe_str_eq(target, iter_name)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup() { if (stonith_cleanup_list) { GListPtr iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup() { GListPtr iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = crm_get_peer(0, target); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ static gboolean fail_incompletable_stonith(crm_graph_t * graph) { GListPtr lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GListPtr lpc2 = NULL; synapse_t *synapse = (synapse_t *) lpc->data; if (synapse->confirmed) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { crm_action_t *action = (crm_action_t *) lpc2->data; if (action->type != action_type_crm || action->confirmed) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (task && safe_str_eq(task, CRM_OP_FENCE)) { action->failed = TRUE; last_action = action->xml; update_graph(graph, action); crm_notice("Failing action %d (%s): STONITHd terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("STONITHd failure resulted in un-runnable actions"); abort_transition(INFINITY, tg_restart, "Stonith failure", last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t * st, stonith_event_t * e) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_crit("Fencing daemon connection failed"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencing daemon disconnected"); } /* cbchan will be garbage at this point, arrange for it to be reset */ if(stonith_api) { stonith_api->state = stonith_disconnected; } if (AM_I_DC) { fail_incompletable_stonith(transition_graph); trigger_graph(); } } #if SUPPORT_CMAN # include #endif char *te_client_id = NULL; #ifdef HAVE_SYS_REBOOT_H # include # include #endif static void tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event) { if(te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%d", crm_system_name, getpid()); } if (st_event == NULL) { crm_err("Notify data not found"); return; } crmd_notify_fencing_op(st_event); if (st_event->result == pcmk_ok && safe_str_eq("on", st_event->action)) { crm_notice("%s was successfully unfenced by %s (at the request of %s)", st_event->target, st_event->executioner ? st_event->executioner : "", st_event->origin); /* TODO: Hook up st_event->device */ return; } else if (safe_str_eq("on", st_event->action)) { crm_err("Unfencing of %s by %s failed: %s (%d)", st_event->target, st_event->executioner ? st_event->executioner : "", pcmk_strerror(st_event->result), st_event->result); return; } else if (st_event->result == pcmk_ok && crm_str_eq(st_event->target, fsa_our_uname, TRUE)) { crm_crit("We were allegedly just fenced by %s for %s!", st_event->executioner ? st_event->executioner : "", st_event->origin); /* Dumps blackbox if enabled */ qb_log_fini(); /* Try to get the above log message to disk - somehow */ /* Get out ASAP and do not come back up. * * Triggering a reboot is also not the worst idea either since * the rest of the cluster thinks we're safely down */ #ifdef RB_HALT_SYSTEM reboot(RB_HALT_SYSTEM); #endif /* * If reboot() fails or is not supported, coming back up will * probably lead to a situation where the other nodes set our * status to 'lost' because of the fencing callback and will * discard subsequent election votes with: * * Election 87 (current: 5171, owner: 103): Processed vote from east-03 (Peer is not part of our cluster) * * So just stay dead, something is seriously messed up anyway. * */ exit(100); /* None of our wrappers since we already called qb_log_fini() */ return; } if (st_event->result == pcmk_ok && safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) { st_fail_count_reset(st_event->target); } crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s", st_event->target, st_event->result == pcmk_ok ? "" : " not", st_event->action, st_event->executioner ? st_event->executioner : "", st_event->origin, pcmk_strerror(st_event->result), st_event->id, st_event->client_origin ? st_event->client_origin : ""); #if SUPPORT_CMAN if (st_event->result == pcmk_ok && is_cman_cluster()) { int local_rc = 0; int confirm = 0; char *target_copy = strdup(st_event->target); /* In case fenced hasn't noticed yet * * Any fencing that has been inititated will be completed by way of the fence_pcmk redirect */ local_rc = fenced_external(target_copy); if (local_rc != 0) { crm_err("Could not notify CMAN that '%s' is now fenced: %d", st_event->target, local_rc); } else { crm_notice("Notified CMAN that '%s' is now fenced", st_event->target); } /* In case fenced is already trying to shoot it */ confirm = open("/var/run/cluster/fenced_override", O_NONBLOCK|O_WRONLY); if (confirm >= 0) { int ignore = 0; int len = strlen(target_copy); errno = 0; local_rc = write(confirm, target_copy, len); ignore = write(confirm, "\n", 1); if(ignore < 0 && errno == EBADF) { crm_trace("CMAN not expecting %s to be fenced (yet)", st_event->target); } else if (local_rc < len) { crm_perror(LOG_ERR, "Confirmation of CMAN fencing event for '%s' failed: %d", st_event->target, local_rc); } else { fsync(confirm); crm_notice("Confirmed CMAN fencing event for '%s'", st_event->target); } close(confirm); } free(target_copy); } #endif if (st_event->result == pcmk_ok) { crm_node_t *peer = crm_find_peer_full(0, st_event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; gboolean we_are_executioner = safe_str_eq(st_event->executioner, fsa_our_uname); if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc); if(AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, st_event->target, uuid); if (st_event->client_origin && safe_str_neq(st_event->client_origin, te_client_id)) { /* Abort the current transition graph if it wasn't us * that invoked stonith to fence someone */ crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target); abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL); } /* Assume it was our leader if we dont currently have one */ } else if (((fsa_our_dc == NULL) || safe_str_eq(fsa_our_dc, st_event->target)) && !is_set(peer->flags, crm_remote_node)) { crm_notice("Target %s our leader %s (recorded: %s)", fsa_our_dc ? "was" : "may have been", st_event->target, fsa_our_dc ? fsa_our_dc : ""); /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (we_are_executioner) { send_stonith_update(NULL, st_event->target, uuid); } add_stonith_cleanup(st_event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (is_set(peer->flags, crm_remote_node)) { remote_ra_fail(st_event->target); } crmd_peer_down(peer, TRUE); } } gboolean te_connect_stonith(gpointer user_data) { int lpc = 0; int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); } if (stonith_api->state != stonith_disconnected) { crm_trace("Still connected"); return TRUE; } for (lpc = 0; lpc < 30; lpc++) { crm_debug("Attempting connection to fencing daemon..."); sleep(1); rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc == pcmk_ok) { break; } if (user_data != NULL) { if (is_set(fsa_input_register, R_ST_REQUIRED)) { crm_err("Sign-in failed: triggered a retry"); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Sign-in failed, but no longer required"); } return TRUE; } crm_err("Sign-in failed: pausing and trying again in 2s..."); sleep(1); } CRM_CHECK(rc == pcmk_ok, return TRUE); /* If not, we failed 30 times... just get out */ stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, tengine_stonith_notify); crm_trace("Connected"); return TRUE; } gboolean stop_te_timer(crm_action_timer_t * timer) { const char *timer_desc = "action timer"; if (timer == NULL) { return FALSE; } if (timer->reason == timeout_abort) { timer_desc = "global timer"; crm_trace("Stopping %s", timer_desc); } if (timer->source_id != 0) { crm_trace("Stopping %s", timer_desc); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("%s was already stopped", timer_desc); return FALSE; } return TRUE; } gboolean te_graph_trigger(gpointer user_data) { enum transition_status graph_rc = -1; if (transition_graph == NULL) { crm_debug("Nothing to do"); return TRUE; } crm_trace("Invoking graph %d in state %s", transition_graph->id, fsa_state2string(fsa_state)); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: return TRUE; break; default: break; } if (transition_graph->complete == FALSE) { int limit = transition_graph->batch_limit; transition_graph->batch_limit = throttle_get_total_job_limit(limit); graph_rc = run_graph(transition_graph); transition_graph->batch_limit = limit; /* Restore the configured value */ /* significant overhead... */ /* print_graph(LOG_DEBUG_3, transition_graph); */ if (graph_rc == transition_active) { crm_trace("Transition not yet complete"); return TRUE; } else if (graph_rc == transition_pending) { crm_trace("Transition not yet complete - no actions fired"); return TRUE; } if (graph_rc != transition_complete) { crm_warn("Transition failed: %s", transition_status(graph_rc)); print_graph(LOG_NOTICE, transition_graph); } } crm_debug("Transition %d is now complete", transition_graph->id); transition_graph->complete = TRUE; notify_crmd(transition_graph); return TRUE; } void trigger_graph_processing(const char *fn, int line) { crm_trace("%s:%d - Triggered graph processing", fn, line); mainloop_set_trigger(transition_trigger); } void abort_transition_graph(int abort_priority, enum transition_action abort_action, const char *abort_text, xmlNode * reason, const char *fn, int line) { int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; int level = LOG_INFO; xmlNode *diff = NULL; xmlNode *change = NULL; CRM_CHECK(transition_graph != NULL, return); switch (fsa_state) { case S_STARTING: case S_PENDING: case S_NOT_DC: case S_HALT: case S_ILLEGAL: case S_STOPPING: case S_TERMINATE: crm_info("Abort %s suppressed: state=%s (complete=%d)", abort_text, fsa_state2string(fsa_state), transition_graph->complete); return; default: break; } /* Make sure any queued calculations are discarded ASAP */ free(fsa_pe_ref); fsa_pe_ref = NULL; if (transition_graph->complete == FALSE) { if(update_abort_priority(transition_graph, abort_priority, abort_action, abort_text)) { level = LOG_NOTICE; } } if(reason) { xmlNode *search = NULL; for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_TAG_DIFF, TYPE(search))) { diff = search; break; } } if(diff) { xml_patch_versions(diff, add, del); for(search = reason; search; search = search->parent) { if (safe_str_eq(XML_DIFF_CHANGE, TYPE(search))) { change = search; break; } } } } if(reason == NULL) { - do_crm_log(level, "Transition aborted: %s (source=%s:%d, %d)", - abort_text, fn, line, transition_graph->complete); + do_crm_log(level, "Transition aborted: %s "CRM_XS" source=%s:%d complete=%s", + abort_text, fn, line, + (transition_graph->complete? "true" : "false")); } else if(change == NULL) { char *local_path = xml_get_path(reason); - do_crm_log(level, "Transition aborted by %s.%s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", - TYPE(reason), ID(reason), abort_text, add[0], add[1], add[2], fn, line, local_path, transition_graph->complete); + do_crm_log(level, "Transition aborted by %s.%s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + TYPE(reason), ID(reason), abort_text, + add[0], add[1], add[2], fn, line, local_path, + (transition_graph->complete? "true" : "false")); free(local_path); } else { const char *kind = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *path = crm_element_value(change, XML_DIFF_PATH); if(change == reason) { if(strcmp(op, "create") == 0) { reason = reason->children; } else if(strcmp(op, "modify") == 0) { reason = first_named_child(reason, XML_DIFF_RESULT); if(reason) { reason = reason->children; } } } kind = TYPE(reason); if(strcmp(op, "delete") == 0) { const char *shortpath = strrchr(path, '/'); - do_crm_log(level, "Transition aborted by deletion of %s: %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", - shortpath?shortpath+1:path, abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete); + do_crm_log(level, "Transition aborted by deletion of %s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + (shortpath? (shortpath + 1) : path), abort_text, + add[0], add[1], add[2], fn, line, path, + (transition_graph->complete? "true" : "false")); } else if (safe_str_eq(XML_CIB_TAG_NVPAIR, kind)) { - do_crm_log(level, "Transition aborted by %s, %s=%s: %s (%s cib=%d.%d.%d, source=%s:%d, path=%s, %d)", - crm_element_value(reason, XML_ATTR_ID), + do_crm_log(level, "Transition aborted by %s doing %s %s=%s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + crm_element_value(reason, XML_ATTR_ID), op, crm_element_value(reason, XML_NVPAIR_ATTR_NAME), crm_element_value(reason, XML_NVPAIR_ATTR_VALUE), - abort_text, op, add[0], add[1], add[2], fn, line, path, transition_graph->complete); + abort_text, add[0], add[1], add[2], fn, line, path, + (transition_graph->complete? "true" : "false")); } else if (safe_str_eq(XML_LRM_TAG_RSC_OP, kind)) { const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC); - do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (magic=%s, cib=%d.%d.%d, source=%s:%d, %d)", + do_crm_log(level, "Transition aborted by operation %s '%s' on %s: %s " + CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s", crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op, crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text, - magic, add[0], add[1], add[2], fn, line, transition_graph->complete); + magic, add[0], add[1], add[2], fn, line, + (transition_graph->complete? "true" : "false")); } else if (safe_str_eq(XML_CIB_TAG_STATE, kind) || safe_str_eq(XML_CIB_TAG_NODE, kind)) { const char *uname = crm_peer_uname(ID(reason)); - do_crm_log(level, "Transition aborted by %s '%s' on %s: %s (cib=%d.%d.%d, source=%s:%d, %d)", - kind, op, uname ? uname : ID(reason), abort_text, - add[0], add[1], add[2], fn, line, transition_graph->complete); + do_crm_log(level, "Transition aborted by %s '%s' on %s: %s " + CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s", + kind, op, (uname? uname : ID(reason)), abort_text, + add[0], add[1], add[2], fn, line, + (transition_graph->complete? "true" : "false")); } else { - do_crm_log(level, "Transition aborted by %s.%s '%s': %s (cib=%d.%d.%d, source=%s:%d, path=%s, %d)", - TYPE(reason), ID(reason), op?op:"change", abort_text, add[0], add[1], add[2], fn, line, path, transition_graph->complete); + do_crm_log(level, "Transition aborted by %s.%s '%s': %s " + CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s", + TYPE(reason), ID(reason), (op? op : "change"), abort_text, + add[0], add[1], add[2], fn, line, path, + (transition_graph->complete? "true" : "false")); } } if (transition_graph->complete) { if (transition_timer->period_ms > 0) { crm_timer_stop(transition_timer); crm_timer_start(transition_timer); } else { register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL); } return; } mainloop_set_trigger(transition_trigger); } diff --git a/cts/CM_lha.py b/cts/CM_lha.py index 28742d97f2..b2428838c7 100755 --- a/cts/CM_lha.py +++ b/cts/CM_lha.py @@ -1,529 +1,529 @@ '''CTS: Cluster Testing System: LinuxHA v2 dependent modules... ''' __copyright__ = ''' Author: Huang Zhen Copyright (C) 2004 International Business Machines Additional Audits, Revised Start action, Default Configuration: Copyright (C) 2004 Andrew Beekhof ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. import sys from cts.CTSvars import * from cts.CTS import * from cts.CIB import * from cts.CTStests import AuditResource from cts.watcher import LogWatcher try: from xml.dom.minidom import * except ImportError: sys.__stdout__.write("Python module xml.dom.minidom not found\n") sys.__stdout__.write("Please install python-xml or similar before continuing\n") sys.__stdout__.flush() sys.exit(1) ####################################################################### # # LinuxHA v2 dependent modules # ####################################################################### class crm_lha(ClusterManager): ''' The linux-ha version 2 cluster manager class. It implements the things we need to talk to and manipulate linux-ha version 2 clusters ''' def __init__(self, Environment, randseed=None, name=None): ClusterManager.__init__(self, Environment, randseed=randseed) #HeartbeatCM.__init__(self, Environment, randseed=randseed) #if not name: name="crm-lha" #self["Name"] = name #self.name = name self.fastfail = 0 self.clear_cache = 0 self.cib_installed = 0 self.config = None self.cluster_monitor = 0 self.use_short_names = 1 if self.Env["DoBSC"]: del self.templates["Pat:They_stopped"] del self.templates["Pat:Logd_stopped"] self.Env["use_logd"] = 0 self._finalConditions() self.check_transitions = 0 self.check_elections = 0 self.CIBsync = {} self.CibFactory = ConfigFactory(self) self.cib = self.CibFactory.createConfig(self.Env["Schema"]) def errorstoignore(self): # At some point implement a more elegant solution that # also produces a report at the end '''Return list of errors which are known and very noisey should be ignored''' return PatternSelector().get_patterns(self.name, "BadNewsIgnore") def install_config(self, node): if not self.ns.WaitForNodeToComeUp(node): self.log("Node %s is not up." % node) return None if not node in self.CIBsync and self.Env["ClobberCIB"] == 1: self.CIBsync[node] = 1 self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") # Only install the CIB on the first node, all the other ones will pick it up from there if self.cib_installed == 1: return None self.cib_installed = 1 if self.Env["CIBfilename"] == None: self.log("Installing Generated CIB on node %s" % (node)) self.cib.install(node) else: self.log("Installing CIB (%s) on node %s" % (self.Env["CIBfilename"], node)) if 0 != self.rsh.cp(self.Env["CIBfilename"], "root@" + (self.templates["CIBfile"] % node)): raise ValueError("Can not scp file to %s %d"%(node)) self.rsh(node, "chown "+CTSvars.CRM_DAEMON_USER+" "+CTSvars.CRM_CONFIG_DIR+"/cib.xml") def prepare(self): '''Finish the Initialization process. Prepare to test...''' self.partitions_expected = 1 for node in self.Env["nodes"]: self.ShouldBeStatus[node] = "" self.unisolate_node(node) self.StataCM(node) def test_node_CM(self, node): '''Report the status of the cluster manager on a given node''' watchpats = [ ] watchpats.append("Current ping state: (S_IDLE|S_NOT_DC)") watchpats.append(self.templates["Pat:Slave_started"]%node) watchpats.append(self.templates["Pat:Master_started"]%node) idle_watch = LogWatcher(self.Env["LogFileName"], watchpats, "ClusterIdle", hosts=[node], kind=self.Env["LogWatcher"]) idle_watch.setwatch() out = self.rsh(node, self.templates["StatusCmd"]%node, 1) self.debug("Node %s status: '%s'" %(node, out)) if not out or string.find(out, 'ok') < 0: if self.ShouldBeStatus[node] == "up": self.log( "Node status for %s is %s but we think it should be %s" % (node, "down", self.ShouldBeStatus[node])) self.ShouldBeStatus[node] = "down" return 0 if self.ShouldBeStatus[node] == "down": self.log( "Node status for %s is %s but we think it should be %s: %s" % (node, "up", self.ShouldBeStatus[node], out)) self.ShouldBeStatus[node] = "up" # check the output first - because syslog-ng looses messages if string.find(out, 'S_NOT_DC') != -1: # Up and stable return 2 if string.find(out, 'S_IDLE') != -1: # Up and stable return 2 # fall back to syslog-ng and wait if not idle_watch.look(): # just up self.debug("Warn: Node %s is unstable: %s" % (node, out)) return 1 # Up and stable return 2 # Is the node up or is the node down def StataCM(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) > 0: return 1 return None # Being up and being stable is not the same question... def node_stable(self, node): '''Report the status of the cluster manager on a given node''' if self.test_node_CM(node) == 2: return 1 self.log("Warn: Node %s not stable" % (node)) return None def partition_stable(self, nodes, timeout=None): watchpats = [ ] watchpats.append("Current ping state: S_IDLE") watchpats.append(self.templates["Pat:DC_IDLE"]) self.debug("Waiting for cluster stability...") if timeout == None: timeout = self.Env["DeadTime"] if len(nodes) < 3: self.debug("Cluster is inactive") return 1 idle_watch = LogWatcher(self.Env["LogFileName"], watchpats, "ClusterStable", timeout, hosts=nodes.split(), kind=self.Env["LogWatcher"]) idle_watch.setwatch() for node in nodes.split(): # have each node dump its current state self.rsh(node, self.templates["StatusCmd"] % node, 1) ret = idle_watch.look() while ret: self.debug(ret) for node in nodes.split(): if re.search(node, ret): return 1 ret = idle_watch.look() self.debug("Warn: Partition %s not IDLE after %ds" % (repr(nodes), timeout)) return None def cluster_stable(self, timeout=None, double_check=False): partitions = self.find_partitions() for partition in partitions: if not self.partition_stable(partition, timeout): return None if double_check: # Make sure we are really stable and that all resources, # including those that depend on transient node attributes, # are started if they were going to be time.sleep(5) for partition in partitions: if not self.partition_stable(partition, timeout): return None return 1 def is_node_dc(self, node, status_line=None): rc = 0 if not status_line: status_line = self.rsh(node, self.templates["StatusCmd"]%node, 1) if not status_line: rc = 0 elif string.find(status_line, 'S_IDLE') != -1: rc = 1 elif string.find(status_line, 'S_INTEGRATION') != -1: rc = 1 elif string.find(status_line, 'S_FINALIZE_JOIN') != -1: rc = 1 elif string.find(status_line, 'S_POLICY_ENGINE') != -1: rc = 1 elif string.find(status_line, 'S_TRANSITION_ENGINE') != -1: rc = 1 return rc def active_resources(self, node): # [SM].* {node} matches Started, Slave, Master # Stopped wont be matched as it wont include {node} (rc, output) = self.rsh(node, """crm_resource -c""", None) resources = [] for line in output: if re.search("^Resource", line): tmp = AuditResource(self, line) if tmp.type == "primitive" and tmp.host == node: resources.append(tmp.id) return resources def ResourceLocation(self, rid): ResourceNodes = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": cmd = self.templates["RscRunning"] % (rid) (rc, lines) = self.rsh(node, cmd, None) if rc == 127: self.log("Command '%s' failed. Binary or pacemaker-cts package not installed?" % cmd) for line in lines: self.log("Output: "+line) elif rc == 0: ResourceNodes.append(node) return ResourceNodes def find_partitions(self): ccm_partitions = [] for node in self.Env["nodes"]: if self.ShouldBeStatus[node] == "up": partition = self.rsh(node, self.templates["PartitionCmd"], 1) if not partition: self.log("no partition details for %s" % node) elif len(partition) > 2: nodes = partition.split() nodes.sort() partition = string.join(nodes, ' ') found = 0 for a_partition in ccm_partitions: if partition == a_partition: found = 1 if found == 0: self.debug("Adding partition from %s: %s" % (node, partition)) ccm_partitions.append(partition) else: self.debug("Partition '%s' from %s is consistent with existing entries" % (partition, node)) else: self.log("bad partition details for %s" % node) else: self.debug("Node %s is down... skipping" % node) self.debug("Found partitions: %s" % repr(ccm_partitions) ) return ccm_partitions def HasQuorum(self, node_list): # If we are auditing a partition, then one side will # have quorum and the other not. # So the caller needs to tell us which we are checking # If no value for node_list is specified... assume all nodes if not node_list: node_list = self.Env["nodes"] for node in node_list: if self.ShouldBeStatus[node] == "up": quorum = self.rsh(node, self.templates["QuorumCmd"], 1) if string.find(quorum, "1") != -1: return 1 elif string.find(quorum, "0") != -1: return 0 else: self.debug("WARN: Unexpected quorum test result from " + node + ":" + quorum) return 0 def Components(self): complist = [] common_ignore = [ "Pending action:", "(ERROR|error): crm_log_message_adv:", "(ERROR|error): MSG: No message to dump", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "Action A_RECOVER .* not supported", "(ERROR|error): stonithd_op_result_ready: not signed on", "pingd.*(ERROR|error): send_update: Could not send update", "send_ipc_message: IPC Channel to .* is not connected", "unconfirmed_actions: Waiting on .* unconfirmed actions", "cib_native_msgready: Message pending on command channel", r": Performing A_EXIT_1 - forcefully exiting the CRMd", r"Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", ] stonith_ignore = [ r"Updating failcount for child_DoFencing", r"(ERROR|error).*: Sign-in failed: triggered a retry", "lrmd.*(ERROR|error): stonithd_receive_ops_result failed.", ] stonith_ignore.extend(common_ignore) ccm_ignore = [ "(ERROR|error): get_channel_token: No reply message - disconnected" ] ccm_ignore.extend(common_ignore) ccm = Process(self, "ccm", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "CCM connection appears to have failed", "crmd.*Action A_RECOVER .* not supported", - "crmd.*Input I_TERMINATE from do_recover", + r"crmd.*: Input I_TERMINATE .*from do_recover", "Exiting to recover from CCM connection failure", r"crmd.*: Could not recover from internal error", "crmd.*I_ERROR.*(ccm_dispatch|crmd_cib_connection_destroy)", "crmd.*exited with return code 2.", "attrd.*exited with return code 1.", "cib.*exited with return code 2.", # Not if it was fenced # "A new node joined the cluster", # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", # "Processing I_NODE_JOIN:.* cause=C_HA_MESSAGE", # "State transition S_.* -> S_INTEGRATION.*input=I_NODE_JOIN", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = ccm_ignore) cib = Process(self, "cib", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "Lost connection to the CIB service", "Connection to the CIB terminated...", - "crmd.*Input I_TERMINATE from do_recover", + r"crmd.*: Input I_TERMINATE .*from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", r"crmd.*: Could not recover from internal error", "crmd.*exited with return code 2.", "attrd.*exited with return code 1.", ], badnews_ignore = common_ignore) lrmd = Process(self, "lrmd", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "LRM Connection failed", "crmd.*I_ERROR.*lrm_connection_destroy", "State transition S_STARTING -> S_PENDING", - "crmd.*Input I_TERMINATE from do_recover", + r"crmd.*: Input I_TERMINATE .*from do_recover", r"crmd.*: Could not recover from internal error", "crmd.*exited with return code 2.", ], badnews_ignore = common_ignore) crmd = Process(self, "crmd", triggersreboot=self.fastfail, pats = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # "tengine_stonith_callback: .*result=0", "State transition .* S_IDLE", "State transition S_STARTING -> S_PENDING", ], badnews_ignore = common_ignore) pengine = Process(self, "pengine", triggersreboot=self.fastfail, pats = [ "State transition .* S_RECOVERY", "crmd.*exited with return code 2.", - "crmd.*Input I_TERMINATE from do_recover", + r"crmd.*: Input I_TERMINATE .*from do_recover", r"crmd.*: Could not recover from internal error", r"crmd.*CRIT.*: Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", "crmd.*exited with return code 2.", ], badnews_ignore = common_ignore, dc_only=1) if self.Env["DoFencing"] == 1 : complist.append(Process(self, "stoniths", triggersreboot=self.fastfail, dc_pats = [ r"crmd.*CRIT.*: Fencing daemon connection failed", "Attempting connection to fencing daemon", ], badnews_ignore = stonith_ignore)) if self.fastfail == 0: ccm.pats.extend([ "attrd .* exited with return code 1", "(ERROR|error): Respawning client .*attrd", "cib.* exited with return code 2", "(ERROR|error): Respawning client .*cib", "crmd.* exited with return code 2", "(ERROR|error): Respawning client .*crmd" ]) cib.pats.extend([ "attrd.* exited with return code 1", "(ERROR|error): Respawning client .*attrd", "crmd.* exited with return code 2", "(ERROR|error): Respawning client .*crmd" ]) lrmd.pats.extend([ "crmd.* exited with return code 2", "(ERROR|error): Respawning client .*crmd" ]) pengine.pats.extend([ "(ERROR|error): Respawning client .*crmd" ]) complist.append(ccm) complist.append(cib) complist.append(lrmd) complist.append(crmd) complist.append(pengine) return complist def NodeUUID(self, node): lines = self.rsh(node, self.templates["UUIDQueryCmd"], 1) for line in lines: self.debug("UUIDLine:" + line) m = re.search(r'%s.+\((.+)\)' % node, line) if m: return m.group(1) return "" def StandbyStatus(self, node): out=self.rsh(node, self.templates["StandbyQueryCmd"] % node, 1) if not out: return "off" out = out[:-1] self.debug("Standby result: "+out) return out # status == "on" : Enter Standby mode # status == "off": Enter Active mode def SetStandbyMode(self, node, status): current_status = self.StandbyStatus(node) cmd = self.templates["StandbyCmd"] % (node, status) ret = self.rsh(node, cmd) return True def AddDummyRsc(self, node, rid): rsc_xml = """ ' '""" % (rid, rid) constraint_xml = """ ' ' """ % (rid, node, node, rid) self.rsh(node, self.templates['CibAddXml'] % (rsc_xml)) self.rsh(node, self.templates['CibAddXml'] % (constraint_xml)) def RemoveDummyRsc(self, node, rid): constraint = "\"//rsc_location[@rsc='%s']\"" % (rid) rsc = "\"//primitive[@id='%s']\"" % (rid) self.rsh(node, self.templates['CibDelXpath'] % constraint) self.rsh(node, self.templates['CibDelXpath'] % rsc) ####################################################################### # # A little test code... # # Which you are advised to completely ignore... # ####################################################################### if __name__ == '__main__': pass diff --git a/cts/CTStests.py b/cts/CTStests.py index 97db18c0f6..c4d28c3305 100644 --- a/cts/CTStests.py +++ b/cts/CTStests.py @@ -1,3162 +1,3164 @@ '''CTS: Cluster Testing System: Tests module There are a few things we want to do here: ''' __copyright__ = ''' Copyright (C) 2000, 2001 Alan Robertson Licensed under the GNU GPL. Add RecourceRecover testcase Zhao Kai ''' # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. # # SPECIAL NOTE: # # Tests may NOT implement any cluster-manager-specific code in them. # EXTEND the ClusterManager object to provide the base capabilities # the test needs if you need to do something that the current CM classes # do not. Otherwise you screw up the whole point of the object structure # in CTS. # # Thank you. # import time, os, re, string, subprocess, tempfile from stat import * from cts import CTS from cts.CTSaudits import * from cts.CTSvars import * from cts.patterns import PatternSelector from cts.logging import LogFactory from cts.remote import RemoteFactory from cts.watcher import LogWatcher from cts.environment import EnvFactory AllTestClasses = [ ] class CTSTest: ''' A Cluster test. We implement the basic set of properties and behaviors for a generic cluster test. Cluster tests track their own statistics. We keep each of the kinds of counts we track as separate {name,value} pairs. ''' def __init__(self, cm): #self.name="the unnamed test" self.Stats = {"calls":0 , "success":0 , "failure":0 , "skipped":0 , "auditfail":0} # if not issubclass(cm.__class__, ClusterManager): # raise ValueError("Must be a ClusterManager object") self.CM = cm self.Env = EnvFactory().getInstance() self.rsh = RemoteFactory().getInstance() self.logger = LogFactory() self.templates = PatternSelector(cm["Name"]) self.Audits = [] self.timeout = 120 self.passed = 1 self.is_loop = 0 self.is_unsafe = 0 self.is_docker_unsafe = 0 self.is_experimental = 0 self.is_container = 0 self.is_valgrind = 0 self.benchmark = 0 # which tests to benchmark self.timer = {} # timers def log(self, args): self.logger.log(args) def debug(self, args): self.logger.debug(args) def has_key(self, key): return key in self.Stats def __setitem__(self, key, value): self.Stats[key] = value def __getitem__(self, key): if str(key) == "0": raise ValueError("Bad call to 'foo in X', should reference 'foo in X.Stats' instead") if key in self.Stats: return self.Stats[key] return None def log_mark(self, msg): self.debug("MARK: test %s %s %d" % (self.name,msg,time.time())) return def get_timer(self,key = "test"): try: return self.timer[key] except: return 0 def set_timer(self,key = "test"): self.timer[key] = time.time() return self.timer[key] def log_timer(self,key = "test"): elapsed = 0 if key in self.timer: elapsed = time.time() - self.timer[key] s = key == "test" and self.name or "%s:%s" % (self.name,key) self.debug("%s runtime: %.2f" % (s, elapsed)) del self.timer[key] return elapsed def incr(self, name): '''Increment (or initialize) the value associated with the given name''' if not name in self.Stats: self.Stats[name] = 0 self.Stats[name] = self.Stats[name]+1 # Reset the test passed boolean if name == "calls": self.passed = 1 def failure(self, reason="none"): '''Increment the failure count''' self.passed = 0 self.incr("failure") self.logger.log(("Test %s" % self.name).ljust(35) + " FAILED: %s" % reason) return None def success(self): '''Increment the success count''' self.incr("success") return 1 def skipped(self): '''Increment the skipped count''' self.incr("skipped") return 1 def __call__(self, node): '''Perform the given test''' raise ValueError("Abstract Class member (__call__)") self.incr("calls") return self.failure() def audit(self): passed = 1 if len(self.Audits) > 0: for audit in self.Audits: if not audit(): self.logger.log("Internal %s Audit %s FAILED." % (self.name, audit.name())) self.incr("auditfail") passed = 0 return passed def setup(self, node): '''Setup the given test''' return self.success() def teardown(self, node): '''Tear down the given test''' return self.success() def create_watch(self, patterns, timeout, name=None): if not name: name = self.name return LogWatcher(self.Env["LogFileName"], patterns, name, timeout, kind=self.Env["LogWatcher"], hosts=self.Env["nodes"]) def local_badnews(self, prefix, watch, local_ignore=[]): errcount = 0 if not prefix: prefix = "LocalBadNews:" ignorelist = [] ignorelist.append(" CTS: ") ignorelist.append(prefix) ignorelist.extend(local_ignore) while errcount < 100: match = watch.look(0) if match: add_err = 1 for ignore in ignorelist: if add_err == 1 and re.search(ignore, match): add_err = 0 if add_err == 1: self.logger.log(prefix + " " + match) errcount = errcount + 1 else: break else: self.logger.log("Too many errors!") watch.end() return errcount def is_applicable(self): return self.is_applicable_common() def is_applicable_common(self): '''Return TRUE if we are applicable in the current test configuration''' #raise ValueError("Abstract Class member (is_applicable)") if self.is_loop and not self.Env["loop-tests"]: return 0 elif self.is_unsafe and not self.Env["unsafe-tests"]: return 0 elif self.is_valgrind and not self.Env["valgrind-tests"]: return 0 elif self.is_experimental and not self.Env["experimental-tests"]: return 0 elif self.is_docker_unsafe and self.Env["docker"]: return 0 elif self.is_container and not self.Env["container-tests"]: return 0 elif self.Env["benchmark"] and self.benchmark == 0: return 0 return 1 def find_ocfs2_resources(self, node): self.r_o2cb = None self.r_ocfs2 = [] (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "o2cb" and r.parent != "NA": self.debug("Found o2cb: %s" % self.r_o2cb) self.r_o2cb = r.parent if re.search("^Constraint", line): c = AuditConstraint(self.CM, line) if c.type == "rsc_colocation" and c.target == self.r_o2cb: self.r_ocfs2.append(c.rsc) self.debug("Found ocfs2 filesystems: %s" % repr(self.r_ocfs2)) return len(self.r_ocfs2) def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' return 1 def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [] class StopTest(CTSTest): '''Stop (deactivate) the cluster manager on a node''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Stop" def __call__(self, node): '''Perform the 'stop' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] != "up": return self.skipped() patterns = [] # Technically we should always be able to notice ourselves stopping patterns.append(self.templates["Pat:We_stopped"] % node) #if self.Env["use_logd"]: # patterns.append(self.templates["Pat:Logd_stopped"] % node) # Any active node needs to notice this one left # NOTE: This wont work if we have multiple partitions for other in self.Env["nodes"]: if self.CM.ShouldBeStatus[other] == "up" and other != node: patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) #self.debug("Checking %s will notice %s left"%(other, node)) watch = self.create_watch(patterns, self.Env["DeadTime"]) watch.setwatch() if node == self.CM.OurNode: self.incr("us") else: if self.CM.upcount() <= 1: self.incr("all") else: self.incr("them") self.CM.StopaCM(node) watch_result = watch.lookforall() failreason = None UnmatchedList = "||" if watch.unmatched: (rc, output) = self.rsh(node, "/bin/ps axf", None) for line in output: self.debug(line) (rc, output) = self.rsh(node, "/usr/sbin/dlm_tool dump", None) for line in output: self.debug(line) for regex in watch.unmatched: self.logger.log ("ERROR: Shutdown pattern not found: %s" % (regex)) UnmatchedList += regex + "||"; failreason = "Missing shutdown pattern" self.CM.cluster_stable(self.Env["DeadTime"]) if not watch.unmatched or self.CM.upcount() == 0: return self.success() if len(watch.unmatched) >= self.CM.upcount(): return self.failure("no match against (%s)" % UnmatchedList) if failreason == None: return self.success() else: return self.failure(failreason) # # We don't register StopTest because it's better when called by # another test... # class StartTest(CTSTest): '''Start (activate) the cluster manager on a node''' def __init__(self, cm, debug=None): CTSTest.__init__(self,cm) self.name = "start" self.debug = debug def __call__(self, node): '''Perform the 'start' test. ''' self.incr("calls") if self.CM.upcount() == 0: self.incr("us") else: self.incr("them") if self.CM.ShouldBeStatus[node] != "down": return self.skipped() elif self.CM.StartaCM(node): return self.success() else: return self.failure("Startup %s on node %s failed" % (self.Env["Name"], node)) # # We don't register StartTest because it's better when called by # another test... # class FlipTest(CTSTest): '''If it's running, stop it. If it's stopped start it. Overthrow the status quo... ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Flip" self.start = StartTest(cm) self.stop = StopTest(cm) def __call__(self, node): '''Perform the 'Flip' test. ''' self.incr("calls") if self.CM.ShouldBeStatus[node] == "up": self.incr("stopped") ret = self.stop(node) type = "up->down" # Give the cluster time to recognize it's gone... time.sleep(self.Env["StableTime"]) elif self.CM.ShouldBeStatus[node] == "down": self.incr("started") ret = self.start(node) type = "down->up" else: return self.skipped() self.incr(type) if ret: return self.success() else: return self.failure("%s failure" % type) # Register FlipTest as a good test to run AllTestClasses.append(FlipTest) class RestartTest(CTSTest): '''Stop and restart a node''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Restart" self.start = StartTest(cm) self.stop = StopTest(cm) self.benchmark = 1 def __call__(self, node): '''Perform the 'restart' test. ''' self.incr("calls") self.incr("node:" + node) ret1 = 1 if self.CM.StataCM(node): self.incr("WasStopped") if not self.start(node): return self.failure("start (setup) failure: "+node) self.set_timer() if not self.stop(node): return self.failure("stop failure: "+node) if not self.start(node): return self.failure("start failure: "+node) return self.success() # Register RestartTest as a good test to run AllTestClasses.append(RestartTest) class StonithdTest(CTSTest): def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Stonithd" self.startall = SimulStartLite(cm) self.benchmark = 1 def __call__(self, node): self.incr("calls") if len(self.Env["nodes"]) < 2: return self.skipped() ret = self.startall(None) if not ret: return self.failure("Setup failed") is_dc = self.CM.is_node_dc(node) watchpats = [] watchpats.append(self.templates["Pat:FenceOpOK"] % node) watchpats.append(self.templates["Pat:NodeFenced"] % node) if self.Env["at-boot"] == 0: self.debug("Expecting %s to stay down" % node) self.CM.ShouldBeStatus[node] = "down" else: self.debug("Expecting %s to come up again %d" % (node, self.Env["at-boot"])) watchpats.append("%s.* S_STARTING -> S_PENDING" % node) watchpats.append("%s.* S_PENDING -> S_NOT_DC" % node) watch = self.create_watch(watchpats, 30 + self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"]) watch.setwatch() origin = self.Env.RandomGen.choice(self.Env["nodes"]) rc = self.rsh(origin, "stonith_admin --reboot %s -VVVVVV" % node) if rc == 194: # 194 - 256 = -62 = Timer expired # # Look for the patterns, usually this means the required # device was running on the node to be fenced - or that # the required devices were in the process of being loaded # and/or moved # # Effectively the node committed suicide so there will be # no confirmation, but pacemaker should be watching and # fence the node again self.logger.log("Fencing command on %s to fence %s timed out" % (origin, node)) elif origin != node and rc != 0: self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600) self.logger.log("Fencing command on %s failed to fence %s (rc=%d)" % (origin, node, rc)) elif origin == node and rc != 255: # 255 == broken pipe, ie. the node was fenced as expected self.logger.log("Locally originated fencing returned %d" % rc) self.set_timer("fence") matched = watch.lookforall() self.log_timer("fence") self.set_timer("reform") if watch.unmatched: self.logger.log("Patterns not found: " + repr(watch.unmatched)) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.debug("Waiting STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600) self.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.Env["StartTime"]) if not matched: return self.failure("Didn't find all expected patterns") elif not is_stable: return self.failure("Cluster did not become stable") self.log_timer("reform") return self.success() def errorstoignore(self): return [ self.templates["Pat:Fencing_start"] % ".*", self.templates["Pat:Fencing_ok"] % ".*", r"error.*: Resource .*stonith::.* is active on 2 nodes attempting recovery", r"error.*: Operation reboot of .*by .* for stonith_admin.*: Timer expired", ] def is_applicable(self): if not self.is_applicable_common(): return 0 if "DoFencing" in self.Env.keys(): return self.Env["DoFencing"] return 1 AllTestClasses.append(StonithdTest) class StartOnebyOne(CTSTest): '''Start all the nodes ~ one by one''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "StartOnebyOne" self.stopall = SimulStopLite(cm) self.start = StartTest(cm) self.ns = CTS.NodeStatus(cm.Env) def __call__(self, dummy): '''Perform the 'StartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Test setup failed") failed = [] self.set_timer() for node in self.Env["nodes"]: if not self.start(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to start: " + repr(failed)) return self.success() # Register StartOnebyOne as a good test to run AllTestClasses.append(StartOnebyOne) class SimulStart(CTSTest): '''Start all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStart" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'SimulStart' test. ''' self.incr("calls") # We ignore the "node" parameter... # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Setup failed") self.CM.clear_all_caches() if not self.startall(None): return self.failure("Startall failed") return self.success() # Register SimulStart as a good test to run AllTestClasses.append(SimulStart) class SimulStop(CTSTest): '''Stop all the nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStop" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) def __call__(self, dummy): '''Perform the 'SimulStop' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.stopall(None): return self.failure("Stopall failed") return self.success() # Register SimulStop as a good test to run AllTestClasses.append(SimulStop) class StopOnebyOne(CTSTest): '''Stop all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "StopOnebyOne" self.startall = SimulStartLite(cm) self.stop = StopTest(cm) def __call__(self, dummy): '''Perform the 'StopOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") failed = [] self.set_timer() for node in self.Env["nodes"]: if not self.stop(node): failed.append(node) if len(failed) > 0: return self.failure("Some node failed to stop: " + repr(failed)) self.CM.clear_all_caches() return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(StopOnebyOne) class RestartOnebyOne(CTSTest): '''Restart all the nodes in order''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "RestartOnebyOne" self.startall = SimulStartLite(cm) def __call__(self, dummy): '''Perform the 'RestartOnebyOne' test. ''' self.incr("calls") # We ignore the "node" parameter... # Start up all the nodes... ret = self.startall(None) if not ret: return self.failure("Setup failed") did_fail = [] self.set_timer() self.restart = RestartTest(self.CM) for node in self.Env["nodes"]: if not self.restart(node): did_fail.append(node) if did_fail: return self.failure("Could not restart %d nodes: %s" % (len(did_fail), repr(did_fail))) return self.success() # Register StopOnebyOne as a good test to run AllTestClasses.append(RestartOnebyOne) class PartialStart(CTSTest): '''Start a node - but tell it to stop before it finishes starting up''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "PartialStart" self.startall = SimulStartLite(cm) self.stopall = SimulStopLite(cm) self.stop = StopTest(cm) #self.is_unsafe = 1 def __call__(self, node): '''Perform the 'PartialStart' test. ''' self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Setup failed") # FIXME! This should use the CM class to get the pattern # then it would be applicable in general watchpats = [] watchpats.append("crmd.*Connecting to cluster infrastructure") watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() self.CM.StartaCMnoBlock(node) ret = watch.lookforall() if not ret: self.logger.log("Patterns not found: " + repr(watch.unmatched)) return self.failure("Setup of %s failed" % node) ret = self.stop(node) if not ret: return self.failure("%s did not stop in time" % node) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # We might do some fencing in the 2-node case if we make it up far enough return [ - """Executing reboot fencing operation""", + r"Executing reboot fencing operation", + r"Requesting fencing \([^)]+\) of node ", ] # Register StopOnebyOne as a good test to run AllTestClasses.append(PartialStart) class StandbyTest(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Standby" self.benchmark = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) # make sure the node is active # set the node to standby mode # check resources, none resource should be running on the node # set the node to active mode # check resouces, resources should have been migrated back (SHOULD THEY?) def __call__(self, node): self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Start all nodes failed") self.debug("Make sure node %s is active" % node) if self.CM.StandbyStatus(node) != "off": if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.debug("Getting resources running on node %s" % node) rsc_on_node = self.CM.active_resources(node) watchpats = [] watchpats.append(r"State transition .* -> S_POLICY_ENGINE") watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() self.debug("Setting node %s to standby mode" % node) if not self.CM.SetStandbyMode(node, "on"): return self.failure("can't set node %s to standby mode" % node) self.set_timer("on") ret = watch.lookforall() if not ret: self.logger.log("Patterns not found: " + repr(watch.unmatched)) self.CM.SetStandbyMode(node, "off") return self.failure("cluster didn't react to standby change on %s" % node) self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "on": return self.failure("standby status of %s is [%s] but we expect [on]" % (node, status)) self.log_timer("on") self.debug("Checking resources") bad_run = self.CM.active_resources(node) if len(bad_run) > 0: rc = self.failure("%s set to standby, %s is still running on it" % (node, repr(bad_run))) self.debug("Setting node %s to active mode" % node) self.CM.SetStandbyMode(node, "off") return rc self.debug("Setting node %s to active mode" % node) if not self.CM.SetStandbyMode(node, "off"): return self.failure("can't set node %s to active mode" % node) self.set_timer("off") self.CM.cluster_stable() status = self.CM.StandbyStatus(node) if status != "off": return self.failure("standby status of %s is [%s] but we expect [off]" % (node, status)) self.log_timer("off") return self.success() AllTestClasses.append(StandbyTest) class ValgrindTest(CTSTest): '''Check for memory leaks''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Valgrind" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_valgrind = 1 self.is_loop = 1 def setup(self, node): self.incr("calls") ret = self.stopall(None) if not ret: return self.failure("Stop all nodes failed") # Enable valgrind self.logger.logPat = "/tmp/%s-*.valgrind" % self.name self.Env["valgrind-prefix"] = self.name self.rsh(node, "rm -f %s" % self.logger.logPat, None) ret = self.startall(None) if not ret: return self.failure("Start all nodes failed") for node in self.Env["nodes"]: (rc, output) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None) for line in output: self.debug(line) return self.success() def teardown(self, node): # Disable valgrind self.Env["valgrind-prefix"] = None # Return all nodes to normal ret = self.stopall(None) if not ret: return self.failure("Stop all nodes failed") return self.success() def find_leaks(self): # Check for leaks leaked = [] self.stop = StopTest(self.CM) for node in self.Env["nodes"]: (rc, ps_out) = self.rsh(node, "ps u --ppid `pidofproc aisexec`", None) rc = self.stop(node) if not rc: self.failure("Couldn't shut down %s" % node) rc = self.rsh(node, "grep -e indirectly.*lost:.*[1-9] -e definitely.*lost:.*[1-9] -e (ERROR|error).*SUMMARY:.*[1-9].*errors %s" % self.logger.logPat, 0) if rc != 1: leaked.append(node) self.failure("Valgrind errors detected on %s" % node) for line in ps_out: self.logger.log(line) (rc, output) = self.rsh(node, "grep -e lost: -e SUMMARY: %s" % self.logger.logPat, None) for line in output: self.logger.log(line) (rc, output) = self.rsh(node, "cat %s" % self.logger.logPat, None) for line in output: self.debug(line) self.rsh(node, "rm -f %s" % self.logger.logPat, None) return leaked def __call__(self, node): leaked = self.find_leaks() if len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ r"cib.*: \*\*\*\*\*\*\*\*\*\*\*\*\*", r"cib.*: .* avoid confusing Valgrind", r"HA_VALGRIND_ENABLED", ] class StandbyLoopTest(ValgrindTest): '''Check for memory leaks by putting a node in and out of standby for an hour''' def __init__(self, cm): ValgrindTest.__init__(self,cm) self.name = "StandbyLoop" def __call__(self, node): lpc = 0 delay = 2 failed = 0 done = time.time() + self.Env["loop-minutes"] * 60 while time.time() <= done and not failed: lpc = lpc + 1 time.sleep(delay) if not self.CM.SetStandbyMode(node, "on"): self.failure("can't set node %s to standby mode" % node) failed = lpc time.sleep(delay) if not self.CM.SetStandbyMode(node, "off"): self.failure("can't set node %s to active mode" % node) failed = lpc leaked = self.find_leaks() if failed: return self.failure("Iteration %d failed" % failed) elif len(leaked) > 0: return self.failure("Nodes %s leaked" % repr(leaked)) return self.success() AllTestClasses.append(StandbyLoopTest) class BandwidthTest(CTSTest): # Tests should not be cluster-manager-specific # If you need to find out cluster manager configuration to do this, then # it should be added to the generic cluster manager API. '''Test the bandwidth which heartbeat uses''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "Bandwidth" self.start = StartTest(cm) self.__setitem__("min",0) self.__setitem__("max",0) self.__setitem__("totalbandwidth",0) (handle, self.tempfile) = tempfile.mkstemp(".cts") os.close(handle) self.startall = SimulStartLite(cm) def __call__(self, node): '''Perform the Bandwidth test''' self.incr("calls") if self.CM.upcount() < 1: return self.skipped() Path = self.CM.InternalCommConfig() if "ip" not in Path["mediatype"]: return self.skipped() port = Path["port"][0] port = int(port) ret = self.startall(None) if not ret: return self.failure("Test setup failed") time.sleep(5) # We get extra messages right after startup. fstmpfile = "/var/run/band_estimate" dumpcmd = "tcpdump -p -n -c 102 -i any udp port %d > %s 2>&1" \ % (port, fstmpfile) rc = self.rsh(node, dumpcmd) if rc == 0: farfile = "root@%s:%s" % (node, fstmpfile) self.rsh.cp(farfile, self.tempfile) Bandwidth = self.countbandwidth(self.tempfile) if not Bandwidth: self.logger.log("Could not compute bandwidth.") return self.success() intband = int(Bandwidth + 0.5) self.logger.log("...bandwidth: %d bits/sec" % intband) self.Stats["totalbandwidth"] = self.Stats["totalbandwidth"] + Bandwidth if self.Stats["min"] == 0: self.Stats["min"] = Bandwidth if Bandwidth > self.Stats["max"]: self.Stats["max"] = Bandwidth if Bandwidth < self.Stats["min"]: self.Stats["min"] = Bandwidth self.rsh(node, "rm -f %s" % fstmpfile) os.unlink(self.tempfile) return self.success() else: return self.failure("no response from tcpdump command [%d]!" % rc) def countbandwidth(self, file): fp = open(file, "r") fp.seek(0) count = 0 sum = 0 while 1: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count + 1 linesplit = string.split(line," ") for j in range(len(linesplit)-1): if linesplit[j] == "udp": break if linesplit[j] == "length:": break try: sum = sum + int(linesplit[j+1]) except ValueError: self.logger.log("Invalid tcpdump line: %s" % line) return None T1 = linesplit[0] timesplit = string.split(T1,":") time2split = string.split(timesplit[2],".") time1 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001 break while count < 100: line = fp.readline() if not line: return None if re.search("udp",line) or re.search("UDP,", line): count = count+1 linessplit = string.split(line," ") for j in range(len(linessplit)-1): if linessplit[j] == "udp": break if linesplit[j] == "length:": break try: sum = int(linessplit[j+1]) + sum except ValueError: self.logger.log("Invalid tcpdump line: %s" % line) return None T2 = linessplit[0] timesplit = string.split(T2,":") time2split = string.split(timesplit[2],".") time2 = (int(timesplit[0])*60+int(timesplit[1]))*60+int(time2split[0])+int(time2split[1])*0.000001 time = time2-time1 if (time <= 0): return 0 return (sum*8)/time def is_applicable(self): '''BandwidthTest never applicable''' return 0 AllTestClasses.append(BandwidthTest) ################################################################### class MaintenanceMode(CTSTest): ################################################################### def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "MaintenanceMode" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max = 30 #self.is_unsafe = 1 self.benchmark = 1 self.action = "asyncmon" self.interval = 0 self.rid = "maintenanceDummy" def toggleMaintenanceMode(self, node, action): pats = [] pats.append(self.templates["Pat:DC_IDLE"]) # fail the resource right after turning Maintenance mode on # verify it is not recovered until maintenance mode is turned off if action == "On": pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for %s on" % (self.action, self.rid)) else: - pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) - pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid)) + pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid)) watch = self.create_watch(pats, 60) watch.setwatch() self.debug("Turning maintenance mode %s" % action) self.rsh(node, self.templates["MaintenanceMode%s" % (action)]) if (action == "On"): self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node)) self.set_timer("recover%s" % (action)) watch.lookforall() self.log_timer("recover%s" % (action)) if watch.unmatched: self.debug("Failed to find patterns when turning maintenance mode %s" % action) return repr(watch.unmatched) return "" def insertMaintenanceDummy(self, node): pats = [] - pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % (self.rid, "start_0"))) + pats.append(("%s.*" % node) + (self.templates["Pat:RscOpOK"] % ("start", self.rid))) watch = self.create_watch(pats, 60) watch.setwatch() self.CM.AddDummyRsc(node, self.rid) self.set_timer("addDummy") watch.lookforall() self.log_timer("addDummy") if watch.unmatched: self.debug("Failed to find patterns when adding maintenance dummy resource") return repr(watch.unmatched) return "" def removeMaintenanceDummy(self, node): pats = [] - pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid)) watch = self.create_watch(pats, 60) watch.setwatch() self.CM.RemoveDummyRsc(node, self.rid) self.set_timer("removeDummy") watch.lookforall() self.log_timer("removeDummy") if watch.unmatched: self.debug("Failed to find patterns when removing maintenance dummy resource") return repr(watch.unmatched) return "" def managedRscList(self, node): rscList = [] (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.managed(): rscList.append(tmp.id) return rscList def verifyResources(self, node, rscList, managed): managedList = list(rscList) managed_str = "managed" if not managed: managed_str = "unmanaged" (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if managed and not tmp.managed(): continue elif not managed and tmp.managed(): continue elif managedList.count(tmp.id): managedList.remove(tmp.id) if len(managedList) == 0: self.debug("Found all %s resources on %s" % (managed_str, node)) return True self.logger.log("Could not find all %s resources on %s. %s" % (managed_str, node, managedList)) return False def __call__(self, node): '''Perform the 'MaintenanceMode' test. ''' self.incr("calls") verify_managed = False verify_unmanaged = False failPat = "" ret = self.startall(None) if not ret: return self.failure("Setup failed") # get a list of all the managed resources. We use this list # after enabling maintenance mode to verify all managed resources # become un-managed. After maintenance mode is turned off, we use # this list to verify all the resources become managed again. managedResources = self.managedRscList(node) if len(managedResources) == 0: self.logger.log("No managed resources on %s" % node) return self.skipped() # insert a fake resource we can fail during maintenance mode # so we can verify recovery does not take place until after maintenance # mode is disabled. failPat = failPat + self.insertMaintenanceDummy(node) # toggle maintenance mode ON, then fail dummy resource. failPat = failPat + self.toggleMaintenanceMode(node, "On") # verify all the resources are now unmanaged if self.verifyResources(node, managedResources, False): verify_unmanaged = True # Toggle maintenance mode OFF, verify dummy is recovered. failPat = failPat + self.toggleMaintenanceMode(node, "Off") # verify all the resources are now managed again if self.verifyResources(node, managedResources, True): verify_managed = True # Remove our maintenance dummy resource. failPat = failPat + self.removeMaintenanceDummy(node) self.CM.cluster_stable() if failPat != "": return self.failure("Unmatched patterns: %s" % (failPat)) elif verify_unmanaged is False: return self.failure("Failed to verify resources became unmanaged during maintenance mode") elif verify_managed is False: return self.failure("Failed to verify resources switched back to managed after disabling maintenance mode") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ r"Updating failcount for %s" % self.rid, r"pengine.*: Recover %s\s*\(.*\)" % self.rid, r"Unknown operation: fail", r"(ERROR|error): sending stonithRA op to stonithd failed.", - self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))), + self.templates["Pat:RscOpOK"] % (self.action, self.rid), r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval), ] AllTestClasses.append(MaintenanceMode) class ResourceRecover(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "ResourceRecover" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.max = 30 self.rid = None self.rid_alt = None #self.is_unsafe = 1 self.benchmark = 1 # these are the values used for the new LRM API call self.action = "asyncmon" self.interval = 0 def __call__(self, node): '''Perform the 'ResourceRecover' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed") resourcelist = self.CM.active_resources(node) # if there are no resourcelist, return directly if len(resourcelist) == 0: self.logger.log("No active resources on %s" % node) return self.skipped() self.rid = self.Env.RandomGen.choice(resourcelist) self.rid_alt = self.rid rsc = None (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): tmp = AuditResource(self.CM, line) if tmp.id == self.rid: rsc = tmp # Handle anonymous clones that get renamed self.rid = rsc.clone_id break if not rsc: return self.failure("Could not find %s in the resource list" % self.rid) self.debug("Shooting %s aka. %s" % (rsc.clone_id, rsc.id)) pats = [] pats.append(r"pengine.*:\s+warning:.*Processing failed op %s for (%s|%s) on" % (self.action, rsc.id, rsc.clone_id)) if rsc.managed(): - pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "stop_0")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.rid)) if rsc.unique(): - pats.append(self.templates["Pat:RscOpOK"] % (self.rid, "start_0")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", self.rid)) else: # Anonymous clones may get restarted with a different clone number - pats.append(self.templates["Pat:RscOpOK"] % (".*", "start_0")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*")) watch = self.create_watch(pats, 60) watch.setwatch() self.rsh(node, "crm_resource -V -F -r %s -H %s &>/dev/null" % (self.rid, node)) self.set_timer("recover") watch.lookforall() self.log_timer("recover") self.CM.cluster_stable() recovered = self.CM.ResourceLocation(self.rid) if watch.unmatched: return self.failure("Patterns not found: %s" % repr(watch.unmatched)) elif rsc.unique() and len(recovered) > 1: return self.failure("%s is now active on more than one node: %s"%(self.rid, repr(recovered))) elif len(recovered) > 0: self.debug("%s is running on: %s" % (self.rid, repr(recovered))) elif rsc.managed(): return self.failure("%s was not recovered and is inactive" % self.rid) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ r"Updating failcount for %s" % self.rid, r"pengine.*: Recover (%s|%s)\s*\(.*\)" % (self.rid, self.rid_alt), r"Unknown operation: fail", r"(ERROR|error): sending stonithRA op to stonithd failed.", - self.templates["Pat:RscOpOK"] % (self.rid, ("%s_%d" % (self.action, self.interval))), + self.templates["Pat:RscOpOK"] % (self.action, self.rid), r"(ERROR|error).*: Action %s_%s_%d .* initiated outside of a transition" % (self.rid, self.action, self.interval), ] AllTestClasses.append(ResourceRecover) class ComponentFail(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "ComponentFail" # TODO make this work correctly in docker. self.is_docker_unsafe = 1 self.startall = SimulStartLite(cm) self.complist = cm.Components() self.patterns = [] self.okerrpatterns = [] self.is_unsafe = 1 def __call__(self, node): '''Perform the 'ComponentFail' test. ''' self.incr("calls") self.patterns = [] self.okerrpatterns = [] # start all nodes ret = self.startall(None) if not ret: return self.failure("Setup failed") if not self.CM.cluster_stable(self.Env["StableTime"]): return self.failure("Setup failed - unstable") node_is_dc = self.CM.is_node_dc(node, None) # select a component to kill chosen = self.Env.RandomGen.choice(self.complist) while chosen.dc_only == 1 and node_is_dc == 0: chosen = self.Env.RandomGen.choice(self.complist) self.debug("...component %s (dc=%d,boot=%d)" % (chosen.name, node_is_dc,chosen.triggersreboot)) self.incr(chosen.name) if chosen.name != "aisexec" and chosen.name != "corosync": if self.Env["Name"] != "crm-lha" or chosen.name != "pengine": self.patterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name)) self.patterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name)) self.patterns.extend(chosen.pats) if node_is_dc: self.patterns.extend(chosen.dc_pats) # In an ideal world, this next stuff should be in the "chosen" object as a member function if self.Env["Name"] == "crm-lha" and chosen.triggersreboot: # Make sure the node goes down and then comes back up if it should reboot... for other in self.Env["nodes"]: if other != node: self.patterns.append(self.templates["Pat:They_stopped"] %(other, self.CM.key_for_node(node))) self.patterns.append(self.templates["Pat:Slave_started"] % node) self.patterns.append(self.templates["Pat:Local_started"] % node) if chosen.dc_only: # Sometimes these will be in the log, and sometimes they won't... self.okerrpatterns.append("%s .*Process %s:.* exited" % (node, chosen.name)) self.okerrpatterns.append("%s .*I_ERROR.*crmdManagedChildDied" % node) self.okerrpatterns.append("%s .*The %s subsystem terminated unexpectedly" % (node, chosen.name)) self.okerrpatterns.append("(ERROR|error): Client .* exited with return code") else: # Sometimes this won't be in the log... self.okerrpatterns.append(self.templates["Pat:ChildKilled"] %(node, chosen.name)) self.okerrpatterns.append(self.templates["Pat:ChildRespawn"] %(node, chosen.name)) self.okerrpatterns.append(self.templates["Pat:ChildExit"]) if chosen.name == "stonith": # Ignore actions for STONITH resources (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rclass == "stonith": self.okerrpatterns.append(self.templates["Pat:Fencing_recover"] % r.id) # supply a copy so self.patterns doesn't end up empty tmpPats = [] tmpPats.extend(self.patterns) self.patterns.extend(chosen.badnews_ignore) # Look for STONITH ops, depending on Env["at-boot"] we might need to change the nodes status stonithPats = [] stonithPats.append(self.templates["Pat:Fencing_ok"] % node) stonith = self.create_watch(stonithPats, 0) stonith.setwatch() # set the watch for stable watch = self.create_watch( tmpPats, self.Env["DeadTime"] + self.Env["StableTime"] + self.Env["StartTime"]) watch.setwatch() # kill the component chosen.kill(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() self.debug("Waiting for any STONITHd node to come back up") self.CM.ns.WaitForAllNodesToComeUp(self.Env["nodes"], 600) self.debug("Waiting for the cluster to re-stabilize with all nodes") self.CM.cluster_stable(self.Env["StartTime"]) self.debug("Checking if %s was shot" % node) shot = stonith.look(60) if shot: self.debug("Found: " + repr(shot)) self.okerrpatterns.append(self.templates["Pat:Fencing_start"] % node) if self.Env["at-boot"] == 0: self.CM.ShouldBeStatus[node] = "down" # If fencing occurred, chances are many (if not all) the expected logs # will not be sent - or will be lost when the node reboots return self.success() # check for logs indicating a graceful recovery matched = watch.lookforall(allow_multiple_matches=1) if watch.unmatched: self.logger.log("Patterns not found: " + repr(watch.unmatched)) self.debug("Waiting for the cluster to re-stabilize with all nodes") is_stable = self.CM.cluster_stable(self.Env["StartTime"]) if not matched: return self.failure("Didn't find all expected %s patterns" % chosen.name) elif not is_stable: return self.failure("Cluster did not become stable after killing %s" % chosen.name) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # Note that okerrpatterns refers to the last time we ran this test # The good news is that this works fine for us... self.okerrpatterns.extend(self.patterns) return self.okerrpatterns AllTestClasses.append(ComponentFail) class SplitBrainTest(CTSTest): '''It is used to test split-brain. when the path between the two nodes break check the two nodes both take over the resource''' def __init__(self,cm): CTSTest.__init__(self,cm) self.name = "SplitBrain" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.is_experimental = 1 def isolate_partition(self, partition): other_nodes = [] other_nodes.extend(self.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"]) + " from " +repr(partition)) if len(other_nodes) == 0: return 1 self.debug("Creating partition: " + repr(partition)) self.debug("Everyone else: " + repr(other_nodes)) for node in partition: if not self.CM.isolate_node(node, other_nodes): self.logger.log("Could not isolate %s" % node) return 0 return 1 def heal_partition(self, partition): other_nodes = [] other_nodes.extend(self.Env["nodes"]) for node in partition: try: other_nodes.remove(node) except ValueError: self.logger.log("Node "+node+" not in " + repr(self.Env["nodes"])) if len(other_nodes) == 0: return 1 self.debug("Healing partition: " + repr(partition)) self.debug("Everyone else: " + repr(other_nodes)) for node in partition: self.CM.unisolate_node(node, other_nodes) def __call__(self, node): '''Perform split-brain test''' self.incr("calls") self.passed = 1 partitions = {} ret = self.startall(None) if not ret: return self.failure("Setup failed") while 1: # Retry until we get multiple partitions partitions = {} p_max = len(self.Env["nodes"]) for node in self.Env["nodes"]: p = self.Env.RandomGen.randint(1, p_max) if not p in partitions: partitions[p] = [] partitions[p].append(node) p_max = len(partitions.keys()) if p_max > 1: break # else, try again self.debug("Created %d partitions" % p_max) for key in list(partitions.keys()): self.debug("Partition["+str(key)+"]:\t"+repr(partitions[key])) # Disabling STONITH to reduce test complexity for now self.rsh(node, "crm_attribute -V -n stonith-enabled -v false") for key in list(partitions.keys()): self.isolate_partition(partitions[key]) count = 30 while count > 0: if len(self.CM.find_partitions()) != p_max: time.sleep(10) else: break else: self.failure("Expected partitions were not created") # Target number of partitions formed - wait for stability if not self.CM.cluster_stable(): self.failure("Partitioned cluster not stable") # Now audit the cluster state self.CM.partitions_expected = p_max if not self.audit(): self.failure("Audits failed") self.CM.partitions_expected = 1 # And heal them again for key in list(partitions.keys()): self.heal_partition(partitions[key]) # Wait for a single partition to form count = 30 while count > 0: if len(self.CM.find_partitions()) != 1: time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not reform") # Wait for it to have the right number of members count = 30 while count > 0: members = [] partitions = self.CM.find_partitions() if len(partitions) > 0: members = partitions[0].split() if len(members) != len(self.Env["nodes"]): time.sleep(10) count -= 1 else: break else: self.failure("Cluster did not completely reform") # Wait up to 20 minutes - the delay is more preferable than # trying to continue with in a messed up state if not self.CM.cluster_stable(1200): self.failure("Reformed cluster not stable") if self.Env["continue"] == 1: answer = "Y" else: try: answer = raw_input('Continue? [nY]') except EOFError, e: answer = "n" if answer and answer == "n": raise ValueError("Reformed cluster not stable") # Turn fencing back on if self.Env["DoFencing"]: self.rsh(node, "crm_attribute -V -D -n stonith-enabled") self.CM.cluster_stable() if self.passed: return self.success() return self.failure("See previous errors") def errorstoignore(self): '''Return list of errors which are 'normal' and should be ignored''' return [ r"Another DC detected:", r"(ERROR|error).*: .*Application of an update diff failed", r"crmd.*:.*not in our membership list", r"CRIT:.*node.*returning after partition", ] def is_applicable(self): if not self.is_applicable_common(): return 0 return len(self.Env["nodes"]) > 2 AllTestClasses.append(SplitBrainTest) class Reattach(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "Reattach" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) self.is_unsafe = 0 # Handled by canrunnow() def _is_managed(self, node): is_managed = self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -Q -G -d true", 1) is_managed = is_managed[:-1] # Strip off the newline return is_managed == "true" def _set_unmanaged(self, node): self.debug("Disable resource management") self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -v false") def _set_managed(self, node): self.debug("Re-enable resource management") self.rsh(node, "crm_attribute -t rsc_defaults -n is-managed -D") def setup(self, node): attempt = 0 if not self.startall(None): return None # Make sure we are really _really_ stable and that all # resources, including those that depend on transient node # attributes, are started while not self.CM.cluster_stable(double_check=True): if attempt < 5: attempt += 1 self.debug("Not stable yet, re-testing") else: self.logger.log("Cluster is not stable") return None return 1 def teardown(self, node): # Make sure 'node' is up start = StartTest(self.CM) start(node) if not self._is_managed(node): self.logger.log("Attempting to re-enable resource management on %s" % node) self._set_managed(node) self.CM.cluster_stable() if not self._is_managed(node): self.logger.log("Could not re-enable resource management") return 0 return 1 def canrunnow(self, node): '''Return TRUE if we can meaningfully run right now''' if self.find_ocfs2_resources(node): self.logger.log("Detach/Reattach scenarios are not possible with OCFS2 services present") return 0 return 1 def __call__(self, node): self.incr("calls") pats = [] # Conveniently, pengine will display this message when disabling management, # even if fencing is not enabled, so we can rely on it. managed = self.create_watch(["Delaying fencing operations"], 60) managed.setwatch() self._set_unmanaged(node) if not managed.lookforall(): self.logger.log("Patterns not found: " + repr(managed.unmatched)) return self.failure("Resource management not disabled") pats = [] - pats.append(self.templates["Pat:RscOpOK"] % (".*", "start")) - pats.append(self.templates["Pat:RscOpOK"] % (".*", "stop")) - pats.append(self.templates["Pat:RscOpOK"] % (".*", "promote")) - pats.append(self.templates["Pat:RscOpOK"] % (".*", "demote")) - pats.append(self.templates["Pat:RscOpOK"] % (".*", "migrate")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", ".*")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", ".*")) + pats.append(self.templates["Pat:RscOpOK"] % ("promote", ".*")) + pats.append(self.templates["Pat:RscOpOK"] % ("demote", ".*")) + pats.append(self.templates["Pat:RscOpOK"] % ("migrate", ".*")) watch = self.create_watch(pats, 60, "ShutdownActivity") watch.setwatch() self.debug("Shutting down the cluster") ret = self.stopall(None) if not ret: self._set_managed(node) return self.failure("Couldn't shut down the cluster") self.debug("Bringing the cluster back up") ret = self.startall(None) time.sleep(5) # allow ping to update the CIB if not ret: self._set_managed(node) return self.failure("Couldn't restart the cluster") if self.local_badnews("ResourceActivity:", watch): self._set_managed(node) return self.failure("Resources stopped or started during cluster restart") watch = self.create_watch(pats, 60, "StartupActivity") watch.setwatch() # Re-enable resource management (and verify it happened). self._set_managed(node) self.CM.cluster_stable() if not self._is_managed(node): return self.failure("Could not re-enable resource management") # Ignore actions for STONITH resources ignore = [] (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rclass == "stonith": self.debug("Ignoring start actions for %s" % r.id) - ignore.append(self.templates["Pat:RscOpOK"] % (r.id, "start_0")) + ignore.append(self.templates["Pat:RscOpOK"] % ("start", r.id)) if self.local_badnews("ResourceActivity:", watch, ignore): return self.failure("Resources stopped or started after resource management was re-enabled") return ret def errorstoignore(self): '''Return list of errors which should be ignored''' return [ r"resources were active at shutdown", ] def is_applicable(self): if self.Env["Name"] == "crm-lha": return None return 1 AllTestClasses.append(Reattach) class SpecialTest1(CTSTest): '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SpecialTest1" self.startall = SimulStartLite(cm) self.restart1 = RestartTest(cm) self.stopall = SimulStopLite(cm) def __call__(self, node): '''Perform the 'SpecialTest1' test for Andrew. ''' self.incr("calls") # Shut down all the nodes... ret = self.stopall(None) if not ret: return self.failure("Could not stop all nodes") # Test config recovery when the other nodes come up self.rsh(node, "rm -f "+CTSvars.CRM_CONFIG_DIR+"/cib*") # Start the selected node ret = self.restart1(node) if not ret: return self.failure("Could not start "+node) # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Could not start the remaining nodes") return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' # Errors that occur as a result of the CIB being wiped return [ r"error.*: v1 patchset error, patch failed to apply: Application of an update diff failed", r"error.*: Resource start-up disabled since no STONITH resources have been defined", r"error.*: Either configure some or disable STONITH with the stonith-enabled option", r"error.*: NOTE: Clusters with shared data need STONITH to ensure data integrity", ] AllTestClasses.append(SpecialTest1) class HAETest(CTSTest): '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "HAETest" self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) self.is_loop = 1 def setup(self, node): # Start all remaining nodes ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") return self.success() def wait_on_state(self, node, resource, expected_clones, attempts=240): while attempts > 0: active = 0 (rc, lines) = self.rsh(node, "crm_resource -r %s -W -Q" % resource, stdout=None) # Hack until crm_resource does the right thing if rc == 0 and lines: active = len(lines) if len(lines) == expected_clones: return 1 elif rc == 1: self.debug("Resource %s is still inactive" % resource) elif rc == 234: self.logger.log("Unknown resource %s" % resource) return 0 elif rc == 246: self.logger.log("Cluster is inactive") return 0 elif rc != 0: self.logger.log("Call to crm_resource failed, rc=%d" % rc) return 0 else: self.debug("Resource %s is active on %d times instead of %d" % (resource, active, expected_clones)) attempts -= 1 time.sleep(1) return 0 def find_dlm(self, node): self.r_dlm = None (rc, lines) = self.rsh(node, "crm_resource -c", None) for line in lines: if re.search("^Resource", line): r = AuditResource(self.CM, line) if r.rtype == "controld" and r.parent != "NA": self.debug("Found dlm: %s" % self.r_dlm) self.r_dlm = r.parent return 1 return 0 def find_hae_resources(self, node): self.r_dlm = None self.r_o2cb = None self.r_ocfs2 = [] if self.find_dlm(node): self.find_ocfs2_resources(node) def is_applicable(self): if not self.is_applicable_common(): return 0 if self.Env["Schema"] == "hae": return 1 return None class HAERoleTest(HAETest): def __init__(self, cm): '''Lars' mount/unmount test for the HA extension. ''' HAETest.__init__(self,cm) self.name = "HAERoleTest" def change_state(self, node, resource, target): rc = self.rsh(node, "crm_resource -V -r %s -p target-role -v %s --meta" % (resource, target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 delay = 2 done = time.time() + self.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "Stopped") if not self.wait_on_state(node, self.r_dlm, 0): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "Started") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAERoleTest) class HAEStandbyTest(HAETest): '''Set up a custom test to cause quorum failure issues for Andrew''' def __init__(self, cm): HAETest.__init__(self,cm) self.name = "HAEStandbyTest" def change_state(self, node, resource, target): rc = self.rsh(node, "crm_standby -V -l reboot -v %s" % (target)) return rc def __call__(self, node): self.incr("calls") lpc = 0 failed = 0 done = time.time() + self.Env["loop-minutes"]*60 self.find_hae_resources(node) clone_max = len(self.Env["nodes"]) while time.time() <= done and not failed: lpc = lpc + 1 self.change_state(node, self.r_dlm, "true") if not self.wait_on_state(node, self.r_dlm, clone_max-1): self.failure("%s did not go down correctly" % self.r_dlm) failed = lpc self.change_state(node, self.r_dlm, "false") if not self.wait_on_state(node, self.r_dlm, clone_max): self.failure("%s did not come up correctly" % self.r_dlm) failed = lpc if not self.wait_on_state(node, self.r_o2cb, clone_max): self.failure("%s did not come up correctly" % self.r_o2cb) failed = lpc for fs in self.r_ocfs2: if not self.wait_on_state(node, fs, clone_max): self.failure("%s did not come up correctly" % fs) failed = lpc if failed: return self.failure("iteration %d failed" % failed) return self.success() AllTestClasses.append(HAEStandbyTest) class NearQuorumPointTest(CTSTest): ''' This test brings larger clusters near the quorum point (50%). In addition, it will test doing starts and stops at the same time. Here is how I think it should work: - loop over the nodes and decide randomly which will be up and which will be down Use a 50% probability for each of up/down. - figure out what to do to get into that state from the current state - in parallel, bring up those going up and bring those going down. ''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "NearQuorumPoint" def __call__(self, dummy): '''Perform the 'NearQuorumPoint' test. ''' self.incr("calls") startset = [] stopset = [] stonith = self.CM.prepare_fencing_watcher("NearQuorumPoint") #decide what to do with each node for node in self.Env["nodes"]: action = self.Env.RandomGen.choice(["start","stop"]) #action = self.Env.RandomGen.choice(["start","stop","no change"]) if action == "start" : startset.append(node) elif action == "stop" : stopset.append(node) self.debug("start nodes:" + repr(startset)) self.debug("stop nodes:" + repr(stopset)) #add search patterns watchpats = [ ] for node in stopset: if self.CM.ShouldBeStatus[node] == "up": watchpats.append(self.templates["Pat:We_stopped"] % node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": #watchpats.append(self.templates["Pat:Slave_started"] % node) watchpats.append(self.templates["Pat:Local_started"] % node) else: for stopping in stopset: if self.CM.ShouldBeStatus[stopping] == "up": watchpats.append(self.templates["Pat:They_stopped"] % (node, self.CM.key_for_node(stopping))) if len(watchpats) == 0: return self.skipped() if len(startset) != 0: watchpats.append(self.templates["Pat:DC_IDLE"]) watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() #begin actions for node in stopset: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) for node in startset: if self.CM.ShouldBeStatus[node] == "down": self.CM.StartaCMnoBlock(node) #get the result if watch.lookforall(): self.CM.cluster_stable() self.CM.fencing_cleanup("NearQuorumPoint", stonith) return self.success() self.logger.log("Warn: Patterns not found: " + repr(watch.unmatched)) #get the "bad" nodes upnodes = [] for node in stopset: if self.CM.StataCM(node) == 1: upnodes.append(node) downnodes = [] for node in startset: if self.CM.StataCM(node) == 0: downnodes.append(node) self.CM.fencing_cleanup("NearQuorumPoint", stonith) if upnodes == [] and downnodes == []: self.CM.cluster_stable() # Make sure they're completely down with no residule for node in stopset: self.rsh(node, self.templates["StopCmd"]) return self.success() if len(upnodes) > 0: self.logger.log("Warn: Unstoppable nodes: " + repr(upnodes)) if len(downnodes) > 0: self.logger.log("Warn: Unstartable nodes: " + repr(downnodes)) return self.failure() def is_applicable(self): if self.Env["Name"] == "crm-cman": return None return 1 AllTestClasses.append(NearQuorumPointTest) class RollingUpgradeTest(CTSTest): '''Perform a rolling upgrade of the cluster''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "RollingUpgrade" self.start = StartTest(cm) self.stop = StopTest(cm) self.stopall = SimulStopLite(cm) self.startall = SimulStartLite(cm) def setup(self, node): # Start all remaining nodes ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.Env["nodes"]: if not self.downgrade(node, None): return self.failure("Couldn't downgrade %s" % node) ret = self.startall(None) if not ret: return self.failure("Couldn't start all nodes") return self.success() def teardown(self, node): # Stop everything ret = self.stopall(None) if not ret: return self.failure("Couldn't stop all nodes") for node in self.Env["nodes"]: if not self.upgrade(node, None): return self.failure("Couldn't upgrade %s" % node) return self.success() def install(self, node, version, start=1, flags="--force"): target_dir = "/tmp/rpm-%s" % version src_dir = "%s/%s" % (self.Env["rpm-dir"], version) self.logger.log("Installing %s on %s with %s" % (version, node, flags)) if not self.stop(node): return self.failure("stop failure: "+node) rc = self.rsh(node, "mkdir -p %s" % target_dir) rc = self.rsh(node, "rm -f %s/*.rpm" % target_dir) (rc, lines) = self.rsh(node, "ls -1 %s/*.rpm" % src_dir, None) for line in lines: line = line[:-1] rc = self.rsh.cp("%s" % (line), "%s:%s/" % (node, target_dir)) rc = self.rsh(node, "rpm -Uvh %s %s/*.rpm" % (flags, target_dir)) if start and not self.start(node): return self.failure("start failure: "+node) return self.success() def upgrade(self, node, start=1): return self.install(node, self.Env["current-version"], start) def downgrade(self, node, start=1): return self.install(node, self.Env["previous-version"], start, "--force --nodeps") def __call__(self, node): '''Perform the 'Rolling Upgrade' test. ''' self.incr("calls") for node in self.Env["nodes"]: if self.upgrade(node): return self.failure("Couldn't upgrade %s" % node) self.CM.cluster_stable() return self.success() def is_applicable(self): if not self.is_applicable_common(): return None if not "rpm-dir" in self.Env.keys(): return None if not "current-version" in self.Env.keys(): return None if not "previous-version" in self.Env.keys(): return None return 1 # Register RestartTest as a good test to run AllTestClasses.append(RollingUpgradeTest) class BSC_AddResource(CTSTest): '''Add a resource to the cluster''' def __init__(self, cm): CTSTest.__init__(self, cm) self.name = "AddResource" self.resource_offset = 0 self.cib_cmd = """cibadmin -C -o %s -X '%s' """ def __call__(self, node): self.incr("calls") self.resource_offset = self.resource_offset + 1 r_id = "bsc-rsc-%s-%d" % (node, self.resource_offset) start_pat = "crmd.*%s_start_0.*confirmed.*ok" patterns = [] patterns.append(start_pat % r_id) watch = self.create_watch(patterns, self.Env["DeadTime"]) watch.setwatch() ip = self.NextIP() if not self.make_ip_resource(node, r_id, "ocf", "IPaddr", ip): return self.failure("Make resource %s failed" % r_id) failed = 0 watch_result = watch.lookforall() if watch.unmatched: for regex in watch.unmatched: self.logger.log ("Warn: Pattern not found: %s" % (regex)) failed = 1 if failed: return self.failure("Resource pattern(s) not found") if not self.CM.cluster_stable(self.Env["DeadTime"]): return self.failure("Unstable cluster") return self.success() def NextIP(self): ip = self.Env["IPBase"] if ":" in ip: fields = ip.rpartition(":") fields[2] = str(hex(int(fields[2], 16)+1)) print(str(hex(int(f[2], 16)+1))) else: fields = ip.rpartition('.') fields[2] = str(int(fields[2])+1) ip = fields[0] + fields[1] + fields[3]; self.Env["IPBase"] = ip return ip.strip() def make_ip_resource(self, node, id, rclass, type, ip): self.logger.log("Creating %s::%s:%s (%s) on %s" % (rclass,type,id,ip,node)) rsc_xml=""" """ % (id, rclass, type, id, id, ip) node_constraint = """ """ % (id, id, id, id, node) rc = 0 (rc, lines) = self.rsh(node, self.cib_cmd % ("constraints", node_constraint), None) if rc != 0: self.logger.log("Constraint creation failed: %d" % rc) return None (rc, lines) = self.rsh(node, self.cib_cmd % ("resources", rsc_xml), None) if rc != 0: self.logger.log("Resource creation failed: %d" % rc) return None return 1 def is_applicable(self): if self.Env["DoBSC"]: return 1 return None AllTestClasses.append(BSC_AddResource) class SimulStopLite(CTSTest): '''Stop any active nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStopLite" def __call__(self, dummy): '''Perform the 'SimulStopLite' setup work. ''' self.incr("calls") self.debug("Setup: " + self.name) # We ignore the "node" parameter... watchpats = [ ] for node in self.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.incr("WasStarted") watchpats.append(self.templates["Pat:We_stopped"] % node) #if self.Env["use_logd"]: # watchpats.append(self.templates["Pat:Logd_stopped"] % node) if len(watchpats) == 0: self.CM.clear_all_caches() return self.success() # Stop all the nodes - at about the same time... watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() self.set_timer() for node in self.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "up": self.CM.StopaCMnoBlock(node) if watch.lookforall(): self.CM.clear_all_caches() # Make sure they're completely down with no residule for node in self.Env["nodes"]: self.rsh(node, self.templates["StopCmd"]) return self.success() did_fail = 0 up_nodes = [] for node in self.Env["nodes"]: if self.CM.StataCM(node) == 1: did_fail = 1 up_nodes.append(node) if did_fail: return self.failure("Active nodes exist: " + repr(up_nodes)) self.logger.log("Warn: All nodes stopped but CTS didnt detect: " + repr(watch.unmatched)) self.CM.clear_all_caches() return self.failure("Missing log message: "+repr(watch.unmatched)) def is_applicable(self): '''SimulStopLite is a setup test and never applicable''' return 0 class SimulStartLite(CTSTest): '''Start any stopped nodes ~ simultaneously''' def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "SimulStartLite" def __call__(self, dummy): '''Perform the 'SimulStartList' setup work. ''' self.incr("calls") self.debug("Setup: " + self.name) # We ignore the "node" parameter... node_list = [] for node in self.Env["nodes"]: if self.CM.ShouldBeStatus[node] == "down": self.incr("WasStopped") node_list.append(node) self.set_timer() while len(node_list) > 0: # Repeat until all nodes come up watchpats = [ ] uppat = self.templates["Pat:Slave_started"] if self.CM.upcount() == 0: uppat = self.templates["Pat:Local_started"] watchpats.append(self.templates["Pat:DC_IDLE"]) for node in node_list: watchpats.append(uppat % node) watchpats.append(self.templates["Pat:InfraUp"] % node) watchpats.append(self.templates["Pat:PacemakerUp"] % node) # Start all the nodes - at about the same time... watch = self.create_watch(watchpats, self.Env["DeadTime"]+10) watch.setwatch() stonith = self.CM.prepare_fencing_watcher(self.name) for node in node_list: self.CM.StartaCMnoBlock(node) watch.lookforall() node_list = self.CM.fencing_cleanup(self.name, stonith) if node_list == None: return self.failure("Cluster did not stabilize") # Remove node_list messages from watch.unmatched for node in node_list: self.logger.debug("Dealing with stonith operations for %s" % repr(node_list)) if watch.unmatched: try: watch.unmatched.remove(uppat % node) except: self.debug("Already matched: %s" % (uppat % node)) try: watch.unmatched.remove(self.templates["Pat:InfraUp"] % node) except: self.debug("Already matched: %s" % (self.templates["Pat:InfraUp"] % node)) try: watch.unmatched.remove(self.templates["Pat:PacemakerUp"] % node) except: self.debug("Already matched: %s" % (self.templates["Pat:PacemakerUp"] % node)) if watch.unmatched: for regex in watch.unmatched: self.logger.log ("Warn: Startup pattern not found: %s" %(regex)) if not self.CM.cluster_stable(): return self.failure("Cluster did not stabilize") did_fail = 0 unstable = [] for node in self.Env["nodes"]: if self.CM.StataCM(node) == 0: did_fail = 1 unstable.append(node) if did_fail: return self.failure("Unstarted nodes exist: " + repr(unstable)) unstable = [] for node in self.Env["nodes"]: if not self.CM.node_stable(node): did_fail = 1 unstable.append(node) if did_fail: return self.failure("Unstable cluster nodes exist: " + repr(unstable)) return self.success() def is_applicable(self): '''SimulStartLite is a setup test and never applicable''' return 0 def TestList(cm, audits): result = [] for testclass in AllTestClasses: bound_test = testclass(cm) if bound_test.is_applicable(): bound_test.Audits = audits result.append(bound_test) return result class RemoteLXC(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = "RemoteLXC" self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.num_containers = 2 self.is_container = 1 self.is_docker_unsafe = 1 self.failed = 0 self.fail_string = "" def start_lxc_simple(self, node): # restore any artifacts laying around from a previous test. self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null") # generate the containers, put them in the config, add some resources to them pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() - pats.append(self.templates["Pat:RscOpOK"] % ("lxc1", "start_0")) - pats.append(self.templates["Pat:RscOpOK"] % ("lxc2", "start_0")) - pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "start_0")) - pats.append(self.templates["Pat:RscOpOK"] % ("lxc-ms", "promote_0")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc1")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc2")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", "lxc-ms")) + pats.append(self.templates["Pat:RscOpOK"] % ("promote", "lxc-ms")) self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -g -a -m -s -c %d &>/dev/null" % self.num_containers) self.set_timer("remoteSimpleInit") watch.lookforall() self.log_timer("remoteSimpleInit") if watch.unmatched: self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) self.failed = 1 def cleanup_lxc_simple(self, node): pats = [ ] # if the test failed, attempt to clean up the cib and libvirt environment # as best as possible if self.failed == 1: # restore libvirt and cib self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null") return watch = self.create_watch(pats, 120) watch.setwatch() - pats.append(self.templates["Pat:RscOpOK"] % ("container1", "stop_0")) - pats.append(self.templates["Pat:RscOpOK"] % ("container2", "stop_0")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container1")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", "container2")) self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -p &>/dev/null") self.set_timer("remoteSimpleCleanup") watch.lookforall() self.log_timer("remoteSimpleCleanup") if watch.unmatched: self.fail_string = "Unmatched patterns: %s" % (repr(watch.unmatched)) self.failed = 1 # cleanup libvirt self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -s -R &>/dev/null") def __call__(self, node): '''Perform the 'RemoteLXC' test. ''' self.incr("calls") ret = self.startall(None) if not ret: return self.failure("Setup failed, start all nodes failed.") rc = self.rsh(node, "/usr/share/pacemaker/tests/cts/lxc_autogen.sh -v &>/dev/null") if rc == 1: self.log("Environment test for lxc support failed.") return self.skipped() self.start_lxc_simple(node) self.cleanup_lxc_simple(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.failed == 1: return self.failure(self.fail_string) return self.success() def errorstoignore(self): '''Return list of errors which should be ignored''' return [ r"Updating failcount for ping", r"pengine.*: Recover (ping|lxc-ms|container)\s*\(.*\)", # The orphaned lxc-ms resource causes an expected transition error # that is a result of the pengine not having knowledge that the # ms resource used to be a clone. As a result it looks like that # resource is running in multiple locations when it shouldn't... But in # this instance we know why this error is occurring and that it is expected. - r"Calculated Transition .* /var/lib/pacemaker/pengine/pe-error", + r"Calculated [Tt]ransition .* /var/lib/pacemaker/pengine/pe-error", r"Resource lxc-ms .* is active on 2 nodes attempting recovery", r"Unknown operation: fail", r"(ERROR|error): sending stonithRA op to stonithd failed.", r"VirtualDomain.*ERROR: Unable to determine emulator", ] AllTestClasses.append(RemoteLXC) class RemoteDriver(CTSTest): def __init__(self, cm): CTSTest.__init__(self,cm) self.name = self.__class__.__name__ self.is_docker_unsafe = 1 self.start = StartTest(cm) self.startall = SimulStartLite(cm) self.stop = StopTest(cm) self.remote_rsc = "remote-rsc" self.cib_cmd = """cibadmin -C -o %s -X '%s' """ self.reset() def reset(self): self.pcmk_started = 0 self.failed = False self.fail_string = "" self.remote_node_added = 0 self.remote_rsc_added = 0 self.remote_use_reconnect_interval = self.Env.RandomGen.choice([True,False]) def fail(self, msg): """ Mark test as failed. """ self.failed = True # Always log the failure. self.logger.log(msg) # Use first failure as test status, as it's likely to be most useful. if not self.fail_string: self.fail_string = msg def get_othernode(self, node): for othernode in self.Env["nodes"]: if othernode == node: # we don't want to try and use the cib that we just shutdown. # find a cluster node that is not our soon to be remote-node. continue else: return othernode def del_rsc(self, node, rsc): othernode = self.get_othernode(node) rc = self.rsh(othernode, "crm_resource -D -r %s -t primitive" % (rsc)) if rc != 0: self.fail("Removal of resource '%s' failed" % rsc) def add_rsc(self, node, rsc_xml): othernode = self.get_othernode(node) rc = self.rsh(othernode, self.cib_cmd % ("resources", rsc_xml)) if rc != 0: self.fail("resource creation failed") def add_primitive_rsc(self, node): rsc_xml = """ """ % (self.remote_rsc) self.add_rsc(node, rsc_xml) if not self.failed: self.remote_rsc_added = 1 def add_connection_rsc(self, node): if self.remote_use_reconnect_interval: # use reconnect interval and make sure to set cluster-recheck-interval as well. rsc_xml = """ """ % (self.remote_node, node) self.rsh(self.get_othernode(node), self.templates["SetCheckInterval"] % ("45s")) else: # not using reconnect interval rsc_xml = """ """ % (self.remote_node, node) self.add_rsc(node, rsc_xml) if not self.failed: self.remote_node_added = 1 def stop_pcmk_remote(self, node): # disable pcmk remote for i in range(10): rc = self.rsh(node, "service pacemaker_remote stop") if rc != 0: time.sleep(6) else: break def start_pcmk_remote(self, node): for i in range(10): rc = self.rsh(node, "service pacemaker_remote start") if rc != 0: time.sleep(6) else: self.pcmk_started = 1 break def kill_pcmk_remote(self, node): """ Simulate a Pacemaker Remote daemon failure. """ # We kill the process to prevent a graceful stop, # then stop it to prevent the OS from restarting it. self.rsh(node, "killall -9 pacemaker_remoted") self.stop_pcmk_remote(node) def start_metal(self, node): pcmk_started = 0 # make sure the resource doesn't already exist for some reason self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_rsc)) self.rsh(node, "crm_resource -D -r %s -t primitive" % (self.remote_node)) if not self.stop(node): self.fail("Failed to shutdown cluster node %s" % node) return self.start_pcmk_remote(node) if self.pcmk_started == 0: self.fail("Failed to start pacemaker_remote on node %s" % node) return # convert node to baremetal node now that it has shutdow the cluster stack pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() - pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node)) pats.append(self.templates["Pat:DC_IDLE"]) self.add_connection_rsc(node) self.set_timer("remoteMetalInit") watch.lookforall() self.log_timer("remoteMetalInit") if watch.unmatched: self.fail("Unmatched patterns: %s" % watch.unmatched) def migrate_connection(self, node): if self.failed: return pats = [ ] - pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_to")) - pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "migrate_from")) + pats.append(self.templates["Pat:RscOpOK"] % ("migrate_to", self.remote_node)) + pats.append(self.templates["Pat:RscOpOK"] % ("migrate_from", self.remote_node)) pats.append(self.templates["Pat:DC_IDLE"]) watch = self.create_watch(pats, 120) watch.setwatch() (rc, lines) = self.rsh(node, "crm_resource -M -r %s" % (self.remote_node), None) if rc != 0: self.fail("failed to move remote node connection resource") return self.set_timer("remoteMetalMigrate") watch.lookforall() self.log_timer("remoteMetalMigrate") if watch.unmatched: self.fail("Unmatched patterns: %s" % watch.unmatched) return def fail_rsc(self, node): if self.failed: return watchpats = [ ] - watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "stop", self.remote_node)) - watchpats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node)) + watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("stop", self.remote_rsc, self.remote_node)) + watchpats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node)) watchpats.append(self.templates["Pat:DC_IDLE"]) watch = self.create_watch(watchpats, 120) watch.setwatch() self.debug("causing dummy rsc to fail.") rc = self.rsh(node, "rm -f /var/run/resource-agents/Dummy*") self.set_timer("remoteRscFail") watch.lookforall() self.log_timer("remoteRscFail") if watch.unmatched: self.fail("Unmatched patterns during rsc fail: %s" % watch.unmatched) def fail_connection(self, node): if self.failed: return watchpats = [ ] watchpats.append(self.templates["Pat:FenceOpOK"] % self.remote_node) watchpats.append(self.templates["Pat:NodeFenced"] % self.remote_node) watch = self.create_watch(watchpats, 120) watch.setwatch() # force stop the pcmk remote daemon. this will result in fencing self.debug("Force stopped active remote node") self.kill_pcmk_remote(node) self.debug("Waiting for remote node to be fenced.") self.set_timer("remoteMetalFence") watch.lookforall() self.log_timer("remoteMetalFence") if watch.unmatched: self.fail("Unmatched patterns: %s" % watch.unmatched) return self.debug("Waiting for the remote node to come back up") self.CM.ns.WaitForNodeToComeUp(node, 120); pats = [ ] watch = self.create_watch(pats, 240) watch.setwatch() - pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "start")) + pats.append(self.templates["Pat:RscOpOK"] % ("start", self.remote_node)) if self.remote_rsc_added == 1: - pats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node)) + pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node)) # start the remote node again watch it integrate back into cluster. self.start_pcmk_remote(node) if self.pcmk_started == 0: self.fail("Failed to start pacemaker_remote on node %s" % node) return self.debug("Waiting for remote node to rejoin cluster after being fenced.") self.set_timer("remoteMetalRestart") watch.lookforall() self.log_timer("remoteMetalRestart") if watch.unmatched: self.fail("Unmatched patterns: %s" % watch.unmatched) return def add_dummy_rsc(self, node): if self.failed: return # verify we can put a resource on the remote node pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() - pats.append(self.templates["Pat:RscRemoteOpOK"] % (self.remote_rsc, "start", self.remote_node)) + pats.append(self.templates["Pat:RscRemoteOpOK"] % ("start", self.remote_rsc, self.remote_node)) pats.append(self.templates["Pat:DC_IDLE"]) # Add a resource that must live on remote-node self.add_primitive_rsc(node) # force that rsc to prefer the remote node. (rc, line) = self.CM.rsh(node, "crm_resource -M -r %s -N %s -f" % (self.remote_rsc, self.remote_node), None) if rc != 0: self.fail("Failed to place remote resource on remote node.") return self.set_timer("remoteMetalRsc") watch.lookforall() self.log_timer("remoteMetalRsc") if watch.unmatched: self.fail("Unmatched patterns: %s" % watch.unmatched) def test_attributes(self, node): if self.failed: return # This verifies permanent attributes can be set on a remote-node. It also # verifies the remote-node can edit it's own cib node section remotely. (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -v testval -N %s" % (self.remote_node), None) if rc != 0: self.fail("Failed to set remote-node attribute. rc:%s output:%s" % (rc, line)) return (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -Q -N %s" % (self.remote_node), None) if rc != 0: self.fail("Failed to get remote-node attribute") return (rc, line) = self.CM.rsh(node, "crm_attribute -l forever -n testattr -D -N %s" % (self.remote_node), None) if rc != 0: self.fail("Failed to delete remote-node attribute") return def cleanup_metal(self, node): if self.pcmk_started == 0: return pats = [ ] watch = self.create_watch(pats, 120) watch.setwatch() if self.remote_rsc_added == 1: - pats.append(self.templates["Pat:RscOpOK"] % (self.remote_rsc, "stop")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_rsc)) if self.remote_node_added == 1: - pats.append(self.templates["Pat:RscOpOK"] % (self.remote_node, "stop")) + pats.append(self.templates["Pat:RscOpOK"] % ("stop", self.remote_node)) self.set_timer("remoteMetalCleanup") if self.remote_use_reconnect_interval: self.debug("Cleaning up re-check interval") self.rsh(self.get_othernode(node), self.templates["ClearCheckInterval"]) if self.remote_rsc_added == 1: # Remove dummy resource added for remote node tests self.debug("Cleaning up dummy rsc put on remote node") self.rsh(node, "crm_resource -U -r %s" % self.remote_rsc) self.del_rsc(node, self.remote_rsc) if self.remote_node_added == 1: # Remove remote node's connection resource self.debug("Cleaning up remote node connection resource") self.rsh(node, "crm_resource -U -r %s" % (self.remote_node)) self.del_rsc(node, self.remote_node) watch.lookforall() self.log_timer("remoteMetalCleanup") if watch.unmatched: self.fail("Unmatched patterns: %s" % watch.unmatched) self.stop_pcmk_remote(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.remote_node_added == 1: # Remove remote node itself self.debug("Cleaning up node entry for remote node") self.rsh(self.get_othernode(node), "crm_node --force --remove %s" % self.remote_node) def setup_env(self, node): self.remote_node = "remote_%s" % (node) # we are assuming if all nodes have a key, that it is # the right key... If any node doesn't have a remote # key, we regenerate it everywhere. if self.rsh.exists_on_all("/etc/pacemaker/authkey", self.Env["nodes"]): return # create key locally (handle, keyfile) = tempfile.mkstemp(".cts") os.close(handle) devnull = open(os.devnull, 'wb') subprocess.check_call(["dd", "if=/dev/urandom", "of=%s" % keyfile, "bs=4096", "count=1"], stdout=devnull, stderr=devnull) devnull.close() # sync key throughout the cluster for node in self.Env["nodes"]: self.rsh(node, "mkdir -p --mode=0750 /etc/pacemaker") self.rsh.cp(keyfile, "root@%s:/etc/pacemaker/authkey" % node) self.rsh(node, "chgrp haclient /etc/pacemaker /etc/pacemaker/authkey") self.rsh(node, "chmod 0640 /etc/pacemaker/authkey") os.unlink(keyfile) def is_applicable(self): if not self.is_applicable_common(): return False for node in self.Env["nodes"]: rc = self.rsh(node, "type pacemaker_remoted >/dev/null 2>&1") if rc != 0: return False return True def start_new_test(self, node): self.incr("calls") self.reset() ret = self.startall(None) if not ret: return self.failure("Setup failed, start all nodes failed.") self.setup_env(node) self.start_metal(node) self.add_dummy_rsc(node) def __call__(self, node): return self.failure("This base class is not meant to be called directly.") def errorstoignore(self): '''Return list of errors which should be ignored''' return [ """is running on remote.*which isn't allowed""", """Connection terminated""", """Failed to send remote""", ] # RemoteDriver is just a base class for other tests, so it is not added to AllTestClasses class RemoteBasic(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteBaremetal' test. ''' self.start_new_test(node) self.test_attributes(node) self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.failed: return self.failure(self.fail_string) return self.success() AllTestClasses.append(RemoteBasic) class RemoteStonithd(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteStonithd' test. ''' self.start_new_test(node) self.fail_connection(node) self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.failed: return self.failure(self.fail_string) return self.success() def is_applicable(self): if not RemoteDriver.is_applicable(self): return False if "DoFencing" in self.Env.keys(): return self.Env["DoFencing"] return True def errorstoignore(self): ignore_pats = [ r"Unexpected disconnect on remote-node", r"crmd.*:\s+error.*: Operation remote_.*_monitor", + r"crmd.*:\s+error.*: Result of monitor operation for remote_.*", r"pengine.*:\s+Recover remote_.*\s*\(.*\)", - r"Calculated Transition .* /var/lib/pacemaker/pengine/pe-error", + r"Calculated [Tt]ransition .* /var/lib/pacemaker/pengine/pe-error", r"error.*: Resource .*ocf::.* is active on 2 nodes attempting recovery", ] ignore_pats.extend(RemoteDriver.errorstoignore(self)) return ignore_pats AllTestClasses.append(RemoteStonithd) class RemoteMigrate(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteMigrate' test. ''' self.start_new_test(node) self.migrate_connection(node) self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.failed: return self.failure(self.fail_string) return self.success() AllTestClasses.append(RemoteMigrate) class RemoteRscFailure(RemoteDriver): def __call__(self, node): '''Perform the 'RemoteRscFailure' test. ''' self.start_new_test(node) # This is an important step. We are migrating the connection # before failing the resource. This verifies that the migration # has properly maintained control over the remote-node. self.migrate_connection(node) self.fail_rsc(node) self.cleanup_metal(node) self.debug("Waiting for the cluster to recover") self.CM.cluster_stable() if self.failed: return self.failure(self.fail_string) return self.success() def errorstoignore(self): ignore_pats = [ r"pengine.*: Recover remote-rsc\s*\(.*\)", ] ignore_pats.extend(RemoteDriver.errorstoignore(self)) return ignore_pats AllTestClasses.append(RemoteRscFailure) # vim:ts=4:sw=4:et: diff --git a/cts/patterns.py b/cts/patterns.py index 784641288b..34dfe031f0 100644 --- a/cts/patterns.py +++ b/cts/patterns.py @@ -1,545 +1,546 @@ import sys, os from cts.CTSvars import * patternvariants = {} class BasePatterns: def __init__(self, name): self.name = name patternvariants[name] = self self.ignore = [ "avoid confusing Valgrind", ] self.BadNews = [] self.components = {} self.commands = { "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "CibQuery" : "cibadmin -Ql", "CibAddXml" : "cibadmin --modify -c --xml-text %s", "CibDelXpath" : "cibadmin --delete --xpath %s", # 300,000 == 5 minutes "RscRunning" : CTSvars.CRM_DAEMON_DIR + "/lrmd_test -R -r %s", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "UUIDQueryCmd" : "crmadmin -N", "SetCheckInterval" : "cibadmin --modify -c --xml-text ''", "ClearCheckInterval" : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"", "MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''", "MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", "StandbyCmd" : "crm_attribute -VQ -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_attribute -QG -U %s -n standby -l forever -d off 2>/dev/null", } self.search = { "Pat:DC_IDLE" : "crmd.*State transition.*-> S_IDLE", # This wont work if we have multiple partitions "Pat:Local_started" : "%s\W.*The local CRM is operational", "Pat:Slave_started" : "%s\W.*State transition.*-> S_NOT_DC", "Pat:Master_started": "%s\W.*State transition.*-> S_IDLE", "Pat:We_stopped" : "heartbeat.*%s.*Heartbeat shutdown complete", "Pat:Logd_stopped" : "%s\W.*logd:.*Exiting write process", "Pat:They_stopped" : "%s\W.*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", - "Pat:Fencing_start" : "Initiating remote operation .* for %s", + "Pat:Fencing_start" : "(Initiating remote operation|Requesting peer fencing ).* (for|of) %s", "Pat:Fencing_ok" : r"stonith.*:\s*Operation .* of %s by .* for .*@.*: OK", "Pat:Fencing_recover" : r"pengine.*: Recover %s", - "Pat:RscOpOK" : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(.*confirmed=\S+\)", - "Pat:RscRemoteOpOK" : r"crmd.*:\s*Operation %s_%s.*:\s*ok \(node=%s,.*,\s*confirmed=true\)", + "Pat:RscOpOK" : r"crmd.*:\s+Result of %s operation for %s.*: ok", + "Pat:RscRemoteOpOK" : r"crmd.*:\s+Result of %s operation for %s on %s: ok", "Pat:NodeFenced" : r"crmd.*:\s*Peer\s+%s\s+was\s+terminated\s+\(.*\)\s+by\s+.*\s+for\s+.*:\s+OK", "Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0", } def get_component(self, key): if key in self.components: return self.components[key] print("Unknown component '%s' for %s" % (key, self.name)) return [] def get_patterns(self, key): if key == "BadNews": return self.BadNews elif key == "BadNewsIgnore": return self.ignore elif key == "Commands": return self.commands elif key == "Search": return self.search elif key == "Components": return self.components def __getitem__(self, key): if key == "Name": return self.name elif key in self.commands: return self.commands[key] elif key in self.search: return self.search[key] else: print("Unknown template '%s' for %s" % (key, self.name)) return None class crm_lha(BasePatterns): def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "StartCmd" : "service heartbeat start > /dev/null 2>&1", "StopCmd" : "service heartbeat stop > /dev/null 2>&1", "EpochCmd" : "crm_node -H -e", "QuorumCmd" : "crm_node -H -q", "PartitionCmd" : "crm_node -H -p", }) self.search.update({ # Patterns to look for in the log files for various occasions... "Pat:ChildKilled" : "%s\W.*heartbeat.*%s.*killed by signal 9", "Pat:ChildRespawn" : "%s\W.*heartbeat.*Respawning client.*%s", "Pat:ChildExit" : "(ERROR|error): Client .* exited with return code", }) self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"WARN.*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r"No need to invoke the TE", r"global_timer_callback:", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", ] self.ignore = self.ignore + [ r"(ERROR|error):.*\s+assert\s+at\s+crm_glib_handler:" "(ERROR|error): Message hist queue is filling up", "stonithd.*CRIT: external_hostlist:.*'vmware gethosts' returned an empty hostlist", "stonithd.*(ERROR|error): Could not list nodes for stonith RA external/vmware.", "pengine.*Preventing .* from re-starting", ] class crm_cs_v0(BasePatterns): def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "EpochCmd" : "crm_node -e --openais", "QuorumCmd" : "crm_node -q --openais", "PartitionCmd" : "crm_node -p --openais", "StartCmd" : "service corosync start", "StopCmd" : "service corosync stop", }) self.search.update({ # The next pattern is too early # "Pat:We_stopped" : "%s.*Service engine unloaded: Pacemaker Cluster Manager", # The next pattern would be preferred, but it doesn't always come out # "Pat:We_stopped" : "%s.*Corosync Cluster Engine exiting with status", "Pat:We_stopped" : "%s\W.*Service engine unloaded: corosync cluster quorum service", - "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", + "Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost", "Pat:They_dead" : "corosync:.*Node %s is now: lost", "Pat:ChildExit" : "Child process .* exited", "Pat:ChildKilled" : "%s\W.*corosync.*Child process %s terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*corosync.*Respawning failed child process: %s", "Pat:InfraUp" : "%s\W.*corosync.*Initializing transport", "Pat:PacemakerUp" : "%s\W.*corosync.*CRM: Initialized", }) self.ignore = self.ignore + [ r"crm_mon:", r"crmadmin:", r"update_trace_data", r"async_notify:.*strange, client not found", r"Parse error: Ignoring unknown option .*nodename", r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:", r"Child process .* terminated with signal 9", r"getinfo response error: 1$", "sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE", r"sbd.* pcmk:\s*error:.*Connection to cib_ro failed", r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* closed .I/O condition=17", ] self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r", exiting\.", r"(WARN|warn).*Ignoring HA message.*vote.*not in our membership list", r"pengine.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"The .* process .* terminated with signal", r"Child process .* terminated with signal", r"pengine:.*Recover .*\(.* -\> .*\)", r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", r"Peer is not part of our cluster", r"We appear to be in an election loop", r"Unknown node -> we will not deliver message", r"(Blackbox dump requested|Problem detected)", r"pacemakerd.*Could not connect to Cluster Configuration Database API", r"Receiving messages from a node we think is dead", r"share the same cluster nodeid", r"share the same name", #r"crm_ipc_send:.*Request .* failed", #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received", # Not inherently bad, but worth tracking #r"No need to invoke the TE", #r"ping.*: DEBUG: Updated connected = 0", #r"Digest mis-match:", r"crmd:.*Transition failed: terminated", r"Local CIB .* differs from .*:", r"warn.*:\s*Continuing but .* will NOT be used", r"warn.*:\s*Cluster configuration file .* is corrupt", #r"Executing .* fencing operation", #r"fence_pcmk.* Call to fence", #r"fence_pcmk", r"cman killed by node", r"Election storm", r"stalled the FSA with pending inputs", ] self.components["common-ignore"] = [ "Pending action:", "error: crm_log_message_adv:", "resources were active at shutdown", "pending LRM operations at shutdown", "Lost connection to the CIB service", "Connection to the CIB terminated...", "Sending message to CIB service FAILED", "apply_xml_diff:.*Diff application failed!", r"crmd.*:\s*Action A_RECOVER .* not supported", "unconfirmed_actions:.*Waiting on .* unconfirmed actions", "cib_native_msgready:.*Message pending on command channel", r"crmd.*:\s*Performing A_EXIT_1 - forcefully exiting the CRMd", "verify_stopped:.*Resource .* was active at shutdown. You may ignore this error if it is unmanaged.", "error: attrd_connection_destroy:.*Lost connection to attrd", r".*:\s*Executing .* fencing operation \(.*\) on ", + r".*:\s*Requesting fencing \([^)]+\) of node ", r"(Blackbox dump requested|Problem detected)", # "error: native_create_actions: Resource .*stonith::.* is active on 2 nodes attempting recovery", # "error: process_pe_message: Transition .* ERRORs found during PE processing", ] self.components["corosync-ignore"] = [ r"error:.*Connection to the CPG API failed: Library error", r"The .* process .* exited", r"pacemakerd.*error:.*Child process .* exited", r"cib.*error:.*Corosync connection lost", r"stonith-ng.*error:.*Corosync connection terminated", r"The cib process .* exited: Invalid argument", r"The attrd process .* exited: Transport endpoint is not connected", r"The crmd process .* exited: Link has been severed", r"error:.*Child process cib .* exited: Invalid argument", r"error:.*Child process attrd .* exited: Transport endpoint is not connected", r"error:.*Child process crmd .* exited: Link has been severed", r"lrmd.*error:.*Connection to stonith-ng failed", r"lrmd.*error:.*Connection to stonith-ng.* closed", r"lrmd.*error:.*LRMD lost STONITH connection", r"crmd.*State transition .* S_RECOVERY", - r"crmd.*error:.*FSA: Input I_ERROR", - r"crmd.*error:.*FSA: Input I_TERMINATE", + r"crmd.*error:.*Input I_ERROR .*received in state", + r"crmd.*error:.*Input I_TERMINATE .*received in state", r"crmd.*error:.*Connection to cman failed", r"crmd.*error:.*Could not recover from internal error", r"error:.*Connection to cib_shm failed", r"error:.*Connection to cib_shm.* closed", r"error:.*STONITH connection failed", r"error: Connection to stonith-ng failed", r"crit: Fencing daemon connection failed", r"error: Connection to stonith-ng.* closed", ] self.components["corosync"] = [ r"pacemakerd.*error:.*Connection destroyed", r"attrd.*:\s*crit:.*Lost connection to Corosync service", r"stonith.*:\s*Corosync connection terminated", r"cib.*:\s*Corosync connection lost!\s+Exiting.", r"crmd.*:\s*connection terminated", r"pengine.*Scheduling Node .* for STONITH", r"crmd.*:\s*Peer %s was terminated \(.*\) by .* for .*:\s*OK", ] self.components["cib-ignore"] = [ "lrmd.*Connection to stonith-ng failed", "lrmd.*Connection to stonith-ng.* closed", "lrmd.*LRMD lost STONITH connection", "lrmd.*STONITH connection failed, finalizing .* pending operations", ] self.components["cib"] = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "Respawning .* attrd", "Connection to cib_.* failed", "Connection to cib_.* closed", "Connection to the CIB terminated...", "(Child process|The) crmd .* exited: Generic Pacemaker error", "(Child process|The) attrd .* exited: (Connection reset by peer|Transport endpoint is not connected)", "Lost connection to CIB service", - "crmd.*Input I_TERMINATE from do_recover", + r"crmd.*: Input I_TERMINATE .*from do_recover", "crmd.*I_ERROR.*crmd_cib_connection_destroy", "crmd.*Could not recover from internal error", ] self.components["lrmd"] = [ "State transition .* S_RECOVERY", "LRM Connection failed", "Respawning .* crmd", "Connection to lrmd failed", "Connection to lrmd.* closed", "crmd.*I_ERROR.*lrm_connection_destroy", "(Child process|The) crmd .* exited: Generic Pacemaker error", - "crmd.*Input I_TERMINATE from do_recover", + r"crmd.*: Input I_TERMINATE .*from do_recover", "crmd.*Could not recover from internal error", ] self.components["lrmd-ignore"] = [] self.components["crmd"] = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ] self.components["crmd-ignore"] = [] self.components["attrd"] = [] self.components["attrd-ignore"] = [] self.components["pengine"] = [ "State transition .* S_RECOVERY", "Respawning .* crmd", "(The|Child process) crmd .* exited: Generic Pacemaker error", "Connection to pengine failed", "Connection to pengine.* closed", "Connection to the Policy Engine failed", "crmd.*I_ERROR.*save_cib_contents", - "crmd.*Input I_TERMINATE from do_recover", + r"crmd.*: Input I_TERMINATE .*from do_recover", "crmd.*Could not recover from internal error", ] self.components["pengine-ignore"] = [] self.components["stonith"] = [ "Connection to stonith-ng failed", "LRMD lost STONITH connection", "Connection to stonith-ng.* closed", "Fencing daemon connection failed", r"crmd.*:\s*warn.*:\s*Callback already present", ] self.components["stonith-ignore"] = [ r"pengine.*: Recover Fencing", r"Updating failcount for Fencing", r"error:.*Connection to stonith-ng failed", r"error:.*Connection to stonith-ng.*closed \(I/O condition=17\)", r"crit:.*Fencing daemon connection failed", r"error:.*Sign-in failed: triggered a retry", "STONITH connection failed, finalizing .* pending operations.", - r"crmd.*:\s*Operation Fencing.* Error", + r"crmd.*:\s+Result of .* operation for Fencing.* Error", ] self.components["stonith-ignore"].extend(self.components["common-ignore"]) class crm_mcp(crm_cs_v0): ''' The crm version 4 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of native corosync (no plugins) ''' def __init__(self, name): crm_cs_v0.__init__(self, name) self.commands.update({ "StartCmd" : "service corosync start && service pacemaker start", "StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop; service corosync stop", "EpochCmd" : "crm_node -e", "QuorumCmd" : "crm_node -q", "PartitionCmd" : "crm_node -p", }) self.search.update({ # Close enough... "Corosync Cluster Engine exiting normally" isn't printed # reliably and there's little interest in doing anything about it "Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines", - "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", - "Pat:They_dead" : "crmd.*Node %s\[.*state is now lost", + "Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost", + "Pat:They_dead" : "crmd.*Node %s(\[|\s).*state is now lost", "Pat:ChildExit" : "The .* process exited", "Pat:ChildKilled" : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", }) # if self.Env["have_systemd"]: # self.update({ # # When systemd is in use, we can look for this instead # "Pat:We_stopped" : "%s.*Stopped Corosync Cluster Engine", # }) class crm_mcp_docker(crm_mcp): ''' The crm version 4 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of native corosync (no plugins) ''' def __init__(self, name): crm_mcp.__init__(self, name) self.commands.update({ "StartCmd" : "pcmk_start", "StopCmd" : "pcmk_stop", }) class crm_cman(crm_cs_v0): ''' The crm version 3 cluster manager class. It implements the things we need to talk to and manipulate crm clusters running on top of openais ''' def __init__(self, name): crm_cs_v0.__init__(self, name) self.commands.update({ "StartCmd" : "service pacemaker start", "StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker_remoted ] || service pacemaker_remote stop", "EpochCmd" : "crm_node -e --cman", "QuorumCmd" : "crm_node -q --cman", "PartitionCmd" : "crm_node -p --cman", }) self.search.update({ "Pat:We_stopped" : "%s.*Unloading all Corosync service engines", - "Pat:They_stopped" : "%s\W.*crmd.*Node %s\[.*state is now lost", - "Pat:They_dead" : "crmd.*Node %s\[.*state is now lost", + "Pat:They_stopped" : "%s\W.*crmd.*Node %s(\[|\s).*state is now lost", + "Pat:They_dead" : "crmd.*Node %s(\[|\s).*state is now lost", "Pat:ChildKilled" : "%s\W.*pacemakerd.*The %s process .* terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", }) class PatternSelector: def __init__(self, name=None): self.name = name self.base = BasePatterns("crm-base") if not name: crm_cs_v0("crm-plugin-v0") crm_cman("crm-cman") crm_mcp("crm-mcp") crm_lha("crm-lha") elif name == "crm-lha": crm_lha(name) elif name == "crm-plugin-v0": crm_cs_v0(name) elif name == "crm-cman": crm_cman(name) elif name == "crm-mcp": crm_mcp(name) elif name == "crm-mcp-docker": crm_mcp_docker(name) def get_variant(self, variant): if variant in patternvariants: return patternvariants[variant] print("defaulting to crm-base for %s" % variant) return self.base def get_patterns(self, variant, kind): return self.get_variant(variant).get_patterns(kind) def get_template(self, variant, key): v = self.get_variant(variant) return v[key] def get_component(self, variant, kind): return self.get_variant(variant).get_component(kind) def __getitem__(self, key): return self.get_template(self.name, key) # python cts/CTSpatt.py -k crm-mcp -t StartCmd if __name__ == '__main__': pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory kind=None template=None skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-k" or args[i] == "--kind": skipthis=1 kind = args[i+1] elif args[i] == "-t" or args[i] == "--template": skipthis=1 template = args[i+1] else: print("Illegal argument " + args[i]) print(PatternSelector(kind)[template]) diff --git a/fencing/main.c b/fencing/main.c index 1d50355e9c..34f393a8ba 100644 --- a/fencing/main.c +++ b/fencing/main.c @@ -1,1546 +1,1546 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; char *stonith_our_uuid = NULL; long stonith_watchdog_timeout_ms = 0; GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean no_cib_connect = FALSE; gboolean stonith_shutdown_flag = FALSE; qb_ipcs_service_t *ipcs = NULL; xmlNode *local_cib = NULL; GHashTable *known_peer_names = NULL; static cib_t *cib_api = NULL; static void *cib_library = NULL; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void st_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection created for %p", c); } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; crm_client_t *c = crm_client_get(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = crm_ipcs_recv(c, data, size, &id, &flags); if (request == NULL) { crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__); return 0; } op = crm_element_value(request, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %u/%u for command %u from %s", flags, call_options, id, crm_client_name(c)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "Client[inbound]"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); crm_client_destroy(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (crm_str_eq(op, "poke", TRUE)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_HEARTBEAT static void stonith_peer_hb_callback(HA_Message * msg, void *private_data) { xmlNode *xml = convert_ha_message(NULL, msg, __FUNCTION__); stonith_peer_callback(xml, private_data); free_xml(xml); } static void stonith_peer_hb_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Heartbeat disconnection complete... exiting"); } else { crm_err("Heartbeat connection lost! Exiting."); } stonith_shutdown(0); } #endif #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_err("Corosync connection terminated"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ crm_client_t *client_obj = NULL; int local_rc = pcmk_ok; crm_trace("Sending response"); client_obj = crm_client_get_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = -1; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer ? "(originator of delegated request)" : ""); } local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event); } if (local_rc < pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } long long get_stonith_flag(const char *name) { if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) { return 0x01; } else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return 0x04; } else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return 0x10; } return 0; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (client->options & get_stonith_flag(type)) { int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error); if (rc <= 0) { crm_warn("%s notification of client %s.%.6s failed: %s (%d)", type, crm_client_name(client), client->id, pcmk_strerror(rc), rc); } else { crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client), client->id); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { crm_client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = crm_client_get_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_connections, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static void do_stonith_notify_config(int options, const char *op, int rc, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); do_stonith_notify(options, op, rc, notify_data); free_xml(notify_data); } void do_stonith_notify_device(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); } void do_stonith_notify_level(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (safe_str_neq(standard, "stonith")) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { int rc; char *desc = NULL; CRM_CHECK(match != NULL, return); crm_trace("Updating %s", ID(match)); if(remove) { int index = 0; char *key = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } rc = stonith_level_register(match, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void fencing_topology_init() { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); if(topology) { g_hash_table_destroy(topology); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); } /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static node_t * our_node_allowed_for(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in] rsc Resource to check * \param[in] data_set Cluster working set with device information */ static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) { node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; node_t *parent = NULL; gboolean remove = TRUE; /* If this is a complex resource, check children rather than this resource itself. * TODO: Mark each installed device and remove if untouched when this process finishes. */ if(rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(rsc->variant == pe_clone || rsc->variant == pe_master) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(safe_str_neq(rclass, "stonith")) { return; } /* If this STONITH resource is disabled, just remove it. */ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (safe_str_eq(value, RSC_STOPPED)) { crm_info("Device %s has been disabled", rsc->id); goto update_done; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } goto update_done; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); goto update_done; } else { /* Our node is allowed, so update the device information */ xmlNode *data; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); get_rsc_attributes(rsc->parameters, rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES); g_hash_table_iter_init(&gIter, rsc->parameters); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } remove = FALSE; data = create_device_registration_xml(rsc_name(rsc), provider, agent, params, rsc_provides); stonith_device_register(data, NULL, TRUE); stonith_key_value_freeall(params, 1, 1); free_xml(data); } update_done: if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) { stonith_device_remove(rsc_name(rsc), TRUE); } } extern xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); extern node_t *create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set); /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GListPtr gIter = NULL; pe_working_set_t data_set; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); set_working_set_defaults(&data_set); data_set.input = local_cib; data_set.now = crm_time_new(NULL); data_set.flags |= pe_flag_quick_location; data_set.localhost = stonith_our_uname; cluster_status(&data_set); do_calculations(&data_set, NULL, NULL); for (gIter = data_set.resources; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, &data_set); } data_set.input = NULL; /* Wasn't a copy */ cleanup_alloc_calculations(&data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if(op == NULL || strcmp(op, "move") == 0) { continue; } else if(safe_str_eq(op, "delete") && strstr(xpath, XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS)) { needs_update = TRUE; break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, TRUE); } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if(strstr(xpath, "/"XML_CIB_TAG_RESOURCES)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } else if(strstr(xpath, XML_CONS_TAG_RSC_LOCATION)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (safe_str_neq(standard, "stonith")) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /* * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_TRACE); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; xmlNode *stonith_watchdog_xml = NULL; const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting the full cib"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) { long timeout_ms = 0; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if(value) { timeout_ms = crm_get_msec(value); } if(timeout_ms != stonith_watchdog_timeout_ms) { crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); stonith_watchdog_timeout_ms = timeout_ms; } } else { stonith_watchdog_timeout_ms = 0; } if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { crm_trace("Ignoring cib updates while stonith is disabled"); stonith_enabled_saved = FALSE; return; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating stonith device and topology lists now that stonith is enabled"); stonith_enabled_saved = TRUE; fencing_topology_init(); cib_devices_update(); } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from the cib: init"); have_cib_devices = TRUE; local_cib = copy_xml(output); fencing_topology_init(); cib_devices_update(); } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (mainloop != NULL && g_main_is_running(mainloop)) { g_main_quit(mainloop); } else { stonith_cleanup(); crm_exit(pcmk_ok); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB closed."); return; } else { crm_notice("Connection to the CIB terminated. Shutting down."); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } g_hash_table_destroy(known_peer_names); known_peer_names = NULL; crm_peer_destroy(); crm_client_cleanup(); free(stonith_our_uname); free_xml(local_cib); } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"stand-alone-w-cpg", 0, 0, 'c'}, {"logfile", 1, 0, 'l'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { int rc, retries = 0; static cib_t *(*cib_new_fn) (void) = NULL; if (cib_new_fn == NULL) { cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE); } if (cib_new_fn != NULL) { cib_api = (*cib_new_fn) (); } if (cib_api == NULL) { crm_err("No connection to the CIB"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_CRMD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB service: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); - crm_notice("Watching for stonith topology changes"); + crm_info("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = st_ipc_created, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) { xmlNode *query = NULL; if (node->id && node->uname) { g_hash_table_insert(known_peer_names, GUINT_TO_POINTER(node->id), strdup(node->uname)); } /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } int main(int argc, char **argv) { int flag; int rc = 0; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t cluster; const char *actions[] = { "reboot", "off", "list", "monitor", "status" }; crm_log_preinit("stonith-ng", argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': crm_add_logfile(optarg); break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': crm_help(flag, EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf (" This is a fake resource that details the instance attributes handled by stonithd.\n"); printf(" Options available for all stonith resources\n"); printf(" \n"); printf(" \n"); printf (" The priority of the stonith resource. Devices are tried in order of highest priority to lowest.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTARG); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf (" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf (" How to determine which machines are controlled by the device.\n"); printf (" Allowed values: dynamic-list (query the device), static-list (check the %s attribute), none (assume every device can fence every machine)\n", STONITH_ATTR_HOSTLIST); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_DELAY_MAX); printf (" Enable random delay for stonith actions and specify the maximum of random delay\n"); printf (" This prevents double fencing when using slow devices such as sbd.\n" "Use this to enable random delay for stonith actions and specify the maximum of random delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_ACTION_LIMIT); printf (" The maximum number of actions can be performed in parallel on this device\n"); printf (" Pengine property concurrent-fencing=true needs to be configured first.\n" "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return 0; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', EX_USAGE); } crm_log_init("stonith-ng", LOG_INFO, TRUE, FALSE, argc, argv, FALSE); mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); known_peer_names = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, free); if (stand_alone == FALSE) { #if SUPPORT_HEARTBEAT cluster.hb_conn = NULL; cluster.hb_dispatch = stonith_peer_hb_callback; cluster.destroy = stonith_peer_hb_destroy; #endif if (is_openais_cluster()) { #if SUPPORT_COROSYNC cluster.destroy = stonith_peer_cs_destroy; cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(&cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(DAEMON_RESPAWN_STOP); } stonith_our_uname = cluster.uname; stonith_our_uuid = cluster.uuid; #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { /* crm_cluster_connect() registered us for crm_system_name, which * usually is the only F_TYPE used by the respective sub system. * Stonith needs to register two additional F_TYPE callbacks, * because it can :-/ */ if (HA_OK != cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_NOTIFY, cluster.hb_dispatch, cluster.hb_conn)) { crm_crit("Cannot set msg callback %s: %s", T_STONITH_NOTIFY, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn)); crm_exit(DAEMON_RESPAWN_STOP); } if (HA_OK != cluster.hb_conn->llc_ops->set_msg_callback(cluster.hb_conn, T_STONITH_TIMEOUT_VALUE, cluster.hb_dispatch, cluster.hb_conn)) { crm_crit("Cannot set msg callback %s: %s", T_STONITH_TIMEOUT_VALUE, cluster.hb_conn->llc_ops->errmsg(cluster.hb_conn)); crm_exit(DAEMON_RESPAWN_STOP); } } #endif if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device); topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); if(stonith_watchdog_timeout_ms > 0) { xmlNode *xml; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname); xml = create_device_registration_xml("watchdog", "internal", STONITH_WATCHDOG_AGENT, params, NULL); stonith_device_register(xml, NULL, FALSE); stonith_key_value_freeall(params, 1, 1); free_xml(xml); } stonith_ipc_server_init(&ipcs, &ipc_callbacks); #if SUPPORT_STONITH_CONFIG if (((stand_alone == TRUE)) && !(standalone_cfg_read_file(STONITH_NG_CONF_FILE))) { standalone_cfg_commit(); } #endif /* Create the mainloop and run it... */ mainloop = g_main_new(FALSE); crm_info("Starting %s mainloop", crm_system_name); g_main_run(mainloop); stonith_cleanup(); #if SUPPORT_HEARTBEAT if (cluster.hb_conn) { cluster.hb_conn->llc_ops->delete(cluster.hb_conn); } #endif crm_info("Done"); return crm_exit(rc); } diff --git a/fencing/remote.c b/fencing/remote.c index 2394590838..6096d27799 100644 --- a/fencing/remote.c +++ b/fencing/remote.c @@ -1,2106 +1,2109 @@ /* * Copyright (C) 2009 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 /* When one stonithd queries its peers for devices able to handle a fencing * request, each peer will reply with a list of such devices available to it. * Each reply will be parsed into a st_query_result_t, with each device's * information kept in a device_properties_t. */ typedef struct device_properties_s { /* Whether access to this device has been verified */ gboolean verified; /* The remaining members are indexed by the operation's "phase" */ /* Whether this device has been executed in each phase */ gboolean executed[st_phase_max]; /* Whether this device is disallowed from executing in each phase */ gboolean disallowed[st_phase_max]; /* Action-specific timeout for each phase */ int custom_action_timeout[st_phase_max]; /* Action-specific maximum random delay for each phase */ int delay_max[st_phase_max]; } device_properties_t; typedef struct st_query_result_s { /* Name of peer that sent this result */ char *host; /* Only try peers for non-topology based operations once */ gboolean tried; /* Number of entries in the devices table */ int ndevices; /* Devices available to this host that are capable of fencing the target */ GHashTable *devices; } st_query_result_t; GHashTable *remote_op_list = NULL; void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer); static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup); extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(const remote_fencing_op_t *op, const st_query_result_t *chosen_peer); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data) { st_query_result_t *query = data; crm_trace("Free'ing query result from %s", query->host); g_hash_table_destroy(query->devices); free(query->host); free(query); } } struct peer_count_data { const remote_fencing_op_t *op; gboolean verified_only; int count; }; /*! * \internal * \brief Increment a counter if a device has not been executed yet * * \param[in] key Device ID (ignored) * \param[in] value Device properties * \param[in] user_data Peer count data */ static void count_peer_device(gpointer key, gpointer value, gpointer user_data) { device_properties_t *props = (device_properties_t*)value; struct peer_count_data *data = user_data; if (!props->executed[data->op->phase] && (!data->verified_only || props->verified)) { ++(data->count); } } /*! * \internal * \brief Check the number of available devices in a peer's query results * * \param[in] op Operation that results are for * \param[in] peer Peer to count * \param[in] verified_only Whether to count only verified devices * * \return Number of devices available to peer that were not already executed */ static int count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer, gboolean verified_only) { struct peer_count_data data; data.op = op; data.verified_only = verified_only; data.count = 0; if (peer) { g_hash_table_foreach(peer->devices, count_peer_device, &data); } return data.count; } /*! * \internal * \brief Search for a device in a query result * * \param[in] op Operation that result is for * \param[in] peer Query result for a peer * \param[in] device Device ID to search for * * \return Device properties if found, NULL otherwise */ static device_properties_t * find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer, const char *device) { device_properties_t *props = g_hash_table_lookup(peer->devices, device); return (props && !props->executed[op->phase] && !props->disallowed[op->phase])? props : NULL; } /*! * \internal * \brief Find a device in a peer's device list and mark it as executed * * \param[in] op Operation that peer result is for * \param[in,out] peer Peer with results to search * \param[in] device ID of device to mark as done * \param[in] verified_devices_only Only consider verified devices * * \return TRUE if device was found and marked, FALSE otherwise */ static gboolean grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer, const char *device, gboolean verified_devices_only) { device_properties_t *props = find_peer_device(op, peer, device); if ((props == NULL) || (verified_devices_only && !props->verified)) { return FALSE; } crm_trace("Removing %s from %s (%d remaining)", device, peer->host, count_peer_devices(op, peer, FALSE)); props->executed[op->phase] = TRUE; return TRUE; } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_trace("Free'ing op %s for %s", op->id, op->target); crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { free_xml(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); free(op); } /* * \internal * \brief Return an operation's originally requested action (before any remap) * * \param[in] op Operation to check * * \return Operation's original action */ static const char * op_requested_action(const remote_fencing_op_t *op) { return ((op->phase > st_phase_requested)? "reboot" : op->action); } /* * \internal * \brief Remap a "reboot" operation to the "off" phase * * \param[in,out] op Operation to remap */ static void op_phase_off(remote_fencing_op_t *op) { crm_info("Remapping multiple-device reboot of %s (%s) to off", op->target, op->id); op->phase = st_phase_off; /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ strcpy(op->action, "off"); } /*! * \internal * \brief Advance a remapped reboot operation to the "on" phase * * \param[in,out] op Operation to remap */ static void op_phase_on(remote_fencing_op_t *op) { GListPtr iter = NULL; crm_info("Remapped off of %s complete, remapping to on for %s.%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; strcpy(op->action, "on"); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. */ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { GListPtr match = g_list_find_custom(op->devices_list, iter->data, sort_strings); if (match) { op->devices_list = g_list_remove(op->devices_list, match->data); } } g_list_free_full(op->automatic_list, free); op->automatic_list = NULL; /* Rewind device list pointer */ op->devices = op->devices_list; } /*! * \internal * \brief Reset a remapped reboot operation * * \param[in,out] op Operation to reset */ static void undo_op_remap(remote_fencing_op_t *op) { if (op->phase > 0) { crm_info("Undoing remap of reboot of %s for %s.%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; strcpy(op->action, "reboot"); } } static xmlNode * create_op_done_notify(remote_fencing_op_t * op, int rc) { xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_ACTION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); return notify_data; } static void bcast_result_to_peers(remote_fencing_op_t * op, int rc) { static int count = 0; xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); xmlNode *notify_data = create_op_done_notify(op, rc); count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_xml_add_int(bcast, "count", count); add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(notify_data); free_xml(bcast); return; } static void handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ notify_data = create_op_done_notify(op, rc); crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); reply = stonith_construct_reply(op->request, NULL, data, rc); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); /* bcast to all local clients that the fencing operation happend */ do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; free_xml(reply); free_xml(notify_data); } static void handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc) { GListPtr iter = NULL; for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { /* Ie. it hasn't timed out already */ other->state = op->state; crm_debug("Peforming duplicate notification for %s@%s.%.8s = %s", other->client_name, other->originator, other->id, pcmk_strerror(rc)); remote_op_done(other, data, rc, TRUE); } else { crm_err("Skipping duplicate notification for %s@%s - %d", other->client_name, other->originator, other->state); } } } /*! * \internal * \brief Finalize a remote operation. * * \description This function has two code paths. * * Path 1. This node is the owner of the operation and needs * to notify the cpg group via a broadcast as to the operation's * results. * * Path 2. The cpg broadcast is received. All nodes notify their local * stonith clients the operation results. * * So, The owner of the operation first notifies the cluster of the result, * and once that cpg notify is received back it notifies all the local clients. * * Nodes that are passive watchers of the operation will receive the * broadcast and only need to notify their local clients the operation finished. * * \param op, The fencing operation to finalize * \param data, The xml msg reply (if present) of the last delegated fencing * operation. * \param dup, Is this operation a duplicate, if so treat it a little differently * making sure the broadcast is not sent out. */ static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; op->completed = time(NULL); clear_remote_op_timers(op); undo_op_remap(op); if (op->notify_sent == TRUE) { crm_err("Already sent notifications for '%s of %s by %s' (for=%s@%s.%.8s, state=%d): %s", op->action, op->target, op->delegate ? op->delegate : "", op->client_name, op->originator, op->id, op->state, pcmk_strerror(rc)); goto remote_op_done_cleanup; } if (!op->delegate && data && rc != -ENODEV && rc != -EHOSTUNREACH) { xmlNode *ndata = get_xpath_object("//@" F_STONITH_DELEGATE, data, LOG_TRACE); if(ndata) { op->delegate = crm_element_value_copy(ndata, F_STONITH_DELEGATE); } else { op->delegate = crm_element_value_copy(data, F_ORIG); } } if (data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, F_SUBTYPE); if (dup == FALSE && safe_str_neq(subt, "broadcast")) { /* Defer notification until the bcast message arrives */ bcast_result_to_peers(op, rc); goto remote_op_done_cleanup; } if (rc == pcmk_ok || dup) { level = LOG_NOTICE; } else if (safe_str_neq(op->originator, stonith_our_uname)) { level = LOG_NOTICE; } do_crm_log(level, "Operation %s of %s by %s for %s@%s.%.8s: %s", op->action, op->target, op->delegate ? op->delegate : "", op->client_name, op->originator, op->id, pcmk_strerror(rc)); handle_local_reply_and_notify(op, data, rc); if (dup == FALSE) { handle_duplicates(op, data, rc); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { free_xml(op->request); op->request = NULL; } remote_op_done_cleanup: free_xml(local_data); } static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; - crm_notice("Remote %s operation on %s for %s.%8s assumed complete", + crm_notice("Self-fencing (%s) by %s for %s.%8s assumed complete", op->action, op->target, op->client_name, op->id); op->state = st_done; remote_op_done(op, NULL, pcmk_ok, FALSE); return FALSE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; - crm_notice("Remote %s operation on %s for %s.%8s timed out", + crm_notice("Peer's fencing (%s) of %s for %s timed out" CRM_XS "id=%s", op->action, op->target, op->client_name, op->id); call_remote_stonith(op, NULL); return FALSE; } static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action %s (%s) for %s (%s) already completed", op->action, op->id, op->target, op->client_name); return FALSE; } crm_debug("Action %s (%s) for %s (%s) timed out", op->action, op->id, op->target, op->client_name); if (op->phase == st_phase_on) { /* A remapped reboot operation timed out in the "on" phase, but the * "off" phase completed successfully, so quit trying any further * devices, and return success. */ remote_op_done(op, NULL, pcmk_ok, FALSE); return FALSE; } op->state = st_failed; remote_op_done(op, NULL, -ETIME, FALSE); return FALSE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %s for %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %s for %s already in progress", op->id, op->target); } else if (op->query_results) { crm_debug("Query %s for %s complete: %d", op->id, op->target, op->state); call_remote_stonith(op, NULL); } else { crm_debug("Query %s for %s timed out: %d", op->id, op->target, op->state); if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } remote_op_timeout(op); } return FALSE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } /* * \internal * \brief Add a device to an operation's automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to add */ static void add_required_device(remote_fencing_op_t *op, const char *device) { GListPtr match = g_list_find_custom(op->automatic_list, device, sort_strings); if (!match) { op->automatic_list = g_list_prepend(op->automatic_list, strdup(device)); } } /* * \internal * \brief Remove a device from the automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to remove */ static void remove_required_device(remote_fencing_op_t *op, const char *device) { GListPtr match = g_list_find_custom(op->automatic_list, device, sort_strings); if (match) { op->automatic_list = g_list_remove(op->automatic_list, match->data); } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GListPtr devices) { GListPtr lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } op->devices = op->devices_list; } /* * \internal * \brief Check whether a node matches a topology target * * \param[in] tp Topology table entry to check * \param[in] node Name of node to check * * \return TRUE if node matches topology target */ static gboolean topology_matches(const stonith_topology_t *tp, const char *node) { regex_t r_patt; CRM_CHECK(node && tp && tp->target, return FALSE); switch(tp->kind) { case 2: /* This level targets by attribute, so tp->target is a NAME=VALUE pair * of a permanent attribute applied to targeted nodes. The test below * relies on the locally cached copy of the CIB, so if fencing needs to * be done before the initial CIB is received or after a malformed CIB * is received, then the topology will be unable to be used. */ if (node_has_attr(node, tp->target_attribute, tp->target_value)) { crm_notice("Matched %s with %s by attribute", node, tp->target); return TRUE; } break; case 1: /* This level targets by name, so tp->target is a regular expression * matching names of nodes to be targeted. */ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED)) { crm_info("Bad regex '%s' for fencing level", tp->target); } else { int status = regexec(&r_patt, node, 0, NULL, 0); regfree(&r_patt); if (status == 0) { crm_notice("Matched %s with %s by name", node, tp->target); return TRUE; } } break; case 0: crm_trace("Testing %s against %s", node, tp->target); return safe_str_eq(tp->target, node); } crm_trace("No match for %s with %s", node, tp->target); return FALSE; } stonith_topology_t * find_topology_for_host(const char *host) { GHashTableIter tIter; stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp != NULL) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (topology_matches(tp, host)) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } } crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); return NULL; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * * \return pcmk_ok if successful, target was not specified (i.e. queries) or * target has no topology, or -EINVAL if no more topology levels to try */ static int stonith_topology_next(remote_fencing_op_t * op) { stonith_topology_t *tp = NULL; if (op->target) { /* Queries don't have a target set */ tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return pcmk_ok; } set_bit(op->call_options, st_opt_topology); /* This is a new level, so undo any remapping left over from previous */ undo_op_remap(op); do { op->level++; } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if (op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d for %s (%d devices) - %s@%s.%.8s", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about * switched power outlets for redundant power supplies.) */ op_phase_off(op); } return pcmk_ok; } crm_notice("All fencing options to fence %s for %s@%s.%.8s failed", op->target, op->client_name, op->originator, op->id); return -EINVAL; } /*! * \brief Check to see if this operation is a duplicate of another in flight * operation. If so merge this operation into the inflight operation, and mark * it as a duplicate. */ static void merge_duplicates(remote_fencing_op_t * op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { crm_node_t *peer = NULL; const char *other_action = op_requested_action(other); if (other->state > st_exec) { /* Must be in-progress */ continue; } else if (safe_str_neq(op->target, other->target)) { /* Must be for the same node */ continue; } else if (safe_str_neq(op->action, other_action)) { crm_trace("Must be for the same action: %s vs. %s", op->action, other_action); continue; } else if (safe_str_eq(op->client_name, other->client_name)) { crm_trace("Must be for different clients: %s", op->client_name); continue; } else if (safe_str_eq(other->target, other->originator)) { crm_trace("Can't be a suicide operation: %s", other->target); continue; } peer = crm_get_peer(0, other->originator); if(fencing_peer_active(peer) == FALSE) { crm_notice("Failing stonith action %s for node %s originating from %s@%s.%.8s: Originator is dead", other->action, other->target, other->client_name, other->originator, other->id); other->state = st_failed; continue; } else if(other->total_timeout > 0 && now > (other->total_timeout + other->created)) { crm_info("Stonith action %s for node %s originating from %s@%s.%.8s is too old: %d vs. %d + %d", other->action, other->target, other->client_name, other->originator, other->id, now, other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { crm_trace("Making a best-guess as to the timeout used"); other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); } crm_notice ("Merging stonith action %s for node %s originating from client %s.%.8s with identical request from %s@%s.%.8s (%ds)", op->action, op->target, op->client_name, op->id, other->client_name, other->originator, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; crm_node_t *entry; GHashTableIter gIter; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); op->state = st_done; op->completed = time(NULL); op->delegate = strdup("a human"); crm_notice("Injecting manual confirmation that %s is safely off/down", crm_element_value(dev, F_STONITH_TARGET)); remote_op_done(op, msg, pcmk_ok, FALSE); /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */ return -EINPROGRESS; } char * stonith_get_peer_name(unsigned int nodeid) { crm_node_t *node = crm_find_peer(nodeid, NULL); char *nodename = NULL; if (node && node->uname) { return strdup(node->uname); } else if ((nodename = get_node_name(nodeid))) { return nodename; } else { const char *last_known_name = g_hash_table_lookup(known_peer_names, GUINT_TO_POINTER(nodeid)); if (last_known_name) { crm_debug("Use the last known name %s for nodeid %u", last_known_name, nodeid); return strdup(last_known_name); } } return NULL; } /*! * \internal * \brief Create a new remote stonith op * \param client, he local stonith client id that initaited the operation * \param request, The request from the client that started the operation * \param peer, Is this operation owned by another stonith peer? Operations * owned by other peers are stored on all the stonith nodes, but only the * owner executes the operation. All the nodes get the results to the operation * once the owner finishes executing it. */ void * create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); int call_options = 0; if (remote_op_list == NULL) { remote_op_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_remote_op); } /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(remote_op_list, op_id); if (op) { crm_debug("%s already exists", op_id); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(remote_op_list, op->id, op); CRM_LOG_ASSERT(g_hash_table_lookup(remote_op_list, op->id) != NULL); crm_trace("Created %s", op->id); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ op->created = time(NULL); if (op->originator == NULL) { /* Local or relayed request */ op->originator = strdup(stonith_our_uname); } CRM_LOG_ASSERT(client != NULL); if (client) { op->client_id = strdup(client); } op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); op->call_options = call_options; crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid)); crm_trace("%s new stonith op: %s - %s of %s for %s", (peer && dev) ? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name); if (op->call_options & st_opt_cs_nodeid) { int nodeid = crm_atoi(op->target, NULL); char *nodename = stonith_get_peer_name(nodeid); /* Ensure the conversion only happens once */ op->call_options &= ~st_opt_cs_nodeid; if (nodename) { free(op->target); op->target = nodename; } else { crm_warn("Could not expand nodeid '%s' into a host name", op->target); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); return op; } remote_fencing_op_t * initiate_remote_stonith_op(crm_client_t * client, xmlNode * request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { crm_notice("Initiating manual confirmation for %s: %s", op->target, op->id); return op; } CRM_CHECK(op->action, return NULL); if (stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; } switch (op->state) { case st_failed: - crm_warn("Initiation of remote operation %s for %s: failed (%s)", op->action, - op->target, op->id); + crm_warn("Could not request peer fencing (%s) of %s " + CRM_XS " id=%s", op->action, op->target, op->id); remote_op_done(op, NULL, -EINVAL, FALSE); return op; case st_duplicate: - crm_info("Initiating remote operation %s for %s: %s (duplicate)", op->action, - op->target, op->id); + crm_info("Requesting peer fencing (%s) of %s (duplicate) " + CRM_XS " id=%s", op->action, op->target, op->id); return op; default: - crm_notice("Initiating remote operation %s for %s: %s (%d)", op->action, op->target, - op->id, op->state); + crm_notice("Requesting peer fencing (%s) of %s " + CRM_XS " id=%s state=%d", + op->action, op->target, op->id, op->state); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, op->call_options); crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op)); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static st_query_result_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GListPtr iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; crm_trace("Testing result from %s for %s with %d devices: %d %x", peer->host, op->target, peer->ndevices, peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && safe_str_eq(peer->host, op->target)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && safe_str_neq(peer->host, op->target)) { continue; } if (is_set(op->call_options, st_opt_topology)) { if (grab_peer_device(op, peer, device, verified_devices_only)) { return peer; } } else if ((peer->tried == FALSE) && count_peer_devices(op, peer, verified_devices_only)) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static st_query_result_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; st_query_result_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence (%s) %s with %s", op->action, op->target, device); } else { crm_trace("Checking for someone to fence (%s) %s", op->action, op->target); } /* Best choice is a peer other than the target with verified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } /* If no other peer has verified access, next best is unverified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } /* If no other peer can do it, last option is self-fencing * (which is never allowed for the "on" phase of a remapped reboot) */ if (op->phase != st_phase_on) { peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if (peer) { crm_trace("%s will fence itself", peer->host); return peer; } } /* Try the next fencing level if there is one (unless we're in the "on" * phase of a remapped "reboot", because we ignore errors in that case) */ } while ((op->phase != st_phase_on) && is_set(op->call_options, st_opt_topology) && stonith_topology_next(op) == pcmk_ok); crm_notice("Couldn't find anyone to fence (%s) %s with %s", op->action, op->target, (device? device : "any device")); return NULL; } static int get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer, const char *device) { device_properties_t *props; if (!peer || !device) { return op->base_timeout; } props = g_hash_table_lookup(peer->devices, device); if (!props) { return op->base_timeout; } return (props->custom_action_timeout[op->phase]? props->custom_action_timeout[op->phase] : op->base_timeout) + props->delay_max[op->phase]; } struct timeout_data { const remote_fencing_op_t *op; const st_query_result_t *peer; int total_timeout; }; /*! * \internal * \brief Add timeout to a total if device has not been executed yet * * \param[in] key GHashTable key (device ID) * \param[in] value GHashTable value (device properties) * \param[in] user_data Timeout data */ static void add_device_timeout(gpointer key, gpointer value, gpointer user_data) { const char *device_id = key; device_properties_t *props = value; struct timeout_data *timeout = user_data; if (!props->executed[timeout->op->phase] && !props->disallowed[timeout->op->phase]) { timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, device_id); } } static int get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer) { struct timeout_data timeout; timeout.op = op; timeout.peer = peer; timeout.total_timeout = 0; g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); } static int get_op_total_timeout(const remote_fencing_op_t *op, const st_query_result_t *chosen_peer) { int total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (is_set(op->call_options, st_opt_topology) && tp) { int i; GListPtr device_list = NULL; GListPtr iter = NULL; /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST_LEVEL_MAX; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { for (iter = op->query_results; iter != NULL; iter = iter->next) { const st_query_result_t *peer = iter->data; if (find_peer_device(op, peer, device_list->data)) { total_timeout += get_device_timeout(op, peer, device_list->data); break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ } else if (chosen_peer) { total_timeout = get_peer_timeout(op, chosen_peer); } else { total_timeout = op->base_timeout; } return total_timeout ? total_timeout : op->base_timeout; } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GListPtr iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a synchronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s.%.8s", op->client_name, op->id); client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); call_id = crm_element_value(op->request, F_STONITH_CALLID); client_id = crm_element_value(op->request, F_STONITH_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (safe_str_eq(client_node, stonith_our_uname)) { /* The client is connected to this node, send the update direclty to them */ do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(update, F_STONITH_CLIENTID, client_id); crm_xml_add(update, F_STONITH_CALLID, call_id); crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); free_xml(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %s.%.8s", dup->client_name, dup->id); report_timeout_period(iter->data, op_timeout); } } /* * \internal * \brief Advance an operation to the next device in its topology * * \param[in,out] op Operation to advance * \param[in] device ID of device just completed * \param[in] msg XML reply that contained device result (if available) * \param[in] rc Return code of device's execution */ static void advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg, int rc) { /* Advance to the next device at this topology level, if any */ if (op->devices) { op->devices = op->devices->next; } /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) && safe_str_eq(op->action, "on")) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); /* If there are no more devices at this topology level, run through any * remaining devices with automatic unfencing */ if (op->devices == NULL) { op->devices = op->automatic_list; } } if ((op->devices == NULL) && (op->phase == st_phase_off)) { /* We're done with this level and with required devices, but we had * remapped "reboot" to "off", so start over with "on". If any devices * need to be turned back on, op->devices will be non-NULL after this. */ op_phase_on(op); } if (op->devices) { /* Necessary devices remain, so execute the next one */ crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc); call_remote_stonith(op, NULL); } else { /* We're done with all devices and phases, so finalize operation */ crm_trace("Marking complex fencing op for %s as complete", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); } } void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer) { const char *device = NULL; int timeout = op->base_timeout; crm_trace("State for %s.%.8s: %s %d", op->target, op->client_name, op->id, op->state); if (peer == NULL && !is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { int total_timeout = get_op_total_timeout(op, peer); op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout; op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); - crm_info("Total remote op timeout set to %d for fencing of node %s for %s.%.8s", + crm_info("Total timeout set to %d for peer's fencing of %s for %s" + CRM_XS "id=%s", total_timeout, op->target, op->client_name, op->id); } if (is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore any peer preference, they might not have the device we need */ /* When using topology, stonith_choose_peer() removes the device from * further consideration, so be sure to calculate timeout beforehand */ peer = stonith_choose_peer(op); device = op->devices->data; timeout = get_device_timeout(op, peer, device); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); if (device) { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device); crm_info("Requesting that '%s' perform op '%s %s' with '%s' for %s (%ds)", peer->host, op->target, op->action, device, op->client_name, timeout_one); crm_xml_add(remote_op, F_STONITH_DEVICE, device); crm_xml_add(remote_op, F_STONITH_MODE, "slave"); } else { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); crm_info("Requesting that '%s' perform op '%s %s' for %s (%ds, %ds)", peer->host, op->target, op->action, op->client_name, timeout_one, stonith_watchdog_timeout_ms); crm_xml_add(remote_op, F_STONITH_MODE, "smart"); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); } if(stonith_watchdog_timeout_ms > 0 && device && safe_str_eq(device, "watchdog")) { crm_notice("Waiting %ds for %s to self-fence (%s) for %s.%.8s (%p)", stonith_watchdog_timeout_ms/1000, op->target, op->action, op->client_name, op->id, device); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); /* TODO check devices to verify watchdog will be in use */ } else if(stonith_watchdog_timeout_ms > 0 && safe_str_eq(peer->host, op->target) && safe_str_neq(op->action, "on")) { crm_notice("Waiting %ds for %s to self-fence (%s) for %s.%.8s (%p)", stonith_watchdog_timeout_ms/1000, op->target, op->action, op->client_name, op->id, device); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); } else { op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); peer->tried = TRUE; free_xml(remote_op); return; } else if (op->phase == st_phase_on) { /* A remapped "on" cannot be executed, but the node was already * turned off successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (no capable peers) for %s after successful 'off'", device, op->target); advance_op_topology(op, device, NULL, pcmk_ok); return; } else if (op->owner == FALSE) { crm_err("Fencing (%s) of %s for %s is not ours to control", op->action, op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of fencing (%s) %s for %s (%d)", op->target, op->action, op->client_name, op->state); CRM_LOG_ASSERT(op->state < st_done); remote_op_timeout(op); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { int rc = -EHOSTUNREACH; /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if(stonith_watchdog_timeout_ms && (device == NULL || safe_str_eq(device, "watchdog"))) { crm_notice("Waiting %ds for %s to self-fence (%s) for %s.%.8s (%p)", stonith_watchdog_timeout_ms/1000, op->target, op->action, op->client_name, op->id, device); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); return; } if (op->state == st_query) { crm_info("None of the %d peers have devices capable of fencing (%s) %s for %s (%d)", op->replies, op->action, op->target, op->client_name, op->state); rc = -ENODEV; } else { crm_info("None of the %d peers are capable of fencing (%s) %s for %s (%d)", op->replies, op->action, op->target, op->client_name, op->state); } op->state = st_failed; remote_op_done(op, NULL, rc, FALSE); } else if (device) { crm_info("Waiting for additional peers capable of fencing (%s) %s with %s for %s.%.8s", op->action, op->target, device, op->client_name, op->id); } else { crm_info("Waiting for additional peers capable of fencing (%s) %s for %s%.8s", op->action, op->target, op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const st_query_result_t *peer_a = a; const st_query_result_t *peer_b = b; return (peer_b->ndevices - peer_a->ndevices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not */ static gboolean all_topology_devices_found(remote_fencing_op_t * op) { GListPtr device = NULL; GListPtr iter = NULL; device_properties_t *match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (safe_str_eq(op->action, "off") || safe_str_eq(op->action, "reboot")) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { st_query_result_t *peer = iter->data; if (skip_target && safe_str_eq(peer->host, op->target)) { continue; } match = find_peer_device(op, peer, device->data); } if (!match) { return FALSE; } } } return TRUE; } /* * \internal * \brief Parse action-specific device properties from XML * * \param[in] msg XML element containing the properties * \param[in] peer Name of peer that sent XML (for logs) * \param[in] device Device ID (for logs) * \param[in] action Action the properties relate to (for logs) * \param[in] phase Phase the properties relate to * \param[in,out] props Device properties to update */ static void parse_action_specific(xmlNode *xml, const char *peer, const char *device, const char *action, remote_fencing_op_t *op, enum st_remap_phase phase, device_properties_t *props) { props->custom_action_timeout[phase] = 0; crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, &props->custom_action_timeout[phase]); if (props->custom_action_timeout[phase]) { crm_trace("Peer %s with device %s returned %s action timeout %d", peer, device, action, props->custom_action_timeout[phase]); } props->delay_max[phase] = 0; crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]); if (props->delay_max[phase]) { crm_trace("Peer %s with device %s returned maximum of random delay %d for %s", peer, device, props->delay_max[phase], action); } /* Handle devices with automatic unfencing */ if (safe_str_eq(action, "on")) { int required = 0; crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); if (required) { crm_trace("Peer %s requires device %s to execute for action %s", peer, device, action); add_required_device(op, device); } } /* If a reboot is remapped to off+on, it's possible that a node is allowed * to perform one action but not another. */ if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) { props->disallowed[phase] = TRUE; crm_trace("Peer %s is disallowed from executing %s for device %s", peer, action, device); } } /* * \internal * \brief Parse one device's properties from peer's XML query reply * * \param[in] xml XML node containing device properties * \param[in,out] op Operation that query and reply relate to * \param[in,out] result Peer's results * \param[in] device ID of device being parsed */ static void add_device_properties(xmlNode *xml, remote_fencing_op_t *op, st_query_result_t *result, const char *device) { xmlNode *child; int verified = 0; device_properties_t *props = calloc(1, sizeof(device_properties_t)); /* Add a new entry to this result's devices list */ CRM_ASSERT(props != NULL); g_hash_table_insert(result->devices, strdup(device), props); /* Peers with verified (monitored) access will be preferred */ crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); if (verified) { crm_trace("Peer %s has confirmed a verified device %s", result->host, device); props->verified = TRUE; } /* Parse action-specific device properties */ parse_action_specific(xml, result->host, device, op_requested_action(op), op, st_phase_requested, props); for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { /* Replies for "reboot" operations will include the action-specific * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ if (safe_str_eq(ID(child), "off")) { parse_action_specific(child, result->host, device, "off", op, st_phase_off, props); } else if (safe_str_eq(ID(child), "on")) { parse_action_specific(child, result->host, device, "on", op, st_phase_on, props); } } } /* * \internal * \brief Parse a peer's XML query reply and add it to operation's results * * \param[in,out] op Operation that query and reply relate to * \param[in] host Name of peer that sent this reply * \param[in] ndevices Number of devices expected in reply * \param[in] xml XML node containing device list * * \return Newly allocated result structure with parsed reply */ static st_query_result_t * add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml) { st_query_result_t *result = calloc(1, sizeof(st_query_result_t)); xmlNode *child; CRM_CHECK(result != NULL, return NULL); result->host = strdup(host); result->devices = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); /* Each child element describes one capable device available to the peer */ for (child = __xml_first_child(xml); child != NULL; child = __xml_next(child)) { const char *device = ID(child); if (device) { add_device_properties(child, op, result, device); } } result->ndevices = g_hash_table_size(result->devices); CRM_CHECK(ndevices == result->ndevices, crm_err("Query claimed to have %d devices but %d found", ndevices, result->ndevices)); op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); return result; } /* * \internal * \brief Handle a peer's reply to our fencing query * * Parse a query result from XML and store it in the remote operation * table, and when enough replies have been received, issue a fencing request. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error * * \note See initiate_remote_stonith_op() for how the XML query was initially * formed, and stonith_query() for how the peer formed its XML reply. */ int process_remote_stonith_query(xmlNode * msg) { int ndevices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; st_query_result_t *result = NULL; uint32_t replies_expected; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices); op = g_hash_table_lookup(remote_op_list, id); if (op == NULL) { - crm_debug("Unknown or expired remote op: %s", id); + crm_debug("Received query reply for unknown or expired operation %s", + id); return -EOPNOTSUPP; } replies_expected = QB_MIN(op->replies_expected, fencing_active_peers()); if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = crm_element_value(msg, F_ORIG); host_is_target = safe_str_eq(host, op->target); crm_info("Query result %d of %d from %s for %s/%s (%d devices) %s", op->replies, replies_expected, host, op->target, op->action, ndevices, id); if (ndevices > 0) { result = add_result(op, host, ndevices, dev); } if (is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); call_remote_stonith(op, result); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); call_remote_stonith(op, NULL); } } else if (op->state == st_query) { int nverified = count_peer_devices(op, result, TRUE); /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if (result && (host_is_target == FALSE) && nverified) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified devices", nverified); call_remote_stonith(op, result); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); call_remote_stonith(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if (result && (op->state == st_done)) { crm_info("Discarding query result from %s (%d devices): Operation is in state %d", result->host, result->ndevices, op->state); } return pcmk_ok; } /* * \internal * \brief Handle a peer's reply to a fencing request * * Parse a fencing reply from XML, and either finalize the operation * or attempt another device as appropriate. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error */ int process_remote_stonith_exec(xmlNode * msg) { int rc = 0; const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_RC, &rc); device = crm_element_value(dev, F_STONITH_DEVICE); if (remote_op_list) { op = g_hash_table_lookup(remote_op_list, id); } if (op == NULL && rc == pcmk_ok) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ - crm_info("Unknown or expired remote op: %s", id); + crm_info("Received peer result of unknown or expired operation %s", id); return -EOPNOTSUPP; } if (op->devices && device && safe_str_neq(op->devices->data, device)) { - crm_err - ("Received outdated reply for device %s (instead of %s) to %s node %s. Operation already timed out at remote level.", - device, op->devices->data, op->action, op->target); + crm_err("Received outdated reply for device %s (instead of %s) to " + "fence (%s) %s. Operation already timed out at peer level.", + device, op->devices->data, op->action, op->target); return rc; } if (safe_str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast")) { crm_debug("Marking call to %s for %s on behalf of %s@%s.%.8s: %s (%d)", op->action, op->target, op->client_name, op->id, op->originator, pcmk_strerror(rc), rc); if (rc == pcmk_ok) { op->state = st_done; } else { op->state = st_failed; } remote_op_done(op, msg, rc, FALSE); return pcmk_ok; } else if (safe_str_neq(op->originator, stonith_our_uname)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err ("%s received non-broadcast fencing result for operation it does not own (device %s targeting %s)", stonith_our_uname, device, op->target); return rc; } if (is_set(op->call_options, st_opt_topology)) { const char *device = crm_element_value(msg, F_STONITH_DEVICE); crm_notice("Call to %s for '%s %s' on behalf of %s@%s: %s (%d)", device, op->target, op->action, op->client_name, op->originator, pcmk_strerror(rc), rc); /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { remote_op_done(op, msg, rc, FALSE); return rc; } if ((op->phase == 2) && (rc != pcmk_ok)) { /* A remapped "on" failed, but the node was already turned off * successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (exit code %d) for %s after successful 'off'", device, rc, op->target); rc = pcmk_ok; } if (rc == pcmk_ok) { /* An operation completed successfully. Try another device if * necessary, otherwise mark the operation as done. */ advance_op_topology(op, device, msg, rc); return rc; } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (stonith_topology_next(op) != pcmk_ok) { op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } } } else if (rc == pcmk_ok && op->devices == NULL) { crm_trace("All done for %s", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } else if (rc == -ETIME && op->devices == NULL) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure */ crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc); call_remote_stonith(op, NULL); return rc; } int stonith_fence_history(xmlNode * msg, xmlNode ** output) { int rc = 0; const char *target = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_TRACE); char *nodename = NULL; if (dev) { int options = 0; target = crm_element_value(dev, F_STONITH_TARGET); crm_element_value_int(msg, F_STONITH_CALLOPTS, &options); if (target && (options & st_opt_cs_nodeid)) { int nodeid = crm_atoi(target, NULL); nodename = stonith_get_peer_name(nodeid); if (nodename) { target = nodename; } } } crm_trace("Looking for operations on %s in %p", target, remote_op_list); *output = create_xml_node(NULL, F_STONITH_HISTORY_LIST); if (remote_op_list) { GHashTableIter iter; remote_fencing_op_t *op = NULL; g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { xmlNode *entry = NULL; if (target && strcmp(op->target, target) != 0) { continue; } rc = 0; crm_trace("Attaching op %s", op->id); entry = create_xml_node(*output, STONITH_OP_EXEC); crm_xml_add(entry, F_STONITH_TARGET, op->target); crm_xml_add(entry, F_STONITH_ACTION, op->action); crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(entry, F_STONITH_DATE, op->completed); crm_xml_add_int(entry, F_STONITH_STATE, op->state); } } free(nodename); return rc; } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; crm_trace("tolerance=%d, remote_op_list=%p", tolerance, remote_op_list); if (tolerance <= 0 || !remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; /* We don't have to worry about remapped reboots here * because if state is done, any remapping has been undone */ } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 93a10e2d23..1655f00f85 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -1,634 +1,646 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include quorum_handle_t pcmk_quorum_handle = 0; gboolean(*quorum_app_callback) (unsigned long long seq, gboolean quorate) = NULL; /* * CFG functionality stolen from node_name() in corosync-quorumtool.c * This resolves the first address assigned to a node and returns the name or IP address. */ char * corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid) { int lpc = 0; int rc = CS_OK; int retries = 0; char *name = NULL; cmap_handle_t local_handle = 0; /* nodeid == 0 == CMAN_NODEID_US */ if (nodeid == 0) { nodeid = get_local_nodeid(0); } if (cmap_handle == 0 && local_handle == 0) { retries = 0; crm_trace("Initializing CMAP connection"); do { rc = cmap_initialize(&local_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %s", cs_strerror(rc)); local_handle = 0; } } if (cmap_handle == 0) { cmap_handle = local_handle; } while (name == NULL && cmap_handle != 0) { uint32_t id = 0; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &id); crm_trace("Checking %u vs %u from %s", nodeid, id, key); free(key); if (rc != CS_OK) { break; } if (nodeid == id) { crm_trace("Searching for node name for %u in nodelist.node.%d %s", nodeid, lpc, name); if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.ring0_addr", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s", key, name); if (node_name_is_valid(key, name) == FALSE) { free(name); name = NULL; } free(key); } if (name == NULL) { key = crm_strdup_printf("nodelist.node.%d.name", lpc); cmap_get_string(cmap_handle, key, &name); crm_trace("%s = %s %d", key, name, rc); free(key); } break; } lpc++; } if(local_handle) { cmap_finalize(local_handle); } if (name == NULL) { crm_info("Unable to get node name for nodeid %u", nodeid); } return name; } void terminate_cs_connection(crm_cluster_t *cluster) { - crm_notice("Disconnecting from Corosync"); + crm_info("Disconnecting from Corosync"); cluster_disconnect_cpg(cluster); if (pcmk_quorum_handle) { crm_trace("Disconnecting quorum"); quorum_finalize(pcmk_quorum_handle); pcmk_quorum_handle = 0; } else { crm_info("No Quorum connection"); } + + crm_notice("Disconnected from Corosync"); } int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; static int pcmk_quorum_dispatch(gpointer user_data) { int rc = 0; rc = quorum_dispatch(pcmk_quorum_handle, CS_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to the Quorum API failed: %d", rc); pcmk_quorum_handle = 0; return -1; } return 0; } static void pcmk_quorum_notification(quorum_handle_t handle, uint32_t quorate, uint64_t ring_id, uint32_t view_list_entries, uint32_t * view_list) { int i; GHashTableIter iter; crm_node_t *node = NULL; static gboolean init_phase = TRUE; if (quorate != crm_have_quorum) { - crm_notice("Membership " U64T ": quorum %s (%lu)", ring_id, - quorate ? "acquired" : "lost", (long unsigned int)view_list_entries); + if (quorate) { + crm_notice("Quorum acquired " CRM_XS " membership=" U64T " members=%lu", + ring_id, (long unsigned int)view_list_entries); + } else { + crm_warn("Quorum lost " CRM_XS " membership=" U64T " members=%lu", + ring_id, (long unsigned int)view_list_entries); + } crm_have_quorum = quorate; } else { - crm_info("Membership " U64T ": quorum %s (%lu)", ring_id, - quorate ? "retained" : "still lost", (long unsigned int)view_list_entries); + crm_info("Quorum %s " CRM_XS " membership=" U64T " members=%lu", + (quorate? "retained" : "still lost"), ring_id, + (long unsigned int)view_list_entries); } if (view_list_entries == 0 && init_phase) { crm_info("Corosync membership is still forming, ignoring"); return; } init_phase = FALSE; /* Reset last_seen for all cached nodes so we can tell which ones aren't * in the view list */ g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { node->last_seen = 0; } /* Update the peer cache for each node in view list */ for (i = 0; i < view_list_entries; i++) { uint32_t id = view_list[i]; crm_debug("Member[%d] %u ", i, id); /* Get this node's peer cache entry (adding one if not already there) */ node = crm_get_peer(id, NULL); if (node->uname == NULL) { char *name = corosync_node_name(0, id); crm_info("Obtaining name for new node %u", id); node = crm_get_peer(id, name); free(name); } /* Update the node state (including updating last_seen to ring_id) */ crm_update_peer_state(__FUNCTION__, node, CRM_NODE_MEMBER, ring_id); } /* Remove any peer cache entries we didn't update */ crm_reap_unseen_nodes(ring_id); if (quorum_app_callback) { quorum_app_callback(ring_id, quorate); } } quorum_callbacks_t quorum_callbacks = { .quorum_notify_fn = pcmk_quorum_notification, }; gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { int rc = -1; int fd = 0; int quorate = 0; uint32_t quorum_type = 0; struct mainloop_fd_callbacks quorum_fd_callbacks; quorum_fd_callbacks.dispatch = pcmk_quorum_dispatch; quorum_fd_callbacks.destroy = destroy; crm_debug("Configuring Pacemaker to obtain quorum from Corosync"); rc = quorum_initialize(&pcmk_quorum_handle, &quorum_callbacks, &quorum_type); if (rc != CS_OK) { crm_err("Could not connect to the Quorum API: %d\n", rc); goto bail; } else if (quorum_type != QUORUM_SET) { crm_err("Corosync quorum is not configured\n"); goto bail; } rc = quorum_getquorate(pcmk_quorum_handle, &quorate); if (rc != CS_OK) { crm_err("Could not obtain the current Quorum API state: %d\n", rc); goto bail; } - crm_notice("Quorum %s", quorate ? "acquired" : "lost"); + if (quorate) { + crm_notice("Quorum acquired"); + } else { + crm_warn("Quorum lost"); + } quorum_app_callback = dispatch; crm_have_quorum = quorate; rc = quorum_trackstart(pcmk_quorum_handle, CS_TRACK_CHANGES | CS_TRACK_CURRENT); if (rc != CS_OK) { crm_err("Could not setup Quorum API notifications: %d\n", rc); goto bail; } rc = quorum_fd_get(pcmk_quorum_handle, &fd); if (rc != CS_OK) { crm_err("Could not obtain the Quorum API connection: %d\n", rc); goto bail; } mainloop_add_fd("quorum", G_PRIORITY_HIGH, fd, dispatch, &quorum_fd_callbacks); corosync_initialize_nodelist(NULL, FALSE, NULL); bail: if (rc != CS_OK) { quorum_finalize(pcmk_quorum_handle); return FALSE; } return TRUE; } gboolean init_cs_connection(crm_cluster_t * cluster) { int retries = 0; while (retries < 5) { int rc = init_cs_connection_once(cluster); retries++; switch (rc) { case CS_OK: return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: sleep(retries); break; default: return FALSE; } } crm_err("Could not connect to corosync after %d retries", retries); return FALSE; } gboolean init_cs_connection_once(crm_cluster_t * cluster) { crm_node_t *peer = NULL; enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ if (stack != pcmk_cluster_corosync) { crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; } if (cluster_connect_cpg(cluster) == FALSE) { return FALSE; } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); cluster->nodeid = get_local_nodeid(0); if(cluster->nodeid == 0) { crm_err("Could not establish local nodeid"); return FALSE; } cluster->uname = get_node_name(0); if(cluster->uname == NULL) { crm_err("Could not establish local node name"); return FALSE; } /* Ensure the local node always exists */ peer = crm_get_peer(cluster->nodeid, cluster->uname); cluster->uuid = get_corosync_uuid(peer); return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verified message %d: (dest=%s:%s, from=%s:%s.%u, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } enum cluster_type_e find_corosync_variant(void) { int rc = CS_OK; cmap_handle_t handle; rc = cmap_initialize(&handle); switch(rc) { case CS_OK: break; case CS_ERR_SECURITY: crm_debug("Failed to initialize the cmap API: Permission denied (%d)", rc); /* It's there, we just can't talk to it. * Good enough for us to identify as 'corosync' */ return pcmk_cluster_corosync; default: crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc); return pcmk_cluster_unknown; } cmap_finalize(handle); return pcmk_cluster_corosync; } gboolean crm_is_corosync_peer_active(const crm_node_t * node) { if (node == NULL) { crm_trace("NULL"); return FALSE; } else if (safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if ((node->processes & crm_proc_cpg) == 0) { crm_trace("%s: processes=%.16x", node->uname, node->processes); return FALSE; } return TRUE; } gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent) { int lpc = 0; int rc = CS_OK; int retries = 0; gboolean any = FALSE; cmap_handle_t cmap_handle; do { rc = cmap_initialize(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } crm_peer_init(); crm_trace("Initializing corosync nodelist"); for (lpc = 0;; lpc++) { uint32_t nodeid = 0; char *name = NULL; char *key = NULL; key = crm_strdup_printf("nodelist.node.%d.nodeid", lpc); rc = cmap_get_uint32(cmap_handle, key, &nodeid); free(key); if (rc != CS_OK) { break; } name = corosync_node_name(cmap_handle, nodeid); if (name != NULL) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node && node->uname && strcasecmp(node->uname, name) == 0) { if (node->id && node->id != nodeid) { crm_crit("Nodes %u and %u share the same name '%s': shutting down", node->id, nodeid, name); crm_exit(DAEMON_RESPAWN_STOP); } } } } if (nodeid > 0 || name != NULL) { crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); crm_get_peer(nodeid, name); } if (nodeid > 0 && name != NULL) { any = TRUE; if (xml_parent) { char buffer[64]; xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE); if(snprintf(buffer, 63, "%u", nodeid) > 0) { crm_xml_add(node, XML_ATTR_ID, buffer); } crm_xml_add(node, XML_ATTR_UNAME, name); if (force_member) { crm_xml_add(node, XML_ATTR_TYPE, CRM_NODE_MEMBER); } } } free(name); } cmap_finalize(cmap_handle); return any; } char * corosync_cluster_name(void) { cmap_handle_t handle; char *cluster_name = NULL; int rc = CS_OK; rc = cmap_initialize(&handle); if (rc != CS_OK) { crm_info("Failed to initialize the cmap API: %s (%d)", ais_error2text(rc), rc); return NULL; } rc = cmap_get_string(handle, "totem.cluster_name", &cluster_name); if (rc != CS_OK) { crm_info("Cannot get totem.cluster_name: %s (%d)", ais_error2text(rc), rc); } else { crm_debug("cmap totem.cluster_name = '%s'", cluster_name); } cmap_finalize(handle); return cluster_name; } int corosync_cmap_has_config(const char *prefix) { int rc = CS_OK; int retries = 0; static int found = -1; cmap_handle_t cmap_handle; cmap_iter_handle_t iter_handle; char key_name[CMAP_KEYNAME_MAXLEN + 1]; if(found != -1) { return found; } do { rc = cmap_initialize(&cmap_handle); if (rc != CS_OK) { retries++; crm_debug("API connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } } while (retries < 5 && rc != CS_OK); if (rc != CS_OK) { crm_warn("Could not connect to Cluster Configuration Database API: %s (rc=%d)", cs_strerror(rc), rc); return -1; } rc = cmap_iter_init(cmap_handle, prefix, &iter_handle); if (rc != CS_OK) { crm_warn("Failed to initialize iteration for corosync cmap '%s': %s (rc=%d)", prefix, cs_strerror(rc), rc); goto bail; } found = 0; while ((rc = cmap_iter_next(cmap_handle, iter_handle, key_name, NULL, NULL)) == CS_OK) { crm_trace("'%s' is configured in corosync cmap: %s", prefix, key_name); found++; break; } cmap_iter_finalize(cmap_handle, iter_handle); bail: cmap_finalize(cmap_handle); return found; } diff --git a/lib/cluster/legacy.c b/lib/cluster/legacy.c index 5de63f88fa..88e279a143 100644 --- a/lib/cluster/legacy.c +++ b/lib/cluster/legacy.c @@ -1,970 +1,972 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #if SUPPORT_COROSYNC # include # include # include # include #endif #if HAVE_CMAP # include #endif #if SUPPORT_CMAN # include cman_handle_t pcmk_cman_handle = NULL; #endif int ais_membership_timer = 0; gboolean ais_membership_force = FALSE; int plugin_dispatch(gpointer user_data); int ais_fd_sync = -1; int ais_fd_async = -1; /* never send messages via this channel */ void *ais_ipc_ctx = NULL; hdb_handle_t ais_ipc_handle = 0; #if SUPPORT_CMAN static bool valid_cman_name(const char *name, uint32_t nodeid) { bool rc = TRUE; /* Yes, %d, because that's what CMAN does */ char *fakename = crm_strdup_printf("Node%d", nodeid); if(crm_str_eq(fakename, name, TRUE)) { rc = FALSE; crm_notice("Ignoring inferred name from cman: %s", fakename); } free(fakename); return rc; } #endif static gboolean plugin_get_details(uint32_t * id, char **uname) { struct iovec iov; int retries = 0; int rc = CS_OK; cs_ipc_header_response_t header; struct crm_ais_nodeid_resp_s answer; static uint32_t local_id = 0; static char *local_uname = NULL; if(local_id) { if(id) *id = local_id; if(uname) *uname = strdup(local_uname); return TRUE; } header.error = CS_OK; header.id = crm_class_nodeid; header.size = sizeof(cs_ipc_header_response_t); iov.iov_base = &header; iov.iov_len = header.size; retry: errno = 0; rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, &answer, sizeof(answer)); if (rc == CS_OK) { CRM_CHECK(answer.header.size == sizeof(struct crm_ais_nodeid_resp_s), crm_err("Odd message: id=%d, size=%d, error=%d", answer.header.id, answer.header.size, answer.header.error)); CRM_CHECK(answer.header.id == crm_class_nodeid, crm_err("Bad response id: %d", answer.header.id)); } if ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20) { retries++; crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ goto retry; } if (rc != CS_OK) { crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } else if (answer.header.error != CS_OK) { crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc)); return FALSE; } crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname); local_id = answer.id; local_uname = strdup(answer.uname); if(id) *id = local_id; if(uname) *uname = strdup(local_uname); return TRUE; } bool send_plugin_text(int class, struct iovec *iov) { int rc = CS_OK; int retries = 0; int buf_len = sizeof(cs_ipc_header_response_t); char *buf = malloc(buf_len); AIS_Message *ais_msg = (AIS_Message*)iov[0].iov_base; cs_ipc_header_response_t *header = (cs_ipc_header_response_t *)(void*)buf; CRM_ASSERT(buf != NULL); /* There are only 6 handlers registered to crm_lib_service in plugin.c */ CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class); return FALSE); do { if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { retries++; crm_info("Peer overloaded or membership in flux:" " Re-sending message (Attempt %d of 20)", retries); sleep(retries); /* Proportional back off */ } errno = 0; rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, iov, 1, buf, buf_len); } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20); if (rc == CS_OK) { CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t), crm_err("Odd message: id=%d, size=%d, class=%d, error=%d", header->id, header->size, class, header->error)); CRM_ASSERT(buf_len >= header->size); CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK, crm_err("Bad response id (%d) for request (%d)", header->id, ais_msg->header.id)); CRM_CHECK(header->error == CS_OK, rc = header->error); } else { crm_perror(LOG_ERR, "Sending plugin message %d FAILED: %s (%d)", ais_msg->id, ais_error2text(rc), rc); } free(iov[0].iov_base); free(iov); free(buf); return (rc == CS_OK); } void terminate_cs_connection(crm_cluster_t *cluster) { - crm_notice("Disconnecting from Corosync"); + crm_info("Disconnecting from Corosync"); if (is_classic_ais_cluster()) { if (ais_ipc_handle) { crm_trace("Disconnecting plugin"); coroipcc_service_disconnect(ais_ipc_handle); ais_ipc_handle = 0; } else { crm_info("No plugin connection"); } } cluster_disconnect_cpg(cluster); # if SUPPORT_CMAN if (is_cman_cluster()) { if (pcmk_cman_handle) { crm_info("Disconnecting cman"); if (cman_stop_notification(pcmk_cman_handle) >= 0) { crm_info("Destroying cman"); cman_finish(pcmk_cman_handle); } } else { crm_info("No cman connection"); } } # endif ais_fd_async = -1; ais_fd_sync = -1; + + crm_notice("Disconnected from Corosync"); } void plugin_handle_membership(AIS_Message *msg) { if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) { xmlNode *member = NULL; const char *value = NULL; gboolean quorate = FALSE; xmlNode *xml = string2xml(msg->data); if (xml == NULL) { crm_err("Invalid membership update: %s", msg->data); return; } value = crm_element_value(xml, "quorate"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); return); if (crm_is_true(value)) { quorate = TRUE; } value = crm_element_value(xml, "id"); CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); return); crm_peer_seq = crm_int_helper(value, NULL); if (quorate != crm_have_quorum) { crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost"); crm_have_quorum = quorate; } else { crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost"); } for (member = __xml_first_child(xml); member != NULL; member = __xml_next(member)) { const char *id_s = crm_element_value(member, "id"); const char *addr = crm_element_value(member, "addr"); const char *uname = crm_element_value(member, "uname"); const char *state = crm_element_value(member, "state"); const char *born_s = crm_element_value(member, "born"); const char *seen_s = crm_element_value(member, "seen"); const char *votes_s = crm_element_value(member, "votes"); const char *procs_s = crm_element_value(member, "processes"); int votes = crm_int_helper(votes_s, NULL); unsigned int id = crm_int_helper(id_s, NULL); unsigned int procs = crm_int_helper(procs_s, NULL); /* TODO: These values will contain garbage if version < 0.7.1 */ uint64_t born = crm_int_helper(born_s, NULL); uint64_t seen = crm_int_helper(seen_s, NULL); crm_update_peer(__FUNCTION__, id, born, seen, votes, procs, uname, uname, addr, state); } free_xml(xml); } } static void plugin_default_deliver_message(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); free(data); } int plugin_dispatch(gpointer user_data) { int rc = CS_OK; crm_cluster_t *cluster = (crm_cluster_t *) user_data; do { char *buffer = NULL; rc = coroipcc_dispatch_get(ais_ipc_handle, (void **)&buffer, 0); if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { return 0; } if (rc != CS_OK) { crm_perror(LOG_ERR, "Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); return -1; } if (buffer == NULL) { /* NULL is a legal "no message afterall" value */ return 0; } /* cpg_deliver_fn_t(cpg_handle_t handle, const struct cpg_name *group_name, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len); */ if (cluster && cluster->cpg.cpg_deliver_fn) { cluster->cpg.cpg_deliver_fn(0, NULL, 0, 0, buffer, 0); } else { plugin_default_deliver_message(0, NULL, 0, 0, buffer, 0); } coroipcc_dispatch_put(ais_ipc_handle); } while (ais_ipc_handle); return 0; } static void plugin_destroy(gpointer user_data) { crm_err("AIS connection terminated"); ais_fd_sync = -1; crm_exit(ENOTCONN); } # if SUPPORT_CMAN static int pcmk_cman_dispatch(gpointer user_data) { int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL); if (rc < 0) { crm_err("Connection to cman failed: %d", rc); pcmk_cman_handle = 0; return FALSE; } return TRUE; } # define MAX_NODES 256 static void cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg) { int rc = 0, lpc = 0, node_count = 0; cman_cluster_t cluster; static cman_node_t cman_nodes[MAX_NODES]; gboolean(*dispatch) (unsigned long long, gboolean) = privdata; switch (reason) { case CMAN_REASON_STATECHANGE: memset(&cluster, 0, sizeof(cluster)); rc = cman_get_cluster(pcmk_cman_handle, &cluster); if (rc < 0) { crm_err("Couldn't query cman cluster details: %d %d", rc, errno); return; } crm_peer_seq = cluster.ci_generation; if (arg != crm_have_quorum) { crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg ? "acquired" : "lost"); crm_have_quorum = arg; } else { crm_info("Membership %llu: quorum %s", crm_peer_seq, arg ? "retained" : "still lost"); } memset(cman_nodes, 0, MAX_NODES * sizeof(cman_node_t)); rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes); if (rc < 0) { crm_err("Couldn't query cman node list: %d %d", rc, errno); return; } for (lpc = 0; lpc < node_count; lpc++) { crm_node_t *peer = NULL; const char *name = NULL; if (cman_nodes[lpc].cn_nodeid == 0) { /* Never allow node ID 0 to be considered a member #315711 */ /* Skip entirely, its a qdisk */ continue; } if(valid_cman_name(cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_nodeid)) { name = cman_nodes[lpc].cn_name; } peer = crm_get_peer(cman_nodes[lpc].cn_nodeid, name); if(cman_nodes[lpc].cn_member) { crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, crm_peer_seq); } else if(peer->state) { crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0); } else { crm_info("State of node %s[%u] is still unknown", peer->uname, peer->id); } } if (dispatch) { dispatch(crm_peer_seq, crm_have_quorum); } break; case CMAN_REASON_TRY_SHUTDOWN: /* Always reply with a negative - pacemaker needs to be stopped first */ crm_notice("CMAN wants to shut down: %s", arg ? "forced" : "optional"); cman_replyto_shutdown(pcmk_cman_handle, 0); break; case CMAN_REASON_CONFIG_UPDATE: /* Ignore */ break; } } # endif gboolean init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { # if SUPPORT_CMAN int rc = -1, fd = -1; cman_cluster_t cluster; struct mainloop_fd_callbacks cman_fd_callbacks = { .dispatch = pcmk_cman_dispatch, .destroy = destroy, }; crm_info("Configuring Pacemaker to obtain quorum from cman"); memset(&cluster, 0, sizeof(cluster)); pcmk_cman_handle = cman_init(dispatch); if (pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) { crm_err("Couldn't connect to cman"); goto cman_bail; } rc = cman_start_notification(pcmk_cman_handle, cman_event_callback); if (rc < 0) { crm_err("Couldn't register for cman notifications: %d %d", rc, errno); goto cman_bail; } /* Get the current membership state */ cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE, cman_is_quorate(pcmk_cman_handle)); fd = cman_get_fd(pcmk_cman_handle); mainloop_add_fd("cman", G_PRIORITY_MEDIUM, fd, dispatch, &cman_fd_callbacks); cman_bail: if (rc < 0) { cman_finish(pcmk_cman_handle); return FALSE; } # else crm_err("cman qorum is not supported in this build"); crm_exit(DAEMON_RESPAWN_STOP); # endif return TRUE; } # ifdef SUPPORT_COROSYNC gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)) { crm_err("The Corosync quorum API is not supported in this build"); crm_exit(DAEMON_RESPAWN_STOP); return TRUE; } static gboolean init_cs_connection_classic(crm_cluster_t * cluster) { int rc; int pid = 0; char *pid_s = NULL; const char *name = NULL; crm_node_t *peer = NULL; enum crm_proc_flag proc = 0; struct mainloop_fd_callbacks ais_fd_callbacks = { .dispatch = plugin_dispatch, .destroy = cluster->destroy, }; crm_info("Creating connection to our Corosync plugin"); rc = coroipcc_service_connect(COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, AIS_IPC_MESSAGE_SIZE, &ais_ipc_handle); if (ais_ipc_handle) { coroipcc_fd_get(ais_ipc_handle, &ais_fd_async); } else { crm_info("Connection to our Corosync plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, strerror(errno), errno); return FALSE; } if (ais_fd_async <= 0 && rc == CS_OK) { crm_err("No context created, but connection reported 'ok'"); rc = CS_ERR_LIBRARY; } if (rc != CS_OK) { crm_info("Connection to our Corosync plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID, ais_error2text(rc), rc); } if (rc != CS_OK) { return FALSE; } if (ais_fd_callbacks.destroy == NULL) { ais_fd_callbacks.destroy = plugin_destroy; } mainloop_add_fd("corosync-plugin", G_PRIORITY_MEDIUM, ais_fd_async, cluster, &ais_fd_callbacks); crm_info("AIS connection established"); pid = getpid(); pid_s = crm_itoa(pid); send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais); free(pid_s); cluster->nodeid = get_local_nodeid(0); name = get_local_node_name(); plugin_get_details(NULL, &(cluster->uname)); if (safe_str_neq(name, cluster->uname)) { crm_crit("Node name mismatch! Corosync supplied %s but our lookup returned %s", cluster->uname, name); crm_notice ("Node name mismatches usually occur when assigned automatically by DHCP servers"); crm_exit(ENOTUNIQ); } proc = text2proc(crm_system_name); peer = crm_get_peer(cluster->nodeid, cluster->uname); crm_update_peer_proc(__FUNCTION__, peer, proc|crm_proc_plugin, ONLINESTATUS); return TRUE; } static int pcmk_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg && is_classic_ais_cluster()) { xmlNode *node = NULL; for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { int id = 0; int children = 0; const char *uname = crm_element_value(node, "uname"); crm_element_value_int(node, "id", &id); crm_element_value_int(node, "processes", &children); if (id == 0) { crm_log_xml_err(msg, "Bad Update"); } else { crm_node_t *peer = crm_get_peer(id, uname); crm_update_peer_proc(__FUNCTION__, peer, children, NULL); } } } free_xml(msg); return 0; } static void pcmk_mcp_destroy(gpointer user_data) { void (*callback) (gpointer data) = user_data; if (callback) { callback(NULL); } } gboolean init_cs_connection(crm_cluster_t * cluster) { int retries = 0; static struct ipc_client_callbacks mcp_callbacks = { .dispatch = pcmk_mcp_dispatch, .destroy = pcmk_mcp_destroy }; while (retries < 5) { int rc = init_cs_connection_once(cluster); retries++; switch (rc) { case CS_OK: if (getenv("HA_mcp") && get_cluster_type() != pcmk_cluster_cman) { xmlNode *poke = create_xml_node(NULL, "poke"); mainloop_io_t *ipc = mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_MEDIUM, 0, cluster->destroy, &mcp_callbacks); crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL); free_xml(poke); } return TRUE; break; case CS_ERR_TRY_AGAIN: case CS_ERR_QUEUE_FULL: sleep(retries); break; default: return FALSE; } } crm_err("Retry count exceeded: %d", retries); return FALSE; } char * classic_node_name(uint32_t nodeid) { return NULL; /* Always use the uname() default for localhost. No way to look up peers */ } char * cman_node_name(uint32_t nodeid) { char *name = NULL; # if SUPPORT_CMAN cman_node_t us; cman_handle_t cman; cman = cman_init(NULL); if (cman != NULL && cman_is_active(cman)) { memset(&us, 0, sizeof(cman_node_t)); cman_get_node(cman, nodeid, &us); if(valid_cman_name(us.cn_name, nodeid)) { name = strdup(us.cn_name); crm_info("Using CMAN node name %s for %u", name, nodeid); } } cman_finish(cman); # endif if (name == NULL) { crm_debug("Unable to get node name for nodeid %u", nodeid); } return name; } extern int set_cluster_type(enum cluster_type_e type); gboolean init_cs_connection_once(crm_cluster_t * cluster) { crm_node_t *peer = NULL; enum cluster_type_e stack = get_cluster_type(); crm_peer_init(); /* Here we just initialize comms */ switch (stack) { case pcmk_cluster_classic_ais: if (init_cs_connection_classic(cluster) == FALSE) { return FALSE; } break; case pcmk_cluster_cman: if (cluster_connect_cpg(cluster) == FALSE) { return FALSE; } break; case pcmk_cluster_heartbeat: crm_info("Could not find an active corosync based cluster"); return FALSE; break; default: crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack); return FALSE; break; } crm_info("Connection to '%s': established", name_for_cluster_type(stack)); cluster->nodeid = get_local_nodeid(0); if(cluster->nodeid == 0) { crm_err("Could not establish local nodeid"); return FALSE; } cluster->uname = get_node_name(0); if(cluster->uname == NULL) { crm_err("Could not establish local node name"); return FALSE; } /* Ensure the local node always exists */ peer = crm_get_peer(cluster->nodeid, cluster->uname); cluster->uuid = get_corosync_uuid(peer); return TRUE; } gboolean check_message_sanity(const AIS_Message * msg, const char *data) { gboolean sane = TRUE; int dest = msg->host.type; int tmp_size = msg->header.size - sizeof(AIS_Message); if (sane && msg->header.size == 0) { crm_warn("Message with no size"); sane = FALSE; } if (sane && msg->header.error != CS_OK) { crm_warn("Message header contains an error: %d", msg->header.error); sane = FALSE; } if (sane && ais_data_len(msg) != tmp_size) { crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg), tmp_size); sane = TRUE; } if (sane && ais_data_len(msg) == 0) { crm_warn("Message with no payload"); sane = FALSE; } if (sane && data && msg->is_compressed == FALSE) { int str_size = strlen(data) + 1; if (ais_data_len(msg) != str_size) { int lpc = 0; crm_warn("Message payload is corrupted: expected %d bytes, got %d", ais_data_len(msg), str_size); sane = FALSE; for (lpc = (str_size - 10); lpc < msg->size; lpc++) { if (lpc < 0) { lpc = 0; } crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]); } } } if (sane == FALSE) { crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } else { crm_trace ("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)", msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size); } return sane; } #endif static int get_config_opt(confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if (*value) { free(*value); *value = NULL; } if (object_handle > 0) { if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = strdup(env_value); return 0; } if (fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if (rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if (top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if (rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } enum cluster_type_e find_corosync_variant(void) { confdb_handle_t config; enum cluster_type_e found = pcmk_cluster_unknown; int rc; char *value = NULL; confdb_handle_t top_handle = 0; hdb_handle_t local_handle = 0; static confdb_callbacks_t callbacks = { }; rc = confdb_initialize(&config, &callbacks); if (rc != CS_OK) { crm_debug("Could not initialize Cluster Configuration Database API instance error %d", rc); return found; } top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); while (local_handle) { get_config_opt(config, local_handle, "name", &value, NULL); if (safe_str_eq("pacemaker", value)) { found = pcmk_cluster_classic_ais; get_config_opt(config, local_handle, "ver", &value, "0"); crm_trace("Found Pacemaker plugin version: %s", value); break; } local_handle = config_find_next(config, "service", top_handle); } if (found == pcmk_cluster_unknown) { top_handle = config_find_init(config); local_handle = config_find_next(config, "quorum", top_handle); get_config_opt(config, local_handle, "provider", &value, NULL); if (safe_str_eq("quorum_cman", value)) { crm_trace("Found CMAN quorum provider"); found = pcmk_cluster_cman; } } free(value); confdb_finalize(config); if (found == pcmk_cluster_unknown) { crm_err ("Corosync is running, but Pacemaker could not find the CMAN or Pacemaker plugin loaded"); found = pcmk_cluster_invalid; } return found; } gboolean crm_is_corosync_peer_active(const crm_node_t * node) { enum crm_proc_flag proc = crm_proc_none; if (node == NULL) { crm_trace("NULL"); return FALSE; } else if (safe_str_neq(node->state, CRM_NODE_MEMBER)) { crm_trace("%s: state=%s", node->uname, node->state); return FALSE; } else if (is_cman_cluster() && (node->processes & crm_proc_cpg)) { /* If we can still talk to our peer process on that node, * then its also part of the corosync membership */ crm_trace("%s: processes=%.8x", node->uname, node->processes); return TRUE; } else if (is_classic_ais_cluster()) { if (node->processes < crm_proc_none) { crm_debug("%s: unknown process list, assuming active for now", node->uname); return TRUE; } else if (is_set(node->processes, crm_proc_none)) { crm_debug("%s: all processes are inactive", node->uname); return FALSE; } else if (is_not_set(node->processes, crm_proc_plugin)) { crm_trace("%s: processes=%.8x", node->uname, node->processes); return FALSE; } } proc = text2proc(crm_system_name); if (proc > crm_proc_none && (node->processes & proc) == 0) { crm_trace("%s: proc %.8x not in %.8x", node->uname, proc, node->processes); return FALSE; } return TRUE; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 9d17bfb259..408aa6e8a9 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1102 +1,1110 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include /* The peer cache remembers cluster nodes that have been seen. * This is managed mostly automatically by libcluster, based on * cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. */ GHashTable *crm_peer_cache = NULL; /* * The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * That said, using a single cache would be more logical and less error-prone, * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: * crm_remote_peer_get(), crm_remote_peer_cache_add() and * crm_remote_peer_cache_remove() directly manage it, * while crm_remote_peer_cache_refresh() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; static gboolean crm_autoreap = TRUE; int crm_remote_peer_cache_size(void) { if (crm_remote_peer_cache == NULL) { return 0; } return g_hash_table_size(crm_remote_peer_cache); } /*! * \brief Get a remote node peer cache entry, creating it if necessary * * \param[in] node_name Name of remote node * * \return Cache entry for node on success, NULL (and set errno) otherwise * * \note When creating a new entry, this will leave the node state undetermined, * so the caller should also call crm_update_peer_state() if the state is * known. */ crm_node_t * crm_remote_peer_get(const char *node_name) { crm_node_t *node; if (node_name == NULL) { errno = -EINVAL; return NULL; } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { return NULL; } /* Populate the essential information */ node->flags = crm_remote_node; node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); errno = -ENOMEM; return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ crm_update_peer_uname(node, node_name); return node; } /*! * \brief Add a node to the remote peer cache * * \param[in] node_name Name of remote node * * \note This is a legacy convenience wrapper for crm_remote_peer_get() * for callers that don't need the cache entry returned. */ void crm_remote_peer_cache_add(const char *node_name) { CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); } void crm_remote_peer_cache_remove(const char *node_name) { if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { crm_trace("removed %s from remote peer cache", node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state, * CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older crmd versions that don't set it. */ static const char * remote_state_from_cib(xmlNode *node_state) { const char *status; status = crm_element_value(node_state, XML_NODE_IN_CLUSTER); if (status && !crm_is_true(status)) { status = CRM_NODE_LOST; } else { status = CRM_NODE_MEMBER; } return status; } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { struct refresh_data *data = user_data; const char *remote = crm_element_value(result, data->field); const char *state = NULL; crm_node_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(crm_remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ node = crm_remote_peer_get(remote); CRM_ASSERT(node); if (state) { crm_update_peer_state(__FUNCTION__, node, state, 0); } } else if (is_set(node->flags, crm_node_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_bit(node->flags, crm_node_dirty); if (state) { crm_update_peer_state(__FUNCTION__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_bit(((crm_node_t*)value)->flags, crm_node_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { return is_set(((crm_node_t*)value)->flags, crm_node_dirty); } /* search string to find CIB resources entries for guest nodes */ #define XPATH_GUEST_NODE_CONFIG \ "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \ "[@name='" XML_RSC_ATTR_REMOTE_NODE "']" /* search string to find CIB resources entries for remote nodes */ #define XPATH_REMOTE_NODE_CONFIG \ "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \ "[@type='remote'][@provider='pacemaker']" /* search string to find CIB node status entries for pacemaker_remote nodes */ #define XPATH_REMOTE_NODE_STATUS \ "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \ "[@" XML_NODE_IS_REMOTE "='true']" /*! * \brief Repopulate the remote peer cache based on CIB XML * * \param[in] xmlNode CIB XML to parse */ void crm_remote_peer_cache_refresh(xmlNode *cib) { struct refresh_data data; /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = "id"; data.has_state = TRUE; crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = "value"; data.has_state = FALSE; crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = "id"; data.has_state = FALSE; crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } gboolean crm_is_peer_active(const crm_node_t * node) { if(node == NULL) { return FALSE; } if (is_set(node->flags, crm_remote_node)) { /* remote nodes are never considered active members. This * guarantees they will never be considered for DC membership.*/ return FALSE; } #if SUPPORT_COROSYNC if (is_openais_cluster()) { return crm_is_corosync_peer_active(node); } #endif #if SUPPORT_HEARTBEAT if (is_heartbeat_cluster()) { return crm_is_heartbeat_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; } else if (search->id && node->id != search->id) { return FALSE; } else if (search->id == 0 && safe_str_neq(node->uname, search->uname)) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { - crm_notice("Removing %s/%u from the membership list", node->uname, node->id); + crm_info("Removing node with name %s and id %u from membership cache", + (node->uname? node->uname : "unknown"), node->id); return TRUE; } return FALSE; } /*! * \brief Remove all peer cache entries matching a node ID and/or uname * * \param[in] id ID of node to remove (or 0 to ignore) * \param[in] name Uname of node to remove (or NULL to ignore) * * \return Number of cache entries removed */ guint reap_crm_member(uint32_t id, const char *name) { int matches = 0; crm_node_t search; if (crm_peer_cache == NULL) { - crm_trace("Nothing to do, cache not initialized"); + crm_trace("Membership cache not initialized, ignoring purge request"); return 0; } search.id = id; search.uname = name ? strdup(name) : NULL; matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); if(matches) { - crm_notice("Purged %d peers with id=%u and/or uname=%s from the membership cache", - matches, search.id, search.uname); + crm_notice("Purged %d peers with id=%u%s%s from the membership cache", + matches, search.id, + (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); } else { - crm_info("No peers with id=%u and/or uname=%s exist", id, name); + crm_info("No peers with id=%u%s%s to purge from the membership cache", + search.id, (search.uname? " and/or uname=" : ""), + (search.uname? search.uname : "")); } free(search.uname); return matches; } static void crm_count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; if (crm_peer_cache) { g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u: %s", node->id, node->uname); free(node->addr); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node); } void crm_peer_init(void) { if (crm_peer_cache == NULL) { crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node); } if (crm_remote_peer_cache == NULL) { crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } } void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL; /*! * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Previously, client callbacks were responsible for peer cache * management. This is no longer the case, and client callbacks should do * only client-specific handling. Callbacks MUST NOT add or remove entries * in the peer caches. */ void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { crm_status_callback = dispatch; } /*! * \brief Tell the library whether to automatically reap lost nodes * * If TRUE (the default), calling crm_update_peer_proc() will also update the * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state() * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable */ void crm_set_autoreap(gboolean autoreap) { crm_autoreap = autoreap; } static void crm_dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); } } static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { if(value == user_data) { return TRUE; } return FALSE; } crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (flags & CRM_GET_PEER_REMOTE) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) { node = crm_find_peer(id, uname); } return node; } crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (flags & CRM_GET_PEER_REMOTE) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) { node = crm_get_peer(id, uname); } return node; } crm_node_t * crm_find_peer(unsigned int id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { crm_dump_peer_hash(LOG_WARNING, __FUNCTION__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { if(safe_str_eq(uname, by_id->uname)) { crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name); } else { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); crm_dump_peer_hash(LOG_INFO, __FUNCTION__); crm_abort(__FILE__, __FUNCTION__, __LINE__, "member weirdness", TRUE, TRUE); } } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync based clusters use nodeid's * * The functions that call crm_update_peer_state() only know nodeid * so 'by_id' is authorative when merging * * Same for crm_update_peer_proc() */ crm_dump_peer_hash(LOG_DEBUG, __FUNCTION__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name); } return node; } #if SUPPORT_COROSYNC static guint crm_remove_conflicting_peer(crm_node_t *node) { int matches = 0; GHashTableIter iter; crm_node_t *existing_node = NULL; if (node->id == 0 || node->uname == NULL) { return 0; } # if !SUPPORT_PLUGIN if (corosync_cmap_has_config("nodelist") != 0) { return 0; } # endif g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if (existing_node->id > 0 && existing_node->id != node->id && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { if (crm_is_peer_active(existing_node)) { continue; } crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", existing_node->id, existing_node->uname, node->id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); node = crm_find_peer(id, uname); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { uname_lookup = get_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = crm_find_peer(id, uname); } } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if (uname && (node->uname == NULL)) { crm_update_peer_uname(node, uname); } if(node->uuid == NULL) { const char *uuid = crm_peer_uuid(node); if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); } } free(uname_lookup); return node; } /*! * \internal * \brief Update all of a node's information (process list, state, etc.) * * \param[in] source Caller's function name (for log messages) * * \return NULL if node was reaped from peer caches, pointer to node otherwise * * \note This function should not be called within a peer cache iteration, * otherwise reaping could invalidate the iterator. */ crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state) { #if SUPPORT_PLUGIN gboolean addr_changed = FALSE; gboolean votes_changed = FALSE; #endif crm_node_t *node = NULL; id = get_corosync_id(id, uuid); node = crm_get_peer(id, uname); CRM_ASSERT(node != NULL); if (node->uuid == NULL) { if (is_openais_cluster()) { /* Yes, overrule whatever was passed in */ crm_peer_uuid(node); } else if (uuid != NULL) { node->uuid = strdup(uuid); } } if (children > 0) { if (crm_update_peer_proc(source, node, children, state) == NULL) { return NULL; } } if (state != NULL) { if (crm_update_peer_state(source, node, state, seen) == NULL) { return NULL; } } #if SUPPORT_HEARTBEAT if (born != 0) { node->born = born; } #endif #if SUPPORT_PLUGIN /* These were only used by the plugin */ if (born != 0) { node->born = born; } if (votes > 0 && node->votes != votes) { votes_changed = TRUE; node->votes = votes; } if (addr != NULL) { if (node->addr == NULL || crm_str_eq(node->addr, addr, FALSE) == FALSE) { addr_changed = TRUE; free(node->addr); node->addr = strdup(addr); } } if (addr_changed || votes_changed) { crm_info("%s: Node %s: id=%u state=%s addr=%s%s votes=%d%s born=" U64T " seen=" U64T " proc=%.32x", source, node->uname, node->id, node->state, node->addr, addr_changed ? " (new)" : "", node->votes, votes_changed ? " (new)" : "", node->born, node->last_seen, node->processes); } #endif return node; } /*! * \internal * \brief Update a node's uname * * \param[in] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ void crm_update_peer_uname(crm_node_t *node, const char *uname) { int i, len = strlen(uname); for (i = 0; i < len; i++) { if (uname[i] >= 'A' && uname[i] <= 'Z') { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } free(node->uname); node->uname = strdup(uname); if (crm_status_callback) { crm_status_callback(crm_status_uname, node, NULL); } #if SUPPORT_COROSYNC if (is_openais_cluster() && !is_set(node->flags, crm_remote_node)) { crm_remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, peer2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (is_set(node->flags, crm_remote_node)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (safe_str_eq(status, ONLINESTATUS)) { if ((node->processes & flag) != flag) { set_bit(node->processes, flag); changed = TRUE; } #if SUPPORT_PLUGIN } else if (safe_str_eq(status, CRM_NODE_MEMBER)) { if (flag > 0 && node->processes != flag) { node->processes = flag; changed = TRUE; } #endif } else if (node->processes & flag) { clear_bit(node->processes, flag); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, peer2text(flag), status); } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (crm_status_callback) { crm_status_callback(crm_status_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (crm_peer_cache == NULL) { return NULL; } if (crm_autoreap) { node = crm_update_peer_state(__FUNCTION__, node, is_set(node->processes, crm_get_cluster_proc())? CRM_NODE_MEMBER : CRM_NODE_LOST, 0); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, peer2text(flag), status); } return node; } void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (is_set(node->flags, crm_remote_node)) { return; } last = node->expected; if (expected != NULL && safe_str_neq(node->expected, expected)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, expected, last); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, int membership, GHashTableIter *iter) { gboolean is_member; - CRM_CHECK(node != NULL, crm_err("%s: Could not set 'state' to %s", source, state); - return NULL); + CRM_CHECK(node != NULL, + crm_err("Could not set state for unknown host to %s" + CRM_XS " source=%s", state, source); + return NULL); is_member = safe_str_eq(state, CRM_NODE_MEMBER); if (membership && is_member) { node->last_seen = membership; } if (state && safe_str_neq(node->state, state)) { char *last = node->state; enum crm_status_type status_type = is_set(node->flags, crm_remote_node)? crm_status_rstate : crm_status_nstate; node->state = strdup(state); - crm_notice("%s: Node %s[%u] - state is now %s (was %s)", - source, node->uname, node->id, state, last); + crm_notice("Node %s state is now %s " CRM_XS + " nodeid=%u previous=%s source=%s", node->uname, state, + node->id, (last? last : "unknown"), source); if (crm_status_callback) { crm_status_callback(status_type, node, last); } free(last); if (crm_autoreap && !is_member && !is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by * crm_remote_peer_cache_refresh(). */ if(iter) { crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); } else { reap_crm_member(node->id, node->uname); } node = NULL; } } else { - crm_trace("%s: Node %s[%u] - state is unchanged (%s)", source, node->uname, node->id, - state); + crm_trace("Node %s state is unchanged (%s) " CRM_XS + " nodeid=%u source=%s", node->uname, state, node->id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_state(const char *source, crm_node_t * node, const char *state, int membership) { return crm_update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void crm_reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; crm_node_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->last_seen != membership) { if (node->state) { /* * Calling crm_update_peer_state_iter() allows us to * remove the node from crm_peer_cache without * invalidating our iterator */ crm_update_peer_state_iter(__FUNCTION__, node, CRM_NODE_LOST, membership, &iter); } else { crm_info("State of node %s[%u] is still unknown", node->uname, node->id); } } } } int crm_terminate_member(int nodeid, const char *uname, void *unused) { /* Always use the synchronous, non-mainloop version */ return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } diff --git a/lib/common/logging.c b/lib/common/logging.c index 687902353a..81671b9d07 100644 --- a/lib/common/logging.c +++ b/lib/common/logging.c @@ -1,1247 +1,1247 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include unsigned int crm_log_priority = LOG_NOTICE; unsigned int crm_log_level = LOG_INFO; static gboolean crm_tracing_enabled(void); unsigned int crm_trace_nonlog = 0; bool crm_is_daemon = 0; #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER GLogFunc glib_log_default; static void crm_glib_handler(const gchar * log_domain, GLogLevelFlags flags, const gchar * message, gpointer user_data) { int log_level = LOG_WARNING; GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK); static struct qb_log_callsite *glib_cs = NULL; if (glib_cs == NULL) { glib_cs = qb_log_callsite_get(__FUNCTION__, __FILE__, "glib-handler", LOG_DEBUG, __LINE__, crm_trace_nonlog); } switch (msg_level) { case G_LOG_LEVEL_CRITICAL: log_level = LOG_CRIT; if (crm_is_callsite_active(glib_cs, LOG_DEBUG, 0) == FALSE) { /* log and record how we got here */ crm_abort(__FILE__, __FUNCTION__, __LINE__, message, TRUE, TRUE); } break; case G_LOG_LEVEL_ERROR: log_level = LOG_ERR; break; case G_LOG_LEVEL_MESSAGE: log_level = LOG_NOTICE; break; case G_LOG_LEVEL_INFO: log_level = LOG_INFO; break; case G_LOG_LEVEL_DEBUG: log_level = LOG_DEBUG; break; case G_LOG_LEVEL_WARNING: case G_LOG_FLAG_RECURSION: case G_LOG_FLAG_FATAL: case G_LOG_LEVEL_MASK: log_level = LOG_WARNING; break; } do_crm_log(log_level, "%s: %s", log_domain, message); } #endif #ifndef NAME_MAX # define NAME_MAX 256 #endif static void crm_trigger_blackbox(int nsig) { if(nsig == SIGTRAP) { /* Turn it on if it wasn't already */ crm_enable_blackbox(nsig); } crm_write_blackbox(nsig, NULL); } const char * daemon_option(const char *option) { char env_name[NAME_MAX]; const char *value = NULL; snprintf(env_name, NAME_MAX, "PCMK_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } snprintf(env_name, NAME_MAX, "HA_%s", option); value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } crm_trace("Nothing found for %s", option); return NULL; } void set_daemon_option(const char *option, const char *value) { char env_name[NAME_MAX]; snprintf(env_name, NAME_MAX, "PCMK_%s", option); if (value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } snprintf(env_name, NAME_MAX, "HA_%s", option); if (value) { crm_trace("Setting %s to %s", env_name, value); setenv(env_name, value, 1); } else { crm_trace("Unsetting %s", env_name); unsetenv(env_name); } } gboolean daemon_option_enabled(const char *daemon, const char *option) { const char *value = daemon_option(option); if (value != NULL && crm_is_true(value)) { return TRUE; } else if (value != NULL && strstr(value, daemon)) { return TRUE; } return FALSE; } void crm_log_deinit(void) { #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER g_log_set_default_handler(glib_log_default, NULL); #endif } #define FMT_MAX 256 static void set_format_string(int method, const char *daemon) { int offset = 0; char fmt[FMT_MAX]; if (method > QB_LOG_STDERR) { /* When logging to a file */ struct utsname res; if (uname(&res) == 0) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %s %10s: ", getpid(), res.nodename, daemon); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%t [%d] %10s: ", getpid(), daemon); } } if (method == QB_LOG_SYSLOG) { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%b"); crm_extended_logging(method, QB_FALSE); } else if (crm_tracing_enabled()) { offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%-12f:%%5l %%g) %%-7p: %%n:\t%%b"); } else { offset += snprintf(fmt + offset, FMT_MAX - offset, "%%g %%-7p: %%n:\t%%b"); } CRM_LOG_ASSERT(offset > 0); qb_log_format_set(method, fmt); } gboolean crm_add_logfile(const char *filename) { bool is_default = false; static int default_fd = -1; static gboolean have_logfile = FALSE; const char *default_logfile = "/var/log/pacemaker.log"; struct stat parent; int fd = 0, rc = 0; FILE *logfile = NULL; char *parent_dir = NULL; char *filename_cp; if (filename == NULL && have_logfile == FALSE) { filename = default_logfile; } if (filename == NULL) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, "none")) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, "/dev/null")) { return FALSE; /* Nothing to do */ } else if(safe_str_eq(filename, default_logfile)) { is_default = TRUE; } if(is_default && default_fd >= 0) { return TRUE; /* Nothing to do */ } /* Check the parent directory */ filename_cp = strdup(filename); parent_dir = dirname(filename_cp); rc = stat(parent_dir, &parent); if (rc != 0) { crm_err("Directory '%s' does not exist: logging to '%s' is disabled", parent_dir, filename); free(filename_cp); return FALSE; } free(filename_cp); errno = 0; logfile = fopen(filename, "a"); if(logfile == NULL) { crm_err("%s (%d): Logging to '%s' as uid=%u, gid=%u is disabled", pcmk_strerror(errno), errno, filename, geteuid(), getegid()); return FALSE; } /* Check/Set permissions if we're root */ if (geteuid() == 0) { struct stat st; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; gboolean fix = FALSE; int logfd = fileno(logfile); rc = fstat(logfd, &st); if (rc < 0) { crm_perror(LOG_WARNING, "Cannot stat %s", filename); fclose(logfile); return FALSE; } if(crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) == 0) { if (st.st_gid != pcmk_gid) { /* Wrong group */ fix = TRUE; } else if ((st.st_mode & S_IRWXG) != (S_IRGRP | S_IWGRP)) { /* Not read/writable by the correct group */ fix = TRUE; } } if (fix) { rc = fchown(logfd, pcmk_uid, pcmk_gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d", filename, CRM_DAEMON_USER, pcmk_gid); } rc = fchmod(logfd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); if (rc < 0) { crm_warn("Cannot change the mode of %s to rw-rw----", filename); } fprintf(logfile, "Set r/w permissions for uid=%d, gid=%d on %s\n", pcmk_uid, pcmk_gid, filename); if (fflush(logfile) < 0 || fsync(logfd) < 0) { crm_err("Couldn't write out logfile: %s", filename); } } } /* Close and reopen with libqb */ fclose(logfile); fd = qb_log_file_open(filename); if (fd < 0) { crm_perror(LOG_WARNING, "Couldn't send additional logging to %s", filename); return FALSE; } if(is_default) { default_fd = fd; } else if(default_fd >= 0) { crm_notice("Switching to %s", filename); qb_log_ctl(default_fd, QB_LOG_CONF_ENABLED, QB_FALSE); } crm_notice("Additional logging available in %s", filename); qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_TRUE); /* qb_log_ctl(fd, QB_LOG_CONF_FILE_SYNC, 1); Turn on synchronous writes */ /* Enable callsites */ crm_update_callsites(); have_logfile = TRUE; return TRUE; } static int blackbox_trigger = 0; static char *blackbox_file_prefix = NULL; static void blackbox_logger(int32_t t, struct qb_log_callsite *cs, time_t timestamp, const char *msg) { if(cs && cs->priority < LOG_ERR) { crm_write_blackbox(SIGTRAP, cs); /* Bypass the over-dumping logic */ } else { crm_write_blackbox(0, cs); } } static void crm_control_blackbox(int nsig, bool enable) { if (blackbox_file_prefix == NULL) { pid_t pid = getpid(); blackbox_file_prefix = malloc(NAME_MAX); snprintf(blackbox_file_prefix, NAME_MAX, "%s/%s-%d", CRM_BLACKBOX_DIR, crm_system_name, pid); } if (enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 5 * 1024 * 1024); /* Any size change drops existing entries */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */ crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix); /* Save to disk on abnormal termination */ crm_signal(SIGSEGV, crm_trigger_blackbox); crm_signal(SIGABRT, crm_trigger_blackbox); crm_signal(SIGILL, crm_trigger_blackbox); crm_signal(SIGBUS, crm_trigger_blackbox); crm_update_callsites(); blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); crm_trace("Trigger: %d is %d %d", blackbox_trigger, qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); crm_update_callsites(); } else if (!enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); } } void crm_enable_blackbox(int nsig) { crm_control_blackbox(nsig, TRUE); } void crm_disable_blackbox(int nsig) { crm_control_blackbox(nsig, FALSE); } void crm_write_blackbox(int nsig, struct qb_log_callsite *cs) { static int counter = 1; static time_t last = 0; char buffer[NAME_MAX]; time_t now = time(NULL); if (blackbox_file_prefix == NULL) { return; } switch (nsig) { case 0: case SIGTRAP: /* The graceful case - such as assertion failure or user request */ if (nsig == 0 && now == last) { /* Prevent over-dumping */ return; } snprintf(buffer, NAME_MAX, "%s.%d", blackbox_file_prefix, counter++); if (nsig == SIGTRAP) { crm_notice("Blackbox dump requested, please see %s for contents", buffer); } else if (cs) { syslog(LOG_NOTICE, "Problem detected at %s:%d (%s), please see %s for additional details", cs->function, cs->lineno, cs->filename, buffer); } else { crm_notice("Problem detected, please see %s for additional details", buffer); } last = now; qb_log_blackbox_write_to_file(buffer); /* Flush the existing contents * A size change would also work */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); break; default: /* Do as little as possible, just try to get what we have out * We logged the filename when the blackbox was enabled */ crm_signal(nsig, SIG_DFL); qb_log_blackbox_write_to_file(blackbox_file_prefix); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); raise(nsig); break; } } gboolean crm_log_cli_init(const char *entity) { return crm_log_init(entity, LOG_ERR, FALSE, FALSE, 0, NULL, TRUE); } static const char * crm_quark_to_string(uint32_t tag) { const char *text = g_quark_to_string(tag); if (text) { return text; } return ""; } static void crm_log_filter_source(int source, const char *trace_files, const char *trace_fns, const char *trace_fmts, const char *trace_tags, const char *trace_blackbox, struct qb_log_callsite *cs) { if (qb_log_ctl(source, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { return; } else if (cs->tags != crm_trace_nonlog && source == QB_LOG_BLACKBOX) { /* Blackbox gets everything if enabled */ qb_bit_set(cs->targets, source); } else if (source == blackbox_trigger && blackbox_trigger > 0) { /* Should this log message result in the blackbox being dumped */ if (cs->priority <= LOG_ERR) { qb_bit_set(cs->targets, source); } else if (trace_blackbox) { char *key = crm_strdup_printf("%s:%d", cs->function, cs->lineno); if (strstr(trace_blackbox, key) != NULL) { qb_bit_set(cs->targets, source); } free(key); } } else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */ if (cs->priority <= crm_log_priority && cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } /* Log file tracing options... */ } else if (cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } else if (trace_files && strstr(trace_files, cs->filename) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fns && strstr(trace_fns, cs->function) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fmts && strstr(trace_fmts, cs->format) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_tags && cs->tags != 0 && cs->tags != crm_trace_nonlog && g_quark_to_string(cs->tags) != NULL) { qb_bit_set(cs->targets, source); } } static void crm_log_filter(struct qb_log_callsite *cs) { int lpc = 0; static int need_init = 1; static const char *trace_fns = NULL; static const char *trace_tags = NULL; static const char *trace_fmts = NULL; static const char *trace_files = NULL; static const char *trace_blackbox = NULL; if (need_init) { need_init = 0; trace_fns = getenv("PCMK_trace_functions"); trace_fmts = getenv("PCMK_trace_formats"); trace_tags = getenv("PCMK_trace_tags"); trace_files = getenv("PCMK_trace_files"); trace_blackbox = getenv("PCMK_trace_blackbox"); if (trace_tags != NULL) { uint32_t tag; char token[500]; const char *offset = NULL; const char *next = trace_tags; do { offset = next; next = strchrnul(offset, ','); snprintf(token, 499, "%.*s", (int)(next - offset), offset); tag = g_quark_from_string(token); crm_info("Created GQuark %u from token '%s' in '%s'", tag, token, trace_tags); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } } cs->targets = 0; /* Reset then find targets to enable */ for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { crm_log_filter_source(lpc, trace_files, trace_fns, trace_fmts, trace_tags, trace_blackbox, cs); } } gboolean crm_is_callsite_active(struct qb_log_callsite *cs, uint8_t level, uint32_t tags) { gboolean refilter = FALSE; if (cs == NULL) { return FALSE; } if (cs->priority != level) { cs->priority = level; refilter = TRUE; } if (cs->tags != tags) { cs->tags = tags; refilter = TRUE; } if (refilter) { crm_log_filter(cs); } if (cs->targets == 0) { return FALSE; } return TRUE; } void crm_update_callsites(void) { static gboolean log = TRUE; if (log) { log = FALSE; crm_debug ("Enabling callsites based on priority=%d, files=%s, functions=%s, formats=%s, tags=%s", crm_log_level, getenv("PCMK_trace_files"), getenv("PCMK_trace_functions"), getenv("PCMK_trace_formats"), getenv("PCMK_trace_tags")); } qb_log_filter_fn_set(crm_log_filter); } static gboolean crm_tracing_enabled(void) { if (crm_log_level >= LOG_TRACE) { return TRUE; } else if (getenv("PCMK_trace_files") || getenv("PCMK_trace_functions") || getenv("PCMK_trace_formats") || getenv("PCMK_trace_tags")) { return TRUE; } return FALSE; } static int crm_priority2int(const char *name) { struct syslog_names { const char *name; int priority; }; static struct syslog_names p_names[] = { {"emerg", LOG_EMERG}, {"alert", LOG_ALERT}, {"crit", LOG_CRIT}, {"error", LOG_ERR}, {"warning", LOG_WARNING}, {"notice", LOG_NOTICE}, {"info", LOG_INFO}, {"debug", LOG_DEBUG}, {NULL, -1} }; int lpc; for (lpc = 0; name != NULL && p_names[lpc].name != NULL; lpc++) { if (crm_str_eq(p_names[lpc].name, name, TRUE)) { return p_names[lpc].priority; } } return crm_log_priority; } static void crm_identity(const char *entity, int argc, char **argv) { if(crm_system_name != NULL) { /* Nothing to do */ } else if (entity) { free(crm_system_name); crm_system_name = strdup(entity); } else if (argc > 0 && argv != NULL) { char *mutable = strdup(argv[0]); char *modified = basename(mutable); if (strstr(modified, "lt-") == modified) { modified += 3; } free(crm_system_name); crm_system_name = strdup(modified); free(mutable); } else if (crm_system_name == NULL) { crm_system_name = strdup("Unknown"); } setenv("PCMK_service", crm_system_name, 1); } void crm_log_preinit(const char *entity, int argc, char **argv) { /* Configure libqb logging with nothing turned on */ int lpc = 0; int32_t qb_facility = 0; static bool have_logging = FALSE; if(have_logging == FALSE) { have_logging = TRUE; crm_xml_init(); /* Sets buffer allocation strategy */ if (crm_trace_nonlog == 0) { crm_trace_nonlog = g_quark_from_static_string("Pacemaker non-logging tracepoint"); } umask(S_IWGRP | S_IWOTH | S_IROTH); /* Redirect messages from glib functions to our handler */ #ifdef HAVE_G_LOG_SET_DEFAULT_HANDLER glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL); #endif /* and for good measure... - this enum is a bit field (!) */ g_log_set_always_fatal((GLogLevelFlags) 0); /*value out of range */ /* Who do we log as */ crm_identity(entity, argc, argv); qb_facility = qb_log_facility2int("local0"); qb_log_init(crm_system_name, qb_facility, LOG_ERR); crm_log_level = LOG_CRIT; /* Nuke any syslog activity until its asked for */ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); /* Set format strings */ qb_log_tags_stringify_fn_set(crm_quark_to_string); for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { set_format_string(lpc, crm_system_name); } } } gboolean crm_log_init(const char *entity, uint8_t level, gboolean daemon, gboolean to_stderr, int argc, char **argv, gboolean quiet) { const char *syslog_priority = NULL; const char *logfile = daemon_option("logfile"); const char *facility = daemon_option("logfacility"); const char *f_copy = facility; crm_is_daemon = daemon; crm_log_preinit(entity, argc, argv); if(level > crm_log_level) { crm_log_level = level; } /* Should we log to syslog */ if (facility == NULL) { if(crm_is_daemon) { facility = "daemon"; } else { facility = "none"; } set_daemon_option("logfacility", facility); } if (safe_str_eq(facility, "none")) { quiet = TRUE; } else { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(facility)); } if (daemon_option_enabled(crm_system_name, "debug")) { /* Override the default setting */ crm_log_level = LOG_DEBUG; } /* What lower threshold do we have for sending to syslog */ syslog_priority = daemon_option("logpriority"); if(syslog_priority) { int priority = crm_priority2int(syslog_priority); crm_log_priority = priority; qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", priority); } else { qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_NOTICE); } if (!quiet) { /* Nuke any syslog activity */ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); } /* Should we log to stderr */ if (daemon_option_enabled(crm_system_name, "stderr")) { /* Override the default setting */ to_stderr = TRUE; } crm_enable_stderr(to_stderr); /* Should we log to a file */ if (safe_str_eq("none", logfile)) { /* No soup^Hlogs for you! */ } else if(crm_is_daemon) { /* The daemons always get a log file, unless explicitly set to configured 'none' */ crm_add_logfile(logfile); } else if(logfile) { crm_add_logfile(logfile); } if (crm_is_daemon && daemon_option_enabled(crm_system_name, "blackbox")) { crm_enable_blackbox(0); } /* Summary */ crm_trace("Quiet: %d, facility %s", quiet, f_copy); daemon_option("logfile"); daemon_option("logfacility"); crm_update_callsites(); /* Ok, now we can start logging... */ if (quiet == FALSE && crm_is_daemon == FALSE) { crm_log_args(argc, argv); } if (crm_is_daemon) { const char *user = getenv("USER"); if (user != NULL && safe_str_neq(user, "root") && safe_str_neq(user, CRM_DAEMON_USER)) { crm_trace("Not switching to corefile directory for %s", user); crm_is_daemon = FALSE; } } if (crm_is_daemon) { int user = getuid(); const char *base = CRM_CORE_DIR; struct passwd *pwent = getpwuid(user); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get name for uid: %d", user); } else if (safe_str_neq(pwent->pw_name, "root") && safe_str_neq(pwent->pw_name, CRM_DAEMON_USER)) { crm_trace("Don't change active directory for regular user: %s", pwent->pw_name); } else if (chdir(base) < 0) { crm_perror(LOG_INFO, "Cannot change active directory to %s", base); } else { - crm_info("Changed active directory to %s/%s", base, pwent->pw_name); + crm_info("Changed active directory to %s", base); #if 0 { char path[512]; snprintf(path, 512, "%s-%d", crm_system_name, getpid()); mkdir(path, 0750); chdir(path); crm_info("Changed active directory to %s/%s/%s", base, pwent->pw_name, path); } #endif } /* Original meanings from signal(7) * * Signal Value Action Comment * SIGTRAP 5 Core Trace/breakpoint trap * SIGUSR1 30,10,16 Term User-defined signal 1 * SIGUSR2 31,12,17 Term User-defined signal 2 * * Our usage is as similar as possible */ mainloop_add_signal(SIGUSR1, crm_enable_blackbox); mainloop_add_signal(SIGUSR2, crm_disable_blackbox); mainloop_add_signal(SIGTRAP, crm_trigger_blackbox); } return TRUE; } /* returns the old value */ unsigned int set_crm_log_level(unsigned int level) { unsigned int old = crm_log_level; crm_log_level = level; crm_update_callsites(); crm_trace("New log level: %d", level); return old; } void crm_enable_stderr(int enable) { if (enable && qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); crm_update_callsites(); } else if (enable == FALSE) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); } } void crm_bump_log_level(int argc, char **argv) { static int args = TRUE; int level = crm_log_level; if (args && argc > 1) { crm_log_args(argc, argv); } if (qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { set_crm_log_level(level + 1); } /* Enable after potentially logging the argstring, not before */ crm_enable_stderr(TRUE); } unsigned int get_crm_log_level(void) { return crm_log_level; } #define ARGS_FMT "Invoked: %s" void crm_log_args(int argc, char **argv) { int lpc = 0; int len = 0; int existing_len = 0; int line = __LINE__; static int logged = 0; char *arg_string = NULL; if (argc == 0 || argv == NULL || logged) { return; } logged = 1; for (; lpc < argc; lpc++) { if (argv[lpc] == NULL) { break; } len = 2 + strlen(argv[lpc]); /* +1 space, +1 EOS */ arg_string = realloc_safe(arg_string, len + existing_len); existing_len += sprintf(arg_string + existing_len, "%s ", argv[lpc]); } qb_log_from_external_source(__func__, __FILE__, ARGS_FMT, LOG_NOTICE, line, 0, arg_string); free(arg_string); } const char * pcmk_errorname(int rc) { int error = ABS(rc); switch (error) { case E2BIG: return "E2BIG"; case EACCES: return "EACCES"; case EADDRINUSE: return "EADDRINUSE"; case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; case EAFNOSUPPORT: return "EAFNOSUPPORT"; case EAGAIN: return "EAGAIN"; case EALREADY: return "EALREADY"; case EBADF: return "EBADF"; case EBADMSG: return "EBADMSG"; case EBUSY: return "EBUSY"; case ECANCELED: return "ECANCELED"; case ECHILD: return "ECHILD"; case ECOMM: return "ECOMM"; case ECONNABORTED: return "ECONNABORTED"; case ECONNREFUSED: return "ECONNREFUSED"; case ECONNRESET: return "ECONNRESET"; /* case EDEADLK: return "EDEADLK"; */ case EDESTADDRREQ: return "EDESTADDRREQ"; case EDOM: return "EDOM"; case EDQUOT: return "EDQUOT"; case EEXIST: return "EEXIST"; case EFAULT: return "EFAULT"; case EFBIG: return "EFBIG"; case EHOSTDOWN: return "EHOSTDOWN"; case EHOSTUNREACH: return "EHOSTUNREACH"; case EIDRM: return "EIDRM"; case EILSEQ: return "EILSEQ"; case EINPROGRESS: return "EINPROGRESS"; case EINTR: return "EINTR"; case EINVAL: return "EINVAL"; case EIO: return "EIO"; case EISCONN: return "EISCONN"; case EISDIR: return "EISDIR"; case ELIBACC: return "ELIBACC"; case ELOOP: return "ELOOP"; case EMFILE: return "EMFILE"; case EMLINK: return "EMLINK"; case EMSGSIZE: return "EMSGSIZE"; case EMULTIHOP: return "EMULTIHOP"; case ENAMETOOLONG: return "ENAMETOOLONG"; case ENETDOWN: return "ENETDOWN"; case ENETRESET: return "ENETRESET"; case ENETUNREACH: return "ENETUNREACH"; case ENFILE: return "ENFILE"; case ENOBUFS: return "ENOBUFS"; case ENODATA: return "ENODATA"; case ENODEV: return "ENODEV"; case ENOENT: return "ENOENT"; case ENOEXEC: return "ENOEXEC"; case ENOKEY: return "ENOKEY"; case ENOLCK: return "ENOLCK"; case ENOLINK: return "ENOLINK"; case ENOMEM: return "ENOMEM"; case ENOMSG: return "ENOMSG"; case ENOPROTOOPT: return "ENOPROTOOPT"; case ENOSPC: return "ENOSPC"; case ENOSR: return "ENOSR"; case ENOSTR: return "ENOSTR"; case ENOSYS: return "ENOSYS"; case ENOTBLK: return "ENOTBLK"; case ENOTCONN: return "ENOTCONN"; case ENOTDIR: return "ENOTDIR"; case ENOTEMPTY: return "ENOTEMPTY"; case ENOTSOCK: return "ENOTSOCK"; /* case ENOTSUP: return "ENOTSUP"; */ case ENOTTY: return "ENOTTY"; case ENOTUNIQ: return "ENOTUNIQ"; case ENXIO: return "ENXIO"; case EOPNOTSUPP: return "EOPNOTSUPP"; case EOVERFLOW: return "EOVERFLOW"; case EPERM: return "EPERM"; case EPFNOSUPPORT: return "EPFNOSUPPORT"; case EPIPE: return "EPIPE"; case EPROTO: return "EPROTO"; case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; case EPROTOTYPE: return "EPROTOTYPE"; case ERANGE: return "ERANGE"; case EREMOTE: return "EREMOTE"; case EREMOTEIO: return "EREMOTEIO"; case EROFS: return "EROFS"; case ESHUTDOWN: return "ESHUTDOWN"; case ESPIPE: return "ESPIPE"; case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; case ESRCH: return "ESRCH"; case ESTALE: return "ESTALE"; case ETIME: return "ETIME"; case ETIMEDOUT: return "ETIMEDOUT"; case ETXTBSY: return "ETXTBSY"; case EUNATCH: return "EUNATCH"; case EUSERS: return "EUSERS"; /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ case EXDEV: return "EXDEV"; #ifdef EBADE /* Not available on OSX */ case EBADE: return "EBADE"; case EBADFD: return "EBADFD"; case EBADSLT: return "EBADSLT"; case EDEADLOCK: return "EDEADLOCK"; case EBADR: return "EBADR"; case EBADRQC: return "EBADRQC"; case ECHRNG: return "ECHRNG"; #ifdef EISNAM /* Not available on Illumos/Solaris */ case EISNAM: return "EISNAM"; case EKEYEXPIRED: return "EKEYEXPIRED"; case EKEYREJECTED: return "EKEYREJECTED"; case EKEYREVOKED: return "EKEYREVOKED"; #endif case EL2HLT: return "EL2HLT"; case EL2NSYNC: return "EL2NSYNC"; case EL3HLT: return "EL3HLT"; case EL3RST: return "EL3RST"; case ELIBBAD: return "ELIBBAD"; case ELIBMAX: return "ELIBMAX"; case ELIBSCN: return "ELIBSCN"; case ELIBEXEC: return "ELIBEXEC"; #ifdef ENOMEDIUM /* Not available on Illumos/Solaris */ case ENOMEDIUM: return "ENOMEDIUM"; case EMEDIUMTYPE: return "EMEDIUMTYPE"; #endif case ENONET: return "ENONET"; case ENOPKG: return "ENOPKG"; case EREMCHG: return "EREMCHG"; case ERESTART: return "ERESTART"; case ESTRPIPE: return "ESTRPIPE"; #ifdef EUCLEAN /* Not available on Illumos/Solaris */ case EUCLEAN: return "EUCLEAN"; #endif case EXFULL: return "EXFULL"; #endif case pcmk_err_generic: return "pcmk_err_generic"; case pcmk_err_no_quorum: return "pcmk_err_no_quorum"; case pcmk_err_schema_validation: return "pcmk_err_schema_validation"; case pcmk_err_transform_failed: return "pcmk_err_transform_failed"; case pcmk_err_old_data: return "pcmk_err_old_data"; case pcmk_err_diff_failed: return "pcmk_err_diff_failed"; case pcmk_err_diff_resync: return "pcmk_err_diff_resync"; case pcmk_err_cib_modified: return "pcmk_err_cib_modified"; case pcmk_err_cib_backup: return "pcmk_err_cib_backup"; case pcmk_err_cib_save: return "pcmk_err_cib_save"; case pcmk_err_cib_corrupt: return "pcmk_err_cib_corrupt"; } return "Unknown"; } const char * pcmk_strerror(int rc) { int error = abs(rc); if (error == 0) { return "OK"; } else if (error < PCMK_ERROR_OFFSET) { return strerror(error); } switch (error) { case pcmk_err_generic: return "Generic Pacemaker error"; case pcmk_err_no_quorum: return "Operation requires quorum"; case pcmk_err_schema_validation: return "Update does not conform to the configured schema"; case pcmk_err_transform_failed: return "Schema transform failed"; case pcmk_err_old_data: return "Update was older than existing configuration"; case pcmk_err_diff_failed: return "Application of an update diff failed"; case pcmk_err_diff_resync: return "Application of an update diff failed, requesting a full refresh"; case pcmk_err_cib_modified: return "The on-disk configuration was manually modified"; case pcmk_err_cib_backup: return "Could not archive the previous configuration"; case pcmk_err_cib_save: return "Could not save the new configuration to disk"; case pcmk_err_cib_corrupt: return "Could not parse on-disk configuration"; case pcmk_err_schema_unchanged: return "Schema is already the latest available"; /* The following cases will only be hit on systems for which they are non-standard */ /* coverity[dead_error_condition] False positive on non-Linux */ case ENOTUNIQ: return "Name not unique on network"; /* coverity[dead_error_condition] False positive on non-Linux */ case ECOMM: return "Communication error on send"; /* coverity[dead_error_condition] False positive on non-Linux */ case ELIBACC: return "Can not access a needed shared library"; /* coverity[dead_error_condition] False positive on non-Linux */ case EREMOTEIO: return "Remote I/O error"; /* coverity[dead_error_condition] False positive on non-Linux */ case EUNATCH: return "Protocol driver not attached"; /* coverity[dead_error_condition] False positive on non-Linux */ case ENOKEY: return "Required key not available"; } crm_err("Unknown error code: %d", rc); return "Unknown error"; } const char * bz2_strerror(int rc) { /* http://www.bzip.org/1.0.3/html/err-handling.html */ switch (rc) { case BZ_OK: case BZ_RUN_OK: case BZ_FLUSH_OK: case BZ_FINISH_OK: case BZ_STREAM_END: return "Ok"; case BZ_CONFIG_ERROR: return "libbz2 has been improperly compiled on your platform"; case BZ_SEQUENCE_ERROR: return "library functions called in the wrong order"; case BZ_PARAM_ERROR: return "parameter is out of range or otherwise incorrect"; case BZ_MEM_ERROR: return "memory allocation failed"; case BZ_DATA_ERROR: return "data integrity error is detected during decompression"; case BZ_DATA_ERROR_MAGIC: return "the compressed stream does not start with the correct magic bytes"; case BZ_IO_ERROR: return "error reading or writing in the compressed file"; case BZ_UNEXPECTED_EOF: return "compressed file finishes before the logical end of stream is detected"; case BZ_OUTBUFF_FULL: return "output data will not fit into the buffer provided"; } return "Unknown error"; } void crm_log_output_fn(const char *file, const char *function, int line, int level, const char *prefix, const char *output) { const char *next = NULL; const char *offset = NULL; if (output == NULL) { level = LOG_DEBUG; output = "-- empty --"; } next = output; do { offset = next; next = strchrnul(offset, '\n'); do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", prefix, (int)(next - offset), offset); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } char * crm_strdup_printf (char const *format, ...) { va_list ap; int len = 0; char *string = NULL; va_start(ap, format); len = vasprintf (&string, format, ap); CRM_ASSERT(len > 0); va_end(ap); return string; } diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c index fceebbac86..25823b96c6 100644 --- a/lib/common/mainloop.c +++ b/lib/common/mainloop.c @@ -1,1247 +1,1249 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include struct mainloop_child_s { pid_t pid; char *desc; unsigned timerid; unsigned watchid; gboolean timeout; void *privatedata; enum mainloop_child_flags flags; /* Called when a process dies */ void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode); }; struct trigger_s { GSource source; gboolean running; gboolean trigger; void *user_data; guint id; }; static gboolean crm_trigger_prepare(GSource * source, gint * timeout) { crm_trigger_t *trig = (crm_trigger_t *) source; /* cluster-glue's FD and IPC related sources make use of * g_source_add_poll() but do not set a timeout in their prepare * functions * * This means mainloop's poll() will block until an event for one * of these sources occurs - any /other/ type of source, such as * this one or g_idle_*, that doesn't use g_source_add_poll() is * S-O-L and won't be processed until there is something fd-based * happens. * * Luckily the timeout we can set here affects all sources and * puts an upper limit on how long poll() can take. * * So unconditionally set a small-ish timeout, not too small that * we're in constant motion, which will act as an upper bound on * how long the signal handling might be delayed for. */ *timeout = 500; /* Timeout in ms */ return trig->trigger; } static gboolean crm_trigger_check(GSource * source) { crm_trigger_t *trig = (crm_trigger_t *) source; return trig->trigger; } static gboolean crm_trigger_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { int rc = TRUE; crm_trigger_t *trig = (crm_trigger_t *) source; if (trig->running) { /* Wait until the existing job is complete before starting the next one */ return TRUE; } trig->trigger = FALSE; if (callback) { rc = callback(trig->user_data); if (rc < 0) { crm_trace("Trigger handler %p not yet complete", trig); trig->running = TRUE; rc = TRUE; } } return rc; } static void crm_trigger_finalize(GSource * source) { crm_trace("Trigger %p destroyed", source); } #if 0 struct _GSourceCopy { gpointer callback_data; GSourceCallbackFuncs *callback_funcs; const GSourceFuncs *source_funcs; guint ref_count; GMainContext *context; gint priority; guint flags; guint source_id; GSList *poll_fds; GSource *prev; GSource *next; char *name; void *priv; }; static int g_source_refcount(GSource * source) { /* Duplicating the contents of private header files is a necessary evil */ if (source) { struct _GSourceCopy *evil = (struct _GSourceCopy*)source; return evil->ref_count; } return 0; } #else static int g_source_refcount(GSource * source) { return 0; } #endif static GSourceFuncs crm_trigger_funcs = { crm_trigger_prepare, crm_trigger_check, crm_trigger_dispatch, crm_trigger_finalize, }; static crm_trigger_t * mainloop_setup_trigger(GSource * source, int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { crm_trigger_t *trigger = NULL; trigger = (crm_trigger_t *) source; trigger->id = 0; trigger->trigger = FALSE; trigger->user_data = userdata; if (dispatch) { g_source_set_callback(source, dispatch, trigger, NULL); } g_source_set_priority(source, priority); g_source_set_can_recurse(source, FALSE); crm_trace("Setup %p with ref-count=%u", source, g_source_refcount(source)); trigger->id = g_source_attach(source, NULL); crm_trace("Attached %p with ref-count=%u", source, g_source_refcount(source)); return trigger; } void mainloop_trigger_complete(crm_trigger_t * trig) { crm_trace("Trigger handler %p complete", trig); trig->running = FALSE; } /* If dispatch returns: * -1: Job running but not complete * 0: Remove the trigger from mainloop * 1: Leave the trigger in mainloop */ crm_trigger_t * mainloop_add_trigger(int priority, int (*dispatch) (gpointer user_data), gpointer userdata) { GSource *source = NULL; CRM_ASSERT(sizeof(crm_trigger_t) > sizeof(GSource)); source = g_source_new(&crm_trigger_funcs, sizeof(crm_trigger_t)); CRM_ASSERT(source != NULL); return mainloop_setup_trigger(source, priority, dispatch, userdata); } void mainloop_set_trigger(crm_trigger_t * source) { if(source) { source->trigger = TRUE; } } gboolean mainloop_destroy_trigger(crm_trigger_t * source) { GSource *gs = NULL; if(source == NULL) { return TRUE; } gs = (GSource *)source; if(g_source_refcount(gs) > 2) { crm_info("Trigger %p is still referenced %u times", gs, g_source_refcount(gs)); } g_source_destroy(gs); /* Remove from mainloop, ref_count-- */ g_source_unref(gs); /* The caller no longer carries a reference to source * * At this point the source should be free'd, * unless we're currently processing said * source, in which case mainloop holds an * additional reference and it will be free'd * once our processing completes */ return TRUE; } typedef struct signal_s { crm_trigger_t trigger; /* must be first */ void (*handler) (int sig); int signal; } crm_signal_t; static crm_signal_t *crm_signals[NSIG]; static gboolean crm_signal_dispatch(GSource * source, GSourceFunc callback, gpointer userdata) { crm_signal_t *sig = (crm_signal_t *) source; if(sig->signal != SIGCHLD) { - crm_notice("Invoking handler for signal %d: %s", sig->signal, strsignal(sig->signal)); + crm_notice("Caught '%s' signal "CRM_XS" %d (%s handler)", + strsignal(sig->signal), sig->signal, + (sig->handler? "invoking" : "no")); } sig->trigger.trigger = FALSE; if (sig->handler) { sig->handler(sig->signal); } return TRUE; } static void mainloop_signal_handler(int sig) { if (sig > 0 && sig < NSIG && crm_signals[sig] != NULL) { mainloop_set_trigger((crm_trigger_t *) crm_signals[sig]); } } static GSourceFuncs crm_signal_funcs = { crm_trigger_prepare, crm_trigger_check, crm_signal_dispatch, crm_trigger_finalize, }; gboolean crm_signal(int sig, void (*dispatch) (int sig)) { sigset_t mask; struct sigaction sa; struct sigaction old; if (sigemptyset(&mask) < 0) { crm_perror(LOG_ERR, "Call to sigemptyset failed"); return FALSE; } memset(&sa, 0, sizeof(struct sigaction)); sa.sa_handler = dispatch; sa.sa_flags = SA_RESTART; sa.sa_mask = mask; if (sigaction(sig, &sa, &old) < 0) { crm_perror(LOG_ERR, "Could not install signal handler for signal %d", sig); return FALSE; } return TRUE; } gboolean mainloop_add_signal(int sig, void (*dispatch) (int sig)) { GSource *source = NULL; int priority = G_PRIORITY_HIGH - 1; if (sig == SIGTERM) { /* TERM is higher priority than other signals, * signals are higher priority than other ipc. * Yes, minus: smaller is "higher" */ priority--; } if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signals[sig] != NULL && crm_signals[sig]->handler == dispatch) { crm_trace("Signal handler for %d is already installed", sig); return TRUE; } else if (crm_signals[sig] != NULL) { crm_err("Different signal handler for %d is already installed", sig); return FALSE; } CRM_ASSERT(sizeof(crm_signal_t) > sizeof(GSource)); source = g_source_new(&crm_signal_funcs, sizeof(crm_signal_t)); crm_signals[sig] = (crm_signal_t *) mainloop_setup_trigger(source, priority, NULL, NULL); CRM_ASSERT(crm_signals[sig] != NULL); crm_signals[sig]->handler = dispatch; crm_signals[sig]->signal = sig; if (crm_signal(sig, mainloop_signal_handler) == FALSE) { crm_signal_t *tmp = crm_signals[sig]; crm_signals[sig] = NULL; mainloop_destroy_trigger((crm_trigger_t *) tmp); return FALSE; } #if 0 /* If we want signals to interrupt mainloop's poll(), instead of waiting for * the timeout, then we should call siginterrupt() below * * For now, just enforce a low timeout */ if (siginterrupt(sig, 1) < 0) { crm_perror(LOG_INFO, "Could not enable system call interruptions for signal %d", sig); } #endif return TRUE; } gboolean mainloop_destroy_signal(int sig) { crm_signal_t *tmp = NULL; if (sig >= NSIG || sig < 0) { crm_err("Signal %d is out of range", sig); return FALSE; } else if (crm_signal(sig, NULL) == FALSE) { crm_perror(LOG_ERR, "Could not uninstall signal handler for signal %d", sig); return FALSE; } else if (crm_signals[sig] == NULL) { return TRUE; } crm_trace("Destroying signal %d", sig); tmp = crm_signals[sig]; crm_signals[sig] = NULL; mainloop_destroy_trigger((crm_trigger_t *) tmp); return TRUE; } static qb_array_t *gio_map = NULL; void mainloop_cleanup(void) { if(gio_map) { qb_array_free(gio_map); } } /* * libqb... */ struct gio_to_qb_poll { int32_t is_used; guint source; int32_t events; void *data; qb_ipcs_dispatch_fn_t fn; enum qb_loop_priority p; }; static gboolean gio_read_socket(GIOChannel * gio, GIOCondition condition, gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; gint fd = g_io_channel_unix_get_fd(gio); crm_trace("%p.%d %d", data, fd, condition); /* if this assert get's hit, then there is a race condition between * when we destroy a fd and when mainloop actually gives it up */ CRM_ASSERT(adaptor->is_used > 0); return (adaptor->fn(fd, condition, adaptor->data) == 0); } static void gio_poll_destroy(gpointer data) { struct gio_to_qb_poll *adaptor = (struct gio_to_qb_poll *)data; adaptor->is_used--; CRM_ASSERT(adaptor->is_used >= 0); if (adaptor->is_used == 0) { crm_trace("Marking adaptor %p unused", adaptor); adaptor->source = 0; } } static int32_t gio_poll_dispatch_update(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn, int32_t add) { struct gio_to_qb_poll *adaptor; GIOChannel *channel; int32_t res = 0; res = qb_array_index(gio_map, fd, (void **)&adaptor); if (res < 0) { crm_err("Array lookup failed for fd=%d: %d", fd, res); return res; } crm_trace("Adding fd=%d to mainloop as adaptor %p", fd, adaptor); if (add && adaptor->source) { crm_err("Adaptor for descriptor %d is still in-use", fd); return -EEXIST; } if (!add && !adaptor->is_used) { crm_err("Adaptor for descriptor %d is not in-use", fd); return -ENOENT; } /* channel is created with ref_count = 1 */ channel = g_io_channel_unix_new(fd); if (!channel) { crm_err("No memory left to add fd=%d", fd); return -ENOMEM; } if (adaptor->source) { g_source_remove(adaptor->source); adaptor->source = 0; } /* Because unlike the poll() API, glib doesn't tell us about HUPs by default */ evts |= (G_IO_HUP | G_IO_NVAL | G_IO_ERR); adaptor->fn = fn; adaptor->events = evts; adaptor->data = data; adaptor->p = p; adaptor->is_used++; adaptor->source = g_io_add_watch_full(channel, G_PRIORITY_DEFAULT, evts, gio_read_socket, adaptor, gio_poll_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after gio_poll_destroy() completes */ g_io_channel_unref(channel); crm_trace("Added to mainloop with gsource id=%d", adaptor->source); if (adaptor->source > 0) { return 0; } return -EINVAL; } static int32_t gio_poll_dispatch_add(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_TRUE); } static int32_t gio_poll_dispatch_mod(enum qb_loop_priority p, int32_t fd, int32_t evts, void *data, qb_ipcs_dispatch_fn_t fn) { return gio_poll_dispatch_update(p, fd, evts, data, fn, QB_FALSE); } static int32_t gio_poll_dispatch_del(int32_t fd) { struct gio_to_qb_poll *adaptor; crm_trace("Looking for fd=%d", fd); if (qb_array_index(gio_map, fd, (void **)&adaptor) == 0) { if (adaptor->source) { g_source_remove(adaptor->source); adaptor->source = 0; } } return 0; } struct qb_ipcs_poll_handlers gio_poll_funcs = { .job_add = NULL, .dispatch_add = gio_poll_dispatch_add, .dispatch_mod = gio_poll_dispatch_mod, .dispatch_del = gio_poll_dispatch_del, }; static enum qb_ipc_type pick_ipc_type(enum qb_ipc_type requested) { const char *env = getenv("PCMK_ipc_type"); if (env && strcmp("shared-mem", env) == 0) { return QB_IPC_SHM; } else if (env && strcmp("socket", env) == 0) { return QB_IPC_SOCKET; } else if (env && strcmp("posix", env) == 0) { return QB_IPC_POSIX_MQ; } else if (env && strcmp("sysv", env) == 0) { return QB_IPC_SYSV_MQ; } else if (requested == QB_IPC_NATIVE) { /* We prefer shared memory because the server never blocks on * send. If part of a message fits into the socket, libqb * needs to block until the remainder can be sent also. * Otherwise the client will wait forever for the remaining * bytes. */ return QB_IPC_SHM; } return requested; } qb_ipcs_service_t * mainloop_add_ipc_server(const char *name, enum qb_ipc_type type, struct qb_ipcs_service_handlers * callbacks) { int rc = 0; qb_ipcs_service_t *server = NULL; if (gio_map == NULL) { gio_map = qb_array_create_2(64, sizeof(struct gio_to_qb_poll), 1); } crm_client_init(); server = qb_ipcs_create(name, 0, pick_ipc_type(type), callbacks); #ifdef HAVE_IPCS_GET_BUFFER_SIZE /* All clients should use at least ipc_buffer_max as their buffer size */ qb_ipcs_enforce_buffer_size(server, crm_ipc_default_buffer_size()); #endif qb_ipcs_poll_handlers_set(server, &gio_poll_funcs); rc = qb_ipcs_run(server); if (rc < 0) { crm_err("Could not start %s IPC server: %s (%d)", name, pcmk_strerror(rc), rc); return NULL; } return server; } void mainloop_del_ipc_server(qb_ipcs_service_t * server) { if (server) { qb_ipcs_destroy(server); } } struct mainloop_io_s { char *name; void *userdata; int fd; guint source; crm_ipc_t *ipc; GIOChannel *channel; int (*dispatch_fn_ipc) (const char *buffer, ssize_t length, gpointer userdata); int (*dispatch_fn_io) (gpointer userdata); void (*destroy_fn) (gpointer userdata); }; static gboolean mainloop_gio_callback(GIOChannel * gio, GIOCondition condition, gpointer data) { gboolean keep = TRUE; mainloop_io_t *client = data; CRM_ASSERT(client->fd == g_io_channel_unix_get_fd(gio)); if (condition & G_IO_IN) { if (client->ipc) { long rc = 0; int max = 10; do { rc = crm_ipc_read(client->ipc); if (rc <= 0) { crm_trace("Message acquisition from %s[%p] failed: %s (%ld)", client->name, client, pcmk_strerror(rc), rc); } else if (client->dispatch_fn_ipc) { const char *buffer = crm_ipc_buffer(client->ipc); crm_trace("New message from %s[%p] = %ld (I/O condition=%d)", client->name, client, rc, condition); if (client->dispatch_fn_ipc(buffer, rc, client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } while (keep && rc > 0 && --max > 0); } else { crm_trace("New message from %s[%p] %u", client->name, client, condition); if (client->dispatch_fn_io) { if (client->dispatch_fn_io(client->userdata) < 0) { crm_trace("Connection to %s no longer required", client->name); keep = FALSE; } } } } if (client->ipc && crm_ipc_connected(client->ipc) == FALSE) { crm_err("Connection to %s[%p] closed (I/O condition=%d)", client->name, client, condition); keep = FALSE; } else if (condition & (G_IO_HUP | G_IO_NVAL | G_IO_ERR)) { crm_trace("The connection %s[%p] has been closed (I/O condition=%d)", client->name, client, condition); keep = FALSE; } else if ((condition & G_IO_IN) == 0) { /* #define GLIB_SYSDEF_POLLIN =1 #define GLIB_SYSDEF_POLLPRI =2 #define GLIB_SYSDEF_POLLOUT =4 #define GLIB_SYSDEF_POLLERR =8 #define GLIB_SYSDEF_POLLHUP =16 #define GLIB_SYSDEF_POLLNVAL =32 typedef enum { G_IO_IN GLIB_SYSDEF_POLLIN, G_IO_OUT GLIB_SYSDEF_POLLOUT, G_IO_PRI GLIB_SYSDEF_POLLPRI, G_IO_ERR GLIB_SYSDEF_POLLERR, G_IO_HUP GLIB_SYSDEF_POLLHUP, G_IO_NVAL GLIB_SYSDEF_POLLNVAL } GIOCondition; A bitwise combination representing a condition to watch for on an event source. G_IO_IN There is data to read. G_IO_OUT Data can be written (without blocking). G_IO_PRI There is urgent data to read. G_IO_ERR Error condition. G_IO_HUP Hung up (the connection has been broken, usually for pipes and sockets). G_IO_NVAL Invalid request. The file descriptor is not open. */ crm_err("Strange condition: %d", condition); } /* keep == FALSE results in mainloop_gio_destroy() being called * just before the source is removed from mainloop */ return keep; } static void mainloop_gio_destroy(gpointer c) { mainloop_io_t *client = c; char *c_name = strdup(client->name); /* client->source is valid but about to be destroyed (ref_count == 0) in gmain.c * client->channel will still have ref_count > 0... should be == 1 */ crm_trace("Destroying client %s[%p]", c_name, c); if (client->ipc) { crm_ipc_close(client->ipc); } if (client->destroy_fn) { void (*destroy_fn) (gpointer userdata) = client->destroy_fn; client->destroy_fn = NULL; destroy_fn(client->userdata); } if (client->ipc) { crm_ipc_t *ipc = client->ipc; client->ipc = NULL; crm_ipc_destroy(ipc); } crm_trace("Destroyed client %s[%p]", c_name, c); free(client->name); client->name = NULL; free(client); free(c_name); } mainloop_io_t * mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, struct ipc_client_callbacks *callbacks) { mainloop_io_t *client = NULL; crm_ipc_t *conn = crm_ipc_new(name, max_size); if (conn && crm_ipc_connect(conn)) { int32_t fd = crm_ipc_get_fd(conn); client = mainloop_add_fd(name, priority, fd, userdata, NULL); } if (client == NULL) { crm_perror(LOG_TRACE, "Connection to %s failed", name); if (conn) { crm_ipc_close(conn); crm_ipc_destroy(conn); } return NULL; } client->ipc = conn; client->destroy_fn = callbacks->destroy; client->dispatch_fn_ipc = callbacks->dispatch; return client; } void mainloop_del_ipc_client(mainloop_io_t * client) { mainloop_del_fd(client); } crm_ipc_t * mainloop_get_ipc_client(mainloop_io_t * client) { if (client) { return client->ipc; } return NULL; } mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks * callbacks) { mainloop_io_t *client = NULL; if (fd >= 0) { client = calloc(1, sizeof(mainloop_io_t)); if (client == NULL) { return NULL; } client->name = strdup(name); client->userdata = userdata; if (callbacks) { client->destroy_fn = callbacks->destroy; client->dispatch_fn_io = callbacks->dispatch; } client->fd = fd; client->channel = g_io_channel_unix_new(fd); client->source = g_io_add_watch_full(client->channel, priority, (G_IO_IN | G_IO_HUP | G_IO_NVAL | G_IO_ERR), mainloop_gio_callback, client, mainloop_gio_destroy); /* Now that mainloop now holds a reference to channel, * thanks to g_io_add_watch_full(), drop ours from g_io_channel_unix_new(). * * This means that channel will be free'd by: * g_main_context_dispatch() or g_source_remove() * -> g_source_destroy_internal() * -> g_source_callback_unref() * shortly after mainloop_gio_destroy() completes */ g_io_channel_unref(client->channel); crm_trace("Added connection %d for %s[%p].%d", client->source, client->name, client, fd); } else { errno = EINVAL; } return client; } void mainloop_del_fd(mainloop_io_t * client) { if (client != NULL) { crm_trace("Removing client %s[%p]", client->name, client); if (client->source) { /* Results in mainloop_gio_destroy() being called just * before the source is removed from mainloop */ g_source_remove(client->source); } } } static GListPtr child_list = NULL; pid_t mainloop_child_pid(mainloop_child_t * child) { return child->pid; } const char * mainloop_child_name(mainloop_child_t * child) { return child->desc; } int mainloop_child_timeout(mainloop_child_t * child) { return child->timeout; } void * mainloop_child_userdata(mainloop_child_t * child) { return child->privatedata; } void mainloop_clear_child_userdata(mainloop_child_t * child) { child->privatedata = NULL; } /* good function name */ static void child_free(mainloop_child_t *child) { if (child->timerid != 0) { crm_trace("Removing timer %d", child->timerid); g_source_remove(child->timerid); child->timerid = 0; } free(child->desc); free(child); } /* terrible function name */ static int child_kill_helper(mainloop_child_t *child) { int rc; if (child->flags & mainloop_leave_pid_group) { crm_debug("Kill pid %d only. leave group intact.", child->pid); rc = kill(child->pid, SIGKILL); } else { crm_debug("Kill pid %d's group", child->pid); rc = kill(-child->pid, SIGKILL); } if (rc < 0) { if (errno != ESRCH) { crm_perror(LOG_ERR, "kill(%d, KILL) failed", child->pid); } return -errno; } return 0; } static gboolean child_timeout_callback(gpointer p) { mainloop_child_t *child = p; int rc = 0; child->timerid = 0; if (child->timeout) { crm_crit("%s process (PID %d) will not die!", child->desc, (int)child->pid); return FALSE; } rc = child_kill_helper(child); if (rc == ESRCH) { /* Nothing left to do. pid doesn't exist */ return FALSE; } child->timeout = TRUE; crm_warn("%s process (PID %d) timed out", child->desc, (int)child->pid); child->timerid = g_timeout_add(5000, child_timeout_callback, child); return FALSE; } static gboolean child_waitpid(mainloop_child_t *child, int flags) { int rc = 0; int core = 0; int signo = 0; int status = 0; int exitcode = 0; rc = waitpid(child->pid, &status, flags); if(rc == 0) { crm_perror(LOG_DEBUG, "wait(%d) = %d", child->pid, rc); return FALSE; } else if(rc != child->pid) { signo = SIGCHLD; exitcode = 1; status = 1; crm_perror(LOG_ERR, "Call to waitpid(%d) failed", child->pid); } else { crm_trace("Managed process %d exited: %p", child->pid, child); if (WIFEXITED(status)) { exitcode = WEXITSTATUS(status); crm_trace("Managed process %d (%s) exited with rc=%d", child->pid, child->desc, exitcode); } else if (WIFSIGNALED(status)) { signo = WTERMSIG(status); crm_trace("Managed process %d (%s) exited with signal=%d", child->pid, child->desc, signo); } #ifdef WCOREDUMP if (WCOREDUMP(status)) { core = 1; crm_err("Managed process %d (%s) dumped core", child->pid, child->desc); } #endif } if (child->callback) { child->callback(child, child->pid, core, signo, exitcode); } return TRUE; } static void child_death_dispatch(int signal) { GListPtr iter = child_list; gboolean exited; while(iter) { GListPtr saved = NULL; mainloop_child_t *child = iter->data; exited = child_waitpid(child, WNOHANG); saved = iter; iter = iter->next; if (exited == FALSE) { continue; } crm_trace("Removing process entry %p for %d", child, child->pid); child_list = g_list_remove_link(child_list, saved); g_list_free(saved); child_free(child); } } static gboolean child_signal_init(gpointer p) { crm_trace("Installed SIGCHLD handler"); /* Do NOT use g_child_watch_add() and friends, they rely on pthreads */ mainloop_add_signal(SIGCHLD, child_death_dispatch); /* In case they terminated before the signal handler was installed */ child_death_dispatch(SIGCHLD); return FALSE; } int mainloop_child_kill(pid_t pid) { GListPtr iter; mainloop_child_t *child = NULL; mainloop_child_t *match = NULL; /* It is impossible to block SIGKILL, this allows us to * call waitpid without WNOHANG flag.*/ int waitflags = 0, rc = 0; for (iter = child_list; iter != NULL && match == NULL; iter = iter->next) { child = iter->data; if (pid == child->pid) { match = child; } } if (match == NULL) { return FALSE; } rc = child_kill_helper(match); if(rc == -ESRCH) { /* Its gone, but hasn't shown up in waitpid() yet * * Wait until we get SIGCHLD and let child_death_dispatch() * clean it up as normal (so we get the correct return * code/status) * * The blocking alternative would be to call: * child_waitpid(match, 0); */ crm_trace("Waiting for child %d to be reaped by child_death_dispatch()", match->pid); return TRUE; } else if(rc != 0) { /* If KILL for some other reason set the WNOHANG flag since we * can't be certain what happened. */ waitflags = WNOHANG; } if (child_waitpid(match, waitflags) == FALSE) { /* not much we can do if this occurs */ return FALSE; } child_list = g_list_remove(child_list, match); child_free(match); return TRUE; } /* Create/Log a new tracked process * To track a process group, use -pid */ void mainloop_child_add_with_flags(pid_t pid, int timeout, const char *desc, void *privatedata, enum mainloop_child_flags flags, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { static bool need_init = TRUE; mainloop_child_t *child = g_new(mainloop_child_t, 1); child->pid = pid; child->timerid = 0; child->timeout = FALSE; child->privatedata = privatedata; child->callback = callback; child->flags = flags; if(desc) { child->desc = strdup(desc); } if (timeout) { child->timerid = g_timeout_add(timeout, child_timeout_callback, child); } child_list = g_list_append(child_list, child); if(need_init) { need_init = FALSE; /* SIGCHLD processing has to be invoked from mainloop. * We do not want it to be possible to both add a child pid * to mainloop, and have the pid's exit callback invoked within * the same callstack. */ g_timeout_add(1, child_signal_init, NULL); } } void mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata, void (*callback) (mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode)) { mainloop_child_add_with_flags(pid, timeout, desc, privatedata, 0, callback); } struct mainloop_timer_s { guint id; guint period_ms; bool repeat; char *name; GSourceFunc cb; void *userdata; }; struct mainloop_timer_s mainloop; static gboolean mainloop_timer_cb(gpointer user_data) { int id = 0; bool repeat = FALSE; struct mainloop_timer_s *t = user_data; CRM_ASSERT(t != NULL); id = t->id; t->id = 0; /* Ensure its unset during callbacks so that * mainloop_timer_running() works as expected */ if(t->cb) { crm_trace("Invoking callbacks for timer %s", t->name); repeat = t->repeat; if(t->cb(t->userdata) == FALSE) { crm_trace("Timer %s complete", t->name); repeat = FALSE; } } if(repeat) { /* Restore if repeating */ t->id = id; } return repeat; } bool mainloop_timer_running(mainloop_timer_t *t) { if(t && t->id != 0) { return TRUE; } return FALSE; } void mainloop_timer_start(mainloop_timer_t *t) { mainloop_timer_stop(t); if(t && t->period_ms > 0) { crm_trace("Starting timer %s", t->name); t->id = g_timeout_add(t->period_ms, mainloop_timer_cb, t); } } void mainloop_timer_stop(mainloop_timer_t *t) { if(t && t->id != 0) { crm_trace("Stopping timer %s", t->name); g_source_remove(t->id); t->id = 0; } } guint mainloop_timer_set_period(mainloop_timer_t *t, guint period_ms) { guint last = 0; if(t) { last = t->period_ms; t->period_ms = period_ms; } if(t && t->id != 0 && last != t->period_ms) { mainloop_timer_start(t); } return last; } mainloop_timer_t * mainloop_timer_add(const char *name, guint period_ms, bool repeat, GSourceFunc cb, void *userdata) { mainloop_timer_t *t = calloc(1, sizeof(mainloop_timer_t)); if(t) { if(name) { t->name = crm_strdup_printf("%s-%u-%d", name, period_ms, repeat); } else { t->name = crm_strdup_printf("%p-%u-%d", t, period_ms, repeat); } t->id = 0; t->period_ms = period_ms; t->repeat = repeat; t->cb = cb; t->userdata = userdata; crm_trace("Created timer %s with %p %p", t->name, userdata, t->userdata); } return t; } void mainloop_timer_del(mainloop_timer_t *t) { if(t) { crm_trace("Destroying timer %s", t->name); mainloop_timer_stop(t); free(t->name); free(t); } } diff --git a/lib/pengine/status.c b/lib/pengine/status.c index b9869874d9..8dda5d4071 100644 --- a/lib/pengine/status.c +++ b/lib/pengine/status.c @@ -1,311 +1,311 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #define MEMCHECK_STAGE_0 0 #define check_and_exit(stage) cleanup_calculations(data_set); \ crm_mem_stats(NULL); \ crm_err("Exiting: stage %d", stage); \ crm_exit(pcmk_err_generic); /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pe_working_set_t * data_set) { xmlNode *config = get_xpath_object("//"XML_CIB_TAG_CRMCONFIG, data_set->input, LOG_TRACE); xmlNode *cib_nodes = get_xpath_object("//"XML_CIB_TAG_NODES, data_set->input, LOG_TRACE); xmlNode *cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); xmlNode *cib_status = get_xpath_object("//"XML_CIB_TAG_STATUS, data_set->input, LOG_TRACE); xmlNode *cib_tags = get_xpath_object("//"XML_CIB_TAG_TAGS, data_set->input, LOG_TRACE); const char *value = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); crm_trace("Beginning unpack"); pe_dataset = data_set; /* reset remaining global variables */ data_set->failed = create_xml_node(NULL, "failed-ops"); if (data_set->input == NULL) { return FALSE; } if (data_set->now == NULL) { data_set->now = crm_time_new(NULL); } if (data_set->dc_uuid == NULL && data_set->input != NULL && crm_element_value(data_set->input, XML_ATTR_DC_UUID) != NULL) { /* this should always be present */ data_set->dc_uuid = crm_element_value_copy(data_set->input, XML_ATTR_DC_UUID); } clear_bit(data_set->flags, pe_flag_have_quorum); if (crm_is_true(value)) { set_bit(data_set->flags, pe_flag_have_quorum); } data_set->op_defaults = get_xpath_object("//"XML_CIB_TAG_OPCONFIG, data_set->input, LOG_TRACE); data_set->rsc_defaults = get_xpath_object("//"XML_CIB_TAG_RSCCONFIG, data_set->input, LOG_TRACE); unpack_config(config, data_set); if (is_not_set(data_set->flags, pe_flag_quick_location) && is_not_set(data_set->flags, pe_flag_have_quorum) && data_set->no_quorum_policy != no_quorum_ignore) { - crm_notice("We do not have quorum - fencing and resource management disabled"); + crm_warn("Fencing and resource management disabled due to lack of quorum"); } unpack_nodes(cib_nodes, data_set); if(is_not_set(data_set->flags, pe_flag_quick_location)) { unpack_remote_nodes(cib_resources, data_set); } unpack_resources(cib_resources, data_set); unpack_tags(cib_tags, data_set); if(is_not_set(data_set->flags, pe_flag_quick_location)) { unpack_status(cib_status, data_set); } set_bit(data_set->flags, pe_flag_have_status); return TRUE; } static void pe_free_resources(GListPtr resources) { resource_t *rsc = NULL; GListPtr iterator = resources; while (iterator != NULL) { rsc = (resource_t *) iterator->data; iterator = iterator->next; rsc->fns->free(rsc); } if (resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GListPtr actions) { GListPtr iterator = actions; while (iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if (actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GListPtr nodes) { GListPtr iterator = nodes; while (iterator != NULL) { node_t *node = (node_t *) iterator->data; struct node_shared_s *details = node->details; iterator = iterator->next; crm_trace("deleting node"); print_node("delete", node, FALSE); if (details != NULL) { crm_trace("%s is being deleted", details->uname); if (details->attrs != NULL) { g_hash_table_destroy(details->attrs); } if (details->utilization != NULL) { g_hash_table_destroy(details->utilization); } if (details->digest_cache != NULL) { g_hash_table_destroy(details->digest_cache); } g_list_free(details->running_rsc); g_list_free(details->allocated_rsc); free(details); } free(node); } if (nodes != NULL) { g_list_free(nodes); } } void cleanup_calculations(pe_working_set_t * data_set) { pe_dataset = NULL; if (data_set == NULL) { return; } clear_bit(data_set->flags, pe_flag_have_status); if (data_set->config_hash != NULL) { g_hash_table_destroy(data_set->config_hash); } if (data_set->singletons != NULL) { g_hash_table_destroy(data_set->singletons); } if (data_set->tickets) { g_hash_table_destroy(data_set->tickets); } if (data_set->template_rsc_sets) { g_hash_table_destroy(data_set->template_rsc_sets); } if (data_set->tags) { g_hash_table_destroy(data_set->tags); } free(data_set->dc_uuid); crm_trace("deleting resources"); pe_free_resources(data_set->resources); crm_trace("deleting actions"); pe_free_actions(data_set->actions); crm_trace("deleting nodes"); pe_free_nodes(data_set->nodes); free_xml(data_set->graph); crm_time_free(data_set->now); free_xml(data_set->input); free_xml(data_set->failed); set_working_set_defaults(data_set); CRM_CHECK(data_set->ordering_constraints == NULL,; ); CRM_CHECK(data_set->placement_constraints == NULL,; ); } void set_working_set_defaults(pe_working_set_t * data_set) { pe_dataset = data_set; memset(data_set, 0, sizeof(pe_working_set_t)); data_set->dc_uuid = NULL; data_set->order_id = 1; data_set->action_id = 1; data_set->no_quorum_policy = no_quorum_freeze; data_set->flags = 0x0ULL; set_bit(data_set->flags, pe_flag_stop_rsc_orphans); set_bit(data_set->flags, pe_flag_symmetric_cluster); set_bit(data_set->flags, pe_flag_is_managed_default); set_bit(data_set->flags, pe_flag_stop_action_orphans); } resource_t * pe_find_resource(GListPtr rsc_list, const char *id) { GListPtr rIter = NULL; for (rIter = rsc_list; id && rIter; rIter = rIter->next) { resource_t *parent = rIter->data; resource_t *match = parent->fns->find_rsc(parent, id, NULL, pe_find_renamed | pe_find_current); if (match != NULL) { return match; } } crm_trace("No match for %s", id); return NULL; } node_t * pe_find_node_any(GListPtr nodes, const char *id, const char *uname) { node_t *match = pe_find_node_id(nodes, id); if (match) { return match; } crm_trace("Looking up %s via it's uname instead", uname); return pe_find_node(nodes, uname); } node_t * pe_find_node_id(GListPtr nodes, const char *id) { GListPtr gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node && safe_str_eq(node->details->id, id)) { return node; } } /* error */ return NULL; } node_t * pe_find_node(GListPtr nodes, const char *uname) { GListPtr gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; if (node && safe_str_eq(node->details->uname, uname)) { return node; } } /* error */ return NULL; } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index bf9b53fc4e..f392fcdd34 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,3427 +1,3427 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); #define set_config_flag(data_set, option, flag) do { \ const char *tmp = pe_pref(data_set->config_hash, option); \ if(tmp) { \ if(crm_is_true(tmp)) { \ set_bit(data_set->flags, flag); \ } else { \ clear_bit(data_set->flags, flag); \ } \ } \ } while(0) gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response *failed, pe_working_set_t * data_set); static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node); static gboolean is_dangling_container_remote_node(node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (is_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) { return TRUE; } return FALSE; } void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason) { CRM_CHECK(node, return); /* fence remote nodes living in a container by marking the container as failed. */ if (is_container_remote_node(node)) { resource_t *rsc = node->details->remote_rsc->container; if (is_set(rsc->flags, pe_rsc_failed) == FALSE) { crm_warn("Remote node %s will be fenced by recovering container resource %s", node->details->uname, rsc->id, reason); /* node->details->unclean = TRUE; */ node->details->remote_requires_reset = TRUE; set_bit(rsc->flags, pe_rsc_failed); } } else if (is_dangling_container_remote_node(node)) { crm_info("Fencing remote node %s has already occurred, container no longer exists. cleaning up dangling connection resource: %s", node->details->uname, reason); set_bit(node->details->remote_rsc->flags, pe_rsc_failed); } else if (is_baremetal_remote_node(node)) { if(pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } node->details->unclean = TRUE; node->details->remote_requires_reset = TRUE; } else if (node->details->unclean == FALSE) { if(pe_can_fence(data_set, node)) { crm_warn("Node %s will be fenced %s", node->details->uname, reason); } else { crm_warn("Node %s is unclean %s", node->details->uname, reason); } node->details->unclean = TRUE; } else { crm_trace("Huh? %s %s", node->details->uname, reason); } } gboolean unpack_config(xmlNode * config, pe_working_set_t * data_set) { const char *value = NULL; GHashTable *config_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); xmlXPathObjectPtr xpathObj = NULL; if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='provides' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//nvpair[@name='requires' and @value='unfencing']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #ifdef REDHAT_COMPAT_6 if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { xpathObj = xpath_search(data_set->input, "//primitive[@type='fence_scsi']"); if(xpathObj && numXpathResults(xpathObj) > 0) { set_bit(data_set->flags, pe_flag_enable_unfencing); } freeXpathObject(xpathObj); } #endif data_set->config_hash = config_hash; unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, data_set->now); verify_pe_options(data_set->config_hash); set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes); if(is_not_set(data_set->flags, pe_flag_startup_probes)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_notice("Relying on watchdog integration for fencing"); set_bit(data_set->flags, pe_flag_have_stonith_resource); } value = pe_pref(data_set->config_hash, "stonith-timeout"); data_set->stonith_timeout = crm_get_msec(value); crm_debug("STONITH timeout: %d", data_set->stonith_timeout); set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled); crm_debug("STONITH of failed nodes is %s", is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action"); crm_trace("STONITH will %s nodes", data_set->stonith_action); set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing); crm_debug("Concurrent fencing is %s", is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled"); set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything); crm_debug("Stop all active resources: %s", is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false"); set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster); if (is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(data_set->config_hash, "default-resource-stickiness"); data_set->default_resource_stickiness = char2score(value); crm_debug("Default stickiness: %d", data_set->default_resource_stickiness); value = pe_pref(data_set->config_hash, "no-quorum-policy"); if (safe_str_eq(value, "ignore")) { data_set->no_quorum_policy = no_quorum_ignore; } else if (safe_str_eq(value, "freeze")) { data_set->no_quorum_policy = no_quorum_freeze; } else if (safe_str_eq(value, "suicide")) { gboolean do_panic = FALSE; crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err ("Setting no-quorum-policy=suicide makes no sense if stonith-enabled=false"); } if (do_panic && is_set(data_set->flags, pe_flag_stonith_enabled)) { data_set->no_quorum_policy = no_quorum_suicide; } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && do_panic == FALSE) { crm_notice("Resetting no-quorum-policy to 'stop': The cluster has never had quorum"); data_set->no_quorum_policy = no_quorum_stop; } } else { data_set->no_quorum_policy = no_quorum_stop; } switch (data_set->no_quorum_policy) { case no_quorum_freeze: crm_debug("On loss of CCM Quorum: Freeze resources"); break; case no_quorum_stop: crm_debug("On loss of CCM Quorum: Stop ALL resources"); break; case no_quorum_suicide: crm_notice("On loss of CCM Quorum: Fence all remaining nodes"); break; case no_quorum_ignore: crm_notice("On loss of CCM Quorum: Ignore"); break; } set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans); crm_trace("Orphan resources are %s", is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans); crm_trace("Orphan resource actions are %s", is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored"); set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop); crm_trace("Stopped resources are removed from the status section: %s", is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false"); set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode); crm_trace("Maintenance mode: %s", is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false"); if (is_set(data_set->flags, pe_flag_maintenance_mode)) { clear_bit(data_set->flags, pe_flag_is_managed_default); } else { set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default); } crm_trace("By default resources are %smanaged", is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not "); set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal); crm_trace("Start failures are %s", is_set(data_set->flags, pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount"); node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red")); node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green")); node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow")); crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s", pe_pref(data_set->config_hash, "node-health-red"), pe_pref(data_set->config_hash, "node-health-yellow"), pe_pref(data_set->config_hash, "node-health-green")); data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy"); crm_trace("Placement strategy: %s", data_set->placement_strategy); return TRUE; } static void destroy_digest_cache(gpointer ptr) { op_digest_cache_t *data = ptr; free_xml(data->params_all); free_xml(data->params_secure); free_xml(data->params_restart); free(data->digest_all_calc); free(data->digest_restart_calc); free(data->digest_secure_calc); free(data); } static node_t * create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t * data_set) { node_t *new_node = NULL; if (pe_find_node(data_set->nodes, uname) != NULL) { crm_config_warn("Detected multiple node entries with uname=%s" " - this is rarely intended", uname); } new_node = calloc(1, sizeof(node_t)); if (new_node == NULL) { return NULL; } new_node->weight = char2score(score); new_node->fixed = FALSE; new_node->details = calloc(1, sizeof(struct node_shared_s)); if (new_node->details == NULL) { free(new_node); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->type = node_ping; if (safe_str_eq(type, "remote")) { new_node->details->type = node_remote; set_bit(data_set->flags, pe_flag_have_remote_nodes); } else if (type == NULL || safe_str_eq(type, "member") || safe_str_eq(type, NORMALNODE)) { new_node->details->type = node_member; } new_node->details->attrs = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); if (is_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup("#kind"), strdup("cluster")); } new_node->details->utilization = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_digest_cache); data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, GHashTable **rsc_name_check) { xmlNode *xml_rsc = NULL; xmlNode *xml_tmp = NULL; xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; char *tmp_id = NULL; for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) { if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) { continue; } for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) { remote_name = value; } else if (safe_str_eq(name, "remote-addr")) { remote_server = value; } else if (safe_str_eq(name, "remote-port")) { remote_port = value; } else if (safe_str_eq(name, "remote-connect-timeout")) { connect_timeout = value; } else if (safe_str_eq(name, "remote-allow-migrate")) { remote_allow_migrate=value; } } } if (remote_name == NULL) { return NULL; } if (*rsc_name_check == NULL) { *rsc_name_check = g_hash_table_new(crm_str_hash, g_str_equal); for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) { const char *id = ID(xml_rsc); /* avoiding heap allocation here because we know the duration of this hashtable allows us to */ g_hash_table_insert(*rsc_name_check, (char *) id, (char *) id); } } if (g_hash_table_lookup(*rsc_name_check, remote_name)) { crm_err("Naming conflict with remote-node=%s. remote-nodes can not have the same name as a resource.", remote_name); return NULL; } xml_rsc = create_xml_node(parent, XML_CIB_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, remote_name); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, "ocf"); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, "pacemaker"); crm_xml_add(xml_rsc, XML_ATTR_TYPE, "remote"); xml_tmp = create_xml_node(xml_rsc, XML_TAG_META_SETS); tmp_id = crm_concat(remote_name, XML_TAG_META_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-container", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_CONTAINER); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, container_id); free(tmp_id); attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-internal", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_RSC_ATTR_INTERNAL_RSC); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, "true"); free(tmp_id); if (remote_allow_migrate) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "meta-attributes-container", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_ALLOW_MIGRATE); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_allow_migrate); free(tmp_id); } xml_tmp = create_xml_node(xml_rsc, "operations"); attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "monitor-interval-30s", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, "30s"); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "30s"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "monitor"); free(tmp_id); if (connect_timeout) { attr = create_xml_node(xml_tmp, XML_ATTR_OP); tmp_id = crm_concat(remote_name, "start-interval-0", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_ATTR_TIMEOUT, connect_timeout); crm_xml_add(attr, XML_LRM_ATTR_INTERVAL, "0"); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "start"); free(tmp_id); } if (remote_port || remote_server) { xml_tmp = create_xml_node(xml_rsc, XML_TAG_ATTR_SETS); tmp_id = crm_concat(remote_name, XML_TAG_ATTR_SETS, '_'); crm_xml_add(xml_tmp, XML_ATTR_ID, tmp_id); free(tmp_id); if (remote_server) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-addr", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "addr"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_server); free(tmp_id); } if (remote_port) { attr = create_xml_node(xml_tmp, XML_CIB_TAG_NVPAIR); tmp_id = crm_concat(remote_name, "instance-attributes-port", '_'); crm_xml_add(attr, XML_ATTR_ID, tmp_id); crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, "port"); crm_xml_add(attr, XML_NVPAIR_ATTR_VALUE, remote_port); free(tmp_id); } } return remote_name; } static void handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node) { static const char *blind_faith = NULL; static gboolean unseen_are_unclean = TRUE; if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) { /* ignore fencing remote-nodes that don't have a conneciton resource associated * with them. This happens when remote-node entries get left in the nodes section * after the connection resource is removed */ return; } blind_faith = pe_pref(data_set->config_hash, "startup-fencing"); if (crm_is_true(blind_faith) == FALSE) { unseen_are_unclean = FALSE; crm_warn("Blind faith: not fencing unseen nodes"); } if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE || unseen_are_unclean == FALSE) { /* blind faith... */ new_node->details->unclean = FALSE; } else { /* all nodes are unclean until we've seen their * status entry */ new_node->details->unclean = TRUE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { crm_config_err("Must specify id tag in "); continue; } new_node = create_node(id, uname, type, score, data_set); if (new_node == NULL) { return FALSE; } /* if(data_set->have_quorum == FALSE */ /* && data_set->no_quorum_policy == no_quorum_stop) { */ /* /\* start shutting resources down *\/ */ /* new_node->weight = -INFINITY; */ /* } */ handle_startup_fencing(data_set, new_node); add_node_attrs(xml_obj, new_node, FALSE, data_set); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL, new_node->details->utilization, NULL, FALSE, data_set->now); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) { crm_info("Creating a fake local node"); create_node(data_set->localhost, data_set->localhost, NULL, 0, data_set); } return TRUE; } static void setup_container(resource_t * rsc, pe_working_set_t * data_set) { const char *container_id = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; setup_container(child_rsc, data_set); } return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && safe_str_neq(container_id, rsc->id)) { resource_t *container = pe_find_resource(data_set->resources, container_id); if (container) { rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GHashTable *rsc_name_check = NULL; /* generate remote nodes from resource config before unpacking resources */ for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { const char *new_node_id = NULL; /* first check if this is a bare metal remote node. Bare metal remote nodes * are defined as a resource primitive only. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj)); create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } /* Now check for guest remote nodes. * guest remote nodes are defined within a resource primitive. * Example1: a vm resource might be configured as a remote node. * Example2: a vm resource might be configured within a group to be a remote node. * Note: right now we only support guest remote nodes in as a standalone primitive * or a primitive within a group. No cloned primitives can be a guest remote node * right now */ if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) { /* expands a metadata defined remote resource into the xml config * as an actual rsc primitive to be unpacked later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, &rsc_name_check); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj)); create_node(new_node_id, new_node_id, "remote", NULL, data_set); } continue; } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) { xmlNode *xml_obj2 = NULL; /* search through a group to see if any of the primitive contain a remote node. */ for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, &rsc_name_check); if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) { crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); create_node(new_node_id, new_node_id, "remote", NULL, data_set); } } } } if (rsc_name_check) { g_hash_table_destroy(rsc_name_check); } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the PE calculations. */ static void link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc) { node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (is_set(data_set->flags, pe_flag_quick_location)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } print_resource(LOG_DEBUG_3, "Linking remote-node connection resource, ", new_rsc, FALSE); remote_node = pe_find_node(data_set->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return;); remote_node->details->remote_rsc = new_rsc; /* If this is a baremetal remote-node (no container resource * associated with it) then we need to handle startup fencing the same way * as cluster nodes. */ if (new_rsc->container == NULL) { handle_startup_fencing(data_set, remote_node); } else { /* At this point we know if the remote node is a container or baremetal * remote node, update the #kind attribute if a container is involved */ g_hash_table_replace(remote_node->details->attrs, strdup("#kind"), strdup("container")); } } static void destroy_tag(gpointer data) { tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } gboolean unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; GListPtr gIter = NULL; data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { resource_t *new_rsc = NULL; if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) { const char *template_id = ID(xml_obj); if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets, template_id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL); } continue; } crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj)); if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) { data_set->resources = g_list_append(data_set->resources, new_rsc); if (xml_contains_remote_node(xml_obj)) { new_rsc->is_remote_node = TRUE; } print_resource(LOG_DEBUG_3, "Added ", new_rsc, FALSE); } else { crm_config_err("Failed unpacking %s %s", crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID)); if (new_rsc != NULL && new_rsc->fns != NULL) { new_rsc->fns->free(new_rsc); } } } for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; setup_container(rsc, data_set); link_rsc2remotenode(data_set, rsc); } data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority); if (is_set(data_set->flags, pe_flag_quick_location)) { /* Ignore */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { crm_config_err("Resource start-up disabled since no STONITH resources have been defined"); crm_config_err("Either configure some or disable STONITH with the stonith-enabled option"); crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set) { xmlNode *xml_tag = NULL; data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag); for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) { continue; } if (tag_id == NULL) { crm_config_err("Failed unpacking %s: %s should be specified", crm_element_name(xml_tag), XML_ATTR_ID); continue; } for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) { continue; } if (obj_ref == NULL) { crm_config_err("Failed unpacking %s for tag %s: %s should be specified", crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID); continue; } if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (ticket_id == NULL || strlen(ticket_id) == 0) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_ticket, prop_name); if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set) { xmlNode *xml_obj = NULL; for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) { if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) { continue; } unpack_ticket_state(xml_obj, data_set); } return TRUE; } /* Compatibility with the deprecated ticket state section: * "/cib/status/tickets/instance_attributes" */ static void get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data) { const char *long_key = key; char *state_key = NULL; const char *granted_prefix = "granted-ticket-"; const char *last_granted_prefix = "last-granted-"; static int granted_prefix_strlen = 0; static int last_granted_prefix_strlen = 0; const char *ticket_id = NULL; const char *is_granted = NULL; const char *last_granted = NULL; const char *sep = NULL; ticket_t *ticket = NULL; pe_working_set_t *data_set = user_data; if (granted_prefix_strlen == 0) { granted_prefix_strlen = strlen(granted_prefix); } if (last_granted_prefix_strlen == 0) { last_granted_prefix_strlen = strlen(last_granted_prefix); } if (strstr(long_key, granted_prefix) == long_key) { ticket_id = long_key + granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("granted"); is_granted = value; } } else if (strstr(long_key, last_granted_prefix) == long_key) { ticket_id = long_key + last_granted_prefix_strlen; if (strlen(ticket_id)) { state_key = strdup("last-granted"); last_granted = value; } } else if ((sep = strrchr(long_key, '-'))) { ticket_id = sep + 1; state_key = strndup(long_key, strlen(long_key) - strlen(sep)); } if (ticket_id == NULL || strlen(ticket_id) == 0) { free(state_key); return; } if (state_key == NULL || strlen(state_key) == 0) { free(state_key); return; } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, data_set); if (ticket == NULL) { free(state_key); return; } } g_hash_table_replace(ticket->state, state_key, strdup(value)); if (is_granted) { if (crm_is_true(is_granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } } else if (last_granted) { ticket->last_granted = crm_parse_int(last_granted, 0); } } /* remove nodes that are down, stopping */ /* create +ve rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; crm_trace("Beginning unpack"); if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) { xmlNode *xml_tickets = state; GHashTable *state_hash = NULL; /* Compatibility with the deprecated ticket state section: * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */ state_hash = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL, state_hash, NULL, TRUE, data_set->now); g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set); if (state_hash) { g_hash_table_destroy(state_hash); } /* Unpack the new "/cib/status/tickets/ticket_state"s */ unpack_tickets_state(xml_tickets, data_set); } if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) { xmlNode *attrs = NULL; const char *resource_discovery_enabled = NULL; id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (uname == NULL) { /* error */ continue; } else if (this_node == NULL) { crm_config_warn("Node %s in status section no longer exists", uname); continue; } else if (is_remote_node(this_node)) { /* online state for remote nodes is determined by the * rsc state after all the unpacking is done. we do however * need to mark whether or not the node has been fenced as this plays * a role during unpacking cluster node resource state */ this_node->details->remote_was_fenced = crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0"); continue; } crm_trace("Processing node id=%s, uname=%s", id, uname); /* Mark the node as provisionally clean * - at least we have seen it in the current cluster's lifetime */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } crm_trace("determining node state"); determine_online_status(state, this_node, data_set); if (this_node->details->online && data_set->no_quorum_policy == no_quorum_suicide) { /* Everything else should flow from this automatically * At least until the PE becomes able to migrate off healthy resources */ pe_fence_node(data_set, this_node, "because the cluster does not have quorum"); } } } /* Now that we know all node states, we can safely handle migration ops */ for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if (this_node == NULL) { crm_info("Node %s is unknown", id); continue; } else if (is_remote_node(this_node)) { /* online status of remote node can not be determined until all other * resource status is unpacked. */ continue; } else if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_trace("Processing lrm resource entries on healthy node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } } /* now that the rest of the cluster's status is determined * calculate remote-nodes */ unpack_remote_status(status, data_set); return TRUE; } gboolean unpack_remote_status(xmlNode * status, pe_working_set_t * data_set) { const char *id = NULL; const char *uname = NULL; const char *shutdown = NULL; GListPtr gIter = NULL; xmlNode *state = NULL; xmlNode *lrm_rsc = NULL; node_t *this_node = NULL; if (is_set(data_set->flags, pe_flag_have_remote_nodes) == FALSE) { crm_trace("no remote nodes to unpack"); return TRUE; } /* get online status */ for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { this_node = gIter->data; if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } determine_remote_online_status(data_set, this_node); } /* process attributes */ for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { const char *resource_discovery_enabled = NULL; xmlNode *attrs = NULL; if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing remote node id=%s, uname=%s", id, uname); if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, data_set); shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { resource_t *rsc = this_node->details->remote_rsc; crm_info("Node %s is shutting down", this_node->details->uname); this_node->details->shutdown = TRUE; rsc->next_role = RSC_ROLE_STOPPED; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "standby"))) { crm_info("Node %s is in standby-mode", this_node->details->uname); this_node->details->standby = TRUE; } if (crm_is_true(g_hash_table_lookup(this_node->details->attrs, "maintenance"))) { crm_info("Node %s is in maintenance-mode", this_node->details->uname); this_node->details->maintenance = TRUE; } resource_discovery_enabled = g_hash_table_lookup(this_node->details->attrs, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.", XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname); } else { /* if we're here, this is either a baremetal node and fencing is enabled, * or this is a container node which we don't care if fencing is enabled * or not on. container nodes are 'fenced' by recovering the container resource * regardless of whether fencing is enabled. */ crm_info("Node %s has resource discovery disabled", this_node->details->uname); this_node->details->rsc_discovery_enabled = FALSE; } } } /* process node rsc status */ for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) { if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) { continue; } id = crm_element_value(state, XML_ATTR_ID); uname = crm_element_value(state, XML_ATTR_UNAME); this_node = pe_find_node_any(data_set->nodes, id, uname); if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) { continue; } crm_trace("Processing lrm resource entries on healthy remote node: %s", this_node->details->uname); lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); unpack_lrm_resources(this_node, lrm_rsc, data_set); } return TRUE; } static gboolean determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (!crm_is_true(in_cluster)) { crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster)); } else if (safe_str_eq(is_peer, ONLINESTATUS)) { if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { online = TRUE; } else { crm_debug("Node is not ready to run resources: %s", join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster)); crm_trace("\tis_peer=%s, join=%s, expected=%s", crm_str(is_peer), crm_str(join), crm_str(exp_state)); } else { /* mark it unclean */ - pe_fence_node(data_set, this_node, "unexpectedly down"); + pe_fence_node(data_set, this_node, "because node is unexpectedly down"); crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s", crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state)); } return online; } static gboolean determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state, node_t * this_node) { gboolean online = FALSE; gboolean do_terminate = FALSE; const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE); const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER); const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER); const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); const char *terminate = g_hash_table_lookup(this_node->details->attrs, "terminate"); /* - XML_NODE_IN_CLUSTER ::= true|false - XML_NODE_IS_PEER ::= true|false|online|offline - XML_NODE_JOIN_STATE ::= member|down|pending|banned - XML_NODE_EXPECTED ::= member|down */ if (crm_is_true(terminate)) { do_terminate = TRUE; } else if (terminate != NULL && strlen(terminate) > 0) { /* could be a time() value */ char t = terminate[0]; if (t != '0' && isdigit(t)) { do_terminate = TRUE; } } crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate); online = crm_is_true(in_cluster); if (safe_str_eq(is_peer, ONLINESTATUS)) { is_peer = XML_BOOLEAN_YES; } if (exp_state == NULL) { exp_state = CRMD_JOINSTATE_DOWN; } if (this_node->details->shutdown) { crm_debug("%s is shutting down", this_node->details->uname); /* Slightly different criteria since we can't shut down a dead peer */ online = crm_is_true(is_peer); } else if (in_cluster == NULL) { pe_fence_node(data_set, this_node, "because the peer has not been seen by the cluster"); } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) { pe_fence_node(data_set, this_node, "because it failed the pacemaker membership criteria"); } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) { if (crm_is_true(in_cluster) || crm_is_true(is_peer)) { crm_info("- Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", this_node->details->uname); } } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN) && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) { crm_info("Node %s was just shot", this_node->details->uname); online = FALSE; } else if (crm_is_true(in_cluster) == FALSE) { pe_fence_node(data_set, this_node, "because the node is no longer part of the cluster"); } else if (crm_is_true(is_peer) == FALSE) { pe_fence_node(data_set, this_node, "because our peer process is no longer available"); /* Everything is running at this point, now check join state */ } else if (do_terminate) { pe_fence_node(data_set, this_node, "because termination was requested"); } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) { crm_info("Node %s is active", this_node->details->uname); } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING) || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) { crm_info("Node %s is not ready to run resources", this_node->details->uname); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(data_set, this_node, "because the peer was in an unknown state"); crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d", this_node->details->uname, crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown); } return online; } static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node) { resource_t *rsc = this_node->details->remote_rsc; resource_t *container = NULL; if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; CRM_ASSERT(rsc != NULL); /* If the resource is currently started, mark it online. */ if (rsc->role == RSC_ROLE_STARTED) { crm_trace("Remote node %s is set to ONLINE. role == started", this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) { crm_trace("Remote node %s shutdown. transition from start to stop role", this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && is_set(container->flags, pe_rsc_failed)) { crm_trace("Remote node %s is set to UNCLEAN. rsc failed.", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if(is_set(rsc->flags, pe_rsc_failed)) { crm_trace("Remote node %s is set to OFFLINE. rsc failed.", this_node->details->id); this_node->details->online = FALSE; } else if (rsc->role == RSC_ROLE_STOPPED || (container && container->role == RSC_ROLE_STOPPED)) { crm_trace("Remote node %s is set to OFFLINE. node is stopped.", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); return this_node->details->online; } gboolean determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set) { gboolean online = FALSE; const char *shutdown = NULL; const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED); if (this_node == NULL) { crm_config_err("No node to check"); return online; } this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; shutdown = g_hash_table_lookup(this_node->details->attrs, XML_CIB_ATTR_SHUTDOWN); if (shutdown != NULL && safe_str_neq("0", shutdown)) { this_node->details->shutdown = TRUE; } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { online = determine_online_status_no_fencing(data_set, node_state, this_node); } else { online = determine_online_status_fencing(data_set, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* dont run resources here */ this_node->fixed = TRUE; this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("Node %s is not a pacemaker node", this_node->details->uname); } else if (this_node->details->unclean) { pe_proc_warn("Node %s is unclean", this_node->details->uname); } else if (this_node->details->online) { crm_info("Node %s is %s", this_node->details->uname, this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("Node %s is offline", this_node->details->uname); } return online; } char * clone_strip(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); lpc = strlen(last_rsc_id); while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: crm_err("Empty string: %s", last_rsc_id); return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 1); memcpy(zero, last_rsc_id, lpc); zero[lpc] = 0; return zero; default: goto done; } } done: zero = strdup(last_rsc_id); return zero; } char * clone_zero(const char *last_rsc_id) { int lpc = 0; char *zero = NULL; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { lpc = strlen(last_rsc_id); } while (--lpc > 0) { switch (last_rsc_id[lpc]) { case 0: return NULL; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; return zero; default: goto done; } } done: lpc = strlen(last_rsc_id); zero = calloc(1, lpc + 3); memcpy(zero, last_rsc_id, lpc); zero[lpc] = ':'; zero[lpc + 1] = '0'; zero[lpc + 2] = 0; crm_trace("%s -> %s", last_rsc_id, zero); return zero; } static resource_t * create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set) { resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); rsc->is_remote_node = TRUE; node = pe_find_node(data_set->nodes, rsc_id); if (node == NULL) { node = create_node(rsc_id, rsc_id, "remote", NULL, data_set); } link_rsc2remotenode(data_set, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); set_bit(rsc->flags, pe_rsc_orphan_container_filler); } set_bit(rsc->flags, pe_rsc_orphan); data_set->resources = g_list_append(data_set->resources, rsc); return rsc; } extern resource_t *create_child_clone(resource_t * rsc, int sub_id, pe_working_set_t * data_set); static resource_t * find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent, const char *rsc_id) { GListPtr rIter = NULL; resource_t *rsc = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(parent->variant == pe_clone || parent->variant == pe_master); CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique)); /* Find an instance active (or partially active for grouped clones) on the specified node */ pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr nIter = NULL; GListPtr locations = NULL; resource_t *child = rIter->data; child->fns->location(child, &locations, TRUE); if (locations == NULL) { pe_rsc_trace(child, "Resource %s, skip inactive", child->id); continue; } for (nIter = locations; nIter && rsc == NULL; nIter = nIter->next) { node_t *childnode = nIter->data; if (childnode->details == node->details) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); if(rsc) { pe_rsc_trace(rsc, "Resource %s, active", rsc->id); } } /* Keep this block, it means we'll do the right thing if * anyone toggles the unique flag to 'off' */ if (rsc && rsc->running_on) { crm_notice("/Anonymous/ clone %s is already running on %s", parent->id, node->details->uname); skip_inactive = TRUE; rsc = NULL; } } g_list_free(locations); } /* Find an inactive instance */ if (skip_inactive == FALSE) { pe_rsc_trace(parent, "Looking for %s anywhere", rsc_id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GListPtr locations = NULL; resource_t *child = rIter->data; if (is_set(child->flags, pe_rsc_block)) { pe_rsc_trace(child, "Skip: blocked in stopped state"); continue; } child->fns->location(child, &locations, TRUE); if (locations == NULL) { /* ->find_rsc() because we might be a cloned group */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone); pe_rsc_trace(parent, "Resource %s, empty slot", rsc->id); } g_list_free(locations); } } if (rsc == NULL) { /* Create an extra orphan */ resource_t *top = create_child_clone(parent, -1, data_set); /* ->find_rsc() because we might be a cloned group */ rsc = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone); CRM_ASSERT(rsc != NULL); pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, node->details->uname); } if (safe_str_neq(rsc_id, rsc->id)) { pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, node->details->uname, rsc->id, is_set(rsc->flags, pe_rsc_orphan) ? " (ORPHAN)" : ""); } return rsc; } static resource_t * unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id, xmlNode * rsc_entry) { resource_t *rsc = NULL; resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(data_set->resources, rsc_id); /* no match */ if (rsc == NULL) { /* Even when clone-max=0, we still create a single :0 orphan to match against */ char *tmp = clone_zero(rsc_id); resource_t *clone0 = pe_find_resource(data_set->resources, tmp); if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) { rsc = clone0; } else { crm_trace("%s is not known as %s either", rsc_id, tmp); } parent = uber_parent(clone0); free(tmp); crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan"); } else if (rsc->variant > pe_native) { crm_trace("%s is no longer a primitve resource, the lrm_resource entry is obsolete", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (parent && parent->variant > pe_group) { if (is_not_set(parent->flags, pe_rsc_unique)) { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(data_set, node, parent, base); CRM_ASSERT(rsc != NULL); free(base); } if (rsc && safe_str_neq(rsc_id, rsc->id)) { free(rsc->clone_name); rsc->clone_name = strdup(rsc_id); } } return rsc; } static resource_t * process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set) { resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname); rsc = create_fake_resource(rsc_id, rsc_entry, data_set); if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } else { print_resource(LOG_DEBUG_3, "Added orphan", rsc, FALSE); CRM_CHECK(rsc != NULL, return NULL); resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set); } return rsc; } static void process_rsc_state(resource_t * rsc, node_t * node, enum action_fail_response on_fail, xmlNode * migrate_op, pe_working_set_t * data_set) { node_t *tmpnode = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail)); /* process current state */ if (rsc->role != RSC_ROLE_UNKNOWN) { resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { node_t *n = node_copy(node); pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name, n->details->uname); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (is_set(iter->flags, pe_rsc_unique)) { break; } iter = iter->parent; } } if (rsc->role > RSC_ROLE_STOPPED && node->details->online == FALSE && is_set(rsc->flags, pe_rsc_managed)) { char *reason = NULL; gboolean should_fence = FALSE; /* if this is a remote_node living in a container, fence the container * by recovering it. Mark the resource as unmanaged. Once the container * and remote connenction are re-established, the status section will * get reset in the crmd freeing up this resource to run again once we * are sure we know the resources state. */ if (is_container_remote_node(node)) { set_bit(rsc->flags, pe_rsc_failed); should_fence = TRUE; } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) { if (is_baremetal_remote_node(node) && node->details->remote_rsc && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) { /* setting unseen = true means that fencing of the remote node will * only occur if the connection resource is not going to start somewhere. * This allows connection resources on a failed cluster-node to move to * another node without requiring the baremetal remote nodes to be fenced * as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("because %s is active there. Fencing will be revoked if remote-node connection can be re-established on another cluster-node.", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("because %s is thought to be active there", rsc->id); } pe_fence_node(data_set, node, reason); } free(reason); } if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = action_fail_ignore; } switch (on_fail) { case action_fail_ignore: /* nothing to do */ break; case action_fail_fence: /* treat it as if it is still running * but also mark the node as unclean */ pe_fence_node(data_set, node, "because of resource failure(s)"); break; case action_fail_standby: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case action_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); break; case action_fail_migrate: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set); break; case action_fail_stop: rsc->next_role = RSC_ROLE_STOPPED; break; case action_fail_recover: if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { set_bit(rsc->flags, pe_rsc_failed); stop_action(rsc, node, FALSE); } break; case action_fail_restart_container: set_bit(rsc->flags, pe_rsc_failed); if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { stop_action(rsc, node, FALSE); } break; case action_fail_reset_remote: set_bit(rsc->flags, pe_rsc_failed); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); } if (tmpnode && is_baremetal_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* connection resource to baremetal resource failed in a way that * should result in fencing the remote-node. */ pe_fence_node(data_set, tmpnode, "because of connection failure(s)"); } } /* require the stop action regardless if fencing is occuring or not. */ if (rsc->role > RSC_ROLE_STOPPED) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_interval) { rsc->next_role = RSC_ROLE_STOPPED; } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(data_set->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) { if (is_set(rsc->flags, pe_rsc_orphan)) { if (is_set(rsc->flags, pe_rsc_managed)) { crm_config_warn("Detected active orphan %s running on %s", rsc->id, node->details->uname); } else { crm_config_warn("Cluster configured not to stop active orphans." " %s must be stopped manually on %s", rsc->id, node->details->uname); } } native_add_running(rsc, node, data_set); if (on_fail != action_fail_ignore) { set_bit(rsc->flags, pe_rsc_failed); } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { char *key = stop_key(rsc); GListPtr possible_matches = find_actions(rsc->actions, key, node); GListPtr gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { action_t *stop = (action_t *) gIter->data; stop->flags |= pe_action_optional; } g_list_free(possible_matches); free(key); } } /* create active recurring operations as optional */ static void process_recurring(node_t * node, resource_t * rsc, int start_index, int stop_index, GListPtr sorted_op_list, pe_working_set_t * data_set) { int counter = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; int interval = 0; char *key = NULL; const char *id = ID(rsc_op); const char *interval_s = NULL; counter++; if (node->details->online == FALSE) { pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname); continue; } else if (counter < start_index) { pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter); continue; } interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL); interval = crm_parse_int(interval_s, "0"); if (interval == 0) { pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(status, "-1")) { pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = generate_op_key(rsc->id, task, interval); pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname); custom_action(rsc, key, task, node, TRUE, TRUE, data_set); } } void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_master_start = -1; const char *task = NULL; const char *status = NULL; GListPtr gIter = sorted_op_list; *stop_index = -1; *start_index = -1; for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (safe_str_eq(task, CRMD_ACTION_STOP) && safe_str_eq(status, "0")) { *stop_index = counter; } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) { implied_monitor_start = counter; } } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) { implied_master_start = counter; } } if (*start_index == -1) { if (implied_master_start != -1) { *start_index = implied_master_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } static resource_t * unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set) { GListPtr gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = RSC_ROLE_UNKNOWN; const char *task = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); resource_t *rsc = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; xmlNode *migrate_op = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = FALSE; enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN; crm_trace("[%s] Processing %s on %s", crm_element_name(rsc_entry), rsc_id, node->details->uname); /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } /* find the resource */ rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry); if (rsc == NULL) { rsc = process_orphan_resource(rsc_entry, node, data_set); } CRM_ASSERT(rsc != NULL); /* process operations */ saved_role = rsc->role; on_fail = action_fail_ignore; rsc->role = RSC_ROLE_UNKNOWN; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { migrate_op = rsc_op; } unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail, migrate_op, data_set); if (get_target_role(rsc, &req_role)) { if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) { pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); rsc->next_role = req_role; } else if (req_role > rsc->next_role) { pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { resource_t *rsc; resource_t *container; const char *rsc_id; const char *container_id; if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(data_set->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL || is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE || rsc->container != NULL) { continue; } pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } gboolean unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set) { xmlNode *rsc_entry = NULL; gboolean found_orphaned_container_filler = FALSE; GListPtr unexpected_containers = NULL; GListPtr gIter = NULL; resource_t *remote = NULL; CRM_CHECK(node != NULL, return FALSE); crm_trace("Unpacking resources on %s", node->details->uname); for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = __xml_next_element(rsc_entry)) { if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) { resource_t *rsc; rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set); if (!rsc) { continue; } if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) { found_orphaned_container_filler = TRUE; } if (is_set(rsc->flags, pe_rsc_unexpectedly_running)) { remote = rsc_contains_remote_node(data_set, rsc); if (remote) { unexpected_containers = g_list_append(unexpected_containers, remote); } } } } /* If a container resource is unexpectedly up... and the remote-node * connection resource for that container is not up, the entire container * must be recovered. */ for (gIter = unexpected_containers; gIter != NULL; gIter = gIter->next) { remote = (resource_t *) gIter->data; if (remote->role != RSC_ROLE_STARTED) { crm_warn("Recovering container resource %s. Resource is unexpectedly running and involves a remote-node.", remote->container->id); set_bit(remote->container->flags, pe_rsc_failed); } } /* now that all the resource state has been unpacked for this node * we have to go back and map any orphaned container fillers to their * container resource */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(lrm_rsc_list, data_set); } g_list_free(unexpected_containers); return TRUE; } static void set_active(resource_t * rsc) { resource_t *top = uber_parent(rsc); if (top && top->variant == pe_master) { rsc->role = RSC_ROLE_SLAVE; } else { rsc->role = RSC_ROLE_STARTED; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { node_t *node = value; int *score = user_data; node->weight = *score; } #define STATUS_PATH_MAX 1024 static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, pe_working_set_t * data_set) { int offset = 0; char xpath[STATUS_PATH_MAX]; offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node); offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']", resource); /* Need to check against transition_magic too? */ if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op, source); } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op, source); } else { offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op); } CRM_LOG_ASSERT(offset > 0); return get_xpath_object(xpath, data_set->input, LOG_DEBUG); } static void unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { /* * The normal sequence is (now): migrate_to(Src) -> migrate_from(Tgt) -> stop(Src) * * So if a migrate_to is followed by a stop, then we dont need to care what * happended on the target node * * Without the stop, we need to look for a successful migrate_from. * This would also imply we're no longer running on the source * * Without the stop, and without a migrate_from op we make sure the resource * gets stopped on both source and target (assuming the target is up) * */ int stop_id = 0; int task_id = 0; xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id, NULL, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); if (stop_op == NULL || stop_id < task_id) { int from_rc = 0, from_status = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); node_t *source = pe_find_node(data_set->nodes, migrate_source); xmlNode *migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (migrate_from) { crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d", ID(migrate_from), migrate_target, from_status, from_rc); } if (migrate_from && from_rc == PCMK_OCF_OK && from_status == PCMK_LRM_OP_DONE) { pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op), migrate_source); /* all good * just need to arrange for the stop action to get sent * but _without_ affecting the target somehow */ rsc->role = RSC_ROLE_STOPPED; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } else if (migrate_from) { /* Failed */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); } } else { /* Pending or complete but erased */ if (target && target->details->online) { pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target, target->details->online); native_add_running(rsc, target, data_set); if (source && source->details->online) { /* If we make it here we have a partial migration. The migrate_to * has completed but the migrate_from on the target has not. Hold on * to the target and source on the resource. Later on if we detect that * the resource is still going to run on that target, we may continue * the migration */ rsc->partial_migration_target = target; rsc->partial_migration_source = source; } } else { /* Consider it failed here - forces a restart, prevents migration */ set_bit(rsc->flags, pe_rsc_failed); clear_bit(rsc->flags, pe_rsc_allow_migrate); } } } } static void unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set) { const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); CRM_ASSERT(rsc); if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *source = pe_find_node(data_set->nodes, migrate_source); if (source && source->details->online) { native_add_running(rsc, source, data_set); } } } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { int stop_id = 0; int migrate_id = 0; const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set); xmlNode *migrate_op = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source, data_set); if (stop_op) { crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id); } if (migrate_op) { crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id); } /* Get our state right */ rsc->role = RSC_ROLE_STARTED; /* can be master? */ if (stop_op == NULL || stop_id < migrate_id) { node_t *target = pe_find_node(data_set->nodes, migrate_target); pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op, migrate_id); if (target && target->details->online) { native_add_running(rsc, target, data_set); } } else if (migrate_op == NULL) { /* Make sure it gets cleaned up, the stop may pre-date the migrate_from */ rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node); } } } static void record_failed_op(xmlNode *op, node_t* node, pe_working_set_t * data_set) { xmlNode *xIter = NULL; const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY); if ((node->details->shutdown) && (node->details->online == FALSE)) { return; } for (xIter = data_set->failed->children; xIter; xIter = xIter->next) { const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) { crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname); return; } } crm_trace("Adding entry %s on %s", op_key, node->details->uname); crm_xml_add(op, XML_ATTR_UNAME, node->details->uname); add_node_copy(data_set->failed, op); } static const char *get_op_key(xmlNode *xml_op) { const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if(key == NULL) { key = ID(xml_op); } return key; } static void unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; bool is_probe = FALSE; action_t *action = NULL; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); CRM_ASSERT(rsc); *last_failure = xml_op; crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; pe_rsc_trace(rsc, "is a probe: %s", key); } if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) { crm_warn("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); record_failed_op(xml_op, node, data_set); } else { crm_trace("Processing failed op %s for %s on %s: %s (%d)", task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc), rc); } action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) || (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) || (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) || (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) { pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail), fail2text(action->on_fail), action->uuid, key); *on_fail = action->on_fail; } if (safe_str_eq(task, CRMD_ACTION_STOP)) { resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set); } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) { unpack_rsc_migration_failure(rsc, node, xml_op, data_set); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* * staying in role=master ends up putting the PE/TE into a loop * setting role=slave is not dangerous because no master will be * promoted until the failed resource has been fully stopped */ rsc->next_role = RSC_ROLE_STOPPED; if (action->on_fail == action_fail_block) { rsc->role = RSC_ROLE_MASTER; } else { crm_warn("Forcing %s to stop after a failed demote action", rsc->id); rsc->role = RSC_ROLE_SLAVE; } } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) { crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname); resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set); } if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) { /* leave stopped */ pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id); rsc->role = RSC_ROLE_STOPPED; } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "Setting %s active", rsc->id); set_active(rsc); } pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s", rsc->id, role2text(rsc->role), node->details->unclean ? "true" : "false", fail2text(action->on_fail), role2text(action->fail_role)); if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) { rsc->next_role = action->fail_role; } if (action->fail_role == RSC_ROLE_STOPPED) { int score = -INFINITY; resource_t *fail_rsc = rsc; if (fail_rsc->parent) { resource_t *parent = uber_parent(fail_rsc); if ((parent->variant == pe_clone || parent->variant == pe_master) && is_not_set(parent->flags, pe_rsc_unique)) { /* for clone and master resources, if a child fails on an operation * with on-fail = stop, all the resources fail. Do this by preventing * the parent from coming up again. */ fail_rsc = parent; } } crm_warn("Making sure %s doesn't come up again", fail_rsc->id); /* make sure it doesn't come up again */ g_hash_table_destroy(fail_rsc->allowed_nodes); fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } pe_free_action(action); } static int determine_op_status( resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int interval = 0; int result = PCMK_LRM_OP_DONE; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); bool is_probe = FALSE; CRM_ASSERT(rsc); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { is_probe = TRUE; } if (target_rc >= 0 && target_rc != rc) { result = PCMK_LRM_OP_ERROR; pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); } /* we could clean this up significantly except for old LRMs and CRMs that * didn't include target_rc and liked to remap status */ switch (rc) { case PCMK_OCF_OK: if (is_probe && target_rc == 7) { result = PCMK_LRM_OP_DONE; set_bit(rsc->flags, pe_rsc_unexpectedly_running); pe_rsc_info(rsc, "Operation %s found resource %s active on %s", task, rsc->id, node->details->uname); /* legacy code for pre-0.6.5 operations */ } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) { /* catch status ops that return 0 instead of 8 while they * are supposed to be in master mode */ result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) { result = PCMK_LRM_OP_DONE; rsc->role = RSC_ROLE_STOPPED; /* clear any previous failure actions */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } else if (safe_str_neq(task, CRMD_ACTION_STOP)) { result = PCMK_LRM_OP_ERROR; } break; case PCMK_OCF_RUNNING_MASTER: if (is_probe) { result = PCMK_LRM_OP_DONE; pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s", task, rsc->id, node->details->uname); } else if (target_rc == rc) { /* nothing to do */ } else if (target_rc >= 0) { result = PCMK_LRM_OP_ERROR; /* legacy code for pre-0.6.5 operations */ } else if (safe_str_neq(task, CRMD_ACTION_STATUS) || rsc->role != RSC_ROLE_MASTER) { result = PCMK_LRM_OP_ERROR; if (rsc->role != RSC_ROLE_MASTER) { crm_err("%s reported %s in master mode on %s", key, rsc->id, node->details->uname); } } rsc->role = RSC_ROLE_MASTER; break; case PCMK_OCF_DEGRADED_MASTER: case PCMK_OCF_FAILED_MASTER: rsc->role = RSC_ROLE_MASTER; result = PCMK_LRM_OP_ERROR; break; case PCMK_OCF_NOT_CONFIGURED: result = PCMK_LRM_OP_ERROR_FATAL; break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: case PCMK_OCF_UNIMPLEMENT_FEATURE: if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) { result = PCMK_LRM_OP_NOTSUPPORTED; break; } else if(pe_can_fence(data_set, node) == FALSE && safe_str_eq(task, CRMD_ACTION_STOP)) { /* If a stop fails and we can't fence, there's nothing else we can do */ pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)", rsc->id, task, services_ocf_exitcode_str(rc), rc); clear_bit(rsc->flags, pe_rsc_managed); set_bit(rsc->flags, pe_rsc_block); } result = PCMK_LRM_OP_ERROR_HARD; break; default: if (result == PCMK_LRM_OP_DONE) { crm_info("Treating %s (rc=%d) on %s as an ERROR", key, rc, node->details->uname); result = PCMK_LRM_OP_ERROR; } } return result; } static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set) { bool expired = FALSE; time_t last_failure = 0; int clear_failcount = 0; int interval = 0; int failure_timeout = rsc->failure_timeout; const char *key = get_op_key(xml_op); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); /* clearing recurring monitor operation failures automatically * needs to be carefully considered */ if (safe_str_eq(crm_element_value(xml_op, XML_LRM_ATTR_TASK), "monitor") && safe_str_neq(crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL), "0")) { /* TODO, in the future we should consider not clearing recurring monitor * op failures unless the last action for a resource was a "stop" action. * otherwise it is possible that clearing the monitor failure will result * in the resource being in an undeterministic state. * * For now we handle this potential undeterministic condition for remote * node connection resources by not clearing a recurring monitor op failure * until after the node has been fenced. */ if (is_set(data_set->flags, pe_flag_stonith_enabled) && (rsc->remote_reconnect_interval)) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); if (remote_node && remote_node->details->remote_was_fenced == 0) { if (strstr(ID(xml_op), "last_failure")) { crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id); } /* disabling failure timeout for this operation because we believe * fencing of the remote node should occur first. */ failure_timeout = 0; } } } if (failure_timeout > 0) { int last_run = 0; if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) { time_t now = get_effective_time(data_set); if (now > (last_run + failure_timeout)) { expired = TRUE; } } } if (expired) { if (failure_timeout > 0) { int fc = get_failcount_full(node, rsc, &last_failure, FALSE, xml_op, data_set); if(fc) { if (get_failcount_full(node, rsc, &last_failure, TRUE, xml_op, data_set) == 0) { clear_failcount = 1; crm_notice("Clearing expired failcount for %s on %s", rsc->id, node->details->uname); } else { expired = FALSE; } } else if (rsc->remote_reconnect_interval && strstr(ID(xml_op), "last_failure")) { /* always clear last failure when reconnect interval is set */ clear_failcount = 1; } } } else if (strstr(ID(xml_op), "last_failure") && ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) { op_digest_cache_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set); if (digest_data->rc == RSC_DIGEST_UNKNOWN) { crm_trace("rsc op %s/%s on node %s does not have a op digest to compare against", rsc->id, key, node->details->id); } else if (digest_data->rc != RSC_DIGEST_MATCH) { clear_failcount = 1; crm_info ("Clearing failcount for %s on %s, %s failed and now resource parameters have changed.", task, rsc->id, node->details->uname); } } if (clear_failcount) { action_t *clear_op = NULL; clear_op = custom_action(rsc, crm_concat(rsc->id, CRM_OP_CLEAR_FAILCOUNT, '_'), CRM_OP_CLEAR_FAILCOUNT, node, FALSE, TRUE, data_set); add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE); } crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) { switch(rc) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: /* Don't expire probes that return these values */ expired = FALSE; break; } } return expired; } int get_target_rc(xmlNode *xml_op) { int dummy = 0; int target_rc = 0; char *dummy_string = NULL; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc); free(dummy_string); return target_rc; } static enum action_fail_response get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set) { int result = action_fail_recover; action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set); result = action->on_fail; pe_free_action(action); return result; } static void update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc, xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { gboolean clear_past_failure = FALSE; CRM_ASSERT(rsc); CRM_ASSERT(xml_op); if (rc == PCMK_OCF_NOT_RUNNING) { clear_past_failure = TRUE; } else if (rc == PCMK_OCF_NOT_INSTALLED) { rsc->role = RSC_ROLE_STOPPED; } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) { if (last_failure) { const char *op_key = get_op_key(xml_op); const char *last_failure_key = get_op_key(last_failure); if (safe_str_eq(op_key, last_failure_key)) { clear_past_failure = TRUE; } } if (rsc->role < RSC_ROLE_STARTED) { set_active(rsc); } } else if (safe_str_eq(task, CRMD_ACTION_START)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_STOP)) { rsc->role = RSC_ROLE_STOPPED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) { /* Demote from Master does not clear an error */ rsc->role = RSC_ROLE_SLAVE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) { rsc->role = RSC_ROLE_STARTED; clear_past_failure = TRUE; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) { unpack_rsc_migration(rsc, node, xml_op, data_set); } else if (rsc->role < RSC_ROLE_STARTED) { pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname); set_active(rsc); } /* clear any previous failure actions */ if (clear_past_failure) { switch (*on_fail) { case action_fail_stop: case action_fail_fence: case action_fail_migrate: case action_fail_standby: pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop", rsc->id, fail2text(*on_fail)); break; case action_fail_block: case action_fail_ignore: case action_fail_recover: case action_fail_restart_container: *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; break; case action_fail_reset_remote: if (rsc->remote_reconnect_interval == 0) { /* when reconnect delay is not in use, the connection is allowed * to start again after the remote node is fenced and completely * stopped. Otherwise, with reconnect delay we wait for the failure * to be cleared entirely before reconnected can be attempted. */ *on_fail = action_fail_ignore; rsc->next_role = RSC_ROLE_UNKNOWN; } break; } } } gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set) { int task_id = 0; const char *key = NULL; const char *task = NULL; const char *task_key = NULL; int rc = 0; int status = PCMK_LRM_OP_PENDING-1; int target_rc = get_target_rc(xml_op); int interval = 0; gboolean expired = FALSE; resource_t *parent = rsc; enum action_fail_response failure_strategy = action_fail_recover; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(node != NULL, return FALSE); CRM_CHECK(xml_op != NULL, return FALSE); task_key = get_op_key(xml_op); task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id); crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status); crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval); CRM_CHECK(task != NULL, return FALSE); CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE); CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE); if (safe_str_eq(task, CRMD_ACTION_NOTIFY)) { /* safe to ignore these */ return TRUE; } if (is_not_set(rsc->flags, pe_rsc_unique)) { parent = uber_parent(rsc); } pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)", task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role)); if (node->details->unclean) { pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean." " Further action depends on the value of the stop's on-fail attribue", node->details->uname, rsc->id); } if (status == PCMK_LRM_OP_ERROR) { /* Older versions set this if rc != 0 but its up to us to decide */ status = PCMK_LRM_OP_DONE; } if(status != PCMK_LRM_OP_NOT_INSTALLED) { expired = check_operation_expiry(rsc, node, rc, xml_op, data_set); } /* Degraded results are informational only, re-map them to their error-free equivalents */ if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_OK; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK); record_failed_op(xml_op, node, data_set); } } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) { rc = PCMK_OCF_RUNNING_MASTER; /* Add them to the failed list to highlight them for the user */ if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) { crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER); record_failed_op(xml_op, node, data_set); } } if (expired && target_rc != rc) { const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC); pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)", key, node->details->uname, services_ocf_exitcode_str(rc), rc, services_ocf_exitcode_str(target_rc), target_rc); if(interval == 0) { crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); goto done; } else if(node->details->online && node->details->unclean == FALSE) { crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s", task_key, rc, magic, node->details->uname); /* This is SO horrible, but we don't have access to CancelXmlOp() yet */ crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); goto done; } } if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) { status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set); } pe_rsc_trace(rsc, "Handling status: %d", status); switch (status) { case PCMK_LRM_OP_CANCELLED: /* do nothing?? */ pe_err("Don't know what to do for cancelled ops yet"); break; case PCMK_LRM_OP_PENDING: if (safe_str_eq(task, CRMD_ACTION_START)) { set_bit(rsc->flags, pe_rsc_start_pending); set_active(rsc); } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) { rsc->role = RSC_ROLE_MASTER; } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) { /* If a pending migrate_to action is out on a unclean node, * we have to force the stop action on the target. */ const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); node_t *target = pe_find_node(data_set->nodes, migrate_target); if (target) { stop_action(rsc, target, FALSE); } } if (rsc->pending_task == NULL) { if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) { /* Pending probes are not printed, even if pending * operations are requested. If someone ever requests that * behavior, uncomment this and the corresponding part of * native.c:native_pending_task(). */ /*rsc->pending_task = strdup("probe");*/ } else { rsc->pending_task = strdup(task); } } break; case PCMK_LRM_OP_DONE: pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname); update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set); break; case PCMK_LRM_OP_NOT_INSTALLED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if (failure_strategy == action_fail_ignore) { crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: " "Resource agent doesn't exist", task_key, status, rc, node->details->uname); /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */ *on_fail = action_fail_migrate; } resource_location(parent, node, -INFINITY, "hard-error", data_set); unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); break; case PCMK_LRM_OP_ERROR: case PCMK_LRM_OP_ERROR_HARD: case PCMK_LRM_OP_ERROR_FATAL: case PCMK_LRM_OP_TIMEOUT: case PCMK_LRM_OP_NOTSUPPORTED: failure_strategy = get_action_on_fail(rsc, task_key, task, data_set); if ((failure_strategy == action_fail_ignore) || (failure_strategy == action_fail_restart_container && safe_str_eq(task, CRMD_ACTION_STOP))) { crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded", task_key, rc, node->details->uname); update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); set_bit(rsc->flags, pe_rsc_failure_ignored); record_failed_op(xml_op, node, data_set); if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set); if(status == PCMK_LRM_OP_ERROR_HARD) { do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE, "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)", parent->id, node->details->uname, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, node, -INFINITY, "hard-error", data_set); } else if(status == PCMK_LRM_OP_ERROR_FATAL) { crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)", parent->id, task, services_ocf_exitcode_str(rc), rc); resource_location(parent, NULL, -INFINITY, "fatal-error", data_set); } } break; } done: pe_rsc_trace(rsc, "Resource %s after %s: role=%s", rsc->id, task, role2text(rsc->role)); return TRUE; } gboolean add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set) { const char *cluster_name = NULL; g_hash_table_insert(node->details->attrs, strdup("#uname"), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_ID), strdup(node->details->id)); if (safe_str_eq(node->details->id, data_set->dc_uuid)) { data_set->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup("#" XML_ATTR_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name"); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup("#cluster-name"), strdup(cluster_name)); } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, node->details->attrs, NULL, overwrite, data_set->now); if (g_hash_table_lookup(node->details->attrs, "#site-name") == NULL) { const char *site_name = g_hash_table_lookup(node->details->attrs, "site-name"); if (site_name) { /* Prefix '#' to the key */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup("#site-name"), strdup(cluster_name)); } } return TRUE; } static GListPtr extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GListPtr gIter = NULL; GListPtr op_list = NULL; GListPtr sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) { if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t * data_set) { GListPtr output = NULL; GListPtr intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE); node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = __xml_first_child(status); node_state != NULL; node_state = __xml_next_element(node_state)) { if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && safe_str_neq(uname, node)) { continue; } this_node = pe_find_node(data_set->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (is_remote_node(this_node)) { determine_remote_online_status(data_set, this_node); } else { determine_online_status(node_state, this_node, data_set); } if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL; lrm_rsc = __xml_next_element(lrm_rsc)) { if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && safe_str_neq(rsc_id, rsc)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 6d7917c3ae..d7421d152a 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,2151 +1,2151 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include pe_working_set_t *pe_dataset = NULL; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set); static xmlNode *find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled); bool pe_can_fence(pe_working_set_t * data_set, node_t *node) { if(is_not_set(data_set->flags, pe_flag_stonith_enabled)) { return FALSE; /* Turned off */ } else if (is_not_set(data_set->flags, pe_flag_have_stonith_resource)) { return FALSE; /* No devices */ } else if (is_set(data_set->flags, pe_flag_have_quorum)) { return TRUE; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return TRUE; } else if(node == NULL) { return FALSE; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return TRUE; } crm_trace("Cannot fence %s", node->details->uname); return FALSE; } node_t * node_copy(const node_t *this_node) { node_t *new_node = NULL; CRM_CHECK(this_node != NULL, return NULL); new_node = calloc(1, sizeof(node_t)); CRM_ASSERT(new_node != NULL); crm_trace("Copying %p (%s) to %p", this_node, this_node->details->uname, new_node); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GListPtr list, gboolean merge_scores) { GHashTable *result = hash; node_t *other_node = NULL; GListPtr gIter = list; GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = merge_weights(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { node_t *new_node = node_copy(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } GHashTable * node_hash_from_list(GListPtr list) { GListPtr gIter = list; GHashTable *result = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, g_hash_destroy_str); for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *n = node_copy(node); g_hash_table_insert(result, (gpointer) n->details->id, n); } return result; } GListPtr node_list_dup(GListPtr list1, gboolean reset, gboolean filter) { GListPtr result = NULL; GListPtr gIter = list1; for (; gIter != NULL; gIter = gIter->next) { node_t *new_node = NULL; node_t *this_node = (node_t *) gIter->data; if (filter && this_node->weight < 0) { continue; } new_node = node_copy(this_node); if (reset) { new_node->weight = 0; } if (new_node != NULL) { result = g_list_prepend(result, new_node); } } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { const node_t *node_a = a; const node_t *node_b = b; return strcmp(node_a->details->uname, node_b->details->uname); } void dump_node_scores_worker(int level, const char *file, const char *function, int line, resource_t * rsc, const char *comment, GHashTable * nodes) { GHashTable *hash = nodes; GHashTableIter iter; node_t *node = NULL; if (rsc) { hash = rsc->allowed_nodes; } if (rsc && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't show the allocation scores for orphans */ return; } if (level == 0) { char score[128]; int len = sizeof(score); /* For now we want this in sorted order to keep the regression tests happy */ GListPtr gIter = NULL; GListPtr list = g_hash_table_get_values(hash); list = g_list_sort(list, sort_node_uname); gIter = list; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { printf("%s: %s allocation score on %s: %s\n", comment, rsc->id, node->details->uname, score); } else { printf("%s: %s = %s\n", comment, node->details->uname, score); } } g_list_free(list); } else if (hash) { char score[128]; int len = sizeof(score); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { /* This function is called a whole lot, use stack allocated score */ score2char_stack(node->weight, score, len); if (rsc) { do_crm_log_alias(LOG_TRACE, file, function, line, "%s: %s allocation score on %s: %s", comment, rsc->id, node->details->uname, score); } else { do_crm_log_alias(LOG_TRACE, file, function, line + 1, "%s: %s = %s", comment, node->details->uname, score); } } } if (rsc && rsc->children) { GListPtr gIter = NULL; gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child = (resource_t *) gIter->data; dump_node_scores_worker(level, file, function, line, child, comment, nodes); } } } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; int len = 0; char *new_text = NULL; len = strlen(*dump_text) + strlen(" ") + strlen(key) + strlen("=") + strlen(value) + 1; new_text = calloc(1, len); sprintf(new_text, "%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } void dump_node_capacity(int level, const char *comment, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(node->details->uname) + strlen(" capacity:") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node) { int len = 0; char *dump_text = NULL; len = strlen(comment) + strlen(": ") + strlen(rsc->id) + strlen(" utilization on ") + strlen(node->details->uname) + strlen(":") + 1; dump_text = calloc(1, len); sprintf(dump_text, "%s: %s utilization on %s:", comment, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); if (level == 0) { fprintf(stdout, "%s\n", dump_text); } else { crm_trace("%s", dump_text); } free(dump_text); } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const resource_t *resource1 = (const resource_t *)a; const resource_t *resource2 = (const resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } action_t * custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node, gboolean optional, gboolean save_action, pe_working_set_t * data_set) { action_t *action = NULL; GListPtr possible_matches = NULL; CRM_CHECK(key != NULL, return NULL); CRM_CHECK(task != NULL, free(key); return NULL); if (save_action && rsc != NULL) { possible_matches = find_actions(rsc->actions, key, on_node); } else if(save_action) { #if 0 action = g_hash_table_lookup(data_set->singletons, key); #else /* More expensive but takes 'node' into account */ possible_matches = find_actions(data_set->actions, key, on_node); #endif } if(data_set->singletons == NULL) { data_set->singletons = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); } if (possible_matches != NULL) { if (g_list_length(possible_matches) > 1) { pe_warn("Action %s for %s on %s exists %d times", task, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", g_list_length(possible_matches)); } action = g_list_nth_data(possible_matches, 0); pe_rsc_trace(rsc, "Found existing action (%d) %s for %s on %s", action->id, task, rsc ? rsc->id : "", on_node ? on_node->details->uname : ""); g_list_free(possible_matches); } if (action == NULL) { if (save_action) { pe_rsc_trace(rsc, "Creating%s action %d: %s for %s on %s %d", optional ? "" : " mandatory", data_set->action_id, key, rsc ? rsc->id : "", on_node ? on_node->details->uname : "", optional); } action = calloc(1, sizeof(action_t)); if (save_action) { action->id = data_set->action_id++; } else { action->id = 0; } action->rsc = rsc; CRM_ASSERT(task != NULL); action->task = strdup(task); if (on_node) { action->node = node_copy(on_node); } action->uuid = strdup(key); pe_set_action_bit(action, pe_action_runnable); if (optional) { pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); } else { pe_clear_action_bit(action, pe_action_optional); pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); } /* Implied by calloc()... action->actions_before = NULL; action->actions_after = NULL; action->pseudo = FALSE; action->dumped = FALSE; action->processed = FALSE; action->seen_count = 0; */ action->extra = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); action->meta = g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); if (save_action) { data_set->actions = g_list_prepend(data_set->actions, action); if(rsc == NULL) { g_hash_table_insert(data_set->singletons, action->uuid, action); } } if (rsc != NULL) { action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); unpack_operation(action, action->op_entry, rsc->container, data_set); if (save_action) { rsc->actions = g_list_prepend(rsc->actions, action); } } if (save_action) { pe_rsc_trace(rsc, "Action %d created", action->id); } } if (optional == FALSE) { pe_rsc_trace(rsc, "Unset optional on %s", action->uuid); pe_clear_action_bit(action, pe_action_optional); } if (rsc != NULL) { enum action_tasks a_task = text2task(action->task); int warn_level = LOG_TRACE; if (save_action) { warn_level = LOG_WARNING; } if (is_set(action->flags, pe_action_have_node_attrs) == FALSE && action->node != NULL && action->op_entry != NULL) { pe_set_action_bit(action, pe_action_have_node_attrs); unpack_instance_attributes(data_set->input, action->op_entry, XML_TAG_ATTR_SETS, action->node->details->attrs, action->extra, NULL, FALSE, data_set->now); } if (is_set(action->flags, pe_action_pseudo)) { /* leave untouched */ } else if (action->node == NULL) { pe_rsc_trace(rsc, "Unset runnable on %s", action->uuid); pe_clear_action_bit(action, pe_action_runnable); } else if (is_not_set(rsc->flags, pe_rsc_managed) && g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL) == NULL) { crm_debug("Action %s (unmanaged)", action->uuid); pe_rsc_trace(rsc, "Set optional on %s", action->uuid); pe_set_action_bit(action, pe_action_optional); /* action->runnable = FALSE; */ } else if (action->node->details->online == FALSE) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (offline)", action->uuid, action->node->details->uname); if (is_set(action->rsc->flags, pe_rsc_managed) && save_action && a_task == stop_rsc) { - pe_fence_node(data_set, action->node, "Node is unclean"); + pe_fence_node(data_set, action->node, "because node is unclean"); } } else if (action->node->details->pending) { pe_clear_action_bit(action, pe_action_runnable); do_crm_log(warn_level, "Action %s on %s is unrunnable (pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_rsc_trace(rsc, "Action %s does not require anything", action->uuid); pe_set_action_bit(action, pe_action_runnable); #if 0 /* * No point checking this * - if we dont have quorum we can't stonith anyway */ } else if (action->needs == rsc_req_stonith) { crm_trace("Action %s requires only stonith", action->uuid); action->runnable = TRUE; #endif } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_stop) { pe_clear_action_bit(action, pe_action_runnable); crm_debug("%s\t%s (cancelled : quorum)", action->node->details->uname, action->uuid); } else if (is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { pe_rsc_trace(rsc, "Check resource is already active: %s %s %s %s", rsc->id, action->uuid, role2text(rsc->next_role), role2text(rsc->role)); if (rsc->fns->active(rsc, TRUE) == FALSE || rsc->next_role > rsc->role) { pe_clear_action_bit(action, pe_action_runnable); pe_rsc_debug(rsc, "%s\t%s (cancelled : quorum freeze)", action->node->details->uname, action->uuid); } } else { pe_rsc_trace(rsc, "Action %s is runnable", action->uuid); pe_set_action_bit(action, pe_action_runnable); } if (save_action) { switch (a_task) { case stop_rsc: set_bit(rsc->flags, pe_rsc_stopping); break; case start_rsc: clear_bit(rsc->flags, pe_rsc_starting); if (is_set(action->flags, pe_action_runnable)) { set_bit(rsc->flags, pe_rsc_starting); } break; default: break; } } } free(key); return action; } static const char * unpack_operation_on_fail(action_t * action) { const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (safe_str_eq(action->task, CRMD_ACTION_STOP) && safe_str_eq(value, "standby")) { crm_config_err("on-fail=standby is not allowed for stop actions: %s", action->rsc->id); return NULL; } else if (safe_str_eq(action->task, CRMD_ACTION_DEMOTE) && !value) { /* demote on_fail defaults to master monitor value if present */ xmlNode *operation = NULL; const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval = NULL; const char *enabled = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = __xml_first_child(action->rsc->ops_xml); operation && !value; operation = __xml_next_element(operation)) { if (!crm_str_eq((const char *)operation->name, "op", TRUE)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); enabled = crm_element_value(operation, "enabled"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; } else if (enabled && !crm_is_true(enabled)) { continue; } else if (safe_str_neq(name, "monitor") || safe_str_neq(role, "Master")) { continue; } else if (crm_get_interval(interval) <= 0) { continue; } value = on_fail; } } return value; } static xmlNode * find_min_interval_mon(resource_t * rsc, gboolean include_disabled) { int number = 0; int min_interval = -1; const char *name = NULL; const char *value = NULL; const char *interval = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } if (safe_str_neq(name, RSC_STATUS)) { continue; } number = crm_get_interval(interval); if (number < 0) { continue; } if (min_interval < 0 || number < min_interval) { min_interval = number; op = operation; } } } return op; } void unpack_operation(action_t * action, xmlNode * xml_obj, resource_t * container, pe_working_set_t * data_set) { int value_i = 0; unsigned long long interval = 0; unsigned long long start_delay = 0; char *value_ms = NULL; const char *value = NULL; const char *field = NULL; CRM_CHECK(action->rsc != NULL, return); unpack_instance_attributes(data_set->input, data_set->op_defaults, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); if (xml_obj) { xmlAttrPtr xIter = NULL; for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, NULL, action->meta, NULL, FALSE, data_set->now); unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL, action->meta, NULL, FALSE, data_set->now); g_hash_table_remove(action->meta, "id"); field = XML_LRM_ATTR_INTERVAL; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { interval = crm_get_interval(value); if (interval > 0) { value_ms = crm_itoa(interval); g_hash_table_replace(action->meta, strdup(field), value_ms); } else { g_hash_table_remove(action->meta, field); } } /* Begin compatability code */ value = g_hash_table_lookup(action->meta, "requires"); if (safe_str_neq(action->task, RSC_START) && safe_str_neq(action->task, RSC_PROMOTE)) { action->needs = rsc_req_nothing; value = "nothing (not start/promote)"; } else if (safe_str_eq(value, "nothing")) { action->needs = rsc_req_nothing; } else if (safe_str_eq(value, "quorum")) { action->needs = rsc_req_quorum; } else if (safe_str_eq(value, "unfencing")) { action->needs = rsc_req_stonith; set_bit(action->rsc->flags, pe_rsc_needs_unfencing); if (is_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires (un)fencing but fencing is disabled", action->rsc->id); } } else if (is_set(data_set->flags, pe_flag_stonith_enabled) && safe_str_eq(value, "fencing")) { action->needs = rsc_req_stonith; if (is_not_set(data_set->flags, pe_flag_stonith_enabled)) { crm_notice("%s requires fencing but fencing is disabled", action->rsc->id); } /* End compatability code */ } else if (is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing (resource)"; } else if (is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum (resource)"; } else { action->needs = rsc_req_nothing; value = "nothing (resource)"; } pe_rsc_trace(action->rsc, "\tAction %s requires: %s", action->task, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (safe_str_eq(value, "block")) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); } else if (safe_str_eq(value, "fence")) { action->on_fail = action_fail_fence; value = "node fencing"; if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) { crm_config_err("Specifying on_fail=fence and" " stonith-enabled=false makes no sense"); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (safe_str_eq(value, "standby")) { action->on_fail = action_fail_standby; value = "node standby"; } else if (safe_str_eq(value, "ignore") || safe_str_eq(value, "nothing")) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (safe_str_eq(value, "migrate")) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (safe_str_eq(value, "stop")) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (safe_str_eq(value, "restart")) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (safe_str_eq(value, "restart-container")) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* for baremetal remote nodes, ensure that any failure that results in * dropping an active connection to a remote node results in fencing of * the remote node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (value == NULL && is_rsc_baremetal_remote_node(action->rsc, data_set) && !(safe_str_eq(action->task, CRMD_ACTION_STATUS) && interval == 0) && (safe_str_neq(action->task, CRMD_ACTION_START))) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence baremetal remote node (default)"; } else { value = "recover baremetal remote node connection (default)"; } if (action->rsc->remote_reconnect_interval) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } else if (value == NULL && safe_str_eq(action->task, CRMD_ACTION_STOP)) { if (is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "\t%s failure handling: %s", action->task, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (safe_str_eq(action->task, CRMD_ACTION_PROMOTE)) { action->fail_role = RSC_ROLE_SLAVE; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "\t%s failure results in: %s", action->task, role2text(action->fail_role)); field = XML_OP_ATTR_START_DELAY; value = g_hash_table_lookup(action->meta, field); if (value != NULL) { value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } start_delay = value_i; value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } else if (interval > 0 && g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN)) { crm_time_t *origin = NULL; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); origin = crm_time_new(value); if (origin == NULL) { crm_config_err("Operation %s contained an invalid " XML_OP_ATTR_ORIGIN ": %s", ID(xml_obj), value); } else { crm_time_t *delay = NULL; int rc = crm_time_compare(origin, data_set->now); long long delay_s = 0; int interval_s = (interval / 1000); crm_trace("Origin: %s, interval: %d", value, interval_s); /* If 'origin' is in the future, find the most recent "multiple" that occurred in the past */ while(rc > 0) { crm_time_add_seconds(origin, -interval_s); rc = crm_time_compare(origin, data_set->now); } /* Now find the first "multiple" that occurs after 'now' */ while (rc < 0) { crm_time_add_seconds(origin, interval_s); rc = crm_time_compare(origin, data_set->now); } delay = crm_time_calculate_duration(origin, data_set->now); crm_time_log(LOG_TRACE, "origin", origin, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "now", data_set->now, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); crm_time_log(LOG_TRACE, "delay", delay, crm_time_log_duration); delay_s = crm_time_get_seconds(delay); CRM_CHECK(delay_s >= 0, delay_s = 0); start_delay = delay_s * 1000; crm_info("Calculated a start delay of %llds for %s", delay_s, ID(xml_obj)); g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_itoa(start_delay)); crm_time_free(origin); crm_time_free(delay); } } field = XML_ATTR_TIMEOUT; value = g_hash_table_lookup(action->meta, field); if (value == NULL && xml_obj == NULL && safe_str_eq(action->task, RSC_STATUS) && interval == 0) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); pe_rsc_trace(action->rsc, "\t%s uses the timeout value '%s' from the minimum interval monitor", action->uuid, value); } } if (value == NULL) { value = pe_pref(data_set->config_hash, "default-action-timeout"); } value_i = crm_get_msec(value); if (value_i < 0) { value_i = 0; } value_ms = crm_itoa(value_i); g_hash_table_replace(action->meta, strdup(field), value_ms); } static xmlNode * find_rsc_op_entry_helper(resource_t * rsc, const char *key, gboolean include_disabled) { unsigned long long number = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { name = crm_element_value(operation, "name"); interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); value = crm_element_value(operation, "enabled"); if (!include_disabled && value && crm_is_true(value) == FALSE) { continue; } number = crm_get_interval(interval); match_key = generate_op_key(rsc->id, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = generate_op_key(rsc->clone_name, name, number); if (safe_str_eq(key, match_key)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = generate_op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = generate_op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } void print_node(const char *pre_text, node_t * node, gboolean details) { if (node == NULL) { crm_trace("%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } CRM_ASSERT(node->details); crm_trace("%s%s%sNode %s: (weight=%d, fixed=%s)", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": ", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { char *pe_mutable = strdup("\t\t"); GListPtr gIter = node->details->running_rsc; crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; print_resource(LOG_DEBUG_4, "\t\t", rsc, FALSE); } } } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details) { long options = pe_print_log; if (rsc == NULL) { do_crm_log(log_level - 1, "%s%s: ", pre_text == NULL ? "" : pre_text, pre_text == NULL ? "" : ": "); return; } if (details) { options |= pe_print_details; } rsc->fns->print(rsc, pre_text, options, &log_level); } void pe_free_action(action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* action_warpper_t* */ g_list_free_full(action->actions_after, free); /* action_warpper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->cancel_task); free(action->task); free(action->uuid); free(action->node); free(action); } GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node) { const char *value = NULL; GListPtr result = NULL; GListPtr gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL); if (value == NULL) { /* skip */ } else if (safe_str_eq(value, "0")) { /* skip */ } else if (safe_str_eq(CRMD_ACTION_CANCEL, action->task)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; break; default: break; } } return task; } action_t * find_first_action(GListPtr input, const char *uuid, const char *task, node_t * on_node) { GListPtr gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (uuid != NULL && safe_str_neq(uuid, action->uuid)) { continue; } else if (task != NULL && safe_str_neq(task, action->task)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { continue; } else if (on_node == NULL) { result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ crm_trace("While looking for %s action on %s, " "found an unallocated one. Assigning" " it to the requested node...", key, on_node->details->uname); action->node = node_copy(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { result = g_list_prepend(result, action); } } return result; } GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node) { GListPtr gIter = input; GListPtr result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("Matching %s against %s", key, action->uuid); if (safe_str_neq(key, action->uuid)) { crm_trace("Key mismatch: %s vs. %s", key, action->uuid); continue; } else if (on_node == NULL || action->node == NULL) { crm_trace("on_node=%p, action->node=%p", on_node, action->node); continue; } else if (safe_str_eq(on_node->details->id, action->node->details->id)) { result = g_list_prepend(result, action); } crm_trace("Node mismatch: %s vs. %s", on_node->details->id, action->node->details->id); } return result; } static void resource_node_score(resource_t * rsc, node_t * node, int score, const char *tag) { node_t *match = NULL; if (rsc->children) { GListPtr gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = node_copy(node); match->weight = merge_weights(score, node->weight); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = merge_weights(match->weight, score); } void resource_location(resource_t * rsc, node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GListPtr gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; resource_node_score(rsc, node, score, tag); } } else { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { resource_node_score(rsc, node, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value_const(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value_const(xml_b, XML_ATTR_ID); if (safe_str_eq(a_xml_id, b_xml_id)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unliklely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why its happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_const_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_const_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ int last_a = -1; int last_b = -1; crm_element_value_const_int(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_const_int(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %d vs %d", last_a, last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; int dummy = -1; const char *a_magic = crm_element_value_const(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value_const(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if(!decode_transition_magic(a_magic, &a_uuid, &a_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic a"); } if(!decode_transition_magic(b_magic, &b_uuid, &b_id, &dummy, &dummy, &dummy, &dummy)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (ie. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means its a shutdown operation and _always_ comes last */ if (safe_str_neq(a_uuid, b_uuid) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations dont survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } struct fail_search { resource_t *rsc; pe_working_set_t * data_set; int count; long long last; char *key; }; static void get_failcount_by_prefix(gpointer key_p, gpointer value, gpointer user_data) { struct fail_search *search = user_data; const char *attr_id = key_p; const char *match = strstr(attr_id, search->key); resource_t *parent = NULL; if (match == NULL) { return; } /* we are only incrementing the failcounts here if the rsc * that matches our prefix has the same uber parent as the rsc we're * calculating the failcounts for. This prevents false positive matches * where unrelated resources may have similar prefixes in their names. * * search->rsc is already set to be the uber parent. */ parent = uber_parent(pe_find_resource(search->data_set->resources, match)); if (parent == NULL || parent != search->rsc) { return; } if (strstr(attr_id, "last-failure-") == attr_id) { search->last = crm_int_helper(value, NULL); } else if (strstr(attr_id, "fail-count-") == attr_id) { search->count += char2score(value); } } int get_failcount(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) { return get_failcount_full(node, rsc, last_failure, TRUE, NULL, data_set); } static gboolean is_matched_failure(const char * rsc_id, xmlNode * conf_op_xml, xmlNode * lrm_op_xml) { gboolean matched = FALSE; const char *conf_op_name = NULL; int conf_op_interval = 0; const char *lrm_op_task = NULL; int lrm_op_interval = 0; const char *lrm_op_id = NULL; char *last_failure_key = NULL; if (rsc_id == NULL || conf_op_xml == NULL || lrm_op_xml == NULL) { return FALSE; } conf_op_name = crm_element_value(conf_op_xml, "name"); conf_op_interval = crm_get_msec(crm_element_value(conf_op_xml, "interval")); lrm_op_task = crm_element_value(lrm_op_xml, XML_LRM_ATTR_TASK); crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_INTERVAL, &lrm_op_interval); if (safe_str_eq(conf_op_name, lrm_op_task) == FALSE || conf_op_interval != lrm_op_interval) { return FALSE; } lrm_op_id = ID(lrm_op_xml); last_failure_key = generate_op_key(rsc_id, "last_failure", 0); if (safe_str_eq(last_failure_key, lrm_op_id)) { matched = TRUE; } else { char *expected_op_key = generate_op_key(rsc_id, conf_op_name, conf_op_interval); if (safe_str_eq(expected_op_key, lrm_op_id)) { int rc = 0; int target_rc = get_target_rc(lrm_op_xml); crm_element_value_int(lrm_op_xml, XML_LRM_ATTR_RC, &rc); if (rc != target_rc) { matched = TRUE; } } free(expected_op_key); } free(last_failure_key); return matched; } static gboolean block_failure(node_t * node, resource_t * rsc, xmlNode * xml_op, pe_working_set_t * data_set) { char *xml_name = clone_strip(rsc->id); char *xpath = crm_strdup_printf("//primitive[@id='%s']//op[@on-fail='block']", xml_name); xmlXPathObject *xpathObj = xpath_search(rsc->xml, xpath); gboolean should_block = FALSE; free(xpath); if (xpathObj) { int max = numXpathResults(xpathObj); int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *pref = getXpathResult(xpathObj, lpc); if (xml_op) { should_block = is_matched_failure(xml_name, pref, xml_op); if (should_block) { break; } } else { const char *conf_op_name = NULL; int conf_op_interval = 0; char *lrm_op_xpath = NULL; xmlXPathObject *lrm_op_xpathObj = NULL; conf_op_name = crm_element_value(pref, "name"); conf_op_interval = crm_get_msec(crm_element_value(pref, "interval")); lrm_op_xpath = crm_strdup_printf("//node_state[@uname='%s']" "//lrm_resource[@id='%s']" "/lrm_rsc_op[@operation='%s'][@interval='%d']", node->details->uname, xml_name, conf_op_name, conf_op_interval); lrm_op_xpathObj = xpath_search(data_set->input, lrm_op_xpath); free(lrm_op_xpath); if (lrm_op_xpathObj) { int max2 = numXpathResults(lrm_op_xpathObj); int lpc2 = 0; for (lpc2 = 0; lpc2 < max2; lpc2++) { xmlNode *lrm_op_xml = getXpathResult(lrm_op_xpathObj, lpc2); should_block = is_matched_failure(xml_name, pref, lrm_op_xml); if (should_block) { break; } } } freeXpathObject(lrm_op_xpathObj); if (should_block) { break; } } } } free(xml_name); freeXpathObject(xpathObj); return should_block; } int get_failcount_full(node_t * node, resource_t * rsc, time_t *last_failure, bool effective, xmlNode * xml_op, pe_working_set_t * data_set) { char *key = NULL; const char *value = NULL; struct fail_search search = { rsc, data_set, 0, 0, NULL }; /* Optimize the "normal" case */ key = crm_concat("fail-count", rsc->clone_name ? rsc->clone_name : rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, key); search.count = char2score(value); crm_trace("%s = %s", key, value); free(key); if (value) { key = crm_concat("last-failure", rsc->clone_name ? rsc->clone_name : rsc->id, '-'); value = g_hash_table_lookup(node->details->attrs, key); search.last = crm_int_helper(value, NULL); free(key); /* This block is still relevant once we omit anonymous instance numbers * because stopped clones won't have clone_name set */ } else if (is_not_set(rsc->flags, pe_rsc_unique)) { search.rsc = uber_parent(rsc); search.key = clone_strip(rsc->id); g_hash_table_foreach(node->details->attrs, get_failcount_by_prefix, &search); free(search.key); search.key = NULL; } if (search.count != 0 && search.last != 0 && last_failure) { *last_failure = search.last; } if(search.count && rsc->failure_timeout) { /* Never time-out if blocking failures are configured */ if (block_failure(node, rsc, xml_op, data_set)) { pe_warn("Setting %s.failure-timeout=%d conflicts with on-fail=block: ignoring timeout", rsc->id, rsc->failure_timeout); rsc->failure_timeout = 0; #if 0 /* A good idea? */ } else if (rsc->container == NULL && is_not_set(data_set->flags, pe_flag_stonith_enabled)) { /* In this case, stop.on-fail defaults to block in unpack_operation() */ rsc->failure_timeout = 0; #endif } } if (effective && search.count != 0 && search.last != 0 && rsc->failure_timeout) { if (search.last > 0) { time_t now = get_effective_time(data_set); if (now > (search.last + rsc->failure_timeout)) { crm_debug("Failcount for %s on %s has expired (limit was %ds)", search.rsc->id, node->details->uname, rsc->failure_timeout); search.count = 0; } } } if (search.count != 0) { char *score = score2char(search.count); crm_info("%s has failed %s times on %s", search.rsc->id, score, node->details->uname); free(score); } return search.count; } /* If it's a resource container, get its failcount plus all the failcounts of the resources within it */ int get_failcount_all(node_t * node, resource_t * rsc, time_t *last_failure, pe_working_set_t * data_set) { int failcount_all = 0; failcount_all = get_failcount(node, rsc, last_failure, data_set); if (rsc->fillers) { GListPtr gIter = NULL; for (gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) { resource_t *filler = (resource_t *) gIter->data; time_t filler_last_failure = 0; failcount_all += get_failcount(node, filler, &filler_last_failure, data_set); if (last_failure && filler_last_failure > *last_failure) { *last_failure = filler_last_failure; } } if (failcount_all != 0) { char *score = score2char(failcount_all); crm_info("Container %s and the resources within it have failed %s times on %s", rsc->id, score, node->details->uname); free(score); } } return failcount_all; } gboolean get_target_role(resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (value == NULL || safe_str_eq("started", value) || safe_str_eq("default", value)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { crm_config_err("%s: Unknown value for %s: %s", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (uber_parent(rsc)->variant == pe_master) { if (local_role > RSC_ROLE_SLAVE) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { crm_config_err("%s is not part of a master/slave resource, a %s of '%s' makes no sense", rsc->id, XML_RSC_ATTR_TARGET_ROLE, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order) { GListPtr gIter = NULL; action_wrapper_t *wrapper = NULL; GListPtr list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Ordering Action %s before %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... its happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { action_wrapper_t *after = (action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = NULL; /* order |= pe_order_implies_then; */ /* order ^= pe_order_implies_then; */ wrapper = calloc(1, sizeof(action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { action_t *op = NULL; if(data_set->singletons) { op = g_hash_table_lookup(data_set->singletons, name); } if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); set_bit(op->flags, pe_action_pseudo); set_bit(op->flags, pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { ticket_t *ticket = NULL; if (ticket_id == NULL || strlen(ticket_id) == 0) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } static void filter_parameters(xmlNode * param_set, const char *param_string, bool need_present) { int len = 0; char *name = NULL; char *match = NULL; if (param_set == NULL) { return; } if (param_set) { xmlAttrPtr xIter = param_set->properties; while (xIter) { const char *prop_name = (const char *)xIter->name; xIter = xIter->next; name = NULL; len = strlen(prop_name) + 3; name = malloc(len); if(name) { sprintf(name, " %s ", prop_name); name[len - 1] = 0; match = strstr(param_string, name); } if (need_present && match == NULL) { crm_trace("%s not found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } else if (need_present == FALSE && match) { crm_trace("%s found in %s", prop_name, param_string); xml_remove_prop(param_set, prop_name); } free(name); } } } op_digest_cache_t * rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node, pe_working_set_t * data_set) { op_digest_cache_t *data = NULL; GHashTable *local_rsc_params = NULL; action_t *action = NULL; char *key = NULL; int interval = 0; const char *op_id = ID(xml_op); const char *interval_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL); const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *digest_all; const char *digest_restart; const char *secure_list; const char *restart_list; const char *op_version; data = g_hash_table_lookup(node->details->digest_cache, op_id); if (data) { return data; } data = calloc(1, sizeof(op_digest_cache_t)); digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST); digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST); secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE); restart_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART); op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION); /* key is freed in custom_action */ interval = crm_parse_int(interval_s, "0"); key = generate_op_key(rsc->id, task, interval); action = custom_action(rsc, key, task, node, TRUE, FALSE, data_set); key = NULL; local_rsc_params = g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, g_hash_destroy_str); get_rsc_attributes(local_rsc_params, rsc, node, data_set); data->params_all = create_xml_node(NULL, XML_TAG_PARAMS); g_hash_table_foreach(local_rsc_params, hash2field, data->params_all); g_hash_table_foreach(action->extra, hash2field, data->params_all); g_hash_table_foreach(rsc->parameters, hash2field, data->params_all); g_hash_table_foreach(action->meta, hash2metafield, data->params_all); filter_action_parameters(data->params_all, op_version); data->digest_all_calc = calculate_operation_digest(data->params_all, op_version); if (secure_list && is_set(data_set->flags, pe_flag_sanitized)) { data->params_secure = copy_xml(data->params_all); if (secure_list) { filter_parameters(data->params_secure, secure_list, FALSE); } data->digest_secure_calc = calculate_operation_digest(data->params_secure, op_version); } if (digest_restart) { data->params_restart = copy_xml(data->params_all); if (restart_list) { filter_parameters(data->params_restart, restart_list, TRUE); } data->digest_restart_calc = calculate_operation_digest(data->params_restart, op_version); } data->rc = RSC_DIGEST_MATCH; if (digest_restart && strcmp(data->digest_restart_calc, digest_restart) != 0) { data->rc = RSC_DIGEST_RESTART; } else if (digest_all == NULL) { /* it is unknown what the previous op digest was */ data->rc = RSC_DIGEST_UNKNOWN; } else if (strcmp(digest_all, data->digest_all_calc) != 0) { data->rc = RSC_DIGEST_ALL; } g_hash_table_insert(node->details->digest_cache, strdup(op_id), data); g_hash_table_destroy(local_rsc_params); pe_free_action(action); return data; } const char *rsc_printable_id(resource_t *rsc) { if (is_not_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void clear_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; clear_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; clear_bit_recursive(child_rsc, flag); } } void set_bit_recursive(resource_t * rsc, unsigned long long flag) { GListPtr gIter = rsc->children; set_bit(rsc->flags, flag); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; set_bit_recursive(child_rsc, flag); } } action_t * pe_fence_op(node_t * node, const char *op, bool optional, pe_working_set_t * data_set) { char *key = NULL; action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); if(data_set->singletons) { stonith_op = g_hash_table_lookup(data_set->singletons, key); } if(stonith_op == NULL) { stonith_op = custom_action(NULL, key, CRM_OP_FENCE, node, optional, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); } else { free(key); } if(optional == FALSE) { crm_trace("%s is no longer optional", stonith_op->uuid); pe_clear_action_bit(stonith_op, pe_action_optional); } return stonith_op; } void trigger_unfencing( resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set) { if(is_not_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if (rsc != NULL && is_not_set(rsc->flags, pe_rsc_fence_device)) { /* Wasnt a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { action_t *unfence = pe_fence_op(node, "on", FALSE, data_set); crm_notice("Unfencing %s: %s", node->details->uname, reason); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { tag_t *tag = NULL; GListPtr gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (crm_str_eq(existing_ref, obj_ref, TRUE)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } diff --git a/lib/services/systemd.c b/lib/services/systemd.c index eb5f8aa44b..d24f986466 100644 --- a/lib/services/systemd.c +++ b/lib/services/systemd.c @@ -1,699 +1,707 @@ /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Copyright (C) 2012 Andrew Beekhof */ #include #include #include #include +#include #include #include #include #include #include #define BUS_NAME "org.freedesktop.systemd1" #define BUS_PATH "/org/freedesktop/systemd1" #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties" /* /usr/share/dbus-1/interfaces/org.freedesktop.systemd1.Manager.xml */ gboolean systemd_unit_exec_with_unit(svc_action_t * op, const char *unit); struct unit_info { const char *id; const char *description; const char *load_state; const char *active_state; const char *sub_state; const char *following; const char *unit_path; uint32_t job_id; const char *job_type; const char *job_path; }; struct pcmk_dbus_data { char *name; char *unit; DBusError error; svc_action_t *op; void (*callback)(DBusMessage *reply, svc_action_t *op); }; static DBusMessage *systemd_new_method(const char *iface, const char *method) { crm_trace("Calling: %s on %s", method, iface); return dbus_message_new_method_call(BUS_NAME, // target for the method call BUS_PATH, // object to call on iface, // interface to call on method); // method name } static DBusConnection* systemd_proxy = NULL; static gboolean systemd_init(void) { static int need_init = 1; /* http://dbus.freedesktop.org/doc/api/html/group__DBusConnection.html */ if (systemd_proxy && dbus_connection_get_is_connected(systemd_proxy) == FALSE) { crm_warn("Connection to System DBus is closed. Reconnecting..."); pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; need_init = 1; } if (need_init) { need_init = 0; systemd_proxy = pcmk_dbus_connect(); } if (systemd_proxy == NULL) { return FALSE; } return TRUE; } void systemd_cleanup(void) { if (systemd_proxy) { pcmk_dbus_disconnect(systemd_proxy); systemd_proxy = NULL; } } static char * systemd_service_name(const char *name) { if (name == NULL) { return NULL; } else if (strstr(name, ".service")) { return strdup(name); } return crm_strdup_printf("%s.service", name); } static void systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data) { DBusError error; DBusMessage *reply = NULL; unsigned int reload_count = GPOINTER_TO_UINT(user_data); dbus_error_init(&error); if(pending) { reply = dbus_pending_call_steal_reply(pending); } if(pcmk_dbus_find_error("Reload", pending, reply, &error)) { crm_err("Could not issue systemd reload %d: %s", reload_count, error.message); } else { crm_trace("Reload %d complete", reload_count); } if(pending) { dbus_pending_call_unref(pending); } if(reply) { dbus_message_unref(reply); } } static bool systemd_daemon_reload(int timeout) { static unsigned int reload_count = 0; const char *method = "Reload"; DBusMessage *msg = systemd_new_method(BUS_NAME".Manager", method); reload_count++; CRM_ASSERT(msg != NULL); pcmk_dbus_send(msg, systemd_proxy, systemd_daemon_reload_complete, GUINT_TO_POINTER(reload_count), timeout); dbus_message_unref(msg); return TRUE; } static bool systemd_mask_error(svc_action_t *op, const char *error) { crm_trace("Could not issue %s for %s: %s", op->action, op->rsc, error); if(strstr(error, "org.freedesktop.systemd1.InvalidName") || strstr(error, "org.freedesktop.systemd1.LoadFailed") || strstr(error, "org.freedesktop.systemd1.NoSuchUnit")) { if (safe_str_eq(op->action, "stop")) { crm_trace("Masking %s failure for %s: unknown services are stopped", op->action, op->rsc); op->rc = PCMK_OCF_OK; return TRUE; } else { crm_trace("Mapping %s failure for %s: unknown services are not installed", op->action, op->rsc); op->rc = PCMK_OCF_NOT_INSTALLED; op->status = PCMK_LRM_OP_NOT_INSTALLED; return FALSE; } } return FALSE; } static const char * systemd_loadunit_result(DBusMessage *reply, svc_action_t * op) { const char *path = NULL; DBusError error; if(pcmk_dbus_find_error("LoadUnit", (void*)&path, reply, &error)) { if(op && !systemd_mask_error(op, error.name)) { crm_err("Could not find unit %s for %s: LoadUnit error '%s'", op->agent, op->id, error.name); } } else if(pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) { dbus_message_get_args (reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); } if(op) { if (path) { systemd_unit_exec_with_unit(op, path); } else if (op->synchronous == FALSE) { operation_finalize(op); } } return path; } static void systemd_loadunit_cb(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; svc_action_t * op = user_data; if(pending) { reply = dbus_pending_call_steal_reply(pending); } crm_trace("Got result: %p for %p / %p for %s", reply, pending, op->opaque->pending, op->id); CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); systemd_loadunit_result(reply, user_data); if(reply) { dbus_message_unref(reply); } } static char * systemd_unit_by_name(const gchar * arg_name, svc_action_t *op) { DBusMessage *msg; DBusMessage *reply = NULL; DBusPendingCall* pending = NULL; char *name = NULL; /* Equivalent to GetUnit if its already loaded */ if (systemd_init() == FALSE) { return FALSE; } msg = systemd_new_method(BUS_NAME".Manager", "LoadUnit"); CRM_ASSERT(msg != NULL); name = systemd_service_name(arg_name); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); free(name); if(op == NULL || op->synchronous) { const char *unit = NULL; char *munit = NULL; DBusError error; dbus_error_init(&error); reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error, op? op->timeout : DBUS_TIMEOUT_USE_DEFAULT); dbus_message_unref(msg); unit = systemd_loadunit_result(reply, op); if(unit) { munit = strdup(unit); } if(reply) { dbus_message_unref(reply); } return munit; } pending = pcmk_dbus_send(msg, systemd_proxy, systemd_loadunit_cb, op, op->timeout); if(pending) { services_set_op_pending(op, pending); } dbus_message_unref(msg); return NULL; } GList * systemd_unit_listall(void) { int lpc = 0; GList *units = NULL; DBusMessageIter args; DBusMessageIter unit; DBusMessageIter elem; DBusMessage *msg = NULL; DBusMessage *reply = NULL; const char *method = "ListUnits"; DBusError error; if (systemd_init() == FALSE) { return NULL; } /* " \n" \ " \n" \ " \n" \ */ dbus_error_init(&error); msg = systemd_new_method(BUS_NAME".Manager", method); CRM_ASSERT(msg != NULL); reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error, DBUS_TIMEOUT_USE_DEFAULT); dbus_message_unref(msg); if(error.name) { crm_err("Call to %s failed: %s", method, error.name); return NULL; } else if (reply == NULL) { crm_err("Call to %s failed: Message has no reply", method); return NULL; } else if (!dbus_message_iter_init(reply, &args)) { crm_err("Call to %s failed: Message has no arguments", method); dbus_message_unref(reply); return NULL; } if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __FUNCTION__, __LINE__)) { crm_err("Call to %s failed: Message has invalid arguments", method); dbus_message_unref(reply); return NULL; } dbus_message_iter_recurse(&args, &unit); while (dbus_message_iter_get_arg_type (&unit) != DBUS_TYPE_INVALID) { DBusBasicValue value; if(!pcmk_dbus_type_check(reply, &unit, DBUS_TYPE_STRUCT, __FUNCTION__, __LINE__)) { continue; } dbus_message_iter_recurse(&unit, &elem); if(!pcmk_dbus_type_check(reply, &elem, DBUS_TYPE_STRING, __FUNCTION__, __LINE__)) { continue; } dbus_message_iter_get_basic(&elem, &value); crm_trace("Got: %s", value.str); if(value.str) { char *match = strstr(value.str, ".service"); if (match) { lpc++; match[0] = 0; units = g_list_append(units, strdup(value.str)); } } dbus_message_iter_next (&unit); } dbus_message_unref(reply); crm_trace("Found %d systemd services", lpc); return units; } gboolean systemd_unit_exists(const char *name) { char *unit = NULL; /* Note: Makes a blocking dbus calls * Used by resources_find_service_class() when resource class=service */ unit = systemd_unit_by_name(name, NULL); if(unit) { free(unit); return TRUE; } return FALSE; } static char * systemd_unit_metadata(const char *name, int timeout) { char *meta = NULL; char *desc = NULL; char *path = systemd_unit_by_name(name, NULL); if (path) { /* TODO: Worth a making blocking call for? Probably not. Possibly if cached. */ desc = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, path, BUS_NAME ".Unit", "Description", NULL, NULL, NULL, timeout); } else { desc = crm_strdup_printf("Systemd unit file for %s", name); } meta = crm_strdup_printf("\n" "\n" "\n" " 1.0\n" " \n" " %s\n" " \n" " systemd unit file for %s\n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" " \n" "\n", name, desc, name); free(desc); free(path); return meta; } static void systemd_exec_result(DBusMessage *reply, svc_action_t *op) { DBusError error; if(pcmk_dbus_find_error(op->action, (void*)&error, reply, &error)) { /* ignore "already started" or "not running" errors */ if (!systemd_mask_error(op, error.name)) { crm_err("Could not issue %s for %s: %s", op->action, op->rsc, error.message); } } else { if(!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, __FUNCTION__, __LINE__)) { crm_warn("Call to %s passed but return type was unexpected", op->action); op->rc = PCMK_OCF_OK; } else { const char *path = NULL; dbus_message_get_args (reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, DBUS_TYPE_INVALID); crm_info("Call to %s passed: %s", op->action, path); op->rc = PCMK_OCF_OK; } } operation_finalize(op); } static void systemd_async_dispatch(DBusPendingCall *pending, void *user_data) { DBusError error; DBusMessage *reply = NULL; svc_action_t *op = user_data; dbus_error_init(&error); if(pending) { reply = dbus_pending_call_steal_reply(pending); } crm_trace("Got result: %p for %p for %s, %s", reply, pending, op->rsc, op->action); CRM_LOG_ASSERT(pending == op->opaque->pending); services_set_op_pending(op, NULL); systemd_exec_result(reply, op); if(reply) { dbus_message_unref(reply); } } #define SYSTEMD_OVERRIDE_ROOT "/run/systemd/system/" static void systemd_unit_check(const char *name, const char *state, void *userdata) { svc_action_t * op = userdata; crm_trace("Resource %s has %s='%s'", op->rsc, name, state); if(state == NULL) { op->rc = PCMK_OCF_NOT_RUNNING; } else if (g_strcmp0(state, "active") == 0) { op->rc = PCMK_OCF_OK; } else if (g_strcmp0(state, "activating") == 0) { op->rc = PCMK_OCF_PENDING; } else if (g_strcmp0(state, "deactivating") == 0) { op->rc = PCMK_OCF_PENDING; } else { op->rc = PCMK_OCF_NOT_RUNNING; } if (op->synchronous == FALSE) { services_set_op_pending(op, NULL); operation_finalize(op); } } gboolean systemd_unit_exec_with_unit(svc_action_t * op, const char *unit) { const char *method = op->action; DBusMessage *msg = NULL; DBusMessage *reply = NULL; CRM_ASSERT(unit); if (safe_str_eq(op->action, "monitor") || safe_str_eq(method, "status")) { DBusPendingCall *pending = NULL; char *state; state = pcmk_dbus_get_property(systemd_proxy, BUS_NAME, unit, BUS_NAME ".Unit", "ActiveState", op->synchronous?NULL:systemd_unit_check, op, op->synchronous?NULL:&pending, op->timeout); if (op->synchronous) { systemd_unit_check("ActiveState", state, op); free(state); return op->rc == PCMK_OCF_OK; } else if (pending) { services_set_op_pending(op, pending); return TRUE; } else { return operation_finalize(op); } } else if (g_strcmp0(method, "start") == 0) { FILE *file_strm = NULL; char *override_dir = crm_strdup_printf("%s/%s.service.d", SYSTEMD_OVERRIDE_ROOT, op->agent); char *override_file = crm_strdup_printf("%s/%s.service.d/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, op->agent); + mode_t orig_umask; method = "StartUnit"; crm_build_path(override_dir, 0755); + /* Ensure the override file is world-readable. This is not strictly + * necessary, but it avoids a systemd warning in the logs. + */ + orig_umask = umask(S_IWGRP | S_IWOTH); file_strm = fopen(override_file, "w"); + umask(orig_umask); + if (file_strm != NULL) { /* TODO: Insert the start timeout in too */ char *override = crm_strdup_printf( "[Unit]\n" "Description=Cluster Controlled %s\n" "Before=pacemaker.service\n" "\n" "[Service]\n" "Restart=no\n", op->agent); int rc = fprintf(file_strm, "%s\n", override); free(override); if (rc < 0) { crm_perror(LOG_ERR, "Cannot write to systemd override file %s", override_file); } } else { crm_err("Cannot open systemd override file %s for writing", override_file); } if (file_strm != NULL) { fflush(file_strm); fclose(file_strm); } systemd_daemon_reload(op->timeout); free(override_file); free(override_dir); } else if (g_strcmp0(method, "stop") == 0) { char *override_file = crm_strdup_printf("%s/%s.service.d/50-pacemaker.conf", SYSTEMD_OVERRIDE_ROOT, op->agent); method = "StopUnit"; unlink(override_file); free(override_file); systemd_daemon_reload(op->timeout); } else if (g_strcmp0(method, "restart") == 0) { method = "RestartUnit"; } else { op->rc = PCMK_OCF_UNIMPLEMENT_FEATURE; goto cleanup; } crm_debug("Calling %s for %s: %s", method, op->rsc, unit); msg = systemd_new_method(BUS_NAME".Manager", method); CRM_ASSERT(msg != NULL); /* (ss) */ { const char *replace_s = "replace"; char *name = systemd_service_name(op->agent); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID)); free(name); } if (op->synchronous == FALSE) { DBusPendingCall* pending = pcmk_dbus_send(msg, systemd_proxy, systemd_async_dispatch, op, op->timeout); dbus_message_unref(msg); if(pending) { services_set_op_pending(op, pending); return TRUE; } else { return operation_finalize(op); } } else { DBusError error; reply = pcmk_dbus_send_recv(msg, systemd_proxy, &error, op->timeout); dbus_message_unref(msg); systemd_exec_result(reply, op); if(reply) { dbus_message_unref(reply); } return FALSE; } cleanup: if (op->synchronous == FALSE) { return operation_finalize(op); } return op->rc == PCMK_OCF_OK; } static gboolean systemd_timeout_callback(gpointer p) { svc_action_t * op = p; op->opaque->timerid = 0; crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc); operation_finalize(op); return FALSE; } /* For an asynchronous 'op', returns FALSE if 'op' should be free'd by the caller */ /* For a synchronous 'op', returns FALSE if 'op' fails */ gboolean systemd_unit_exec(svc_action_t * op) { char *unit = NULL; CRM_ASSERT(op); CRM_ASSERT(systemd_init()); op->rc = PCMK_OCF_UNKNOWN_ERROR; crm_debug("Performing %ssynchronous %s op on systemd unit %s named '%s'", op->synchronous ? "" : "a", op->action, op->agent, op->rsc); if (safe_str_eq(op->action, "meta-data")) { /* TODO: See if we can teach the lrmd not to make these calls synchronously */ op->stdout_data = systemd_unit_metadata(op->agent, op->timeout); op->rc = PCMK_OCF_OK; if (op->synchronous == FALSE) { return operation_finalize(op); } return TRUE; } unit = systemd_unit_by_name(op->agent, op); free(unit); if (op->synchronous == FALSE) { if (op->opaque->pending) { op->opaque->timerid = g_timeout_add(op->timeout + 5000, systemd_timeout_callback, op); services_add_inflight_op(op); return TRUE; } else { return operation_finalize(op); } } return op->rc == PCMK_OCF_OK; } diff --git a/mcp/corosync.c b/mcp/corosync.c index 24f3ba2753..975b93c410 100644 --- a/mcp/corosync.c +++ b/mcp/corosync.c @@ -1,480 +1,483 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include /* for calls to stat() */ #include /* For basename() and dirname() */ #include #include /* For getpwname() */ #include #include #include #if HAVE_CONFDB # include #endif #include #include #if SUPPORT_CMAN # include #endif #if HAVE_CMAP # include #endif enum cluster_type_e stack = pcmk_cluster_unknown; static corosync_cfg_handle_t cfg_handle; /* =::=::=::= CFG - Shutdown stuff =::=::=::= */ static void cfg_shutdown_callback(corosync_cfg_handle_t h, corosync_cfg_shutdown_flags_t flags) { crm_info("Corosync wants to shut down: %s", (flags == COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE) ? "immediate" : (flags == COROSYNC_CFG_SHUTDOWN_FLAG_REGARDLESS) ? "forced" : "optional"); /* Never allow corosync to shut down while we're running */ corosync_cfg_replyto_shutdown(h, COROSYNC_CFG_SHUTDOWN_FLAG_NO); } static corosync_cfg_callbacks_t cfg_callbacks = { .corosync_cfg_shutdown_callback = cfg_shutdown_callback, }; static int pcmk_cfg_dispatch(gpointer user_data) { corosync_cfg_handle_t *handle = (corosync_cfg_handle_t *) user_data; cs_error_t rc = corosync_cfg_dispatch(*handle, CS_DISPATCH_ALL); if (rc != CS_OK) { return -1; } return 0; } static void cfg_connection_destroy(gpointer user_data) { crm_err("Connection destroyed"); cfg_handle = 0; pcmk_shutdown(SIGTERM); } gboolean cluster_disconnect_cfg(void) { if (cfg_handle) { corosync_cfg_finalize(cfg_handle); cfg_handle = 0; } pcmk_shutdown(SIGTERM); return TRUE; } #define cs_repeat(counter, max, code) do { \ code; \ if(rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) { \ counter++; \ crm_debug("Retrying operation after %ds", counter); \ sleep(counter); \ } else { \ break; \ } \ } while(counter < max) gboolean cluster_connect_cfg(uint32_t * nodeid) { cs_error_t rc; int fd = 0, retries = 0; static struct mainloop_fd_callbacks cfg_fd_callbacks = { .dispatch = pcmk_cfg_dispatch, .destroy = cfg_connection_destroy, }; cs_repeat(retries, 30, rc = corosync_cfg_initialize(&cfg_handle, &cfg_callbacks)); if (rc != CS_OK) { crm_err("corosync cfg init error %d", rc); return FALSE; } rc = corosync_cfg_fd_get(cfg_handle, &fd); if (rc != CS_OK) { crm_err("corosync cfg fd_get error %d", rc); goto bail; } retries = 0; cs_repeat(retries, 30, rc = corosync_cfg_local_get(cfg_handle, nodeid)); if (rc != CS_OK) { crm_err("corosync cfg local_get error %d", rc); goto bail; } crm_debug("Our nodeid: %d", *nodeid); mainloop_add_fd("corosync-cfg", G_PRIORITY_DEFAULT, fd, &cfg_handle, &cfg_fd_callbacks); return TRUE; bail: corosync_cfg_finalize(cfg_handle); return FALSE; } /* =::=::=::= Configuration =::=::=::= */ #if HAVE_CONFDB static int get_config_opt(confdb_handle_t config, hdb_handle_t object_handle, const char *key, char **value, const char *fallback) { size_t len = 0; char *env_key = NULL; const char *env_value = NULL; char buffer[256]; if (*value) { free(*value); *value = NULL; } if (object_handle > 0) { if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) { *value = strdup(buffer); } } if (*value) { crm_info("Found '%s' for option: %s", *value, key); return 0; } env_key = crm_concat("HA", key, '_'); env_value = getenv(env_key); free(env_key); if (*value) { crm_info("Found '%s' in ENV for option: %s", *value, key); *value = strdup(env_value); return 0; } if (fallback) { crm_info("Defaulting to '%s' for option: %s", fallback, key); *value = strdup(fallback); } else { crm_info("No default for option: %s", key); } return -1; } static confdb_handle_t config_find_init(confdb_handle_t config) { cs_error_t rc = CS_OK; confdb_handle_t local_handle = OBJECT_PARENT_HANDLE; rc = confdb_object_find_start(config, local_handle); if (rc == CS_OK) { return local_handle; } else { crm_err("Couldn't create search context: %d", rc); } return 0; } static hdb_handle_t config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle) { cs_error_t rc = CS_OK; hdb_handle_t local_handle = 0; if (top_handle == 0) { crm_err("Couldn't search for %s: no valid context", name); return 0; } crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle); rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle); if (rc != CS_OK) { crm_info("No additional configuration supplied for: %s", name); local_handle = 0; } else { crm_info("Processing additional %s options...", name); } return local_handle; } #else static int get_config_opt(uint64_t unused, cmap_handle_t object_handle, const char *key, char **value, const char *fallback) { int rc = 0, retries = 0; cs_repeat(retries, 5, rc = cmap_get_string(object_handle, key, value)); if (rc != CS_OK) { crm_trace("Search for %s failed %d, defaulting to %s", key, rc, fallback); if (fallback) { *value = strdup(fallback); } else { *value = NULL; } } crm_trace("%s: %s", key, *value); return rc; } #endif #if HAVE_CONFDB # define KEY_PREFIX "" #elif HAVE_CMAP # define KEY_PREFIX "logging." #endif gboolean mcp_read_config(void) { int rc = CS_OK; int retries = 0; const char *const_value = NULL; #if HAVE_CONFDB char *value = NULL; confdb_handle_t config = 0; confdb_handle_t top_handle = 0; hdb_handle_t local_handle; static confdb_callbacks_t callbacks = { }; do { rc = confdb_initialize(&config, &callbacks); if (rc != CS_OK) { retries++; printf("confdb connection setup failed: %s. Retrying in %ds\n", ais_error2text(rc), retries); crm_info("confdb connection setup failed: %s. Retrying in %ds", ais_error2text(rc), retries); sleep(retries); } else { break; } } while (retries < 5); #elif HAVE_CMAP cmap_handle_t local_handle; uint64_t config = 0; /* There can be only one (possibility if confdb isn't around) */ do { rc = cmap_initialize(&local_handle); if (rc != CS_OK) { retries++; printf("cmap connection setup failed: %s. Retrying in %ds\n", cs_strerror(rc), retries); crm_info("cmap connection setup failed: %s. Retrying in %ds", cs_strerror(rc), retries); sleep(retries); } else { break; } } while (retries < 5); #endif if (rc != CS_OK) { printf("Could not connect to Cluster Configuration Database API, error %d\n", rc); crm_warn("Could not connect to Cluster Configuration Database API, error %d", rc); return FALSE; } stack = get_cluster_type(); crm_info("Reading configure for stack: %s", name_for_cluster_type(stack)); /* =::=::= Should we be here =::=::= */ if (stack == pcmk_cluster_corosync) { set_daemon_option("cluster_type", "corosync"); set_daemon_option("quorum_type", "corosync"); #if HAVE_CONFDB } else if (stack == pcmk_cluster_cman) { set_daemon_option("cluster_type", "cman"); set_daemon_option("quorum_type", "cman"); enable_crmd_as_root(TRUE); } else if (stack == pcmk_cluster_classic_ais) { set_daemon_option("cluster_type", "openais"); set_daemon_option("quorum_type", "pcmk"); /* Look for a service block to indicate our plugin is loaded */ top_handle = config_find_init(config); local_handle = config_find_next(config, "service", top_handle); while (local_handle) { get_config_opt(config, local_handle, "name", &value, NULL); if (safe_str_eq("pacemaker", value)) { get_config_opt(config, local_handle, "ver", &value, "0"); if (safe_str_eq(value, "1")) { get_config_opt(config, local_handle, "use_logd", &value, "no"); set_daemon_option("use_logd", value); set_daemon_option("LOGD", value); get_config_opt(config, local_handle, "use_mgmtd", &value, "no"); enable_mgmtd(crm_is_true(value)); } else { crm_err("We can only start Pacemaker from init if using version 1" " of the Pacemaker plugin for Corosync. Terminating."); crm_exit(DAEMON_RESPAWN_STOP); } break; } local_handle = config_find_next(config, "service", top_handle); } free(value); #endif } else { crm_err("Unsupported stack type: %s", name_for_cluster_type(stack)); return FALSE; } #if HAVE_CONFDB top_handle = config_find_init(config); local_handle = config_find_next(config, "logging", top_handle); #endif /* =::=::= Logging =::=::= */ if (daemon_option("debug")) { /* Syslog logging is already setup by crm_log_init() */ } else { /* Check corosync */ char *debug_enabled = NULL; get_config_opt(config, local_handle, KEY_PREFIX "debug", &debug_enabled, "off"); if (crm_is_true(debug_enabled)) { set_daemon_option("debug", "1"); if (get_crm_log_level() < LOG_DEBUG) { set_crm_log_level(LOG_DEBUG); } } else { set_daemon_option("debug", "0"); } free(debug_enabled); } const_value = daemon_option("debugfile"); if (daemon_option("logfile")) { /* File logging is already setup by crm_log_init() */ } else if(const_value) { /* From when we cared what options heartbeat used */ set_daemon_option("logfile", const_value); crm_add_logfile(const_value); } else { /* Check corosync */ char *logfile = NULL; char *logfile_enabled = NULL; get_config_opt(config, local_handle, KEY_PREFIX "to_logfile", &logfile_enabled, "on"); get_config_opt(config, local_handle, KEY_PREFIX "logfile", &logfile, "/var/log/pacemaker.log"); if (crm_is_true(logfile_enabled) == FALSE) { crm_trace("File logging disabled in corosync"); } else if (crm_add_logfile(logfile)) { set_daemon_option("logfile", logfile); } else { crm_err("Couldn't create logfile: %s", logfile); set_daemon_option("logfile", "none"); } free(logfile); free(logfile_enabled); } if (daemon_option("logfacility")) { /* Syslog logging is already setup by crm_log_init() */ } else { /* Check corosync */ char *syslog_enabled = NULL; char *syslog_facility = NULL; get_config_opt(config, local_handle, KEY_PREFIX "to_syslog", &syslog_enabled, "on"); get_config_opt(config, local_handle, KEY_PREFIX "syslog_facility", &syslog_facility, "daemon"); if (crm_is_true(syslog_enabled) == FALSE) { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); set_daemon_option("logfacility", "none"); } else { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(syslog_facility)); qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); set_daemon_option("logfacility", syslog_facility); } free(syslog_enabled); free(syslog_facility); } const_value = daemon_option("logfacility"); if (const_value) { /* cluster-glue module needs HA_LOGFACILITY */ setenv("HA_LOGFACILITY", const_value, 1); } #if HAVE_CONFDB confdb_finalize(config); #elif HAVE_CMAP if(local_handle){ gid_t gid = 0; if (crm_user_lookup(CRM_DAEMON_USER, NULL, &gid) < 0) { - crm_warn("No group found for user %s", CRM_DAEMON_USER); + crm_warn("Could not authorize group with corosync " CRM_XS + " No group found for user %s", CRM_DAEMON_USER); } else { char key[PATH_MAX]; snprintf(key, PATH_MAX, "uidgid.gid.%u", gid); rc = cmap_set_uint8(local_handle, key, 1); - crm_notice("Configured corosync to accept connections from group %u: %s (%d)", - gid, ais_error2text(rc), rc); + if (rc != CS_OK) { + crm_warn("Could not authorize group with corosync "CRM_XS + " group=%u rc=%d (%s)", gid, rc, ais_error2text(rc)); + } } } cmap_finalize(local_handle); #endif return TRUE; } diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c index f7f1d462b3..952beb83f1 100644 --- a/mcp/pacemaker.c +++ b/mcp/pacemaker.c @@ -1,1148 +1,1151 @@ /* * Copyright (C) 2010 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean pcmk_quorate = FALSE; gboolean fatal_error = FALSE; GMainLoop *mainloop = NULL; #define PCMK_PROCESS_CHECK_INTERVAL 5 const char *local_name = NULL; uint32_t local_nodeid = 0; crm_trigger_t *shutdown_trigger = NULL; const char *pid_file = "/var/run/pacemaker.pid"; typedef struct pcmk_child_s { int pid; long flag; int start_seq; int respawn_count; gboolean respawn; const char *name; const char *uid; const char *command; gboolean active_before_startup; } pcmk_child_t; /* Index into the array below */ #define pcmk_child_crmd 4 #define pcmk_child_mgmtd 8 /* *INDENT-OFF* */ static pcmk_child_t pcmk_children[] = { { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL }, { 0, crm_proc_plugin, 0, 0, FALSE, "ais", NULL, NULL }, { 0, crm_proc_lrmd, 3, 0, TRUE, "lrmd", NULL, CRM_DAEMON_DIR"/lrmd" }, { 0, crm_proc_cib, 1, 0, TRUE, "cib", CRM_DAEMON_USER, CRM_DAEMON_DIR"/cib" }, { 0, crm_proc_crmd, 6, 0, TRUE, "crmd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/crmd" }, { 0, crm_proc_attrd, 4, 0, TRUE, "attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR"/attrd" }, { 0, crm_proc_stonithd, 0, 0, TRUE, "stonithd", NULL, NULL }, { 0, crm_proc_pe, 5, 0, TRUE, "pengine", CRM_DAEMON_USER, CRM_DAEMON_DIR"/pengine" }, { 0, crm_proc_mgmtd, 0, 0, TRUE, "mgmtd", NULL, HB_DAEMON_DIR"/mgmtd" }, { 0, crm_proc_stonith_ng, 2, 0, TRUE, "stonith-ng", NULL, CRM_DAEMON_DIR"/stonithd" }, }; /* *INDENT-ON* */ static gboolean start_child(pcmk_child_t * child); static gboolean check_active_before_startup_processes(gpointer user_data); void update_process_clients(crm_client_t *client); void update_process_peers(void); void enable_crmd_as_root(gboolean enable) { if (enable) { pcmk_children[pcmk_child_crmd].uid = NULL; } else { pcmk_children[pcmk_child_crmd].uid = CRM_DAEMON_USER; } } void enable_mgmtd(gboolean enable) { if (enable) { pcmk_children[pcmk_child_mgmtd].start_seq = 7; } else { pcmk_children[pcmk_child_mgmtd].start_seq = 0; } } static uint32_t get_process_list(void) { int lpc = 0; uint32_t procs = crm_get_cluster_proc(); for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static void pcmk_process_exit(pcmk_child_t * child) { child->pid = 0; child->active_before_startup = FALSE; /* Broadcast the fact that one of our processes died ASAP * * Try to get some logging of the cause out first though * because we're probably about to get fenced * * Potentially do this only if respawn_count > N * to allow for local recovery */ update_node_processes(local_nodeid, NULL, get_process_list()); child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Child respawn count exceeded by %s", child->name); child->respawn = FALSE; } if (shutdown_trigger) { mainloop_set_trigger(shutdown_trigger); update_node_processes(local_nodeid, NULL, get_process_list()); } else if (child->respawn && crm_is_true(getenv("PCMK_fail_fast"))) { crm_err("Rebooting system because of %s", child->name); pcmk_panic(__FUNCTION__); } else if (child->respawn) { crm_notice("Respawning failed child process: %s", child->name); start_child(child); } } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo && signo == SIGKILL) { crm_warn("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else if (signo) { crm_err("The %s process (%d) terminated with signal %d (core=%d)", name, pid, signo, core); } else { switch(exitcode) { case pcmk_ok: crm_info("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; case DAEMON_RESPAWN_STOP: crm_warn("The %s process (%d) can no longer be respawned, shutting the cluster down.", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case pcmk_err_panic: do_crm_log_always(LOG_EMERG, "The %s process (%d) instructed the machine to reset", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_panic(__FUNCTION__); pcmk_shutdown(SIGTERM); break; default: crm_err("The %s process (%d) exited: %s (%d)", name, pid, pcmk_strerror(exitcode), exitcode); break; } } pcmk_process_exit(child); } static gboolean stop_child(pcmk_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } if (child->command == NULL) { crm_debug("Nothing to do for child \"%s\"", child->name); return TRUE; } if (child->pid <= 0) { crm_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { - crm_notice("Stopping %s: Sent -%d to process %d", child->name, signal, child->pid); + crm_notice("Stopping %s "CRM_XS" sent signal %d to process %d", + child->name, signal, child->pid); } else { - crm_perror(LOG_ERR, "Stopping %s: Could not send -%d to process %d failed", - child->name, signal, child->pid); + crm_perror(LOG_ERR, "Could not stop %s (process %d) with signal %d", + child->name, child->pid, signal); } return TRUE; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; static gboolean start_child(pcmk_child_t * child) { int lpc = 0; uid_t uid = 0; gid_t gid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("PCMK_valgrind_enabled"); const char *env_callgrind = getenv("PCMK_callgrind_enabled"); enum cluster_type_e stack = get_cluster_type(); child->active_before_startup = FALSE; if (child->command == NULL) { crm_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); return FALSE; } crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); crm_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); update_node_processes(local_nodeid, NULL, get_process_list()); return TRUE; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrarys */ opts_vgrind[0] = strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = strdup("--tool=callgrind"); opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = strdup(child->command);; if(gid) { if(stack == pcmk_cluster_corosync) { /* Drop root privileges completely * * We can do this because we set uidgid.gid.${gid}=1 * via CMAP which allows these processes to connect to * corosync */ if (setgid(gid) < 0) { crm_perror(LOG_ERR, "Could not set group to %d", gid); } /* Keep the root group (so we can access corosync), but add the haclient group (so we can access ipc) */ } else if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno); } } if (uid && setuid(uid) < 0) { crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; /* never reached */ } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Child %s not terminating in a timely manner, forcing", child->name); stop_child(child, SIGSEGV); } return FALSE; } static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); int lpc = 0; if (phase == 0) { crm_notice("Shutting down Pacemaker"); phase = max; /* Add a second, more frequent, check to speed up shutdown */ g_timeout_add_seconds(5, check_active_before_startup_processes, NULL); } for (; phase > 0; phase--) { /* dont stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { pcmk_child_t *child = &(pcmk_children[lpc]); if (phase != child->start_seq) { continue; } if (child->pid) { time_t now = time(NULL); if (child->respawn) { next_log = now + 30; child->respawn = FALSE; stop_child(child, SIGTERM); if (phase < pcmk_children[pcmk_child_crmd].start_seq) { g_timeout_add(180000 /* 3m */ , escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; - crm_notice("Still waiting for %s (pid=%d, seq=%d) to terminate...", + crm_notice("Still waiting for %s to terminate " + CRM_XS " pid=%d seq=%d", child->name, child->pid, child->start_seq); } return TRUE; } /* cleanup */ crm_debug("%s confirmed stopped", child->name); child->pid = 0; } } /* send_cluster_id(); */ crm_notice("Shutdown complete"); { const char *delay = daemon_option("shutdown_delay"); if(delay) { sync(); sleep(crm_get_msec(delay) / 1000); } } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Attempting to inhibit respawning after fatal error"); crm_exit(DAEMON_RESPAWN_STOP); } return TRUE; } static void pcmk_ignore(int nsig) { crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); } static void pcmk_sigquit(int nsig) { pcmk_panic(__FUNCTION__); } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } static int32_t pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void pcmk_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; const char *task = NULL; crm_client_t *c = crm_client_get(qbc); xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags); crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } task = crm_element_value(msg, F_CRM_TASK); if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) { /* Time to quit */ crm_notice("Shutting down in response to ticket %s (%s)", crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN)); pcmk_shutdown(15); } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { /* Send to everyone */ struct iovec *iov; int id = 0; const char *name = NULL; crm_element_value_int(msg, XML_ATTR_ID, &id); name = crm_element_value(msg, XML_ATTR_UNAME); crm_notice("Instructing peers to remove references to node %s/%u", name, id); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = dump_xml_unformatted(msg); iov->iov_len = 1 + strlen(iov->iov_base); send_cpg_iov(iov); } else { update_process_clients(c); } free_xml(msg); return 0; } /* Error code means? */ static int32_t pcmk_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void pcmk_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pcmk_ipc_closed(c); } struct qb_ipcs_service_handlers mcp_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, .connection_created = pcmk_ipc_created, .msg_process = pcmk_ipc_dispatch, .connection_closed = pcmk_ipc_closed, .connection_destroyed = pcmk_ipc_destroy }; /*! * \internal * \brief Send an XML message with process list of all known peers to client(s) * * \param[in] client Send message to this client, or all clients if NULL */ void update_process_clients(crm_client_t *client) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *update = create_xml_node(NULL, "nodes"); if (is_corosync_cluster()) { crm_xml_add_int(update, "quorate", pcmk_quorate); } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = create_xml_node(update, "node"); crm_xml_add_int(xml, "id", node->id); crm_xml_add(xml, "uname", node->uname); crm_xml_add(xml, "state", node->state); crm_xml_add_int(xml, "processes", node->processes); } if(client) { crm_trace("Sending process list to client %s", client->id); crm_ipcs_send(client, 0, update, crm_ipc_server_event); } else { crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections)); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) { crm_ipcs_send(client, 0, update, crm_ipc_server_event); } } free_xml(update); } /*! * \internal * \brief Send a CPG message with local node's process list to all peers */ void update_process_peers(void) { /* Do nothing for corosync-2 based clusters */ char buffer[1024]; struct iovec *iov; int rc = 0; memset(buffer, 0, SIZEOF(buffer)); if (local_name) { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", local_name, get_process_list()); } else { rc = snprintf(buffer, SIZEOF(buffer) - 1, "", get_process_list()); } crm_trace("Sending %s", buffer); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = strdup(buffer); iov->iov_len = rc + 1; send_cpg_iov(iov); } /*! * \internal * \brief Update a node's process list, notifying clients and peers if needed * * \param[in] id Node ID of affected node * \param[in] uname Uname of affected node * \param[in] procs Affected node's process list mask * * \return TRUE if the process list changed, FALSE otherwise */ gboolean update_node_processes(uint32_t id, const char *uname, uint32_t procs) { gboolean changed = FALSE; crm_node_t *node = crm_get_peer(id, uname); if (procs != 0) { if (procs != node->processes) { crm_debug("Node %s now has process list: %.32x (was %.32x)", node->uname, procs, node->processes); node->processes = procs; changed = TRUE; /* If local node's processes have changed, notify clients/peers */ if (id == local_nodeid) { update_process_clients(NULL); update_process_peers(); } } else { crm_trace("Node %s still has process list: %.32x", node->uname, procs); } } return changed; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"}, {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"}, {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ static void mcp_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", path, CRM_DAEMON_USER, gid, pcmk_strerror(errno)); } } static gboolean check_active_before_startup_processes(gpointer user_data) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); gboolean keep_tracking = FALSE; for (start_seq = 1; start_seq < max; start_seq++) { for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].active_before_startup == FALSE) { /* we are already tracking it as a child process. */ continue; } else if (start_seq != pcmk_children[lpc].start_seq) { continue; } else { const char *name = pcmk_children[lpc].name; if (pcmk_children[lpc].flag == crm_proc_stonith_ng) { name = "stonithd"; } if (crm_pid_active(pcmk_children[lpc].pid, name) != 1) { crm_notice("Process %s terminated (pid=%d)", name, pcmk_children[lpc].pid); pcmk_process_exit(&(pcmk_children[lpc])); continue; } } /* at least one of the processes found at startup * is still going, so keep this recurring timer around */ keep_tracking = TRUE; } } return keep_tracking; } static bool find_and_track_existing_processes(void) { DIR *dp; struct dirent *entry; int start_tracker = 0; char entry_name[64]; dp = opendir("/proc"); if (!dp) { /* no proc directory to search through */ crm_notice("Can not read /proc directory to track existing components"); return FALSE; } while ((entry = readdir(dp)) != NULL) { int pid; int max = SIZEOF(pcmk_children); int i; if (crm_procfs_process_info(entry, entry_name, &pid) < 0) { continue; } for (i = 0; i < max; i++) { const char *name = pcmk_children[i].name; if (pcmk_children[i].start_seq == 0) { continue; } if (pcmk_children[i].flag == crm_proc_stonith_ng) { name = "stonithd"; } if (safe_str_eq(entry_name, name) && (crm_pid_active(pid, NULL) == 1)) { crm_notice("Tracking existing %s process (pid=%d)", name, pid); pcmk_children[i].pid = pid; pcmk_children[i].active_before_startup = TRUE; start_tracker = 1; break; } } } if (start_tracker) { g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, NULL); } closedir(dp); return start_tracker; } static void init_children_processes(void) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); /* start any children that have not been detected */ for (start_seq = 1; start_seq < max; start_seq++) { /* dont start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].pid) { /* we are already tracking it */ continue; } if (start_seq == pcmk_children[lpc].start_seq) { start_child(&(pcmk_children[lpc])); } } } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ setenv("PCMK_respawned", "true", 1); } static void mcp_cpg_destroy(gpointer user_data) { crm_err("Connection destroyed"); crm_exit(ENOTCONN); } /*! * \internal * \brief Process a CPG message (process list or manual peer cache removal) * * \param[in] handle CPG connection (ignored) * \param[in] groupName CPG group name (ignored) * \param[in] nodeid ID of affected node * \param[in] pid Process ID (ignored) * \param[in] msg CPG XML message * \param[in] msg_len Length of msg in bytes (ignored) */ static void mcp_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = string2xml(msg); const char *task = crm_element_value(xml, F_CRM_TASK); crm_trace("Received CPG message (%s): %.200s", (task? task : "process list"), (char*)msg); if (task == NULL) { if (nodeid == local_nodeid) { crm_info("Ignoring process list sent by peer for local node"); } else { uint32_t procs = 0; const char *uname = crm_element_value(xml, "uname"); crm_element_value_int(xml, "proclist", (int *)&procs); if (update_node_processes(nodeid, uname, procs)) { update_process_clients(NULL); } } } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { int id = 0; const char *name = NULL; crm_element_value_int(xml, XML_ATTR_ID, &id); name = crm_element_value(xml, XML_ATTR_UNAME); reap_crm_member(id, name); } if (xml != NULL) { free_xml(xml); } } static void mcp_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Update peer cache if needed */ pcmk_cpg_membership(handle, groupName, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries); /* Always broadcast our own presence after any membership change */ update_process_peers(); } static gboolean mcp_quorum_callback(unsigned long long seq, gboolean quorate) { pcmk_quorate = quorate; return TRUE; } static void mcp_quorum_destroy(gpointer user_data) { crm_info("connection lost"); } #if SUPPORT_CMAN static gboolean mcp_cman_dispatch(unsigned long long seq, gboolean quorate) { pcmk_quorate = quorate; return TRUE; } static void mcp_cman_destroy(gpointer user_data) { crm_info("connection closed"); } #endif int main(int argc, char **argv) { int rc; int flag; int argerr = 0; int option_index = 0; gboolean shutdown = FALSE; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; struct rlimit cores; crm_ipc_t *old_instance = NULL; qb_ipcs_service_t *ipcs = NULL; const char *facility = daemon_option("logfacility"); static crm_cluster_t cluster; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n"); mainloop_add_signal(SIGHUP, pcmk_ignore); mainloop_add_signal(SIGQUIT, pcmk_sigquit); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'f': /* Legacy */ break; case 'p': pid_file = optarg; break; case '$': case '?': crm_help(flag, EX_OK); break; case 'S': shutdown = TRUE; break; case 'F': printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); crm_exit(pcmk_ok); default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', EX_USAGE); } setenv("LC_ALL", "C", 1); setenv("HA_LOGD", "no", 1); set_daemon_option("mcp", "true"); set_daemon_option("use_logd", "off"); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); /* Restore the original facility so that mcp_read_config() does the right thing */ set_daemon_option("logfacility", facility); crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP); old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0); crm_ipc_connect(old_instance); if (shutdown) { crm_debug("Terminating previous instance"); while (crm_ipc_connected(old_instance)) { xmlNode *cmd = create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL); crm_debug("."); crm_ipc_send(old_instance, cmd, 0, 0, NULL); free_xml(cmd); sleep(2); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_exit(pcmk_ok); } else if (crm_ipc_connected(old_instance)) { crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("Pacemaker is already active, aborting startup"); crm_exit(DAEMON_RESPAWN_STOP); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); if (mcp_read_config() == FALSE) { crm_notice("Could not obtain corosync config data, exiting"); crm_exit(ENODATA); } - crm_notice("Starting Pacemaker %s (Build: %s): %s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); + crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s", + PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); mainloop = g_main_new(FALSE); sysrq_init(); rc = getrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Cannot determine current maximum core size."); } else { if (cores.rlim_max == 0 && geteuid() == 0) { cores.rlim_max = RLIM_INFINITY; } else { crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max); } cores.rlim_cur = cores.rlim_max; rc = setrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Core file generation will remain disabled." " Core files are an important diagnositic tool," " please consider enabling them by default."); } #if 0 /* system() is not thread-safe, can't call from here * Actually, its a pretty hacky way to try and achieve this anyway */ if (system("echo 1 > /proc/sys/kernel/core_uses_pid") != 0) { crm_perror(LOG_ERR, "Could not enable /proc/sys/kernel/core_uses_pid"); } #endif } rc = pcmk_ok; if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); crm_exit(ENOKEY); } mkdir(CRM_STATE_DIR, 0750); mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store core files in */ crm_build_path(CRM_CORE_DIR, 0775); mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid); /* Used to store blackbox dumps in */ crm_build_path(CRM_BLACKBOX_DIR, 0755); mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid); /* Used to store policy engine inputs in */ crm_build_path(PE_STATE_DIR, 0755); mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store the cluster configuration */ crm_build_path(CRM_CONFIG_DIR, 0755); mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid); /* Resource agent paths are constructed by the lrmd */ ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks); if (ipcs == NULL) { crm_err("Couldn't start IPC server"); crm_exit(EIO); } /* Allows us to block shutdown */ if (cluster_connect_cfg(&local_nodeid) == FALSE) { crm_err("Couldn't connect to Corosync's CFG service"); crm_exit(ENOPROTOOPT); } if(pcmk_locate_sbd() > 0) { setenv("PCMK_watchdog", "true", 1); } else { setenv("PCMK_watchdog", "false", 1); } find_and_track_existing_processes(); cluster.destroy = mcp_cpg_destroy; cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; crm_set_autoreap(FALSE); if(cluster_connect_cpg(&cluster) == FALSE) { crm_err("Couldn't connect to Corosync's CPG service"); rc = -ENOPROTOOPT; } if (rc == pcmk_ok && is_corosync_cluster()) { /* Keep the membership list up-to-date for crm_node to query */ if(cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) { rc = -ENOTCONN; } } #if SUPPORT_CMAN if (rc == pcmk_ok && is_cman_cluster()) { init_cman_connection(mcp_cman_dispatch, mcp_cman_destroy); } #endif if(rc == pcmk_ok) { local_name = get_local_node_name(); update_node_processes(local_nodeid, local_name, get_process_list()); mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); init_children_processes(); crm_info("Starting mainloop"); g_main_run(mainloop); } if (ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } g_main_destroy(mainloop); cluster_disconnect_cpg(&cluster); cluster_disconnect_cfg(); crm_info("Exiting %s", crm_system_name); return crm_exit(rc); } diff --git a/pengine/native.c b/pengine/native.c index a6598868dd..095ad0767e 100644 --- a/pengine/native.c +++ b/pengine/native.c @@ -1,3318 +1,3318 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include /* #define DELETE_THEN_REFRESH 1 // The crmd will remove the resource from the CIB itself, making this redundant */ #define INFINITY_HACK (INFINITY * -100) #define VARIANT_NATIVE 1 #include gboolean update_action(action_t * then); void native_rsc_colocation_rh_must(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void native_rsc_colocation_rh_mustnot(resource_t * rsc_lh, gboolean update_lh, resource_t * rsc_rh, gboolean update_rh); void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set); void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set); void pe_post_notify(resource_t * rsc, node_t * node, action_t * op, notify_data_t * n_data, pe_working_set_t * data_set); void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set); gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set); gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set); /* *INDENT-OFF* */ enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, }, /* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, }, /* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, /* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, }, }; gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = { /* Current State */ /* Next State: Unknown Stopped Started Slave Master */ /* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, }, /* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, }, /* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, }, /* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, }, /* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, }, }; /* *INDENT-ON* */ static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current); static gboolean native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { /* 1. Sort by weight 2. color.chosen_node = the node (of those with the highest wieght) with the fewest resources 3. remove color.chosen_node from all other colors */ GListPtr nodes = NULL; node_t *chosen = NULL; int lpc = 0; int multiple = 0; int length = 0; gboolean result = FALSE; process_utilization(rsc, &prefer, data_set); length = g_hash_table_size(rsc->allowed_nodes); if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to ? TRUE : FALSE; } if (prefer) { chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id); if (chosen && chosen->weight >= 0 && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Using preferred node %s for %s instead of choosing from %d candidates", chosen->details->uname, rsc->id, length); } else if (chosen && chosen->weight < 0) { pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable", chosen->details->uname, rsc->id); chosen = NULL; } else if (chosen && can_run_resources(chosen)) { pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable", chosen->details->uname, rsc->id); chosen = NULL; } else { pe_rsc_trace(rsc, "Preferred node %s for %s was unknown", prefer->details->uname, rsc->id); } } if (chosen == NULL && rsc->allowed_nodes) { nodes = g_hash_table_get_values(rsc->allowed_nodes); nodes = g_list_sort_with_data(nodes, sort_node_weight, g_list_nth_data(rsc->running_on, 0)); chosen = g_list_nth_data(nodes, 0); pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates", chosen ? chosen->details->uname : "", rsc->id, length); if (chosen && chosen->weight > 0 && can_run_resources(chosen)) { node_t *running = g_list_nth_data(rsc->running_on, 0); if (running && can_run_resources(running) == FALSE) { pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources", rsc->id, running->details->uname); running = NULL; } for (lpc = 1; lpc < length && running; lpc++) { node_t *tmp = g_list_nth_data(nodes, lpc); if (tmp->weight == chosen->weight) { multiple++; if (tmp->details == running->details) { /* prefer the existing node if scores are equal */ chosen = tmp; } } } } } if (multiple > 1) { int log_level = LOG_INFO; static char score[33]; score2char_stack(chosen->weight, score, sizeof(score)); if (chosen->weight >= INFINITY) { log_level = LOG_WARNING; } do_crm_log(log_level, "%d nodes with equal score (%s) for" " running %s resources. Chose %s.", multiple, score, rsc->id, chosen->details->uname); } result = native_assign_node(rsc, nodes, chosen, FALSE); g_list_free(nodes); return result; } static int node_list_attr_score(GHashTable * list, const char *attr, const char *value) { GHashTableIter iter; node_t *node = NULL; int best_score = -INFINITY; const char *best_node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { int weight = node->weight; if (can_run_resources(node) == FALSE) { weight = -INFINITY; } if (weight > best_score || best_node == NULL) { const char *tmp = g_hash_table_lookup(node->details->attrs, attr); if (safe_str_eq(value, tmp)) { best_score = weight; best_node = node->details->uname; } } } if (safe_str_neq(attr, "#" XML_ATTR_UNAME)) { crm_info("Best score for %s=%s was %s with %d", attr, value, best_node ? best_node : "", best_score); } return best_score; } static void node_hash_update(GHashTable * list1, GHashTable * list2, const char *attr, float factor, gboolean only_positive) { int score = 0; int new_score = 0; GHashTableIter iter; node_t *node = NULL; if (attr == NULL) { attr = "#" XML_ATTR_UNAME; } g_hash_table_iter_init(&iter, list1); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { CRM_LOG_ASSERT(node != NULL); if(node == NULL) { continue; }; score = node_list_attr_score(list2, attr, g_hash_table_lookup(node->details->attrs, attr)); new_score = merge_weights(factor * score, node->weight); if (factor < 0 && score < 0) { /* Negative preference for a node with a negative score * should not become a positive preference * * TODO - Decide if we want to filter only if weight == -INFINITY * */ crm_trace("%s: Filtering %d + %f*%d (factor * score)", node->details->uname, node->weight, factor, score); } else if (node->weight == INFINITY_HACK) { crm_trace("%s: Filtering %d + %f*%d (node < 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight > 0) { node->weight = INFINITY_HACK; crm_trace("%s: Filtering %d + %f*%d (score > 0)", node->details->uname, node->weight, factor, score); } else if (only_positive && new_score < 0 && node->weight == 0) { crm_trace("%s: Filtering %d + %f*%d (score == 0)", node->details->uname, node->weight, factor, score); } else { crm_trace("%s: %d + %f*%d", node->details->uname, node->weight, factor, score); node->weight = new_score; } } } GHashTable * node_hash_dup(GHashTable * hash) { /* Hack! */ GListPtr list = g_hash_table_get_values(hash); GHashTable *result = node_hash_from_list(list); g_list_free(list); return result; } GHashTable * native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags); } GHashTable * rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr, float factor, enum pe_weights flags) { GHashTable *work = NULL; int multiplier = 1; if (factor < 0) { multiplier = -1; } if (is_set(rsc->flags, pe_rsc_merging)) { pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id); return nodes; } set_bit(rsc->flags, pe_rsc_merging); if (is_set(flags, pe_weights_init)) { if (rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } pe_rsc_trace(rsc, "Merging %s as a group %p %p", rsc->id, rsc->children, last); work = rsc_merge_weights(last->data, rhs, NULL, attr, factor, flags); } else { work = node_hash_dup(rsc->allowed_nodes); } clear_bit(flags, pe_weights_init); } else if (rsc->variant == pe_group && rsc->children) { GListPtr iter = rsc->children; pe_rsc_trace(rsc, "%s: Combining scores from %d children of %s", rhs, g_list_length(iter), rsc->id); work = node_hash_dup(nodes); for(iter = rsc->children; iter->next != NULL; iter = iter->next) { work = rsc_merge_weights(iter->data, rhs, work, attr, factor, flags); } } else { pe_rsc_trace(rsc, "%s: Combining scores from %s", rhs, rsc->id); work = node_hash_dup(nodes); node_hash_update(work, rsc->allowed_nodes, attr, factor, is_set(flags, pe_weights_positive)); } if (is_set(flags, pe_weights_rollback) && can_run_any(work) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rhs, rsc->id); g_hash_table_destroy(work); clear_bit(rsc->flags, pe_rsc_merging); return nodes; } if (can_run_any(work)) { GListPtr gIter = NULL; if (is_set(flags, pe_weights_forward)) { gIter = rsc->rsc_cons; crm_trace("Checking %d additional colocation constraints", g_list_length(gIter)); } else if(rsc->variant == pe_group && rsc->children) { GListPtr last = rsc->children; while (last->next != NULL) { last = last->next; } gIter = ((resource_t*)last->data)->rsc_cons_lhs; crm_trace("Checking %d additional optional group colocation constraints from %s", g_list_length(gIter), ((resource_t*)last->data)->id); } else { gIter = rsc->rsc_cons_lhs; crm_trace("Checking %d additional optional colocation constraints %s", g_list_length(gIter), rsc->id); } for (; gIter != NULL; gIter = gIter->next) { resource_t *other = NULL; rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; if (is_set(flags, pe_weights_forward)) { other = constraint->rsc_rh; } else { other = constraint->rsc_lh; } pe_rsc_trace(rsc, "Applying %s (%s)", constraint->id, other->id); work = rsc_merge_weights(other, rhs, work, constraint->node_attribute, multiplier * (float)constraint->score / INFINITY, flags|pe_weights_rollback); dump_node_scores(LOG_TRACE, NULL, rhs, work); } } if (is_set(flags, pe_weights_positive)) { node_t *node = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->weight == INFINITY_HACK) { node->weight = 1; } } } if (nodes) { g_hash_table_destroy(nodes); } clear_bit(rsc->flags, pe_rsc_merging); return work; } node_t * native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set) { GListPtr gIter = NULL; int alloc_details = scores_log_level + 1; if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) { /* never allocate children on their own */ pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id, rsc->parent->id); rsc->parent->cmds->allocate(rsc->parent, prefer, data_set); } if (is_not_set(rsc->flags, pe_rsc_provisional)) { return rsc->allocated_to; } if (is_set(rsc->flags, pe_rsc_allocating)) { pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id); return NULL; } set_bit(rsc->flags, pe_rsc_allocating); print_resource(alloc_details, "Allocating: ", rsc, FALSE); dump_node_scores(alloc_details, rsc, "Pre-allloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; GHashTable *archive = NULL; resource_t *rsc_rh = constraint->rsc_rh; pe_rsc_trace(rsc, "%s: Pre-Processing %s (%s, %d, %s)", rsc->id, constraint->id, rsc_rh->id, constraint->score, role2text(constraint->role_lh)); if (constraint->role_lh >= RSC_ROLE_MASTER || (constraint->score < 0 && constraint->score > -INFINITY)) { archive = node_hash_dup(rsc->allowed_nodes); } rsc_rh->cmds->allocate(rsc_rh, NULL, data_set); rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint); if (archive && can_run_any(rsc->allowed_nodes) == FALSE) { pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id); g_hash_table_destroy(rsc->allowed_nodes); rsc->allowed_nodes = archive; archive = NULL; } if (archive) { g_hash_table_destroy(archive); } } dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes); for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data; rsc->allowed_nodes = constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes, constraint->node_attribute, (float)constraint->score / INFINITY, pe_weights_rollback); } print_resource(LOG_DEBUG_2, "Allocating: ", rsc, FALSE); if (rsc->next_role == RSC_ROLE_STOPPED) { pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id); /* make sure it doesn't come up again */ resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set); } else if(rsc->next_role > rsc->role && is_set(data_set->flags, pe_flag_have_quorum) == FALSE && data_set->no_quorum_policy == no_quorum_freeze) { crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); rsc->next_role = rsc->role; } dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes); if (is_set(data_set->flags, pe_flag_stonith_enabled) && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) { clear_bit(rsc->flags, pe_rsc_managed); } if (is_not_set(rsc->flags, pe_rsc_managed)) { const char *reason = NULL; node_t *assign_to = NULL; rsc->next_role = rsc->role; if (rsc->running_on == NULL) { reason = "inactive"; } else if (rsc->role == RSC_ROLE_MASTER) { assign_to = rsc->running_on->data; reason = "master"; } else if (is_set(rsc->flags, pe_rsc_failed)) { assign_to = rsc->running_on->data; reason = "failed"; } else { assign_to = rsc->running_on->data; reason = "active"; } pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id, assign_to ? assign_to->details->uname : "'nowhere'", reason); native_assign_node(rsc, NULL, assign_to, TRUE); } else if (is_set(data_set->flags, pe_flag_stop_everything)) { pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id); native_assign_node(rsc, NULL, NULL, TRUE); } else if (is_set(rsc->flags, pe_rsc_provisional) && native_choose_node(rsc, prefer, data_set)) { pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } else if (rsc->allocated_to == NULL) { if (is_not_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id); } else if (rsc->running_on != NULL) { pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id); } } else { pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id, rsc->allocated_to->details->uname); } clear_bit(rsc->flags, pe_rsc_allocating); print_resource(LOG_DEBUG_3, "Allocated ", rsc, TRUE); if (rsc->is_remote_node) { node_t *remote_node = pe_find_node(data_set->nodes, rsc->id); CRM_ASSERT(remote_node != NULL); if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) { crm_trace("Setting remote node %s to ONLINE", remote_node->details->id); remote_node->details->online = TRUE; /* We shouldn't consider an unseen remote-node unclean if we are going * to try and connect to it. Otherwise we get an unnecessary fence */ if (remote_node->details->unseen == TRUE) { remote_node->details->unclean = FALSE; } } else { crm_trace("Setting remote node %s to SHUTDOWN. next role = %s, allocated=%s", remote_node->details->id, role2text(rsc->next_role), rsc->allocated_to ? "true" : "false"); remote_node->details->shutdown = TRUE; } } return rsc->allocated_to; } static gboolean is_op_dup(resource_t * rsc, const char *name, const char *interval) { gboolean dup = FALSE; const char *id = NULL; const char *value = NULL; xmlNode *operation = NULL; CRM_ASSERT(rsc); for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { value = crm_element_value(operation, "name"); if (safe_str_neq(value, name)) { continue; } value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (value == NULL) { value = "0"; } if (safe_str_neq(value, interval)) { continue; } if (id == NULL) { id = ID(operation); } else { crm_config_err("Operation %s is a duplicate of %s", ID(operation), id); crm_config_err ("Do not use the same (name, interval) combination more than once per resource"); dup = TRUE; } } } return dup; } void RecurringOp(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *value = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; action_t *mon = NULL; gboolean is_optional = TRUE; GListPtr possible_matches = NULL; /* Only process for the operations without role="Stopped" */ value = crm_element_value(operation, "role"); if (value && text2role(value) == RSC_ROLE_STOPPED) { return; } CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s", ID(operation), rsc->id, role2text(rsc->next_role), node ? node->details->uname : "n/a"); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } if (start != NULL) { pe_rsc_trace(rsc, "Marking %s %s due to %s", key, is_set(start->flags, pe_action_optional) ? "optional" : "mandatory", start->uuid); is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional); } else { pe_rsc_trace(rsc, "Marking %s optional", key); is_optional = TRUE; } /* start a monitor for an already active resource */ possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches == NULL) { is_optional = FALSE; pe_rsc_trace(rsc, "Marking %s mandatory: not active", key); } else { GListPtr gIter = NULL; for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) { action_t *op = (action_t *) gIter->data; if (is_set(op->flags, pe_action_reschedule)) { is_optional = FALSE; break; } } g_list_free(possible_matches); } if ((rsc->next_role == RSC_ROLE_MASTER && value == NULL) || (value != NULL && text2role(value) != rsc->next_role)) { int log_level = LOG_DEBUG_2; const char *result = "Ignoring"; if (is_optional) { char *local_key = strdup(key); log_level = LOG_INFO; result = "Cancelling"; /* its running : cancel it */ mon = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(mon->task); free(mon->cancel_task); mon->task = strdup(RSC_CANCEL); mon->cancel_task = strdup(name); add_hash_param(mon->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(mon->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; switch (rsc->role) { case RSC_ROLE_SLAVE: case RSC_ROLE_STARTED: if (rsc->next_role == RSC_ROLE_MASTER) { local_key = promote_key(rsc); } else if (rsc->next_role == RSC_ROLE_STOPPED) { local_key = stop_key(rsc); } break; case RSC_ROLE_MASTER: local_key = demote_key(rsc); break; default: break; } if (local_key) { custom_action_order(rsc, NULL, mon, rsc, local_key, NULL, pe_order_runnable_left, data_set); } mon = NULL; } do_crm_log(log_level, "%s action %s (%s vs. %s)", result, key, value ? value : role2text(RSC_ROLE_SLAVE), role2text(rsc->next_role)); free(key); key = NULL; return; } mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set); key = mon->uuid; if (is_optional) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(node_uname), mon->uuid); } if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (node == NULL || node->details->online == FALSE || node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(node_uname), mon->uuid); update_action_flags(mon, pe_action_runnable | pe_action_clear); } else if (is_set(mon->flags, pe_action_optional) == FALSE) { pe_rsc_info(rsc, " Start recurring %s (%llus) for %s on %s", mon->task, interval_ms / 1000, rsc->id, crm_str(node_uname)); } if (rsc->next_role == RSC_ROLE_MASTER) { char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master); free(running_master); } if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, start_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, reload_key(rsc), NULL, NULL, strdup(key), mon, pe_order_implies_then | pe_order_runnable_left, data_set); if (rsc->next_role == RSC_ROLE_MASTER) { custom_action_order(rsc, promote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } else if (rsc->role == RSC_ROLE_MASTER) { custom_action_order(rsc, demote_key(rsc), NULL, rsc, NULL, mon, pe_order_optional | pe_order_runnable_left, data_set); } } } void Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp(rsc, start, node, operation, data_set); } } } } void RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node, xmlNode * operation, pe_working_set_t * data_set) { char *key = NULL; const char *name = NULL; const char *role = NULL; const char *interval = NULL; const char *node_uname = NULL; unsigned long long interval_ms = 0; GListPtr possible_matches = NULL; GListPtr gIter = NULL; /* TODO: Support of non-unique clone */ if (is_set(rsc->flags, pe_rsc_unique) == FALSE) { return; } /* Only process for the operations with role="Stopped" */ role = crm_element_value(operation, "role"); if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) { return; } pe_rsc_trace(rsc, "Creating recurring actions %s for %s in role %s on nodes where it'll not be running", ID(operation), rsc->id, role2text(rsc->next_role)); if (node != NULL) { node_uname = node->details->uname; } interval = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); interval_ms = crm_get_interval(interval); if (interval_ms == 0) { return; } name = crm_element_value(operation, "name"); if (is_op_dup(rsc, name, interval)) { return; } if (safe_str_eq(name, RSC_STOP) || safe_str_eq(name, RSC_START) || safe_str_eq(name, RSC_DEMOTE) || safe_str_eq(name, RSC_PROMOTE) ) { crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name); return; } key = generate_op_key(rsc->id, name, interval_ms); if (find_rsc_op_entry(rsc, key) == NULL) { /* disabled */ free(key); return; } /* if the monitor exists on the node where the resource will be running, cancel it */ if (node != NULL) { possible_matches = find_actions_exact(rsc->actions, key, node); if (possible_matches) { action_t *cancel_op = NULL; char *local_key = strdup(key); g_list_free(possible_matches); cancel_op = custom_action(rsc, local_key, RSC_CANCEL, node, FALSE, TRUE, data_set); free(cancel_op->task); free(cancel_op->cancel_task); cancel_op->task = strdup(RSC_CANCEL); cancel_op->cancel_task = strdup(name); add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL, interval); add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, name); local_key = NULL; if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) { /* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */ /* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */ custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL, pe_order_runnable_left, data_set); } pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s", key, role, role2text(rsc->next_role), crm_str(node_uname)); } } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { node_t *stop_node = (node_t *) gIter->data; const char *stop_node_uname = stop_node->details->uname; gboolean is_optional = TRUE; gboolean probe_is_optional = TRUE; gboolean stop_is_optional = TRUE; action_t *stopped_mon = NULL; char *rc_inactive = NULL; GListPtr probe_complete_ops = NULL; GListPtr stop_ops = NULL; GListPtr local_gIter = NULL; char *stop_op_key = NULL; if (node_uname && safe_str_eq(stop_node_uname, node_uname)) { continue; } pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s", ID(operation), rsc->id, crm_str(stop_node_uname)); /* start a monitor for an already stopped resource */ possible_matches = find_actions_exact(rsc->actions, key, stop_node); if (possible_matches == NULL) { pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key, crm_str(stop_node_uname)); is_optional = FALSE; } else { pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key, crm_str(stop_node_uname)); is_optional = TRUE; g_list_free(possible_matches); } stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set); rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); free(rc_inactive); if (is_set(rsc->flags, pe_rsc_managed)) { char *probe_key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0); GListPtr probes = find_actions(rsc->actions, probe_key, stop_node); GListPtr pIter = NULL; for (pIter = probes; pIter != NULL; pIter = pIter->next) { action_t *probe = (action_t *) pIter->data; order_actions(probe, stopped_mon, pe_order_runnable_left); crm_trace("%s then %s on %s\n", probe->uuid, stopped_mon->uuid, stop_node->details->uname); } g_list_free(probes); free(probe_key); } if (probe_complete_ops) { g_list_free(probe_complete_ops); } stop_op_key = stop_key(rsc); stop_ops = find_actions_exact(rsc->actions, stop_op_key, stop_node); for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) { action_t *stop = (action_t *) local_gIter->data; if (is_set(stop->flags, pe_action_optional) == FALSE) { stop_is_optional = FALSE; } if (is_set(stop->flags, pe_action_runnable) == FALSE) { crm_debug("%s\t %s (cancelled : stop un-runnable)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(rsc->flags, pe_rsc_managed)) { custom_action_order(rsc, strdup(stop_op_key), stop, NULL, strdup(key), stopped_mon, pe_order_implies_then | pe_order_runnable_left, data_set); } } if (stop_ops) { g_list_free(stop_ops); } free(stop_op_key); if (is_optional == FALSE && probe_is_optional && stop_is_optional && is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged", key, crm_str(stop_node_uname)); update_action_flags(stopped_mon, pe_action_optional); } if (is_set(stopped_mon->flags, pe_action_optional)) { pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid); } if (stop_node->details->online == FALSE || stop_node->details->unclean) { pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)", crm_str(stop_node_uname), stopped_mon->uuid); update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear); } if (is_set(stopped_mon->flags, pe_action_runnable) && is_set(stopped_mon->flags, pe_action_optional) == FALSE) { crm_notice(" Start recurring %s (%llus) for %s on %s", stopped_mon->task, interval_ms / 1000, rsc->id, crm_str(stop_node_uname)); } } free(key); } void Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set) { if (is_not_set(rsc->flags, pe_rsc_maintenance) && (node == NULL || node->details->maintenance == FALSE)) { xmlNode *operation = NULL; for (operation = __xml_first_child(rsc->ops_xml); operation != NULL; operation = __xml_next_element(operation)) { if (crm_str_eq((const char *)operation->name, "op", TRUE)) { RecurringOp_Stopped(rsc, start, node, operation, data_set); } } } } static void handle_migration_actions(resource_t * rsc, node_t *current, node_t *chosen, pe_working_set_t * data_set) { action_t *migrate_to = NULL; action_t *migrate_from = NULL; action_t *start = NULL; action_t *stop = NULL; gboolean partial = rsc->partial_migration_target ? TRUE : FALSE; pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s", rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE"); start = start_action(rsc, chosen, TRUE); stop = stop_action(rsc, current, TRUE); if (partial == FALSE) { migrate_to = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set); } migrate_from = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set); if ((migrate_to && migrate_from) || (migrate_from && partial)) { set_bit(start->flags, pe_action_migrate_runnable); set_bit(stop->flags, pe_action_migrate_runnable); update_action_flags(start, pe_action_pseudo); /* easier than trying to delete it from the graph */ /* order probes before migrations */ if (partial) { set_bit(migrate_from->flags, pe_action_migrate_runnable); migrate_from->needs = start->needs; custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set); } else { set_bit(migrate_from->flags, pe_action_migrate_runnable); set_bit(migrate_to->flags, pe_action_migrate_runnable); migrate_to->needs = start->needs; custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set); } custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional | pe_order_implies_first_migratable | pe_order_pseudo_left, data_set); } if (migrate_to) { add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); /* pcmk remote connections don't require pending to be recorded in cib. * We can optimize cib writes by only setting PENDING for non pcmk remote * connection resources */ if (rsc->is_remote_node == FALSE) { /* migrate_to takes place on the source node, but can * have an effect on the target node depending on how * the agent is written. Because of this, we have to maintain * a record that the migrate_to occurred incase the source node * loses membership while the migrate_to action is still in-flight. */ add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true"); } } if (migrate_from) { add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname); add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname); } } void native_create_actions(resource_t * rsc, pe_working_set_t * data_set) { action_t *start = NULL; node_t *chosen = NULL; node_t *current = NULL; gboolean need_stop = FALSE; gboolean is_moving = FALSE; gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE; GListPtr gIter = NULL; int num_active_nodes = 0; enum rsc_role_e role = RSC_ROLE_UNKNOWN; enum rsc_role_e next_role = RSC_ROLE_UNKNOWN; CRM_ASSERT(rsc); chosen = rsc->allocated_to; if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STARTED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } else if (rsc->next_role == RSC_ROLE_UNKNOWN) { rsc->next_role = RSC_ROLE_STOPPED; pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role)); } pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc, role2text(rsc->role), role2text(rsc->next_role)); if (rsc->running_on) { current = rsc->running_on->data; } for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *n = (node_t *) gIter->data; if (rsc->partial_migration_source && (n->details == rsc->partial_migration_source->details)) { current = rsc->partial_migration_source; } num_active_nodes++; } for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop = stop_action(rsc, current, FALSE); set_bit(stop->flags, pe_action_dangle); pe_rsc_trace(rsc, "Forcing a cleanup of %s on %s", rsc->id, current->details->uname); if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, FALSE, data_set); } } if (num_active_nodes > 1) { if (num_active_nodes == 2 && chosen && rsc->partial_migration_target && rsc->partial_migration_source && (current->details == rsc->partial_migration_source->details) && (chosen->details == rsc->partial_migration_target->details)) { /* Here the chosen node is still the migration target from a partial * migration. Attempt to continue the migration instead of recovering * by stopping the resource everywhere and starting it on a single node. */ pe_rsc_trace(rsc, "Will attempt to continue with a partial migration to target %s from %s", rsc->partial_migration_target->details->id, rsc->partial_migration_source->details->id); } else { const char *type = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if(rsc->partial_migration_target && rsc->partial_migration_source) { crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too", rsc->id, rsc->partial_migration_target->details->uname, rsc->partial_migration_source->details->uname); } else { pe_proc_err("Resource %s (%s::%s) is active on %d nodes %s", rsc->id, class, type, num_active_nodes, recovery2text(rsc->recovery_type)); crm_warn("See %s for more information.", "http://clusterlabs.org/wiki/FAQ#Resource_is_Too_Active"); } if (rsc->recovery_type == recovery_stop_start) { need_stop = TRUE; } /* If by chance a partial migration is in process, * but the migration target is not chosen still, clear all * partial migration data. */ rsc->partial_migration_source = rsc->partial_migration_target = NULL; allow_migrate = FALSE; } } if (is_set(rsc->flags, pe_rsc_start_pending)) { start = start_action(rsc, chosen, TRUE); set_bit(start->flags, pe_action_print_always); } if (current && chosen && current->details != chosen->details) { pe_rsc_trace(rsc, "Moving %s", rsc->id); is_moving = TRUE; need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Recovering %s", rsc->id); need_stop = TRUE; } else if (is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "Block %s", rsc->id); need_stop = TRUE; } else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) { /* Recovery of a promoted resource */ start = start_action(rsc, chosen, TRUE); if (is_set(start->flags, pe_action_optional) == FALSE) { pe_rsc_trace(rsc, "Forced start %s", rsc->id); need_stop = TRUE; } } pe_rsc_trace(rsc, "Creating actions for %s: %s->%s", rsc->id, role2text(rsc->role), role2text(rsc->next_role)); role = rsc->role; /* Potentiall optional steps on brining the resource down and back up to the same level */ while (role != RSC_ROLE_STOPPED) { next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED]; pe_rsc_trace(rsc, "Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) { break; } role = next_role; } while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) { next_role = rsc_state_matrix[role][rsc->role]; pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role), rsc->id, need_stop ? " required" : ""); if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) { break; } role = next_role; } role = rsc->role; /* Required steps from this role to the next */ while (role != rsc->next_role) { next_role = rsc_state_matrix[role][rsc->next_role]; pe_rsc_trace(rsc, "Role: Executing: %s->%s = (%s)", role2text(role), role2text(next_role), rsc->id); if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) { break; } role = next_role; } if(is_set(rsc->flags, pe_rsc_block)) { pe_rsc_trace(rsc, "No monitor additional ops for blocked resource"); } else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) { pe_rsc_trace(rsc, "Monitor ops for active resource"); start = start_action(rsc, chosen, TRUE); Recurring(rsc, start, chosen, data_set); Recurring_Stopped(rsc, start, chosen, data_set); } else { pe_rsc_trace(rsc, "Monitor ops for in-active resource"); Recurring_Stopped(rsc, NULL, NULL, data_set); } /* if we are stuck in a partial migration, where the target * of the partial migration no longer matches the chosen target. * A full stop/start is required */ if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) { pe_rsc_trace(rsc, "Not allowing partial migration to continue. %s", rsc->id); allow_migrate = FALSE; } else if (is_moving == FALSE || is_not_set(rsc->flags, pe_rsc_managed) || is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending) || (current->details->unclean == TRUE) || rsc->next_role < RSC_ROLE_STARTED) { allow_migrate = FALSE; } if (allow_migrate) { handle_migration_actions(rsc, current, chosen, data_set); } } static void rsc_avoids_remote_nodes(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc) { node->weight = -INFINITY; } } } void native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set) { /* This function is on the critical path and worth optimizing as much as possible */ resource_t *top = uber_parent(rsc); int type = pe_order_optional | pe_order_implies_then | pe_order_restart; gboolean is_stonith = is_set(rsc->flags, pe_rsc_fence_device); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, type, data_set); if (top->variant == pe_master || rsc->role > RSC_ROLE_SLAVE) { custom_action_order(rsc, generate_op_key(rsc->id, RSC_DEMOTE, 0), NULL, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_implies_first_master, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_PROMOTE, 0), NULL, pe_order_runnable_left, data_set); } if (is_stonith == FALSE && is_set(data_set->flags, pe_flag_enable_unfencing) && is_set(rsc->flags, pe_rsc_needs_unfencing) && is_not_set(rsc->flags, pe_rsc_have_unfencing)) { /* Check if the node needs to be unfenced first */ node_t *node = NULL; GHashTableIter iter; if(rsc != top) { /* Only create these constraints once, rsc is almost certainly cloned */ clear_bit_recursive(top, pe_rsc_have_unfencing); } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { action_t *unfence = pe_fence_op(node, "on", TRUE, data_set); custom_action_order(top, generate_op_key(top->id, top == rsc?RSC_STOP:RSC_STOPPED, 0), NULL, NULL, strdup(unfence->uuid), unfence, pe_order_optional, data_set); crm_debug("Stopping %s prior to unfencing %s", top->id, unfence->uuid); custom_action_order(NULL, strdup(unfence->uuid), unfence, top, generate_op_key(top->id, RSC_START, 0), NULL, pe_order_implies_then_on_node, data_set); } } if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } { action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); custom_action_order(rsc, stop_key(rsc), NULL, NULL, strdup(all_stopped->task), all_stopped, pe_order_implies_then | pe_order_runnable_left, data_set); } if (g_hash_table_size(rsc->utilization) > 0 && safe_str_neq(data_set->placement_strategy, "default")) { GHashTableIter iter; node_t *next = NULL; GListPtr gIter = NULL; pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s", rsc->id, data_set->placement_strategy); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; char *load_stopped_task = crm_concat(LOAD_STOPPED, current->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(current); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(rsc, stop_key(rsc), NULL, NULL, load_stopped_task, load_stopped, pe_order_load, data_set); } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&next)) { char *load_stopped_task = crm_concat(LOAD_STOPPED, next->details->uname, '_'); action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set); if (load_stopped->node == NULL) { load_stopped->node = node_copy(next); update_action_flags(load_stopped, pe_action_optional | pe_action_clear); } custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, start_key(rsc), NULL, pe_order_load, data_set); custom_action_order(NULL, strdup(load_stopped_task), load_stopped, rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_load, data_set); free(load_stopped_task); } } if (rsc->container) { resource_t *remote_rsc = NULL; /* find out if the container is associated with remote node connection resource */ if (rsc->container->is_remote_node) { remote_rsc = rsc->container; } else if (rsc->is_remote_node == FALSE) { remote_rsc = rsc_contains_remote_node(data_set, rsc->container); } /* if the container is a remote-node, force the resource within the container * instead of colocating the resource with the container. */ if (remote_rsc) { GHashTableIter iter; node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node->details->remote_rsc != remote_rsc) { node->weight = -INFINITY; } } } else { crm_trace("Generating order and colocation rules for rsc %s with container %s", rsc->id, rsc->container->id); custom_action_order(rsc->container, generate_op_key(rsc->container->id, RSC_START, 0), NULL, rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_implies_then | pe_order_runnable_left, data_set); custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, rsc->container, generate_op_key(rsc->container->id, RSC_STOP, 0), NULL, pe_order_implies_first, data_set); rsc_colocation_new("resource-with-containter", NULL, INFINITY, rsc, rsc->container, NULL, NULL, data_set); } } if (rsc->is_remote_node || is_stonith) { /* don't allow remote nodes to run stonith devices * or remote connection resources.*/ rsc_avoids_remote_nodes(rsc); } /* If this rsc is a remote connection resource associated * with a container ( which will most likely be a virtual guest ) * do not allow the container to live on any remote-nodes. * remote-nodes managing nested remote-nodes should not be allowed. */ if (rsc->is_remote_node && rsc->container) { rsc_avoids_remote_nodes(rsc->container); } } void native_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", constraint->id); return; } else if (constraint->rsc_rh == NULL) { pe_err("rsc_rh was NULL for %s", constraint->id); return; } pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id, rsc_rh->id); rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint); } enum filter_colocation_res filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint, gboolean preview) { if (constraint->score == 0) { return influence_nothing; } /* rh side must be allocated before we can process constraint */ if (preview == FALSE && is_set(rsc_rh->flags, pe_rsc_provisional)) { return influence_nothing; } if ((constraint->role_lh >= RSC_ROLE_SLAVE) && rsc_lh->parent && rsc_lh->parent->variant == pe_master && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* LH and RH resources have already been allocated, place the correct * priority oh LH rsc for the given multistate resource role */ return influence_rsc_priority; } if (preview == FALSE && is_not_set(rsc_lh->flags, pe_rsc_provisional)) { /* error check */ struct node_shared_s *details_lh; struct node_shared_s *details_rh; if ((constraint->score > -INFINITY) && (constraint->score < INFINITY)) { return influence_nothing; } details_rh = rsc_rh->allocated_to ? rsc_rh->allocated_to->details : NULL; details_lh = rsc_lh->allocated_to ? rsc_lh->allocated_to->details : NULL; if (constraint->score == INFINITY && details_lh != details_rh) { crm_err("%s and %s are both allocated" " but to different nodes: %s vs. %s", rsc_lh->id, rsc_rh->id, details_lh ? details_lh->uname : "n/a", details_rh ? details_rh->uname : "n/a"); } else if (constraint->score == -INFINITY && details_lh == details_rh) { crm_err("%s and %s are both allocated" " but to the SAME node: %s", rsc_lh->id, rsc_rh->id, details_rh ? details_rh->uname : "n/a"); } return influence_nothing; } if (constraint->score > 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) { crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s", role2text(constraint->role_lh), role2text(rsc_lh->next_role)); return influence_nothing; } if (constraint->score > 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) { crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh)); return FALSE; } if (constraint->score < 0 && constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) { crm_trace("LH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_lh)); return influence_nothing; } if (constraint->score < 0 && constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) { crm_trace("RH: Skipping -ve constraint: \"%s\" state filter", role2text(constraint->role_rh)); return influence_nothing; } return influence_rsc_location; } static void influence_priority(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *rh_value = NULL; const char *lh_value = NULL; const char *attribute = "#id"; int score_multiplier = 1; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) { return; } lh_value = g_hash_table_lookup(rsc_lh->allocated_to->details->attrs, attribute); rh_value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); if (!safe_str_eq(lh_value, rh_value)) { if(constraint->score == INFINITY && constraint->role_lh == RSC_ROLE_MASTER) { rsc_lh->priority = -INFINITY; } return; } if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) { return; } if (constraint->role_lh == RSC_ROLE_SLAVE) { score_multiplier = -1; } rsc_lh->priority = merge_weights(score_multiplier * constraint->score, rsc_lh->priority); } static void colocation_match(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { const char *tmp = NULL; const char *value = NULL; const char *attribute = "#id"; GHashTable *work = NULL; gboolean do_check = FALSE; GHashTableIter iter; node_t *node = NULL; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (rsc_rh->allocated_to) { value = g_hash_table_lookup(rsc_rh->allocated_to->details->attrs, attribute); do_check = TRUE; } else if (constraint->score < 0) { /* nothing to do: * anti-colocation with something thats not running */ return; } work = node_hash_dup(rsc_lh->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { tmp = g_hash_table_lookup(node->details->attrs, attribute); if (do_check && safe_str_eq(tmp, value)) { if (constraint->score < INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s += %d", constraint->id, rsc_lh->id, node->details->uname, constraint->score); node->weight = merge_weights(constraint->score, node->weight); } } else if (do_check == FALSE || constraint->score >= INFINITY) { pe_rsc_trace(rsc_lh, "%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id, node->details->uname, constraint->score, do_check ? "failed" : "unallocated"); node->weight = merge_weights(-constraint->score, node->weight); } } if (can_run_any(work) || constraint->score <= -INFINITY || constraint->score >= INFINITY) { g_hash_table_destroy(rsc_lh->allowed_nodes); rsc_lh->allowed_nodes = work; work = NULL; } else { static char score[33]; score2char_stack(constraint->score, score, sizeof(score)); pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (%d, %s)", rsc_lh->id, rsc_rh->id, do_check, score); } if (work) { g_hash_table_destroy(work); } } void native_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint) { enum filter_colocation_res filter_results; CRM_ASSERT(rsc_lh); CRM_ASSERT(rsc_rh); filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE); pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d, filter=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results); switch (filter_results) { case influence_rsc_priority: influence_priority(rsc_lh, rsc_rh, constraint); break; case influence_rsc_location: pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d)", constraint->score >= 0 ? "" : "Anti-", rsc_lh->id, rsc_rh->id, constraint->id, constraint->score); colocation_match(rsc_lh, rsc_rh, constraint); break; case influence_nothing: default: return; } } static gboolean filter_rsc_ticket(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket) { if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) { pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter", role2text(rsc_ticket->role_lh)); return FALSE; } return TRUE; } void rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set) { if (rsc_ticket == NULL) { pe_err("rsc_ticket was NULL"); return; } if (rsc_lh == NULL) { pe_err("rsc_lh was NULL for %s", rsc_ticket->id); return; } if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) { return; } if (rsc_lh->children) { GListPtr gIter = rsc_lh->children; pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id); for (; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_ticket_constraint(child_rsc, rsc_ticket, data_set); } return; } pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)", rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id, role2text(rsc_ticket->role_lh)); if (rsc_ticket->ticket->granted == FALSE && g_list_length(rsc_lh->running_on) > 0) { GListPtr gIter = NULL; switch (rsc_ticket->loss_policy) { case loss_ticket_stop: resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); break; case loss_ticket_demote: /*Promotion score will be set to -INFINITY in master_promotion_order() */ if (rsc_ticket->role_lh != RSC_ROLE_MASTER) { resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); } break; case loss_ticket_fence: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set); for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; - pe_fence_node(data_set, node, "deadman ticket lost"); + pe_fence_node(data_set, node, "because deadman ticket was lost"); } break; case loss_ticket_freeze: if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) { return; } if (g_list_length(rsc_lh->running_on) > 0) { clear_bit(rsc_lh->flags, pe_rsc_managed); set_bit(rsc_lh->flags, pe_rsc_block); } break; } } else if (rsc_ticket->ticket->granted == FALSE) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set); } } else if (rsc_ticket->ticket->standby) { if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) { resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set); } } } enum pe_action_flags native_action_flags(action_t * action, node_t * node) { return action->flags; } enum pe_graph_flags native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags, enum pe_action_flags filter, enum pe_ordering type) { /* flags == get_action_flags(first, then_node) called from update_action() */ enum pe_graph_flags changed = pe_graph_none; enum pe_action_flags then_flags = then->flags; enum pe_action_flags first_flags = first->flags; crm_trace( "Testing %s on %s (0x%.6x) with %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, then->uuid, then->flags); if (type & pe_order_asymmetrical) { resource_t *then_rsc = then->rsc; enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0; if (!then_rsc) { /* ignore */ } else if ((then_rsc_role == RSC_ROLE_STOPPED) && safe_str_eq(then->task, RSC_STOP)) { /* ignore... if 'then' is supposed to be stopped after 'first', but * then is already stopped, there is nothing to be done when non-symmetrical. */ } else if ((then_rsc_role >= RSC_ROLE_STARTED) && safe_str_eq(then->task, RSC_START) && then->node && then_rsc->running_on && g_list_length(then_rsc->running_on) == 1 && then->node->details == ((node_t *) then_rsc->running_on->data)->details) { /* ignore... if 'then' is supposed to be started after 'first', but * then is already started, there is nothing to be done when non-symmetrical. */ } else if (!(first->flags & pe_action_runnable)) { /* prevent 'then' action from happening if 'first' is not runnable and * 'then' has not yet occurred. */ pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_optional); pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid); } else { /* ignore... then is allowed to start/stop if it wants to. */ } } if (type & pe_order_implies_first) { if ((filter & pe_action_optional) && (flags & pe_action_optional) == 0) { pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } if (is_set(flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE && is_set(then->flags, pe_action_optional) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_migrate_runnable); } } if (type & pe_order_implies_first_master) { if ((filter & pe_action_optional) && ((then->flags & pe_action_optional) == FALSE) && then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) { pe_clear_action_bit(first, pe_action_optional); if (is_set(first->flags, pe_action_migrate_runnable) && is_set(then->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_migrate_runnable); } pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid); } } if ((type & pe_order_implies_first_migratable) && is_set(filter, pe_action_optional)) { if (((then->flags & pe_action_migrate_runnable) == FALSE) || ((then->flags & pe_action_runnable) == FALSE)) { pe_rsc_trace(then->rsc, "Unset runnable on %s because %s is neither runnable or migratable", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_runnable); } if ((then->flags & pe_action_optional) == 0) { pe_rsc_trace(then->rsc, "Unset optional on %s because %s is not optional", first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if ((type & pe_order_pseudo_left) && is_set(filter, pe_action_optional)) { if ((first->flags & pe_action_runnable) == FALSE) { pe_clear_action_bit(then, pe_action_migrate_runnable); pe_clear_action_bit(then, pe_action_pseudo); pe_rsc_trace(then->rsc, "Unset pseudo on %s because %s is not runnable", then->uuid, first->uuid); } } if (is_set(type, pe_order_runnable_left) && is_set(filter, pe_action_runnable) && is_set(then->flags, pe_action_runnable) && is_set(flags, pe_action_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_runnable); pe_clear_action_bit(then, pe_action_migrate_runnable); } if (is_set(type, pe_order_implies_then) && is_set(filter, pe_action_optional) && is_set(then->flags, pe_action_optional) && is_set(flags, pe_action_optional) == FALSE) { /* in this case, treat migrate_runnable as if first is optional */ if (is_set(first->flags, pe_action_migrate_runnable) == FALSE) { pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid); pe_clear_action_bit(then, pe_action_optional); } } if (is_set(type, pe_order_restart)) { const char *reason = NULL; CRM_ASSERT(first->rsc && first->rsc->variant == pe_native); CRM_ASSERT(then->rsc && then->rsc->variant == pe_native); if ((filter & pe_action_runnable) && (then->flags & pe_action_runnable) == 0 && (then->rsc->flags & pe_rsc_managed)) { reason = "shutdown"; } if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) { reason = "recover"; } if (reason && is_set(first->flags, pe_action_optional)) { if (is_set(first->flags, pe_action_runnable) || is_not_set(then->flags, pe_action_optional)) { pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(first, pe_action_optional); } } if (reason && is_not_set(first->flags, pe_action_optional) && is_not_set(first->flags, pe_action_runnable)) { pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid); pe_clear_action_bit(then, pe_action_runnable); } if (reason && is_not_set(first->flags, pe_action_optional) && is_set(first->flags, pe_action_migrate_runnable) && is_not_set(then->flags, pe_action_migrate_runnable)) { pe_clear_action_bit(first, pe_action_migrate_runnable); } } if (then_flags != then->flags) { changed |= pe_graph_updated_then; pe_rsc_trace(then->rsc, "Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", then->uuid, then->node ? then->node->details->uname : "[none]", then->flags, then_flags, first->uuid, first->flags); if(then->rsc && then->rsc->parent) { /* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */ update_action(then); } } if (first_flags != first->flags) { changed |= pe_graph_updated_first; pe_rsc_trace(first->rsc, "First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x", first->uuid, first->node ? first->node->details->uname : "[none]", first->flags, first_flags, then->uuid, then->flags); } return changed; } void native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint) { GListPtr gIter = NULL; GHashTableIter iter; node_t *node = NULL; if (constraint == NULL) { pe_err("Constraint is NULL"); return; } else if (rsc == NULL) { pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id); return; } pe_rsc_trace(rsc, "Applying %s (%s) to %s", constraint->id, role2text(constraint->role_filter), rsc->id); /* take "lifetime" into account */ if (constraint->role_filter > RSC_ROLE_UNKNOWN && constraint->role_filter != rsc->next_role) { pe_rsc_debug(rsc, "Constraint (%s) is not active (role : %s vs. %s)", constraint->id, role2text(constraint->role_filter), role2text(rsc->next_role)); return; } else if (is_active(constraint) == FALSE) { pe_rsc_trace(rsc, "Constraint (%s) is not active", constraint->id); return; } if (constraint->node_list_rh == NULL) { pe_rsc_trace(rsc, "RHS of constraint %s is NULL", constraint->id); return; } for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; node_t *other_node = NULL; other_node = (node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (other_node != NULL) { pe_rsc_trace(rsc, "%s + %s: %d + %d", node->details->uname, other_node->details->uname, node->weight, other_node->weight); other_node->weight = merge_weights(other_node->weight, node->weight); } else { other_node = node_copy(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node); } if (other_node->rsc_discover_mode < constraint->discover_mode) { if (constraint->discover_mode == discover_exclusive) { rsc->exclusive_discover = TRUE; } /* exclusive > never > always... always is default */ other_node->rsc_discover_mode = constraint->discover_mode; } } g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { pe_rsc_trace(rsc, "%s + %s : %d", rsc->id, node->details->uname, node->weight); } } void native_expand(resource_t * rsc, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "Processing actions from %s", rsc->id); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; crm_trace("processing action %d for rsc=%s", action->id, rsc->id); graph_element_from_action(action, data_set); } for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; child_rsc->cmds->expand(child_rsc, data_set); } } #define log_change(fmt, args...) do { \ if(terminal) { \ printf(" * "fmt"\n", ##args); \ } else { \ crm_notice(fmt, ##args); \ } \ } while(0) #define STOP_SANITY_ASSERT(lineno) do { \ if(current && current->details->unclean) { \ /* It will be a pseudo op */ \ } else if(stop == NULL) { \ crm_err("%s:%d: No stop action exists for %s", __FUNCTION__, lineno, rsc->id); \ CRM_ASSERT(stop != NULL); \ } else if(is_set(stop->flags, pe_action_optional)) { \ crm_err("%s:%d: Action %s is still optional", __FUNCTION__, lineno, stop->uuid); \ CRM_ASSERT(is_not_set(stop->flags, pe_action_optional)); \ } \ } while(0) void LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal) { node_t *next = NULL; node_t *current = NULL; action_t *stop = NULL; action_t *start = NULL; action_t *demote = NULL; action_t *promote = NULL; char *key = NULL; gboolean moving = FALSE; GListPtr possible_matches = NULL; if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; LogActions(child_rsc, data_set, terminal); } return; } next = rsc->allocated_to; if (rsc->running_on) { if (g_list_length(rsc->running_on) > 1 && rsc->partial_migration_source) { current = rsc->partial_migration_source; } else { current = rsc->running_on->data; } if (rsc->role == RSC_ROLE_STOPPED) { /* * This can occur when resources are being recovered * We fiddle with the current role in native_create_actions() */ rsc->role = RSC_ROLE_STARTED; } } if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) { /* Don't log stopped orphans */ return; } if (is_not_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { pe_rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), is_not_set(rsc->flags, pe_rsc_managed) ? " unmanaged" : ""); return; } if (current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) { moving = TRUE; } key = start_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } key = stop_key(rsc); if(start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) { possible_matches = find_actions(rsc->actions, key, NULL); } else { possible_matches = find_actions(rsc->actions, key, current); } free(key); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } key = promote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } key = demote_key(rsc); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { action_t *migrate_to = NULL; key = generate_op_key(rsc->id, RSC_MIGRATED, 0); possible_matches = find_actions(rsc->actions, key, next); free(key); if (possible_matches) { migrate_to = possible_matches->data; } CRM_CHECK(next != NULL,); if (next == NULL) { } else if (migrate_to && is_set(migrate_to->flags, pe_action_runnable) && current) { log_change("Migrate %s\t(%s %s -> %s)", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start == NULL || is_set(start->flags, pe_action_optional)) { pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else if (start && is_set(start->flags, pe_action_runnable) == FALSE) { log_change("Stop %s\t(%s %s%s)", rsc->id, role2text(rsc->role), current?current->details->uname:"N/A", stop && is_not_set(stop->flags, pe_action_runnable) ? " - blocked" : ""); STOP_SANITY_ASSERT(__LINE__); } else if (moving && current) { log_change("%s %s\t(%s %s -> %s)", is_set(rsc->flags, pe_rsc_failed) ? "Recover" : "Move ", rsc->id, role2text(rsc->role), current->details->uname, next->details->uname); } else if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */ } g_list_free(possible_matches); return; } if (rsc->role > RSC_ROLE_SLAVE && rsc->role > rsc->next_role) { CRM_CHECK(current != NULL,); if (current != NULL) { gboolean allowed = FALSE; if (demote != NULL && (demote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Demote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), current->details->uname, allowed ? "" : " - blocked"); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->next_role > RSC_ROLE_STOPPED && moving == FALSE) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->next_role), next->details->uname); STOP_SANITY_ASSERT(__LINE__); } } } } else if (rsc->next_role == RSC_ROLE_STOPPED) { GListPtr gIter = NULL; CRM_CHECK(current != NULL,); key = stop_key(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *node = (node_t *) gIter->data; action_t *stop_op = NULL; gboolean allowed = FALSE; possible_matches = find_actions(rsc->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op && (stop_op->flags & pe_action_runnable)) { STOP_SANITY_ASSERT(__LINE__); allowed = TRUE; } log_change("Stop %s\t(%s%s)", rsc->id, node->details->uname, allowed ? "" : " - blocked"); } free(key); } if (moving) { log_change("Move %s\t(%s %s -> %s)", rsc->id, role2text(rsc->next_role), current->details->uname, next->details->uname); STOP_SANITY_ASSERT(__LINE__); } if (rsc->role == RSC_ROLE_STOPPED) { gboolean allowed = FALSE; if (start && (start->flags & pe_action_runnable)) { allowed = TRUE; } CRM_CHECK(next != NULL,); if (next != NULL) { log_change("Start %s\t(%s%s)", rsc->id, next->details->uname, allowed ? "" : " - blocked"); } if (allowed == FALSE) { return; } } if (rsc->next_role > RSC_ROLE_SLAVE && rsc->role < rsc->next_role) { gboolean allowed = FALSE; CRM_LOG_ASSERT(next); if (stop != NULL && is_not_set(stop->flags, pe_action_optional) && rsc->role > RSC_ROLE_STOPPED) { if (is_set(rsc->flags, pe_rsc_failed)) { log_change("Recover %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } else if (is_set(rsc->flags, pe_rsc_reload)) { log_change("Reload %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } else { log_change("Restart %s\t(%s %s)", rsc->id, role2text(rsc->role), next?next->details->uname:NULL); STOP_SANITY_ASSERT(__LINE__); } } if (promote && (promote->flags & pe_action_runnable)) { allowed = TRUE; } log_change("Promote %s\t(%s -> %s %s%s)", rsc->id, role2text(rsc->role), role2text(rsc->next_role), next?next->details->uname:NULL, allowed ? "" : " - blocked"); } } gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; action_t *stop; if (rsc->partial_migration_target) { if (rsc->partial_migration_target->details == current->details) { pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname, next->details->uname, rsc->id); continue; } else { pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id); optional = FALSE; } } pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname); stop = stop_action(rsc, current, optional); if (is_not_set(rsc->flags, pe_rsc_managed)) { update_action_flags(stop, pe_action_runnable | pe_action_clear); } if (is_set(data_set->flags, pe_flag_remove_after_stop)) { DeleteRsc(rsc, current, optional, data_set); } } return TRUE; } gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { action_t *start = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s on %s %d", rsc->id, next ? next->details->uname : "N/A", optional); start = start_action(rsc, next, TRUE); if (is_set(start->flags, pe_action_runnable) && optional == FALSE) { update_action_flags(start, pe_action_optional | pe_action_clear); } return TRUE; } gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; gboolean runnable = TRUE; GListPtr action_list = NULL; CRM_ASSERT(rsc); CRM_CHECK(next != NULL, return FALSE); pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname); key = start_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *start = (action_t *) gIter->data; if (is_set(start->flags, pe_action_runnable) == FALSE) { runnable = FALSE; } } g_list_free(action_list); if (runnable) { promote_action(rsc, next, optional); return TRUE; } pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id); key = promote_key(rsc); action_list = find_actions_exact(rsc->actions, key, next); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *promote = (action_t *) gIter->data; update_action_flags(promote, pe_action_runnable | pe_action_clear); } g_list_free(action_list); return TRUE; } gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { GListPtr gIter = NULL; CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); /* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */ for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { node_t *current = (node_t *) gIter->data; pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A"); demote_action(rsc, current, optional); } return TRUE; } gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A"); CRM_CHECK(FALSE, return FALSE); return FALSE; } gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set) { CRM_ASSERT(rsc); pe_rsc_trace(rsc, "%s", rsc->id); return FALSE; } gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname); return FALSE; } else if (node == NULL) { pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id); return FALSE; } else if (node->details->unclean || node->details->online == FALSE) { pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id, node->details->uname); return FALSE; } crm_notice("Removing %s from %s", rsc->id, node->details->uname); delete_action(rsc, node, optional); new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE, optional ? pe_order_implies_then : pe_order_optional, data_set); new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START, optional ? pe_order_implies_then : pe_order_optional, data_set); return TRUE; } #include <../lib/pengine/unpack.h> #define set_char(x) last_rsc_id[lpc] = x; complete = TRUE; static char * increment_clone(char *last_rsc_id) { int lpc = 0; int len = 0; char *tmp = NULL; gboolean complete = FALSE; CRM_CHECK(last_rsc_id != NULL, return NULL); if (last_rsc_id != NULL) { len = strlen(last_rsc_id); } lpc = len - 1; while (complete == FALSE && lpc > 0) { switch (last_rsc_id[lpc]) { case 0: lpc--; break; case '0': set_char('1'); break; case '1': set_char('2'); break; case '2': set_char('3'); break; case '3': set_char('4'); break; case '4': set_char('5'); break; case '5': set_char('6'); break; case '6': set_char('7'); break; case '7': set_char('8'); break; case '8': set_char('9'); break; case '9': last_rsc_id[lpc] = '0'; lpc--; break; case ':': tmp = last_rsc_id; last_rsc_id = calloc(1, len + 2); memcpy(last_rsc_id, tmp, len); last_rsc_id[++lpc] = '1'; last_rsc_id[len] = '0'; last_rsc_id[len + 1] = 0; complete = TRUE; free(tmp); break; default: crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc); return NULL; break; } } return last_rsc_id; } static node_t * probe_grouped_clone(resource_t * rsc, node_t * node, pe_working_set_t * data_set) { node_t *running = NULL; resource_t *top = uber_parent(rsc); if (running == NULL && is_set(top->flags, pe_rsc_unique) == FALSE) { /* Annoyingly we also need to check any other clone instances * Clumsy, but it will work. * * An alternative would be to update known_on for every peer * during process_rsc_state() * * This code desperately needs optimization * ptest -x with 100 nodes, 100 clones and clone-max=10: * No probes O(25s) * Detection without clone loop O(3m) * Detection with clone loop O(8m) ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done */ char *clone_id = clone_zero(rsc->id); resource_t *peer = pe_find_resource(top->children, clone_id); while (peer && running == NULL) { running = pe_hash_table_lookup(peer->known_on, node->details->id); if (running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active clone: %s", rsc->id); free(clone_id); return running; } clone_id = increment_clone(clone_id); peer = pe_find_resource(data_set->resources, clone_id); } free(clone_id); } return running; } gboolean native_create_probe(resource_t * rsc, node_t * node, action_t * complete, gboolean force, pe_working_set_t * data_set) { enum pe_ordering flags = pe_order_optional; char *key = NULL; action_t *probe = NULL; node_t *running = NULL; node_t *allowed = NULL; resource_t *top = uber_parent(rsc); static const char *rc_master = NULL; static const char *rc_inactive = NULL; if (rc_inactive == NULL) { rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING); rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER); } CRM_CHECK(node != NULL, return FALSE); if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id); return FALSE; } else if (force == FALSE && is_container_remote_node(node)) { pe_rsc_trace(rsc, "Skipping active resource detection for %s on container %s", rsc->id, node->details->id); return FALSE; } if (is_remote_node(node)) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (safe_str_eq(class, "stonith")) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes do not run stonith agents.", rsc->id, node->details->id); return FALSE; } else if (rsc_contains_remote_node(data_set, rsc)) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run resources that contain connection resources.", rsc->id, node->details->id); return FALSE; } else if (rsc->is_remote_node) { pe_rsc_trace(rsc, "Skipping probe for %s on node %s, remote-nodes can not run connection resources", rsc->id, node->details->id); return FALSE; } } if (rsc->children) { GListPtr gIter = NULL; gboolean any_created = FALSE; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set) || any_created; } return any_created; } else if (rsc->container) { pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id); return FALSE; } if (is_set(rsc->flags, pe_rsc_orphan)) { pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id); return FALSE; } running = g_hash_table_lookup(rsc->known_on, node->details->id); if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) { /* Anonymous clones */ if (rsc->parent == top) { running = g_hash_table_lookup(rsc->parent->known_on, node->details->id); } else { /* Grouped anonymous clones need extra special handling */ running = probe_grouped_clone(rsc, node, data_set); } } if (force == FALSE && running != NULL) { /* we already know the status of the resource on this node */ pe_rsc_trace(rsc, "Skipping active: %s on %s", rsc->id, node->details->uname); return FALSE; } allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (rsc->exclusive_discover || top->exclusive_discover) { if (allowed == NULL) { /* exclusive discover is enabled and this node is not in the allowed list. */ return FALSE; } else if (allowed->rsc_discover_mode != discover_exclusive) { /* exclusive discover is enabled and this node is not marked * as a node this resource should be discovered on */ return FALSE; } } if (allowed && allowed->rsc_discover_mode == discover_never) { /* this resource is marked as not needing to be discovered on this node */ return FALSE; } key = generate_op_key(rsc->id, RSC_STATUS, 0); probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set); update_action_flags(probe, pe_action_optional | pe_action_clear); /* If enabled, require unfencing before probing any fence devices * but ensure it happens after any resources that require * unfencing have been probed. * * Doing it the other way (requiring unfencing after probing * resources that need it) would result in the node being * unfenced, and all its resources being stopped, whenever a new * resource is added. Which would be highly suboptimal. * * So essentially, at the point the fencing device(s) have been * probed, we know the state of all resources that require * unfencing and that unfencing occurred. */ if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { trigger_unfencing(NULL, node, "node discovery", probe, data_set); probe->priority = INFINITY; /* Ensure this runs if unfencing succeeds */ } else if(is_set(rsc->flags, pe_rsc_needs_unfencing)) { action_t *unfence = pe_fence_op(node, "on", TRUE, data_set); order_actions(probe, unfence, pe_order_optional); } /* * We need to know if it's running_on (not just known_on) this node * to correctly determine the target rc. */ running = pe_find_node_id(rsc->running_on, node->details->id); if (running == NULL) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive); } else if (rsc->role == RSC_ROLE_MASTER) { add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master); } crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role), is_set(probe->flags, pe_action_runnable), rsc->running_on); if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { top = rsc; } else if (top->variant < pe_clone) { top = rsc; } else { crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id); } if(is_not_set(probe->flags, pe_action_runnable) && rsc->running_on == NULL) { /* Prevent the start from occuring if rsc isn't active, but * don't cause it to stop if it was active already */ flags |= pe_order_runnable_left; } custom_action_order(rsc, NULL, probe, top, generate_op_key(top->id, RSC_START, 0), NULL, flags, data_set); /* Before any reloads, if they exist */ custom_action_order(rsc, NULL, probe, top, reload_key(rsc), NULL, pe_order_optional, data_set); if (node->details->shutdown == FALSE) { custom_action_order(rsc, NULL, probe, rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional, data_set); } if(is_set(rsc->flags, pe_rsc_fence_device) && is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Normally rsc.start depends on probe complete which depends * on rsc.probe. But this can't be the case in this scenario as * it would create graph loops. * * So instead we explicitly order 'rsc.probe then rsc.start' */ } else { order_actions(probe, complete, pe_order_implies_then); } return TRUE; } static void native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { node_t *target = stonith_op ? stonith_op->node : NULL; GListPtr gIter = NULL; action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set); action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set); for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if(action->needs == rsc_req_nothing) { } else if (action->needs == rsc_req_stonith) { order_actions(stonith_done, action, pe_order_optional); } else if (target != NULL && safe_str_eq(action->task, RSC_START) && NULL == pe_hash_table_lookup(rsc->known_on, target->details->id)) { /* if known == NULL, then we dont know if * the resource is active on the node * we're about to shoot * * in this case, regardless of action->needs, * the only safe option is to wait until * the node is shot before doing anything * to with the resource * * its analogous to waiting for all the probes * for rscX to complete before starting rscX * * the most likely explaination is that the * DC died and took its status with it */ pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid, target->details->uname); order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left); } } } /* User data to pass to guest node iterator */ struct action_list_s { GListPtr search_list; /* list of actions to search */ GListPtr result_list; /* list of matching actions for this node */ const char *key; /* action key to match */ }; /*! * \internal * \brief Prepend a node's actions matching a key to a list * * \param[in] node Guest node * \param[in/out] data User data */ static void prepend_node_actions(const node_t *node, void *data) { GListPtr actions; struct action_list_s *info = (struct action_list_s *) data; actions = find_actions(info->search_list, info->key, node); info->result_list = g_list_concat(actions, info->result_list); } static GListPtr find_fence_target_node_actions(GListPtr search_list, const char *key, node_t *fence_target, pe_working_set_t *data_set) { struct action_list_s action_list; /* Actions on the target that match the key are implied by the fencing */ action_list.search_list = search_list; action_list.result_list = find_actions(search_list, key, fence_target); action_list.key = key; /* * If the target is a host for any guest nodes, actions on those nodes * that match the key are also implied by the fencing. */ pe_foreach_guest_node(data_set, fence_target, prepend_node_actions, &action_list); return action_list.result_list; } static void native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { char *key = NULL; GListPtr gIter = NULL; GListPtr action_list = NULL; action_t *start = NULL; resource_t *top = uber_parent(rsc); key = start_key(rsc); action_list = find_actions(rsc->actions, key, NULL); if(action_list) { start = action_list->data; } g_list_free(action_list); free(key); key = stop_key(rsc); action_list = find_fence_target_node_actions(rsc->actions, key, stonith_op->node, data_set); free(key); /* add the stonith OP as a stop pre-req and the mark the stop * as a pseudo op - since its now redundant */ for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online && action->node->details->unclean == FALSE && is_set(rsc->flags, pe_rsc_failed)) { continue; } if (is_set(rsc->flags, pe_rsc_failed)) { crm_notice("Stop of failed resource %s is" " implicit after %s is fenced", rsc->id, action->node->details->uname); } else { crm_info("%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); update_action_flags(action, pe_action_implied_by_stonith); if(start == NULL || start->needs > rsc_req_quorum) { enum pe_ordering flags = pe_order_optional; action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL); if(stonith_op->node->details->remote_rsc) { flags |= pe_order_preserve; } order_actions(stonith_op, action, flags); order_actions(stonith_op, parent_stop, flags); } if (is_set(rsc->flags, pe_rsc_notify)) { /* Create a second notification that will be delivered * immediately after the node is fenced * * Basic problem: * - C is a clone active on the node to be shot and stopping on another * - R is a resource that depends on C * * + C.stop depends on R.stop * + C.stopped depends on STONITH * + C.notify depends on C.stopped * + C.healthy depends on C.notify * + R.stop depends on C.healthy * * The extra notification here changes * + C.healthy depends on C.notify * into: * + C.healthy depends on C.notify' * + C.notify' depends on STONITH' * thus breaking the loop */ notify_data_t *n_data = create_notification_boundaries(rsc, RSC_STOP, NULL, stonith_op, data_set); crm_info("Creating secondary notification for %s", action->uuid); collect_notification_data(rsc, TRUE, FALSE, n_data); g_hash_table_insert(n_data->keys, strdup("notify_stop_resource"), strdup(rsc->id)); g_hash_table_insert(n_data->keys, strdup("notify_stop_uname"), strdup(action->node->details->uname)); create_notifications(uber_parent(rsc), n_data, data_set); free_notification_data(n_data); } /* From Bug #1601, successful fencing must be an input to a failed resources stop action. However given group(rA, rB) running on nodeX and B.stop has failed, A := stop healthy resource (rA.stop) B := stop failed resource (pseudo operation B.stop) C := stonith nodeX A requires B, B requires C, C requires A This loop would prevent the cluster from making progress. This block creates the "C requires A" dependency and therefore must (at least for now) be disabled. Instead, run the block above and treat all resources on nodeX as B would be (marked as a pseudo op depending on the STONITH). TODO: Break the "A requires B" dependency in update_action() and re-enable this block } else if(is_stonith == FALSE) { crm_info("Moving healthy resource %s" " off %s before fencing", rsc->id, node->details->uname); * stop healthy resources before the * stonith op * custom_action_order( rsc, stop_key(rsc), NULL, NULL,strdup(CRM_OP_FENCE),stonith_op, pe_order_optional, data_set); */ } g_list_free(action_list); key = demote_key(rsc); action_list = find_fence_target_node_actions(rsc->actions, key, stonith_op->node, data_set); free(key); for (gIter = action_list; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (action->node->details->online == FALSE || action->node->details->unclean == TRUE || is_set(rsc->flags, pe_rsc_failed)) { if (is_set(rsc->flags, pe_rsc_failed)) { pe_rsc_info(rsc, "Demote of failed resource %s is" " implict after %s is fenced", rsc->id, action->node->details->uname); } else { pe_rsc_info(rsc, "%s is implicit after %s is fenced", action->uuid, action->node->details->uname); } /* the stop would never complete and is * now implied by the stonith operation */ crm_trace("here - 1"); update_action_flags(action, pe_action_pseudo); update_action_flags(action, pe_action_runnable); if (start == NULL || start->needs > rsc_req_quorum) { order_actions(stonith_op, action, pe_order_preserve|pe_order_optional); } } } g_list_free(action_list); } void rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set) { if (rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; rsc_stonith_ordering(child_rsc, stonith_op, data_set); } return; } if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id); return; } /* Start constraints */ native_start_constraints(rsc, stonith_op, data_set); /* Stop constraints */ if (stonith_op) { native_stop_constraints(rsc, stonith_op, data_set); } } enum stack_activity { stack_stable = 0, stack_starting = 1, stack_stopping = 2, stack_middle = 4, }; static action_t * get_first_named_action(resource_t * rsc, const char *action, gboolean only_valid, node_t * current) { action_t *a = NULL; GListPtr action_list = NULL; char *key = generate_op_key(rsc->id, action, 0); action_list = find_actions(rsc->actions, key, current); if (action_list == NULL || action_list->data == NULL) { crm_trace("%s: no %s action", rsc->id, action); free(key); return NULL; } a = action_list->data; g_list_free(action_list); if (only_valid && is_set(a->flags, pe_action_pseudo)) { crm_trace("%s: pseudo", key); a = NULL; } else if (only_valid && is_not_set(a->flags, pe_action_runnable)) { crm_trace("%s: runnable", key); a = NULL; } free(key); return a; } void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set) { GListPtr gIter = NULL; action_t *other = NULL; action_t *reload = NULL; if (rsc->children) { for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { resource_t *child_rsc = (resource_t *) gIter->data; ReloadRsc(child_rsc, node, data_set); } return; } else if (rsc->variant > pe_native) { /* Complex resource with no children */ return; } else if (is_not_set(rsc->flags, pe_rsc_managed)) { pe_rsc_trace(rsc, "%s: unmanaged", rsc->id); return; } else if (is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending)) { pe_rsc_trace(rsc, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags); stop_action(rsc, node, FALSE); /* Force a full restart, overkill? */ return; } else if (node == NULL) { pe_rsc_trace(rsc, "%s: not active", rsc->id); return; } pe_rsc_trace(rsc, "Processing %s", rsc->id); set_bit(rsc->flags, pe_rsc_reload); reload = custom_action( rsc, reload_key(rsc), CRMD_ACTION_RELOAD, node, FALSE, TRUE, data_set); /* stop = stop_action(rsc, node, optional); */ other = get_first_named_action(rsc, RSC_STOP, TRUE, node); if (other != NULL) { order_actions(reload, other, pe_order_optional); } other = get_first_named_action(rsc, RSC_DEMOTE, TRUE, node); if (other != NULL) { order_actions(reload, other, pe_order_optional); } } void native_append_meta(resource_t * rsc, xmlNode * xml) { char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION); resource_t *iso_parent, *last_parent, *parent; if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_INCARNATION); crm_xml_add(xml, name, value); free(name); } value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE); if (value) { char *name = NULL; name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE); crm_xml_add(xml, name, value); free(name); } for (parent = rsc; parent != NULL; parent = parent->parent) { if (parent->container) { crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id); } } last_parent = iso_parent = rsc; while (iso_parent != NULL) { char *name = NULL; char *iso = NULL; if (iso_parent->isolation_wrapper == NULL) { last_parent = iso_parent; iso_parent = iso_parent->parent; continue; } /* name of wrapper script this resource is routed through. */ name = crm_meta_name(XML_RSC_ATTR_ISOLATION_WRAPPER); crm_xml_add(xml, name, iso_parent->isolation_wrapper); free(name); /* instance name for isolated environment */ name = crm_meta_name(XML_RSC_ATTR_ISOLATION_INSTANCE); if (iso_parent->variant >= pe_clone) { /* if isolation is set at the clone/master level, we have to * give this resource the unique isolation instance associated * with the clone child (last_parent)*/ /* Example: cloned group. group is container * clone myclone - iso_parent * group mygroup - last_parent (this is the iso environment) * rsc myrsc1 - rsc * rsc myrsc2 * The group is what is isolated in example1. We have to make * sure myrsc1 and myrsc2 launch in the same isolated environment. * * Example: cloned primitives. rsc primitive is container * clone myclone iso_parent * rsc myrsc1 - last_parent == rsc (this is the iso environment) * The individual cloned primitive instances are isolated */ value = g_hash_table_lookup(last_parent->meta, XML_RSC_ATTR_INCARNATION); CRM_ASSERT(value != NULL); iso = crm_concat(crm_element_value(last_parent->xml, XML_ATTR_ID), value, '_'); crm_xml_add(xml, name, iso); free(iso); } else { /* * Example: cloned group of containers * clone myclone * group mygroup * rsc myrsc1 - iso_parent (this is the iso environment) * rsc myrsc2 * * Example: group of containers * group mygroup * rsc myrsc1 - iso_parent (this is the iso environment) * rsc myrsc2 * * Example: group is container * group mygroup - iso_parent ( this is iso environment) * rsc myrsc1 * rsc myrsc2 * * Example: single primitive * rsc myrsc1 - iso_parent (this is the iso environment) */ value = g_hash_table_lookup(iso_parent->meta, XML_RSC_ATTR_INCARNATION); if (value) { crm_xml_add(xml, name, iso_parent->id); iso = crm_concat(crm_element_value(iso_parent->xml, XML_ATTR_ID), value, '_'); crm_xml_add(xml, name, iso); free(iso); } else { crm_xml_add(xml, name, iso_parent->id); } } free(name); break; } } diff --git a/pengine/pengine.c b/pengine/pengine.c index 43081af005..25aed9d974 100644 --- a/pengine/pengine.c +++ b/pengine/pengine.c @@ -1,281 +1,284 @@ /* * Copyright (C) 2004 Andrew Beekhof * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #include #include #include #include #include #include #include #include xmlNode *do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now); gboolean show_scores = FALSE; int scores_log_level = LOG_DEBUG_2; gboolean show_utilization = FALSE; int utilization_log_level = LOG_DEBUG_2; extern int transition_id; #define get_series() was_processing_error?1:was_processing_warning?2:3 typedef struct series_s { const char *name; const char *param; int wrap; } series_t; series_t series[] = { {"pe-unknown", "_dont_match_anything_", -1}, {"pe-error", "pe-error-series-max", -1}, {"pe-warn", "pe-warn-series-max", 200}, {"pe-input", "pe-input-series-max", 400}, }; gboolean process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender); gboolean process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender) { static char *last_digest = NULL; static char *filename = NULL; time_t execution_date = time(NULL); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *op = crm_element_value(msg, F_CRM_TASK); const char *ref = crm_element_value(msg, F_CRM_REFERENCE); crm_trace("Processing %s op (ref=%s)...", op, ref); if (op == NULL) { /* error */ } else if (strcasecmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if (safe_str_eq(crm_element_value(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE)) { /* ignore */ } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) { crm_trace("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if (strcasecmp(op, CRM_OP_PECALC) == 0) { int seq = -1; int series_id = 0; int series_wrap = 0; char *digest = NULL; const char *value = NULL; pe_working_set_t data_set; xmlNode *converted = NULL; xmlNode *reply = NULL; gboolean is_repoke = FALSE; gboolean process = TRUE; crm_config_error = FALSE; crm_config_warning = FALSE; was_processing_error = FALSE; was_processing_warning = FALSE; set_working_set_defaults(&data_set); digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE, CRM_FEATURE_SET); converted = copy_xml(xml_data); if (cli_config_update(&converted, NULL, TRUE) == FALSE) { data_set.graph = create_xml_node(NULL, XML_TAG_GRAPH); crm_xml_add_int(data_set.graph, "transition_id", 0); crm_xml_add_int(data_set.graph, "cluster-delay", 0); process = FALSE; free(digest); } else if (safe_str_eq(digest, last_digest)) { crm_info("Input has not changed since last time, not saving to disk"); is_repoke = TRUE; free(digest); } else { free(last_digest); last_digest = digest; } if (process) { do_calculations(&data_set, converted, NULL); } series_id = get_series(); series_wrap = series[series_id].wrap; value = pe_pref(data_set.config_hash, series[series_id].param); if (value != NULL) { series_wrap = crm_int_helper(value, NULL); if (errno != 0) { series_wrap = series[series_id].wrap; } } else { crm_config_warn("No value specified for cluster" " preference: %s", series[series_id].param); } seq = get_last_sequence(PE_STATE_DIR, series[series_id].name); crm_trace("Series %s: wrap=%d, seq=%d, pref=%s", series[series_id].name, series_wrap, seq, value); data_set.input = NULL; reply = create_reply(msg, data_set.graph); CRM_ASSERT(reply != NULL); if (is_repoke == FALSE) { free(filename); filename = generate_series_filename(PE_STATE_DIR, series[series_id].name, seq, HAVE_BZLIB_H); } crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename); crm_xml_add_int(reply, "graph-errors", was_processing_error); crm_xml_add_int(reply, "graph-warnings", was_processing_warning); crm_xml_add_int(reply, "config-errors", crm_config_error); crm_xml_add_int(reply, "config-warnings", crm_config_warning); if (crm_ipcs_send(sender, 0, reply, crm_ipc_server_event) == FALSE) { crm_err("Couldn't send transition graph to peer, discarding"); } free_xml(reply); cleanup_alloc_calculations(&data_set); if (was_processing_error) { - crm_err("Calculated Transition %d: %s", transition_id, filename); + crm_err("Calculated transition %d (with errors), saving inputs in %s", + transition_id, filename); } else if (was_processing_warning) { - crm_warn("Calculated Transition %d: %s", transition_id, filename); + crm_warn("Calculated transition %d (with warnings), saving inputs in %s", + transition_id, filename); } else { - crm_notice("Calculated Transition %d: %s", transition_id, filename); + crm_notice("Calculated transition %d, saving inputs in %s", + transition_id, filename); } if (crm_config_error) { crm_notice("Configuration ERRORs found during PE processing." " Please run \"crm_verify -L\" to identify issues."); } if (is_repoke == FALSE && series_wrap != 0) { unlink(filename); crm_xml_add_int(xml_data, "execution-date", execution_date); write_xml_file(xml_data, filename, HAVE_BZLIB_H); write_last_sequence(PE_STATE_DIR, series[series_id].name, seq + 1, series_wrap); } else { crm_trace("Not writing out %s: %d & %d", filename, is_repoke, series_wrap); } free_xml(converted); } return TRUE; } xmlNode * do_calculations(pe_working_set_t * data_set, xmlNode * xml_input, crm_time_t * now) { GListPtr gIter = NULL; int rsc_log_level = LOG_INFO; /* pe_debug_on(); */ CRM_ASSERT(xml_input || is_set(data_set->flags, pe_flag_have_status)); if (is_set(data_set->flags, pe_flag_have_status) == FALSE) { set_working_set_defaults(data_set); data_set->input = xml_input; data_set->now = now; } else { crm_trace("Already have status - reusing"); } if (data_set->now == NULL) { data_set->now = crm_time_new(NULL); } crm_trace("Calculate cluster status"); stage0(data_set); if(is_not_set(data_set->flags, pe_flag_quick_location)) { gIter = data_set->resources; for (; gIter != NULL; gIter = gIter->next) { resource_t *rsc = (resource_t *) gIter->data; if (is_set(rsc->flags, pe_rsc_orphan) && rsc->role == RSC_ROLE_STOPPED) { continue; } rsc->fns->print(rsc, NULL, pe_print_log, &rsc_log_level); } } crm_trace("Applying placement constraints"); stage2(data_set); if(is_set(data_set->flags, pe_flag_quick_location)){ return NULL; } crm_trace("Create internal constraints"); stage3(data_set); crm_trace("Check actions"); stage4(data_set); crm_trace("Allocate resources"); stage5(data_set); crm_trace("Processing fencing and shutdown cases"); stage6(data_set); crm_trace("Applying ordering constraints"); stage7(data_set); crm_trace("Create transition graph"); stage8(data_set); crm_trace("=#=#=#=#= Summary =#=#=#=#="); crm_trace("\t========= Set %d (Un-runnable) =========", -1); if (get_crm_log_level() >= LOG_TRACE) { gIter = data_set->actions; for (; gIter != NULL; gIter = gIter->next) { action_t *action = (action_t *) gIter->data; if (is_set(action->flags, pe_action_optional) == FALSE && is_set(action->flags, pe_action_runnable) == FALSE && is_set(action->flags, pe_action_pseudo) == FALSE) { log_action(LOG_TRACE, "\t", action, TRUE); } } } return data_set->graph; }