diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index 4124555f70..32976ad159 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -1,596 +1,596 @@ /* * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static xmlNode * attrd_confirmation(int callid) { xmlNode *node = create_xml_node(NULL, __func__); crm_xml_add(node, F_TYPE, T_ATTRD); crm_xml_add(node, F_ORIG, get_local_node_name()); crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); return node; } static void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { const char *election_op = crm_element_value(xml, F_CRM_TASK); if (election_op) { attrd_handle_election_op(peer, xml); return; } if (attrd_shutting_down(false)) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. */ return; } else { pcmk__request_t request = { .ipc_client = NULL, .ipc_id = 0, .ipc_flags = 0, .peer = peer->uname, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); CRM_CHECK(request.op != NULL, return); attrd_handle_request(&request); /* Having finished handling the request, check to see if the originating * peer requested confirmation. If so, send that confirmation back now. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { int callid = 0; xmlNode *reply = NULL; /* Add the confirmation ID for the message we are confirming to the * response so the originating peer knows what they're a confirmation * for. */ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); reply = attrd_confirmation(callid); /* And then send the confirmation back to the originating peer. This * ends up right back in this same function (attrd_peer_message) on the * peer where it will have to do something with a PCMK__XA_CONFIRM type * message. */ crm_debug("Sending %s a confirmation", peer->uname); attrd_send_message(peer, reply, false); free_xml(reply); } pcmk__reset_request(&request); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down(false)) { crm_info("Disconnected from Corosync process group"); } else { crm_crit("Lost connection to Corosync process group, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } /*! * \internal * \brief Override an attribute sync with a local value * * Broadcast the local node's value for an attribute that's different from the * value provided in a peer's attribute synchronization response. This ensures a * node's values for itself take precedence and all peers are kept in sync. * * \param[in] a Attribute entry to override * * \return Local instance of attribute value */ static attribute_value_t * broadcast_local_value(const attribute_t *a) { attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname); xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); attrd_add_value_xml(sync, a, v, false); attrd_send_message(NULL, sync, false); free_xml(sync); return v; } #define state_text(state) pcmk__s((state), "in unknown state") /*! * \internal * \brief Return a node's value from hash table (creating one if needed) * * \param[in,out] values Hash table of values * \param[in] node_name Name of node to look up * \param[in] xml XML describing the attribute * * \return Pointer to new or existing hash table entry */ static attribute_value_t * attrd_lookup_or_create_value(GHashTable *values, const char *node_name, const xmlNode *xml) { attribute_value_t *v = g_hash_table_lookup(values, node_name); int is_remote = 0; if (v == NULL) { v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(v != NULL); pcmk__str_update(&v->nodename, node_name); g_hash_table_replace(values, v->nodename, v); } crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { attrd_set_value_flags(v, attrd_value_remote); CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); } return(v); } static void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { bool gone = false; bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); switch (kind) { case crm_status_uname: crm_debug("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state)); break; case crm_status_processes: if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { gone = true; } crm_debug("Node %s is %s a peer", peer->uname, (gone? "no longer" : "now")); break; case crm_status_nstate: crm_debug("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state), state_text(data)); if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if (attrd_election_won() && !pcmk_is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { // Remove all attribute values associated with lost nodes attrd_peer_remove(peer->uname, false, "loss"); gone = true; } break; } // Remove votes from cluster nodes that leave, in case election in progress if (gone && !is_remote) { attrd_remove_voter(peer); attrd_remove_peer_protocol_ver(peer->uname); attrd_do_not_expect_from_peer(peer->uname); } } static void record_peer_nodeid(attribute_value_t *v, const char *host) { crm_node_t *known_peer = crm_get_peer(v->nodeid, host); crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); if (attrd_election_won()) { attrd_write_attributes(attrd_write_changed); } } static void update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, const char *attr, const char *value, const char *host, bool filter, int is_force_write) { attribute_value_t *v = NULL; v = attrd_lookup_or_create_value(a->values, host, xml); if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", attr, host, v->current, value, peer->uname); v = broadcast_local_value(a); } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { crm_notice("Setting %s[%s]%s%s: %s -> %s " CRM_XS " from %s with %s write delay", attr, host, a->set_type ? " in " : "", pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"), pcmk__s(value, "(unset)"), peer->uname, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); pcmk__str_update(&v->current, value); a->changed = true; if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei) && pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { attrd_set_requesting_shutdown(); } else { attrd_clear_requesting_shutdown(); } } // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr); mainloop_timer_start(a->timer); } else { attrd_write_or_elect_attribute(a); } } else { if (is_force_write == 1 && a->timeout_ms && a->timer) { /* Save forced writing and set change flag. */ /* The actual attribute is written by Writer after election. */ crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)", attr, host, peer->uname, value); a->force_write = TRUE; } else { crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } } // This allows us to later detect local values that peer doesn't know about attrd_set_value_flags(v, attrd_value_from_peer); /* If this is a cluster node whose node ID we are learning, remember it */ if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote) && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { record_peer_nodeid(v, host); } } static void attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) { attribute_t *a = NULL; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); int is_force_write = 0; if (attr == NULL) { crm_warn("Could not update attribute: peer did not specify name"); return; } crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); a = attrd_populate_attribute(xml, attr); if (a == NULL) { return; } if (host == NULL) { // If no host was specified, update all hosts GHashTableIter vIter; crm_debug("Setting %s for all hosts to %s", attr, value); xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID); g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } } else { // Update attribute value for the given host update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } /* If this is a message from some attrd instance broadcasting its protocol * version, check to see if it's a new minimum version. */ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { attrd_update_minimum_protocol_ver(peer->uname, value); } } static void broadcast_unseen_local_values(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { if (!pcmk_is_set(v->flags, attrd_value_from_peer) && pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei)) { if (sync == NULL) { sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); } attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); } } } if (sync != NULL) { crm_debug("Broadcasting local-only values"); attrd_send_message(NULL, sync, false); free_xml(sync); } } int attrd_cluster_connect(void) { attrd_cluster = pcmk_cluster_new(); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); return -ENOTCONN; } return pcmk_ok; } void attrd_peer_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); guint interval_ms = 0U; char *attr = NULL; GHashTableIter iter; regex_t regex; crm_node_t *peer = crm_get_peer(0, request->peer); - pcmk__parse_interval_spec(interval_spec, &interval_ms); + pcmk_parse_interval_spec(interval_spec, &interval_ms); if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", pcmk__s(rsc, "all resources")); return; } crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ xml_remove_prop(xml, PCMK__XA_ATTR_VALUE); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, pcmk__s(rsc, "all resources")); crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); attrd_peer_update(peer, xml, host, false); } } regfree(®ex); } /*! * \internal * \brief Load attributes from a peer sync response * * \param[in] peer Peer that sent clear request * \param[in] peer_won Whether peer is the attribute writer * \param[in,out] xml Request XML */ void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml) { crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", peer->uname); if (peer_won) { /* Initialize the "seen" flag for all attributes to cleared, so we can * detect attributes that local node has but the writer doesn't. */ attrd_clear_value_seen(); } // Process each attribute update in the sync response for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { attrd_peer_update(peer, child, crm_element_value(child, PCMK__XA_ATTR_NODE_NAME), true); } if (peer_won) { /* If any attributes are still not marked as seen, the writer doesn't * know about them, so send all peers an update with them. */ broadcast_unseen_local_values(); } } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If true, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, bool uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for node %s " CRM_XS " %s reaping node from cache", host, source, (uncache? "and" : "without")); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for peer %s", a->id, host, source); } } if (uncache) { pcmk__purge_node_from_cache(host, 0); } } void attrd_peer_sync(crm_node_t *peer, xmlNode *xml) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); attrd_add_value_xml(sync, a, v, false); } } crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); attrd_send_message(peer, sync, false); free_xml(sync); } void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { bool handle_sync_point = false; CRM_CHECK((peer != NULL) && (xml != NULL), return); if (xml->children != NULL) { for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { attrd_copy_xml_attributes(xml, child); attrd_peer_update_one(peer, child, filter); if (attrd_request_has_sync_point(child)) { handle_sync_point = true; } } } else { attrd_peer_update_one(peer, xml, filter); if (attrd_request_has_sync_point(xml)) { handle_sync_point = true; } } /* If the update XML specified that the client wanted to wait for a sync * point, process that now. */ if (handle_sync_point) { crm_trace("Hit local sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_local, xml); } } diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index 67defffb41..11a120a95a 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -1,630 +1,630 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static qb_ipcs_service_t *ipcs = NULL; /*! * \internal * \brief Build the XML reply to a client query * * param[in] attr Name of requested attribute * param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = create_xml_node(NULL, __func__); attribute_t *a; if (reply == NULL) { return NULL; } crm_xml_add(reply, F_TYPE, T_ATTRD); crm_xml_add(reply, F_SUBTYPE, PCMK__ATTRD_CMD_QUERY); crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr); /* Allow caller to use "localhost" to refer to local node */ if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) { host = attrd_cluster->uname; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } pcmk__xe_add_node(host_value, host, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } pcmk__xe_add_node(host_value, v->nodename, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current); } } } return reply; } xmlNode * attrd_client_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc, *op, *interval_spec; if (minimum_protocol_version >= 2) { /* Propagate to all peers (including ourselves). * This ends up at attrd_peer_message(). */ attrd_send_message(NULL, xml, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); /* Map this to an update */ crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Add regular expression matching desired attributes */ if (rsc) { char *pattern; if (op == NULL) { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); } else { guint interval_ms = 0U; - pcmk__parse_interval_spec(interval_spec, &interval_ms); + pcmk_parse_interval_spec(interval_spec, &interval_ms); pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); } crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, pattern); free(pattern); } else { crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, ATTRD_RE_CLEAR_ALL); } /* Make sure attribute and value are not set, so we delete via regex */ xml_remove_prop(xml, PCMK__XA_ATTR_NAME); xml_remove_prop(xml, PCMK__XA_ATTR_VALUE); return attrd_client_update(request); } xmlNode * attrd_client_peer_remove(pcmk__request_t *request) { xmlNode *xml = request->xml; // Host and ID are not used in combination, rather host has precedence const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); char *host_alloc = NULL; attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); if (host == NULL) { int nodeid = 0; crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); if (nodeid > 0) { crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL, NULL); char *host_alloc = NULL; if (node && node->uname) { // Use cached name if available host = node->uname; } else { // Otherwise ask cluster layer host_alloc = get_node_name(nodeid); host = host_alloc; } pcmk__xe_add_node(xml, host, 0); } } if (host) { crm_info("Client %s is requesting all values for %s be removed", pcmk__client_name(request->ipc_client), host); attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ free(host_alloc); } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", pcmk__client_name(request->ipc_client)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } xmlNode * attrd_client_query(pcmk__request_t *request) { xmlNode *query = request->xml; xmlNode *reply = NULL; const char *attr = NULL; crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client)); /* Request must specify attribute name to query */ attr = crm_element_value(query, PCMK__XA_ATTR_NAME); if (attr == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Ignoring malformed query from %s (no attribute name given)", pcmk__client_name(request->ipc_client)); return NULL; } /* Build the XML reply */ reply = build_query_reply(attr, crm_element_value(query, PCMK__XA_ATTR_NODE_NAME)); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Could not respond to query from %s: could not create XML reply", pcmk__client_name(request->ipc_client)); return NULL; } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } request->ipc_client->request_id = 0; return reply; } xmlNode * attrd_client_refresh(pcmk__request_t *request) { crm_info("Updating all attributes"); attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); attrd_write_attributes(attrd_write_all|attrd_write_no_delay); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static void handle_missing_host(xmlNode *xml) { const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); if (host == NULL) { crm_trace("Inferring host"); pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid); } } /* Convert a single IPC message with a regex into one with multiple children, one * for each regex match. */ static int expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex) { if (attr == NULL && regex) { bool matched = false; GHashTableIter aIter; regex_t r_patt; crm_debug("Setting %s to %s", regex, value); if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) { return EINVAL; } g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(&r_patt, attr, 0, NULL, 0); if (status == 0) { xmlNode *child = create_xml_node(xml, XML_ATTR_OP); crm_trace("Matched %s with %s", attr, regex); matched = true; /* Copy all the attributes from the parent over, but remove the * regex and replace it with the name. */ attrd_copy_xml_attributes(xml, child); xml_remove_prop(child, PCMK__XA_ATTR_PATTERN); crm_xml_add(child, PCMK__XA_ATTR_NAME, attr); } } regfree(&r_patt); /* Return a code if we never matched anything. This should not be treated * as an error. It indicates there was a regex, and it was a valid regex, * but simply did not match anything and the caller should not continue * doing any regex-related processing. */ if (!matched) { return pcmk_rc_op_unsatisfied; } } else if (attr == NULL) { return pcmk_rc_bad_nvpair; } return pcmk_rc_ok; } static int handle_regexes(pcmk__request_t *request) { xmlNode *xml = request->xml; int rc = pcmk_rc_ok; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); rc = expand_regexes(xml, attr, value, regex); if (rc == EINVAL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Bad regex '%s' for update from client %s", regex, pcmk__client_name(request->ipc_client)); } else if (rc == pcmk_rc_bad_nvpair) { crm_err("Update request did not specify attribute or regular expression"); pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Client %s update request did not specify attribute or regular expression", pcmk__client_name(request->ipc_client)); } return rc; } static int handle_value_expansion(const char **value, xmlNode *xml, const char *op, const char *attr) { attribute_t *a = g_hash_table_lookup(attributes, attr); if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { return EINVAL; } if (*value && attrd_value_needs_expansion(*value)) { int int_value; attribute_value_t *v = NULL; if (a) { const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); v = g_hash_table_lookup(a->values, host); } int_value = attrd_expand_value(*value, (v? v->current : NULL)); crm_info("Expanded %s=%s to %d", attr, *value, int_value); crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); } return pcmk_rc_ok; } static void send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) { if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { /* The client is waiting on the cluster-wide sync point. In this case, * the response ACK is not sent until this attrd broadcasts the update * and receives its own confirmation back from all peers. */ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ } else { /* The client is either waiting on the local sync point or was not * waiting on any sync point at all. For the local sync point, the * response ACK is sent in attrd_peer_update. For clients not * waiting on any sync point, the response ACK is sent in * handle_update_request immediately before this function was called. */ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ } } static int send_child_update(xmlNode *child, void *data) { pcmk__request_t *request = (pcmk__request_t *) data; /* Calling pcmk__set_result is handled by one of these calls to * attrd_client_update, so no need to do it again here. */ request->xml = child; attrd_client_update(request); return pcmk_rc_ok; } xmlNode * attrd_client_update(pcmk__request_t *request) { xmlNode *xml = NULL; const char *attr, *value, *regex; CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL); xml = request->xml; /* If the message has children, that means it is a message from a newer * client that supports sending multiple operations at a time. There are * two ways we can handle that. */ if (xml->children != NULL) { if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { /* First, if all peers support a certain protocol version, we can * just broadcast the big message and they'll handle it. However, * we also need to apply all the transformations in this function * to the children since they don't happen anywhere else. */ for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { attr = crm_element_value(child, PCMK__XA_ATTR_NAME); value = crm_element_value(child, PCMK__XA_ATTR_VALUE); handle_missing_host(child); if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } } send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { /* Save the original xml node pointer so it can be restored after iterating * over all the children. */ xmlNode *orig_xml = request->xml; /* Second, if they do not support that protocol version, split it * up into individual messages and call attrd_client_update on * each one. */ pcmk__xe_foreach_child(xml, XML_ATTR_OP, send_child_update, request); request->xml = orig_xml; } return NULL; } attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); if (handle_regexes(request) != pcmk_rc_ok) { /* Error handling was already dealt with in handle_regexes, so just return. */ return NULL; } else if (regex) { /* Recursively call attrd_client_update on the new message with regexes * expanded. If supported by the attribute daemon, this means that all * matches can also be handled atomically. */ return attrd_client_update(request); } handle_missing_host(xml); if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), value, (attrd_election_won()? " (writer)" : "")); send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } /*! * \internal * \brief Accept a new client IPC connection * * \param[in,out] c New connection * \param[in] uid Client user id * \param[in] gid Client group id * * \return pcmk_ok on success, -errno otherwise */ static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("New client connection %p", c); if (attrd_shutting_down(false)) { crm_info("Ignoring new connection from pid %d during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return pcmk_ok; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in] c Connection to destroy * * \return FALSE (i.e. do not re-run this callback) */ static int32_t attrd_ipc_closed(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { crm_trace("Ignoring request to clean up unknown connection %p", c); } else { crm_trace("Cleaning up closed client connection %p", c); /* Remove the client from the sync point waitlist if it's present. */ attrd_remove_client_from_waitlist(client); /* And no longer wait for confirmations from any peers. */ attrd_do_not_wait_for_client(client); pcmk__free_client(client); } return FALSE; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in,out] c Connection to destroy * * \note We handle a destroyed connection the same as a closed one, * but we need a separate handler because the return type is different. */ static void attrd_ipc_destroy(qb_ipcs_connection_t *c) { crm_trace("Destroying client connection %p", c); attrd_ipc_closed(c); } static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *xml = NULL; // Sanity-check, and parse XML from IPC data CRM_CHECK((c != NULL) && (client != NULL), return 0); if (data == NULL) { crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); return 0; } xml = pcmk__client_data2xml(client, data, &id, &flags); if (xml == NULL) { crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); return 0; } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); CRM_CHECK(request.op != NULL, return 0); attrd_handle_request(&request); pcmk__reset_request(&request); } free_xml(xml); return 0; } static struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, .connection_created = NULL, .msg_process = attrd_ipc_dispatch, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy }; void attrd_ipc_fini(void) { if (ipcs != NULL) { pcmk__drop_all_clients(ipcs); qb_ipcs_destroy(ipcs); ipcs = NULL; } } /*! * \internal * \brief Set up attrd IPC communication */ void attrd_init_ipc(void) { pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks); } diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index fd77af8394..0a7f799637 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -1,872 +1,872 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include static qb_ipcs_service_t *ipcs = NULL; static crm_trigger_t *config_read_trigger = NULL; #if SUPPORT_COROSYNC extern gboolean crm_connect_corosync(crm_cluster_t * cluster); #endif void crm_shutdown(int nsig); static gboolean crm_read_options(gpointer user_data); /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = pcmk_cluster_new(); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); controld_set_fsa_input_flags(R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); #if SUPPORT_COROSYNC if (is_corosync_cluster()) { registered = crm_connect_corosync(cluster); } #endif // SUPPORT_COROSYNC if (registered) { controld_election_init(cluster->uname); controld_globals.our_nodename = cluster->uname; controld_globals.our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } } if (!registered) { controld_set_fsa_input_flags(R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __func__); controld_clear_fsa_input_flags(R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ controld_set_fsa_input_flags(R_SHUTDOWN); controld_disconnect_fencer(FALSE); } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; controld_set_fsa_input_flags(R_SHUTDOWN); //controld_set_fsa_input_flags(R_STAYDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", pcmk__s(controld_globals.dc_name, "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } void crmd_fast_exit(crm_exit_t exit_code) { if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", exit_code, CRM_EX_FATAL); exit_code = CRM_EX_FATAL; } else if ((exit_code == CRM_EX_OK) && pcmk_is_set(controld_globals.fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = CRM_EX_ERROR; } if (controld_globals.logger_out != NULL) { controld_globals.logger_out->finish(controld_globals.logger_out, exit_code, true, NULL); pcmk__output_free(controld_globals.logger_out); controld_globals.logger_out = NULL; } crm_exit(exit_code); } crm_exit_t crmd_exit(crm_exit_t exit_code) { GMainLoop *mloop = controld_globals.mainloop; static bool in_progress = FALSE; if (in_progress && (exit_code == CRM_EX_OK)) { crm_debug("Exit is already in progress"); return exit_code; } else if(in_progress) { crm_notice("Error during shutdown process, exiting now with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } in_progress = TRUE; crm_trace("Preparing to exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); /* Suppress secondary errors resulting from us disconnecting everything */ controld_set_fsa_input_flags(R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } controld_close_attrd_ipc(); controld_shutdown_schedulerd_ipc(); controld_disconnect_fencer(TRUE); if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) { crm_debug("No mainloop detected"); exit_code = CRM_EX_ERROR; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if (exit_code != CRM_EX_OK) { crm_notice("Forcing immediate exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } /* Clean up as much memory as possible for valgrind */ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; iter = iter->next) { fsa_data_t *fsa_data = (fsa_data_t *) iter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(controld_globals.fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } controld_clear_fsa_input_flags(R_MEMBERSHIP); g_list_free(controld_globals.fsa_message_queue); controld_globals.fsa_message_queue = NULL; controld_free_node_pending_timers(); controld_election_fini(); /* Tear down the CIB manager connection, but don't free it yet -- it could * be used when we drain the mainloop later. */ controld_disconnect_cib_manager(); verify_stopped(controld_globals.fsa_state, LOG_WARNING); controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_destroy_all(); mainloop_destroy_trigger(config_read_trigger); config_read_trigger = NULL; controld_destroy_fsa_trigger(); controld_destroy_transition_trigger(); pcmk__client_cleanup(); crm_peer_destroy(); controld_free_fsa_timers(); te_cleanup_stonith_history_sync(NULL, TRUE); controld_free_sched_timer(); free(controld_globals.our_nodename); controld_globals.our_nodename = NULL; free(controld_globals.our_uuid); controld_globals.our_uuid = NULL; free(controld_globals.dc_name); controld_globals.dc_name = NULL; free(controld_globals.dc_version); controld_globals.dc_version = NULL; free(controld_globals.cluster_name); controld_globals.cluster_name = NULL; free(controld_globals.te_uuid); controld_globals.te_uuid = NULL; free_max_generation(); controld_destroy_failed_sync_table(); controld_destroy_outside_events_table(); mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop); /* Don't re-enter this block */ controld_globals.mainloop = NULL; /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(controld_globals.cib_conn); controld_globals.cib_conn = NULL; throttle_fini(); /* Graceful */ crm_trace("Done preparing for exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); return exit_code; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_exit_t exit_code = CRM_EX_OK; if (pcmk_is_set(action, A_EXIT_1)) { exit_code = CRM_EX_ERROR; crm_err("Exiting now due to errors"); } verify_stopped(cur_state, LOG_ERR); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); controld_init_fsa_trigger(); controld_init_transition_trigger(); crm_debug("Creating CIB manager and executor objects"); controld_globals.cib_conn = cib_new(); lrm_state_init_local(); if (controld_init_fsa_timers() == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } // \return libqb error code (0 on success, -errno on error) static int32_t accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Accepting new IPC client connection"); if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } // \return libqb error code (0 on success, -errno on error) static int32_t dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); return 0; } pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE); CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(msg, F_CRM_USER, client->user); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (controld_authorize_ipc_message(msg, client, NULL)) { crm_trace("Processing IPC message from client %s", pcmk__client_name(client)); route_message(C_IPC_MESSAGE, msg); } controld_trigger_fsa(); free_xml(msg); return 0; } static int32_t ipc_client_disconnected(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client) { crm_trace("Disconnecting %sregistered client %s (%p/%p)", (client->userdata? "" : "un"), pcmk__client_name(client), c, client); free(client->userdata); pcmk__free_client(client); controld_trigger_fsa(); } return 0; } static void ipc_connection_destroyed(qb_ipcs_connection_t *c) { crm_trace("Connection %p", c); ipc_client_disconnected(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = accept_controller_client, .connection_created = NULL, .msg_process = dispatch_controller_ipc, .connection_closed = ipc_client_disconnected, .connection_destroyed = ipc_connection_destroyed }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_MEMBERSHIP)) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_READ_CONFIG)) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) { crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = pcmk__serve_controld_ipc(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_notice("Pacemaker controller successfully started and accepting connections"); } controld_set_fsa_input_flags(R_ST_REQUIRED); controld_timer_fencer_connect(GINT_TO_POINTER(TRUE)); controld_clear_fsa_input_flags(R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { controld_set_fsa_input_flags(R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } static pcmk__cluster_option_t controller_options[] = { /* name, old name, type, allowed values, * default value, validator, * short description, * long description */ { PCMK_OPT_DC_VERSION, NULL, "string", NULL, PCMK__VALUE_NONE, NULL, N_("Pacemaker version on cluster node elected Designated Controller (DC)"), N_("Includes a hash which identifies the exact changeset the code was " "built from. Used for diagnostic purposes.") }, { PCMK_OPT_CLUSTER_INFRASTRUCTURE, NULL, "string", NULL, "corosync", NULL, N_("The messaging stack on which Pacemaker is currently running"), N_("Used for informational and diagnostic purposes.") }, { PCMK_OPT_CLUSTER_NAME, NULL, "string", NULL, NULL, NULL, N_("An arbitrary name for the cluster"), N_("This optional value is mostly for users' convenience as desired " "in administration, but may also be used in Pacemaker " "configuration rules via the #cluster-name node attribute, and " "by higher-level tools and resource agents.") }, { PCMK_OPT_DC_DEADTIME, NULL, "time", NULL, "20s", pcmk__valid_interval_spec, N_("How long to wait for a response from other nodes during start-up"), N_("The optimal value will depend on the speed and load of your network " "and the type of switches used.") }, { PCMK_OPT_CLUSTER_RECHECK_INTERVAL, NULL, "time", N_("Zero disables polling, while positive values are an interval in " "seconds (unless other units are specified, for example \"5min\")"), "15min", pcmk__valid_interval_spec, N_("Polling interval to recheck cluster state and evaluate rules " "with date specifications"), N_("Pacemaker is primarily event-driven, and looks ahead to know when to " "recheck cluster state for failure timeouts and most time-based " "rules. However, it will also recheck the cluster after this " "amount of inactivity, to evaluate rules with date specifications " "and serve as a fail-safe for certain types of scheduler bugs.") }, { PCMK_OPT_LOAD_THRESHOLD, NULL, "percentage", NULL, "80%", pcmk__valid_percentage, N_("Maximum amount of system load that should be used by cluster nodes"), N_("The cluster will slow down its recovery process when the amount of " "system resources used (currently CPU) approaches this limit"), }, { PCMK_OPT_NODE_ACTION_LIMIT, NULL, "integer", NULL, "0", pcmk__valid_int, N_("Maximum number of jobs that can be scheduled per node " "(defaults to 2x cores)") }, { PCMK_OPT_FENCE_REACTION, NULL, "select", "stop, panic", "stop", NULL, N_("How a cluster node should react if notified of its own fencing"), N_("A cluster node may receive notification of its own fencing if fencing " "is misconfigured, or if fabric fencing is in use that doesn't cut " "cluster communication. Use \"stop\" to attempt to immediately " "stop Pacemaker and stay stopped, or \"panic\" to attempt to " "immediately reboot the local node, falling back to stop on " "failure.") }, { PCMK_OPT_ELECTION_TIMEOUT, NULL, "time", NULL, "2min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("Declare an election failed if it is not decided within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { PCMK_OPT_SHUTDOWN_ESCALATION, NULL, "time", NULL, "20min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("Exit immediately if shutdown does not complete within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { PCMK_OPT_JOIN_INTEGRATION_TIMEOUT, "crmd-integration-timeout", "time", NULL, "3min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { PCMK_OPT_JOIN_FINALIZATION_TIMEOUT, "crmd-finalization-timeout", "time", NULL, "30min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { PCMK_OPT_TRANSITION_DELAY, "crmd-transition-delay", "time", NULL, "0s", pcmk__valid_interval_spec, N_("*** Advanced Use Only *** Enabling this option will slow down " "cluster recovery under all conditions"), N_("Delay cluster recovery for this much time to allow for additional " "events to occur. Useful if your configuration is sensitive to " "the order in which ping updates arrive.") }, { PCMK_OPT_STONITH_WATCHDOG_TIMEOUT, NULL, "time", NULL, "0", controld_verify_stonith_watchdog_timeout, N_("How long before nodes can be assumed to be safely down when " "watchdog-based self-fencing via SBD is in use"), N_("If this is set to a positive value, lost nodes are assumed to " "self-fence using watchdog-based SBD within this much time. This " "does not require a fencing resource to be explicitly configured, " "though a fence_watchdog resource can be configured, to limit use " "to specific nodes. If this is set to 0 (the default), the cluster " "will never assume watchdog-based self-fencing. If this is set to a " "negative value, the cluster will use twice the local value of the " "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " "or otherwise treat this as 0. WARNING: When used, this timeout " "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " "watchdog-based SBD, and Pacemaker will refuse to start on any of " "those nodes where this is not true for the local value or SBD is " "not active. When this is set to a negative value, " "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " "that use SBD, otherwise data corruption or loss could occur.") }, { PCMK_OPT_STONITH_MAX_ATTEMPTS, NULL, "integer", NULL, "10", pcmk__valid_positive_int, N_("How many times fencing can fail before it will no longer be " "immediately re-attempted on a target") }, // Already documented in libpe_status (other values must be kept identical) { PCMK_OPT_NO_QUORUM_POLICY, NULL, "select", "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_no_quorum_policy, N_("What to do when the cluster does not have quorum"), NULL }, { PCMK_OPT_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", pcmk__valid_boolean, N_("Whether to lock resources to a cleanly shut down node"), N_("When true, resources active on a node when it is cleanly shut down " "are kept \"locked\" to that node (not allowed to run elsewhere) " "until they start again on that node after it rejoins (or for at " "most shutdown-lock-limit, if set). Stonith resources and " "Pacemaker Remote connections are never locked. Clone and bundle " "instances and the promoted role of promotable clones are " "currently never locked, though support could be added in a future " "release.") }, { PCMK_OPT_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, N_("Do not lock resources to a cleanly shut down node longer than " "this"), N_("If shutdown-lock is true and this is set to a nonzero time " "duration, shutdown locks will expire after this much time has " "passed since the shutdown was initiated, even if the node has not " "rejoined.") }, { PCMK_OPT_NODE_PENDING_TIMEOUT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, N_("How long to wait for a node that has joined the cluster to join " "the controller process group"), N_("Fence nodes that do not join the controller process group within " "this much time after joining the cluster, to allow the cluster " "to continue managing resources. A value of 0 means never fence " "pending nodes. Setting the value to 2h means fence nodes after " "2 hours.") }, }; void crmd_metadata(void) { const char *desc_short = "Pacemaker controller options"; const char *desc_long = "Cluster options used by Pacemaker's controller"; gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short, desc_long, controller_options, PCMK__NELEM(controller_options)); printf("%s", s); g_free(s); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); controld_set_fsa_input_flags(R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = pcmk__strkey_table(free, free); pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); // Validate all options, and use defaults if not already present in hash pcmk__validate_cluster_options(config_hash, controller_options, PCMK__NELEM(controller_options)); value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) { controld_set_global_flags(controld_no_quorum_suicide); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK); if (crm_is_true(value)) { controld_set_global_flags(controld_shutdown_lock_enabled); } else { controld_clear_global_flags(controld_shutdown_lock_enabled); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); - pcmk__parse_interval_spec(value, &controld_globals.shutdown_lock_limit); + pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit); controld_globals.shutdown_lock_limit /= 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); - pcmk__parse_interval_spec(value, &controld_globals.node_pending_timeout); + pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout); controld_globals.node_pending_timeout /= 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME); pcmk__str_update(&(controld_globals.cluster_name), value); // Let subcomponents initialize their own static variables controld_configure_election(config_hash); controld_configure_fencing(config_hash); controld_configure_fsa_timers(config_hash); controld_configure_throttle(config_hash); alerts = first_named_child(output, XML_CIB_TAG_ALERTS); crmd_unpack_alerts(alerts); controld_set_fsa_input_flags(R_READ_CONFIG); controld_trigger_fsa(); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } /*! * \internal * \brief Trigger read and processing of the configuration * * \param[in] fn Calling function name * \param[in] line Line number where call occurred */ void controld_trigger_config_as(const char *fn, int line) { if (config_read_trigger != NULL) { crm_trace("%s:%d - Triggered config processing", fn, line); mainloop_set_trigger(config_read_trigger); } } gboolean crm_read_options(gpointer user_data) { cib_t *cib_conn = controld_globals.cib_conn; int call_id = cib_conn->cmds->query(cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath|cib_scope_local); fsa_register_cib_callback(call_id, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); controld_trigger_config(); } void crm_shutdown(int nsig) { const char *value = NULL; guint default_period_ms = 0; if ((controld_globals.mainloop == NULL) || !g_main_loop_is_running(controld_globals.mainloop)) { crmd_exit(CRM_EX_OK); return; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); return; } controld_set_fsa_input_flags(R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); /* If shutdown timer doesn't have a period set, use the default * * @TODO: Evaluate whether this is still necessary. As long as * config_query_callback() has been run at least once, it doesn't look like * anything could have changed the timer period since then. */ value = pcmk__cluster_option(NULL, controller_options, PCMK__NELEM(controller_options), PCMK_OPT_SHUTDOWN_ESCALATION); - pcmk__parse_interval_spec(value, &default_period_ms); + pcmk_parse_interval_spec(value, &default_period_ms); controld_shutdown_start_countdown(default_period_ms); } diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c index 448808053c..4dc5fb41fc 100644 --- a/daemons/controld/controld_election.c +++ b/daemons/controld/controld_election.c @@ -1,287 +1,287 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include static election_t *fsa_election = NULL; static gboolean election_win_cb(gpointer data) { register_fsa_input(C_FSA_INTERNAL, I_ELECTION_DC, NULL); return FALSE; } void controld_election_init(const char *uname) { fsa_election = election_init("DC", uname, 60000 /*60s*/, election_win_cb); } /*! * \internal * \brief Configure election options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_election(GHashTable *options) { const char *value = g_hash_table_lookup(options, PCMK_OPT_ELECTION_TIMEOUT); guint interval_ms = 0U; - pcmk__parse_interval_spec(value, &interval_ms); + pcmk_parse_interval_spec(value, &interval_ms); election_timeout_set_period(fsa_election, interval_ms); } void controld_remove_voter(const char *uname) { election_remove(fsa_election, uname); if (pcmk__str_eq(uname, controld_globals.dc_name, pcmk__str_casei)) { /* Clear any election dampening in effect. Otherwise, if the lost DC had * just won, an immediate new election could fizzle out with no new DC. */ election_clear_dampening(fsa_election); } } void controld_election_fini(void) { election_fini(fsa_election); fsa_election = NULL; } void controld_stop_current_election_timeout(void) { election_timeout_stop(fsa_election); } /* A_ELECTION_VOTE */ void do_election_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean not_voting = FALSE; /* don't vote if we're in one of these states or wanting to shut down */ switch (cur_state) { case S_STARTING: case S_RECOVERY: case S_STOPPING: case S_TERMINATE: crm_warn("Not voting in election, we're in state %s", fsa_state2string(cur_state)); not_voting = TRUE; break; case S_ELECTION: case S_INTEGRATION: case S_RELEASE_DC: break; default: crm_err("Broken? Voting in state %s", fsa_state2string(cur_state)); break; } if (not_voting == FALSE) { if (pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) { not_voting = TRUE; } } if (not_voting) { if (AM_I_DC) { register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); } else { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } return; } election_vote(fsa_election); return; } void do_election_check(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (controld_globals.fsa_state == S_ELECTION) { election_check(fsa_election); } else { crm_debug("Ignoring election check because we are not in an election"); } } /* A_ELECTION_COUNT */ void do_election_count_vote(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { enum election_result rc = 0; ha_msg_input_t *vote = fsa_typed_data(fsa_dt_ha_msg); if(crm_peer_cache == NULL) { if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Internal error, no peer cache"); } return; } rc = election_count_vote(fsa_election, vote->msg, cur_state != S_STARTING); switch(rc) { case election_start: election_reset(fsa_election); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); break; case election_lost: update_dc(NULL); if (pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)) { cib_t *cib_conn = controld_globals.cib_conn; register_fsa_input(C_FSA_INTERNAL, I_RELEASE_DC, NULL); cib_conn->cmds->set_secondary(cib_conn, cib_scope_local); } else if (cur_state != S_STARTING) { register_fsa_input(C_FSA_INTERNAL, I_PENDING, NULL); } break; default: crm_trace("Election message resulted in state %d", rc); } } static void feature_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_notice("Feature update failed: %s "CRM_XS" rc=%d", pcmk_strerror(rc), rc); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /*! * \internal * \brief Update a node attribute in the CIB during a DC takeover * * \param[in] name Name of attribute to update * \param[in] value New attribute value */ #define dc_takeover_update_attr(name, value) do { \ cib__update_node_attr(controld_globals.logger_out, \ controld_globals.cib_conn, cib_none, \ XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, \ name, value, NULL, NULL); \ } while (0) /* A_DC_TAKEOVER */ void do_dc_takeover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *cib = NULL; const char *cluster_type = name_for_cluster_type(get_cluster_type()); pid_t watchdog = pcmk__locate_sbd(); crm_info("Taking over DC status for this partition"); controld_set_fsa_input_flags(R_THE_DC); execute_stonith_cleanup(); election_reset(fsa_election); controld_set_fsa_input_flags(R_JOIN_OK|R_INVOKE_PE); controld_globals.cib_conn->cmds->set_primary(controld_globals.cib_conn, cib_scope_local); cib = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add(cib, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); controld_update_cib(XML_TAG_CIB, cib, cib_none, feature_update_callback); dc_takeover_update_attr(PCMK_OPT_HAVE_WATCHDOG, pcmk__btoa(watchdog)); dc_takeover_update_attr(PCMK_OPT_DC_VERSION, PACEMAKER_VERSION "-" BUILD_VERSION); dc_takeover_update_attr(PCMK_OPT_CLUSTER_INFRASTRUCTURE, cluster_type); #if SUPPORT_COROSYNC if ((controld_globals.cluster_name == NULL) && is_corosync_cluster()) { char *cluster_name = pcmk__corosync_cluster_name(); if (cluster_name != NULL) { dc_takeover_update_attr(PCMK_OPT_CLUSTER_NAME, cluster_name); } free(cluster_name); } #endif controld_trigger_config(); free_xml(cib); } /* A_DC_RELEASE */ void do_dc_release(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_DC_RELEASE) { crm_debug("Releasing the role of DC"); controld_clear_fsa_input_flags(R_THE_DC); controld_expect_sched_reply(NULL); } else if (action & A_DC_RELEASED) { crm_info("DC role released"); if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { xmlNode *update = NULL; crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename); pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); update = create_node_state_update(node, node_update_expected, NULL, __func__); /* Don't need a based response because controld will stop. */ fsa_cib_anon_update_discard_reply(XML_CIB_TAG_STATUS, update); free_xml(update); } register_fsa_input(C_FSA_INTERNAL, I_RELEASE_SUCCESS, NULL); } else { crm_err("Unknown DC action %s", fsa_action2string(action)); } crm_trace("Am I still the DC? %s", AM_I_DC ? XML_BOOLEAN_YES : XML_BOOLEAN_NO); } diff --git a/daemons/controld/controld_timers.c b/daemons/controld/controld_timers.c index a978e56787..66e8196bc7 100644 --- a/daemons/controld/controld_timers.c +++ b/daemons/controld/controld_timers.c @@ -1,509 +1,509 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include //! FSA mainloop timer type typedef struct fsa_timer_s { guint source_id; //!< Timer source ID guint period_ms; //!< Timer period enum crmd_fsa_input fsa_input; //!< Input to register if timer pops gboolean (*callback) (gpointer data); //!< What do if timer pops bool log_error; //!< Timer popping indicates error int counter; //!< For detecting loops } fsa_timer_t; //! Wait before retrying a failed cib or executor connection static fsa_timer_t *wait_timer = NULL; //! Periodically re-run scheduler (for date_spec evaluation and as a failsafe) static fsa_timer_t *recheck_timer = NULL; //! Wait at start-up, or after an election, for DC to make contact static fsa_timer_t *election_timer = NULL; //! Delay start of new transition with expectation something else might happen static fsa_timer_t *transition_timer = NULL; //! \c PCMK_OPT_JOIN_INTEGRATION_TIMEOUT static fsa_timer_t *integration_timer = NULL; //! \c PCMK_OPT_JOIN_FINALIZATION_TIMEOUT static fsa_timer_t *finalization_timer = NULL; // Wait for DC to stop all resources and give us the all-clear to shut down fsa_timer_t *shutdown_escalation_timer = NULL; //! Cluster recheck interval (from configuration) static guint recheck_interval_ms = 0; static const char * get_timer_desc(fsa_timer_t * timer) { if (timer == election_timer) { return "Election Trigger"; } else if (timer == shutdown_escalation_timer) { return "Shutdown Escalation"; } else if (timer == integration_timer) { return "Integration Timer"; } else if (timer == finalization_timer) { return "Finalization Timer"; } else if (timer == transition_timer) { return "New Transition Timer"; } else if (timer == wait_timer) { return "Wait Timer"; } else if (timer == recheck_timer) { return "Cluster Recheck Timer"; } return "Unknown Timer"; } /*! * \internal * \brief Stop an FSA timer * * \param[in,out] timer Timer to stop * * \return true if the timer was running, or false otherwise */ static bool controld_stop_timer(fsa_timer_t *timer) { CRM_CHECK(timer != NULL, return false); if (timer->source_id != 0) { crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); g_source_remove(timer->source_id); timer->source_id = 0; } else { crm_trace("%s already stopped (would inject %s if popped after %ums)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms); return false; } return true; } /*! * \internal * \brief Start an FSA timer * * \param[in,out] timer Timer to start */ static void controld_start_timer(fsa_timer_t *timer) { if (timer->source_id == 0 && timer->period_ms > 0) { timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer); CRM_ASSERT(timer->source_id != 0); crm_debug("Started %s (inject %s if pops after %ums, source=%d)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); } else { crm_debug("%s already running (inject %s if pops after %ums, source=%d)", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms, timer->source_id); } } /* A_DC_TIMER_STOP, A_DC_TIMER_START, * A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START * A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START */ void do_timer_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean timer_op_ok = TRUE; if (action & A_DC_TIMER_STOP) { timer_op_ok = controld_stop_timer(election_timer); } else if (action & A_FINALIZE_TIMER_STOP) { timer_op_ok = controld_stop_timer(finalization_timer); } else if (action & A_INTEGRATE_TIMER_STOP) { timer_op_ok = controld_stop_timer(integration_timer); } /* don't start a timer that wasn't already running */ if (action & A_DC_TIMER_START && timer_op_ok) { controld_start_timer(election_timer); if (AM_I_DC) { /* there can be only one */ register_fsa_input(cause, I_ELECTION, NULL); } } else if (action & A_FINALIZE_TIMER_START) { controld_start_timer(finalization_timer); } else if (action & A_INTEGRATE_TIMER_START) { controld_start_timer(integration_timer); } } static gboolean crm_timer_popped(gpointer data) { fsa_timer_t *timer = (fsa_timer_t *) data; if (timer->log_error) { crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums", get_timer_desc(timer), fsa_state2string(controld_globals.fsa_state), fsa_input2string(timer->fsa_input), timer->period_ms); } else { crm_info("%s just popped " CRM_XS " input=%s time=%ums", get_timer_desc(timer), fsa_input2string(timer->fsa_input), timer->period_ms); timer->counter++; } if ((timer == election_timer) && (election_timer->counter > 5)) { crm_notice("We appear to be in an election loop, something may be wrong"); crm_write_blackbox(0, NULL); election_timer->counter = 0; } controld_stop_timer(timer); // Make timer _not_ go off again if (timer->fsa_input == I_INTEGRATED) { crm_info("Welcomed: %d, Integrated: %d", crmd_join_phase_count(crm_join_welcomed), crmd_join_phase_count(crm_join_integrated)); if (crmd_join_phase_count(crm_join_welcomed) == 0) { // If we don't even have ourselves, start again register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL, __func__); } else { register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL); } } else if ((timer == recheck_timer) && (controld_globals.fsa_state != S_IDLE)) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(controld_globals.fsa_state)); } else if ((timer == finalization_timer) && (controld_globals.fsa_state != S_FINALIZE_JOIN)) { crm_debug("Discarding %s event in state: %s", fsa_input2string(timer->fsa_input), fsa_state2string(controld_globals.fsa_state)); } else if (timer->fsa_input != I_NULL) { register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL); } controld_trigger_fsa(); return TRUE; } bool controld_init_fsa_timers(void) { transition_timer = calloc(1, sizeof(fsa_timer_t)); if (transition_timer == NULL) { return FALSE; } integration_timer = calloc(1, sizeof(fsa_timer_t)); if (integration_timer == NULL) { return FALSE; } finalization_timer = calloc(1, sizeof(fsa_timer_t)); if (finalization_timer == NULL) { return FALSE; } election_timer = calloc(1, sizeof(fsa_timer_t)); if (election_timer == NULL) { return FALSE; } shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); if (shutdown_escalation_timer == NULL) { return FALSE; } wait_timer = calloc(1, sizeof(fsa_timer_t)); if (wait_timer == NULL) { return FALSE; } recheck_timer = calloc(1, sizeof(fsa_timer_t)); if (recheck_timer == NULL) { return FALSE; } election_timer->source_id = 0; election_timer->period_ms = 0; election_timer->fsa_input = I_DC_TIMEOUT; election_timer->callback = crm_timer_popped; election_timer->log_error = FALSE; transition_timer->source_id = 0; transition_timer->period_ms = 0; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->log_error = FALSE; integration_timer->source_id = 0; integration_timer->period_ms = 0; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->log_error = TRUE; finalization_timer->source_id = 0; finalization_timer->period_ms = 0; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->log_error = FALSE; /* We can't use I_FINALIZED here, because that creates a bug in the join * process where a joining node can be stuck in S_PENDING while we think it * is in S_NOT_DC. This created an infinite transition loop in which we * continually send probes which the node NACKs because it's pending. * * If we have nodes where the cluster layer is active but the controller is * not, we can avoid this causing an election/join loop, in the integration * phase. */ finalization_timer->fsa_input = I_ELECTION; shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = 0; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->log_error = TRUE; wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->log_error = FALSE; recheck_timer->source_id = 0; recheck_timer->period_ms = 0; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->log_error = FALSE; return TRUE; } /*! * \internal * \brief Configure timers based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_fsa_timers(GHashTable *options) { const char *value = NULL; // Election timer value = g_hash_table_lookup(options, PCMK_OPT_DC_DEADTIME); - pcmk__parse_interval_spec(value, &(election_timer->period_ms)); + pcmk_parse_interval_spec(value, &(election_timer->period_ms)); // Integration timer value = g_hash_table_lookup(options, PCMK_OPT_JOIN_INTEGRATION_TIMEOUT); - pcmk__parse_interval_spec(value, &(integration_timer->period_ms)); + pcmk_parse_interval_spec(value, &(integration_timer->period_ms)); // Finalization timer value = g_hash_table_lookup(options, PCMK_OPT_JOIN_FINALIZATION_TIMEOUT); - pcmk__parse_interval_spec(value, &(finalization_timer->period_ms)); + pcmk_parse_interval_spec(value, &(finalization_timer->period_ms)); // Shutdown escalation timer value = g_hash_table_lookup(options, PCMK_OPT_SHUTDOWN_ESCALATION); - pcmk__parse_interval_spec(value, &(shutdown_escalation_timer->period_ms)); + pcmk_parse_interval_spec(value, &(shutdown_escalation_timer->period_ms)); crm_debug("Shutdown escalation occurs if DC has not responded to request " "in %ums", shutdown_escalation_timer->period_ms); // Transition timer value = g_hash_table_lookup(options, PCMK_OPT_TRANSITION_DELAY); - pcmk__parse_interval_spec(value, &(transition_timer->period_ms)); + pcmk_parse_interval_spec(value, &(transition_timer->period_ms)); // Recheck interval value = g_hash_table_lookup(options, PCMK_OPT_CLUSTER_RECHECK_INTERVAL); - pcmk__parse_interval_spec(value, &recheck_interval_ms); + pcmk_parse_interval_spec(value, &recheck_interval_ms); crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms); } void controld_free_fsa_timers(void) { controld_stop_timer(transition_timer); controld_stop_timer(integration_timer); controld_stop_timer(finalization_timer); controld_stop_timer(election_timer); controld_stop_timer(shutdown_escalation_timer); controld_stop_timer(wait_timer); controld_stop_timer(recheck_timer); free(transition_timer); transition_timer = NULL; free(integration_timer); integration_timer = NULL; free(finalization_timer); finalization_timer = NULL; free(election_timer); election_timer = NULL; free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; free(wait_timer); wait_timer = NULL; free(recheck_timer); recheck_timer = NULL; } /*! * \internal * \brief Check whether the transition timer is started * \return true if the transition timer is started, or false otherwise */ bool controld_is_started_transition_timer(void) { return (transition_timer->period_ms > 0) && (transition_timer->source_id != 0); } /*! * \internal * \brief Start the recheck timer */ void controld_start_recheck_timer(void) { // Default to recheck interval configured in CIB (if any) guint period_ms = recheck_interval_ms; // If scheduler supplied a "recheck by" time, check whether that's sooner if (controld_globals.transition_graph->recheck_by > 0) { time_t diff_seconds = controld_globals.transition_graph->recheck_by - time(NULL); if (diff_seconds < 1) { // We're already past the desired time period_ms = 500; } else { period_ms = (guint) diff_seconds * 1000; } // Use "recheck by" only if it's sooner than interval from CIB if (period_ms > recheck_interval_ms) { period_ms = recheck_interval_ms; } } if (period_ms > 0) { recheck_timer->period_ms = period_ms; controld_start_timer(recheck_timer); } } /*! * \internal * \brief Start the wait timer */ void controld_start_wait_timer(void) { controld_start_timer(wait_timer); } /*! * \internal * \brief Stop the recheck timer * * \return true if the recheck timer was running, or false otherwise */ bool controld_stop_recheck_timer(void) { return controld_stop_timer(recheck_timer); } /*! * \brief Get the transition timer's configured period * \return The transition_timer's period */ guint controld_get_period_transition_timer(void) { return transition_timer->period_ms; } /*! * \internal * \brief Reset the election timer's counter to 0 */ void controld_reset_counter_election_timer(void) { election_timer->counter = 0; } /*! * \internal * \brief Stop the transition timer * * \return true if the transition timer was running, or false otherwise */ bool controld_stop_transition_timer(void) { return controld_stop_timer(transition_timer); } /*! * \internal * \brief Start the transition timer */ void controld_start_transition_timer(void) { controld_start_timer(transition_timer); } /*! * \internal * \brief Start the countdown sequence for a shutdown * * \param[in] default_period_ms Period to use if the shutdown escalation * timer's period is 0 */ void controld_shutdown_start_countdown(guint default_period_ms) { if (shutdown_escalation_timer->period_ms == 0) { shutdown_escalation_timer->period_ms = default_period_ms; } crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums", shutdown_escalation_timer->period_ms); controld_start_timer(shutdown_escalation_timer); } diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c index 3220d2836d..ac214a7d8b 100644 --- a/daemons/execd/cts-exec-helper.c +++ b/daemons/execd/cts-exec-helper.c @@ -1,626 +1,626 @@ /* * Copyright 2012-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events" static int exec_call_id = 0; static gboolean start_test(gpointer user_data); static void try_connect(void); static char *key = NULL; static char *val = NULL; static struct { int verbose; int quiet; guint interval_ms; int timeout; int start_delay; int cancel_call_id; gboolean no_wait; gboolean is_running; gboolean no_connect; int exec_call_opts; const char *api_call; const char *rsc_id; const char *provider; const char *class; const char *type; const char *action; const char *listen; gboolean use_tls; lrmd_key_value_t *params; } options; static gboolean interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { - return pcmk__parse_interval_spec(optarg, - &options.interval_ms) == pcmk_rc_ok; + return pcmk_parse_interval_spec(optarg, + &options.interval_ms) == pcmk_rc_ok; } static gboolean notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) { options.exec_call_opts = lrmd_opt_notify_orig_only; } else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) { options.exec_call_opts = lrmd_opt_notify_changes_only; } return TRUE; } static gboolean param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) { pcmk__str_update(&key, optarg); } else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) { pcmk__str_update(&val, optarg); } if (key != NULL && val != NULL) { options.params = lrmd_key_value_add(options.params, key, val); pcmk__str_update(&key, NULL); pcmk__str_update(&val, NULL); } return TRUE; } static GOptionEntry basic_entries[] = { { "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call, "Directly relates to executor API functions", NULL }, { "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running, "Determine if a resource is registered and running", NULL }, { "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen, "Listen for a specific event string", NULL }, { "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait, "Make api call and do not wait for result", NULL }, { "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, "Only notify client changes to recurring operations", NULL }, { "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, "Only notify this client of the results of an API action", NULL }, { "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls, "Use TLS backend for local connection", NULL }, { NULL } }; static GOptionEntry api_call_entries[] = { { "action", 'a', 0, G_OPTION_ARG_STRING, &options.action, NULL, NULL }, { "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id, NULL, NULL }, { "class", 'C', 0, G_OPTION_ARG_STRING, &options.class, NULL, NULL }, { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb, NULL, NULL }, { "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, NULL, NULL }, { "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, NULL, NULL }, { "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider, NULL, NULL }, { "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id, NULL, NULL }, { "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay, NULL, NULL }, { "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout, NULL, NULL }, { "type", 'T', 0, G_OPTION_ARG_STRING, &options.type, NULL, NULL }, { NULL } }; static GMainLoop *mainloop = NULL; static lrmd_t *lrmd_conn = NULL; static char event_buf_v0[1024]; static crm_exit_t test_exit(crm_exit_t exit_code) { lrmd_api_delete(lrmd_conn); return crm_exit(exit_code); } #define print_result(fmt, args...) \ if (!options.quiet) { \ printf(fmt "\n" , ##args); \ } #define report_event(event) \ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \ lrmd_event_type2str(event->type), \ event->rsc_id, \ event->op_type ? event->op_type : "none", \ services_ocf_exitcode_str(event->rc), \ pcmk_exec_status_str(event->op_status)); \ crm_info("%s", event_buf_v0); static void test_shutdown(int nsig) { lrmd_api_delete(lrmd_conn); lrmd_conn = NULL; } static void read_events(lrmd_event_data_t * event) { report_event(event); if (options.listen) { if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) { print_result("LISTEN EVENT SUCCESSFUL"); test_exit(CRM_EX_OK); } } if (exec_call_id && (event->call_id == exec_call_id)) { if (event->op_status == 0 && event->rc == 0) { print_result("API-CALL SUCCESSFUL for 'exec'"); } else { print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s", event->rc, pcmk_exec_status_str(event->op_status)); test_exit(CRM_EX_ERROR); } if (!options.listen) { test_exit(CRM_EX_OK); } } } static gboolean timeout_err(gpointer data) { print_result("LISTEN EVENT FAILURE - timeout occurred, never found"); test_exit(CRM_EX_TIMEOUT); return FALSE; } static void connection_events(lrmd_event_data_t * event) { int rc = event->connection_rc; if (event->type != lrmd_event_connect) { /* ignore */ return; } if (!rc) { crm_info("Executor client connection established"); start_test(NULL); return; } else { sleep(1); try_connect(); crm_notice("Executor client connection failed"); } } static void try_connect(void) { int tries = 10; static int num_tries = 0; int rc = 0; lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); for (; num_tries < tries; num_tries++) { rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000); if (!rc) { return; /* we'll hear back in async callback */ } sleep(1); } print_result("API CONNECTION FAILURE"); test_exit(CRM_EX_ERROR); } static gboolean start_test(gpointer user_data) { int rc = 0; if (!options.no_connect) { if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { try_connect(); /* async connect -- this function will get called back into */ return 0; } } lrmd_conn->cmds->set_callback(lrmd_conn, read_events); if (options.timeout) { g_timeout_add(options.timeout, timeout_err, NULL); } if (!options.api_call) { return 0; } if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) { rc = lrmd_conn->cmds->exec(lrmd_conn, options.rsc_id, options.action, NULL, options.interval_ms, options.timeout, options.start_delay, options.exec_call_opts, options.params); if (rc > 0) { exec_call_id = rc; print_result("API-CALL 'exec' action pending, waiting on response"); } } else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) { rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, options.class, options.provider, options.type, 0); } else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) { lrmd_rsc_info_t *rsc_info; rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); if (rsc_info) { print_result("RSC_INFO: id:%s class:%s provider:%s type:%s", rsc_info->id, rsc_info->standard, (rsc_info->provider? rsc_info->provider : ""), rsc_info->type); lrmd_free_rsc_info(rsc_info); rc = pcmk_ok; } else { rc = -1; } } else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) { rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0); } else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) { rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action, options.interval_ms); } else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) { char *output = NULL; rc = lrmd_conn->cmds->get_metadata(lrmd_conn, options.class, options.provider, options.type, &output, 0); if (rc == pcmk_ok) { print_result("%s", output); free(output); } } else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider); if (rc > 0) { print_result("%d agents found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no agents found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list); if (rc > 0) { print_result("%d providers found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no providers found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); if (rc > 0) { print_result("%d standards found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no providers found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) { GList *op_list = NULL; GList *op_item = NULL; rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0, &op_list); for (op_item = op_list; op_item != NULL; op_item = op_item->next) { lrmd_op_info_t *op_info = op_item->data; print_result("RECURRING_OP: %s_%s_%s timeout=%sms", op_info->rsc_id, op_info->action, op_info->interval_ms_s, op_info->timeout_ms_s); lrmd_free_op_info(op_info); } g_list_free(op_list); } else if (options.api_call) { print_result("API-CALL FAILURE unknown action '%s'", options.action); test_exit(CRM_EX_ERROR); } if (rc < 0) { print_result("API-CALL FAILURE for '%s' api_rc:%d", options.api_call, rc); test_exit(CRM_EX_ERROR); } if (options.api_call && rc == pcmk_ok) { print_result("API-CALL SUCCESSFUL for '%s'", options.api_call); if (!options.listen) { test_exit(CRM_EX_OK); } } if (options.no_wait) { /* just make the call and exit regardless of anything else. */ test_exit(CRM_EX_OK); } return 0; } /*! * \internal * \brief Generate resource parameters from CIB if none explicitly given * * \return Standard Pacemaker return code */ static int generate_params(void) { int rc = pcmk_rc_ok; pcmk_scheduler_t *scheduler = NULL; xmlNode *cib_xml_copy = NULL; pcmk_resource_t *rsc = NULL; GHashTable *params = NULL; GHashTable *meta = NULL; GHashTableIter iter; char *key = NULL; char *value = NULL; if (options.params != NULL) { return pcmk_rc_ok; // User specified parameters explicitly } // Retrieve and update CIB rc = cib__signon_query(NULL, NULL, &cib_xml_copy); if (rc != pcmk_rc_ok) { return rc; } if (!cli_config_update(&cib_xml_copy, NULL, FALSE)) { crm_err("Could not update CIB"); return pcmk_rc_cib_corrupt; } // Calculate cluster status scheduler = pe_new_working_set(); if (scheduler == NULL) { crm_crit("Could not allocate scheduler data"); return ENOMEM; } pe__set_working_set_flags(scheduler, pcmk_sched_no_counts|pcmk_sched_no_compat); scheduler->input = cib_xml_copy; scheduler->now = crm_time_new(NULL); cluster_status(scheduler); // Find resource in CIB rsc = pe_find_resource_with_flags(scheduler->resources, options.rsc_id, pcmk_rsc_match_history |pcmk_rsc_match_basename); if (rsc == NULL) { crm_err("Resource does not exist in config"); pe_free_working_set(scheduler); return EINVAL; } // Add resource instance parameters to options.params params = pe_rsc_params(rsc, NULL, scheduler); if (params != NULL) { g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { options.params = lrmd_key_value_add(options.params, key, value); } } // Add resource meta-attributes to options.params meta = pcmk__strkey_table(free, free); get_meta_attributes(meta, rsc, NULL, scheduler); g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { char *crm_name = crm_meta_name(key); options.params = lrmd_key_value_add(options.params, crm_name, value); free(crm_name); } g_hash_table_destroy(meta); pe_free_working_set(scheduler); return rc; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, NULL, group, NULL); pcmk__add_main_args(context, basic_entries); pcmk__add_arg_group(context, "api-call", "API Call Options:", "Parameters for api-call option", api_call_entries); return context; } int main(int argc, char **argv) { GError *error = NULL; crm_exit_t exit_code = CRM_EX_OK; crm_trigger_t *trig = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); /* Typically we'd pass all the single character options that take an argument * as the second parameter here (and there's a bunch of those in this tool). * However, we control how this program is called so we can just not call it * in a way where the preprocessing ever matters. */ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, NULL); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } /* We have to use crm_log_init here to set up the logging because there's * different handling for daemons vs. command line programs, and * pcmk__cli_init_logging is set up to only handle the latter. */ crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc, argv, FALSE); for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents", "list_standards", "list_ocf_providers", NULL)) { options.no_connect = TRUE; } if (options.is_running) { int rc = pcmk_rc_ok; if (options.rsc_id == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "--is-running requires --rsc-id"); goto done; } options.interval_ms = 0; if (options.timeout == 0) { options.timeout = 30000; } rc = generate_params(); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Can not determine resource status: " "unable to get parameters from CIB"); goto done; } options.api_call = "exec"; options.action = PCMK_ACTION_MONITOR; options.exec_call_opts = lrmd_opt_notify_orig_only; } if (!options.api_call && !options.listen) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must specify at least one of --api-call, --listen, " "or --is-running"); goto done; } if (options.use_tls) { lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0); } else { lrmd_conn = lrmd_api_new(); } trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); free(key); free(val); pcmk__output_and_clear_error(&error, NULL); return test_exit(exit_code); } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index f6a02a080d..ed2c43ccb2 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3683 +1,3683 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { - pcmk__parse_interval_spec(value, &delay_max); + pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = strdup(hash_value); char *valptr = value; CRM_ASSERT(value != NULL); if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { - pcmk__parse_interval_spec(value, &delay_base); + pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; if (msg == NULL) { return NULL; } op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); if (op == NULL) { return NULL; } cmd = calloc(1, sizeof(async_command_t)); CRM_ASSERT(cmd != NULL); // All messages must include these cmd->action = crm_element_value_copy(op, F_STONITH_ACTION); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->target = crm_element_value_copy(op, F_STONITH_TARGET); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(stonith_our_uname)) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { crm_node_t *node = crm_get_peer(0, cmd->target); cmd->target_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } *metadata = string2xml(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, "name"); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* "automatic" means the cluster will unfence node when it joins */ /* "required" is a deprecated synonym for "automatic" */ if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, "on_target")) { pcmk__add_word(&(device->on_target_actions), 64, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in] dev XML containing device parameters */ static GHashTable * xml2device_params(const char *name, const xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) { crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION " cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_OFF) == 0) { map_action(params, PCMK_ACTION_REBOOT, value); } else { map_action(params, PCMK_ACTION_OFF, value); map_action(params, PCMK_ACTION_REBOOT, value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = "static-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = "dynamic-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = "status"; } else { check_type = PCMK__VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, "agent"); CRM_CHECK(agent != NULL, return device); device = calloc(1, sizeof(stonith_device_t)); CRM_CHECK(device != NULL, {free(agent); return device;}); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) { /* Other than "static-list", dev-> targets is unnecessary. */ g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); pcmk__str_update(&cmd->target, target); cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check="status" if the user didn't explicitly * specify "dynamic-list". */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK), strdup("status")); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(stonith_our_uname)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(stonith_our_uname); g_hash_table_replace(device->params, strdup(PCMK_STONITH_HOST_LIST), strdup(stonith_our_uname)); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE), crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", ID(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, XML_ATTR_STONITH_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if (!stand_alone /* if standalone, there's no attribute manager */ && (crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, XML_TAG_FENCING_LEVEL)) { xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, XML_ATTR_STONITH_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(ID(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", ID(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", ID(level)); return; } // Ensure level ID is in allowed range if ((id <= 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, XML_ATTR_STONITH_INDEX), ""), ID(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); if (tp == NULL) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); free(target); return; } tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, XML_ATTR_STONITH_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(ID(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = calloc(alloc_size, sizeof(char)); if (rv) { char *pos = rv; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); const char *action = crm_element_value(op, F_STONITH_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_suicide)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK__VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { crm_crit("Cannot search for capable fence devices: %s", strerror(ENOMEM)); callback(NULL, user_data); return; } pcmk__str_update(&search->host, host); pcmk__str_update(&search->action, action); search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %dms using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, F_STONITH_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } /* Pack the results into XML */ list = create_xml_node(NULL, __func__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); crm_xml_add_int(dev, F_STONITH_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " CRM_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, F_STONITH_MERGED, true); } if (!stand_alone && pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, F_SUBTYPE, "broadcast"); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); free_xml(reply); if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); stonith__xe_set_result(notify_data, result); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->target); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data); fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; crm_node_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); if (node != NULL) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { add_message_xml(reply, F_STONITH_CALLDATA, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->target); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->uname); return entry->uname; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, F_STONITH_TARGET); } relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY); op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); client_name = crm_element_value(request, F_STONITH_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(request->ipc_client != NULL); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, F_STONITH_CALLID); const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = get_xpath_object("//@" F_STONITH_ACTION, request->xml, LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = calloc(1, sizeof(struct st_query_data)); CRM_ASSERT(query != NULL); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); pcmk__str_update(&query->remote_peer, request->peer); pcmk__str_update(&query->client_id, client_id); pcmk__str_update(&query->target, target); pcmk__str_update(&query->action, action); query->call_options = request->call_options; crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // T_STONITH_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; CRM_ASSERT(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, "ack", NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if ((request->peer != NULL) || stand_alone) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request->xml, F_STONITH_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request->xml, FALSE); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } free_xml(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_device_notification(op, &request->result, (dev == NULL)? NULL : ID(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_device_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_level_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_level_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, XML_ATTR_ID, &node_id); name = crm_element_value(request->xml, XML_ATTR_UNAME); reap_crm_member(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { T_STONITH_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } free_xml(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { int call_options = st_opt_none; bool is_reply = false; CRM_CHECK(message != NULL, return); if (get_xpath_object("//" T_STONITH_REPLY, message, LOG_NEVER) != NULL) { is_reply = true; } crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, F_STONITH_OPERATION), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, F_STONITH_OPERATION); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h index b4d937e0eb..5f5be45e4e 100644 --- a/include/crm/common/options_internal.h +++ b/include/crm/common/options_internal.h @@ -1,151 +1,151 @@ /* * Copyright 2006-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__OPTIONS_INTERNAL__H # define PCMK__OPTIONS_INTERNAL__H # ifndef PCMK__CONFIG_H # define PCMK__CONFIG_H # include // _Noreturn # endif # include // GHashTable # include // bool -#include // pcmk__parse_interval_spec() +#include // pcmk_parse_interval_spec() _Noreturn void pcmk__cli_help(char cmd); /* * Environment variable option handling */ const char *pcmk__env_option(const char *option); void pcmk__set_env_option(const char *option, const char *value, bool compat); bool pcmk__env_option_enabled(const char *daemon, const char *option); /* * Cluster option handling */ typedef struct pcmk__cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; bool (*is_valid)(const char *); const char *description_short; const char *description_long; } pcmk__cluster_option_t; const char *pcmk__cluster_option(GHashTable *options, const pcmk__cluster_option_t *option_list, int len, const char *name); gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, const char *desc_long, pcmk__cluster_option_t *option_list, int len); void pcmk__validate_cluster_options(GHashTable *options, pcmk__cluster_option_t *option_list, int len); bool pcmk__valid_interval_spec(const char *value); bool pcmk__valid_boolean(const char *value); bool pcmk__valid_int(const char *value); bool pcmk__valid_positive_int(const char *value); bool pcmk__valid_no_quorum_policy(const char *value); bool pcmk__valid_percentage(const char *value); bool pcmk__valid_script(const char *value); // from watchdog.c long pcmk__get_sbd_timeout(void); bool pcmk__get_sbd_sync_resource_startup(void); long pcmk__auto_watchdog_timeout(void); bool pcmk__valid_sbd_timeout(const char *value); // Constants for environment variable names #define PCMK__ENV_AUTHKEY_LOCATION "authkey_location" #define PCMK__ENV_BLACKBOX "blackbox" #define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled" #define PCMK__ENV_CLUSTER_TYPE "cluster_type" #define PCMK__ENV_DEBUG "debug" #define PCMK__ENV_DH_MAX_BITS "dh_max_bits" #define PCMK__ENV_DH_MIN_BITS "dh_min_bits" #define PCMK__ENV_FAIL_FAST "fail_fast" #define PCMK__ENV_IPC_BUFFER "ipc_buffer" #define PCMK__ENV_IPC_TYPE "ipc_type" #define PCMK__ENV_LOGFACILITY "logfacility" #define PCMK__ENV_LOGFILE "logfile" #define PCMK__ENV_LOGFILE_MODE "logfile_mode" #define PCMK__ENV_LOGPRIORITY "logpriority" #define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit" #define PCMK__ENV_NODE_START_STATE "node_start_state" #define PCMK__ENV_PANIC_ACTION "panic_action" #define PCMK__ENV_PHYSICAL_HOST "physical_host" #define PCMK__ENV_REMOTE_ADDRESS "remote_address" #define PCMK__ENV_REMOTE_SCHEMA_DIR "remote_schema_directory" #define PCMK__ENV_REMOTE_PID1 "remote_pid1" #define PCMK__ENV_REMOTE_PORT "remote_port" #define PCMK__ENV_RESPAWNED "respawned" #define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory" #define PCMK__ENV_SERVICE "service" #define PCMK__ENV_STDERR "stderr" #define PCMK__ENV_TLS_PRIORITIES "tls_priorities" #define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox" #define PCMK__ENV_TRACE_FILES "trace_files" #define PCMK__ENV_TRACE_FORMATS "trace_formats" #define PCMK__ENV_TRACE_FUNCTIONS "trace_functions" #define PCMK__ENV_TRACE_TAGS "trace_tags" #define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled" // @COMPAT Drop at 3.0.0; default is plenty #define PCMK__ENV_CIB_TIMEOUT "cib_timeout" // @COMPAT Drop at 3.0.0; likely last used in 1.1.24 #define PCMK__ENV_MCP "mcp" // @COMPAT Drop at 3.0.0; added unused in 1.1.9 #define PCMK__ENV_QUORUM_TYPE "quorum_type" /* @COMPAT Drop at 3.0.0; added to debug shutdown issues when Pacemaker is * managed by systemd, but no longer useful. */ #define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay" // @COMPAT Deprecated since 2.1.0 #define PCMK__OPT_REMOVE_AFTER_STOP "remove-after-stop" // Constants for meta-attribute names #define PCMK__META_ALLOW_UNHEALTHY_NODES "allow-unhealthy-nodes" // Constants for enumerated values for various options #define PCMK__VALUE_CLUSTER "cluster" #define PCMK__VALUE_CUSTOM "custom" #define PCMK__VALUE_FENCING "fencing" #define PCMK__VALUE_GREEN "green" #define PCMK__VALUE_LOCAL "local" #define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red" #define PCMK__VALUE_NONE "none" #define PCMK__VALUE_NOTHING "nothing" #define PCMK__VALUE_ONLY_GREEN "only-green" #define PCMK__VALUE_PROGRESSIVE "progressive" #define PCMK__VALUE_QUORUM "quorum" #define PCMK__VALUE_RED "red" #define PCMK__VALUE_UNFENCING "unfencing" #define PCMK__VALUE_YELLOW "yellow" #endif // PCMK__OPTIONS_INTERNAL__H diff --git a/include/crm/common/strings_internal.h b/include/crm/common/strings_internal.h index 5c5e36a1a1..e5e1758b0a 100644 --- a/include/crm/common/strings_internal.h +++ b/include/crm/common/strings_internal.h @@ -1,215 +1,214 @@ /* * Copyright 2015-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__STRINGS_INTERNAL__H #define PCMK__STRINGS_INTERNAL__H #include // bool #include // guint, GList, GHashTable /* internal constants for generic string functions (from strings.c) */ #define PCMK__PARSE_INT_DEFAULT -1 #define PCMK__PARSE_DBL_DEFAULT -1.0 /* internal generic string functions (from strings.c) */ enum pcmk__str_flags { pcmk__str_none = 0, pcmk__str_casei = 1 << 0, pcmk__str_null_matches = 1 << 1, pcmk__str_regex = 1 << 2, pcmk__str_star_matches = 1 << 3, }; int pcmk__scan_double(const char *text, double *result, const char *default_text, char **end_text); int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val, guint *result); -int pcmk__parse_interval_spec(const char *input, guint *result_ms); bool pcmk__starts_with(const char *str, const char *prefix); bool pcmk__ends_with(const char *s, const char *match); bool pcmk__ends_with_ext(const char *s, const char *match); char *pcmk__trim(char *str); void pcmk__add_separated_word(GString **list, size_t init_size, const char *word, const char *separator); int pcmk__compress(const char *data, unsigned int length, unsigned int max, char **result, unsigned int *result_len); int pcmk__scan_ll(const char *text, long long *result, long long default_value); int pcmk__scan_min_int(const char *text, int *result, int minimum); int pcmk__scan_port(const char *text, int *port); int pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end); GHashTable *pcmk__strkey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func); GHashTable *pcmk__strikey_table(GDestroyNotify key_destroy_func, GDestroyNotify value_destroy_func); GHashTable *pcmk__str_table_dup(GHashTable *old_table); /*! * \internal * \brief Get a string value with a default if NULL * * \param[in] s String to return if non-NULL * \param[in] default_value String (or NULL) to return if \p s is NULL * * \return \p s if \p s is non-NULL, otherwise \p default_value */ static inline const char * pcmk__s(const char *s, const char *default_value) { return (s == NULL)? default_value : s; } /*! * \internal * \brief Create a hash table with integer keys * * \param[in] value_destroy_func Function to free a value * * \return Newly allocated hash table * \note It is the caller's responsibility to free the result, using * g_hash_table_destroy(). */ static inline GHashTable * pcmk__intkey_table(GDestroyNotify value_destroy_func) { return g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, value_destroy_func); } /*! * \internal * \brief Insert a value into a hash table with integer keys * * \param[in,out] hash_table Table to insert into * \param[in] key Integer key to insert * \param[in] value Value to insert * * \return Whether the key/value was already in the table * \note This has the same semantics as g_hash_table_insert(). If the key * already exists in the table, the old value is freed and replaced. */ static inline gboolean pcmk__intkey_table_insert(GHashTable *hash_table, int key, gpointer value) { return g_hash_table_insert(hash_table, GINT_TO_POINTER(key), value); } /*! * \internal * \brief Look up a value in a hash table with integer keys * * \param[in] hash_table Table to check * \param[in] key Integer key to look for * * \return Value in table for \key (or NULL if not found) */ static inline gpointer pcmk__intkey_table_lookup(GHashTable *hash_table, int key) { return g_hash_table_lookup(hash_table, GINT_TO_POINTER(key)); } /*! * \internal * \brief Remove a key/value from a hash table with integer keys * * \param[in,out] hash_table Table to modify * \param[in] key Integer key of entry to remove * * \return Whether \p key was found and removed from \p hash_table */ static inline gboolean pcmk__intkey_table_remove(GHashTable *hash_table, int key) { return g_hash_table_remove(hash_table, GINT_TO_POINTER(key)); } gboolean pcmk__str_in_list(const gchar *s, const GList *lst, uint32_t flags); bool pcmk__strcase_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED; bool pcmk__str_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED; bool pcmk__char_in_any_str(int ch, ...) G_GNUC_NULL_TERMINATED; int pcmk__strcmp(const char *s1, const char *s2, uint32_t flags); int pcmk__numeric_strcasecmp(const char *s1, const char *s2); void pcmk__str_update(char **str, const char *value); void pcmk__g_strcat(GString *buffer, ...) G_GNUC_NULL_TERMINATED; static inline bool pcmk__str_eq(const char *s1, const char *s2, uint32_t flags) { return pcmk__strcmp(s1, s2, flags) == 0; } // Like pcmk__add_separated_word() but using a space as separator static inline void pcmk__add_word(GString **list, size_t init_size, const char *word) { return pcmk__add_separated_word(list, init_size, word, " "); } /* Correctly displaying singular or plural is complicated; consider "1 node has" * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g. * * if (a == 1) { * crm_info("singular message"): * } else { * crm_info("plural message"); * } * * though even that's not sufficient for all languages besides English (if we * ever desire to do translations of output and log messages). But the following * convenience macros are "good enough" and more concise for many cases. */ /* Example: * crm_info("Found %d %s", nentries, * pcmk__plural_alt(nentries, "entry", "entries")); */ #define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2)) // Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes)); #define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s") static inline int pcmk__str_empty(const char *s) { return (s == NULL) || (s[0] == '\0'); } static inline char * pcmk__itoa(int an_int) { return crm_strdup_printf("%d", an_int); } static inline char * pcmk__ftoa(double a_float) { return crm_strdup_printf("%f", a_float); } static inline char * pcmk__ttoa(time_t epoch_time) { return crm_strdup_printf("%lld", (long long) epoch_time); } // note this returns const not allocated static inline const char * pcmk__btoa(bool condition) { return condition? "true" : "false"; } #endif /* PCMK__STRINGS_INTERNAL__H */ diff --git a/include/crm/common/util.h b/include/crm/common/util.h index 24a93c92f6..5bc9eee8ef 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -1,129 +1,131 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_UTIL__H # define PCMK__CRM_COMMON_UTIL__H # include // gid_t, mode_t, size_t, time_t, uid_t # include # include # include // uint32_t # include # include # include # include # include # include # include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Utility functions * \ingroup core */ # define ONLINESTATUS "online" // Status of an online client # define OFFLINESTATUS "offline" // Status of an offline client /* public node attribute functions (from attrd_client.c) */ char *pcmk_promotion_score_name(const char *rsc_id); /* public Pacemaker Remote functions (from remote.c) */ int crm_default_remote_port(void); /* public score-related functions (from scores.c) */ const char *pcmk_readable_score(int score); int char2score(const char *score); int pcmk__add_scores(int score1, int score2); /* public string functions (from strings.c) */ gboolean crm_is_true(const char *s); int crm_str_to_boolean(const char *s, int *ret); long long crm_get_msec(const char *input); char * crm_strip_trailing_newline(char *str); char *crm_strdup_printf(char const *format, ...) G_GNUC_PRINTF(1, 2); +int pcmk_parse_interval_spec(const char *input, guint *result_ms); + int compare_version(const char *version1, const char *version2); /* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork); /*! * \brief Check whether any of specified flags are set in a flag group * * \param[in] flag_group The flag group being examined * \param[in] flags_to_check Which flags in flag_group should be checked * * \return true if \p flags_to_check is nonzero and any of its flags are set in * \p flag_group, or false otherwise */ static inline bool pcmk_any_flags_set(uint64_t flag_group, uint64_t flags_to_check) { return (flag_group & flags_to_check) != 0; } /*! * \brief Check whether all of specified flags are set in a flag group * * \param[in] flag_group The flag group being examined * \param[in] flags_to_check Which flags in flag_group should be checked * * \return true if \p flags_to_check is zero or all of its flags are set in * \p flag_group, or false otherwise */ static inline bool pcmk_all_flags_set(uint64_t flag_group, uint64_t flags_to_check) { return (flag_group & flags_to_check) == flags_to_check; } /*! * \brief Convenience alias for pcmk_all_flags_set(), to check single flag */ #define pcmk_is_set(g, f) pcmk_all_flags_set((g), (f)) char *crm_meta_name(const char *field); const char *crm_meta_value(GHashTable * hash, const char *field); char *crm_md5sum(const char *buffer); char *crm_generate_uuid(void); // This belongs in ipc.h but is here for backward compatibility bool crm_is_daemon_name(const char *name); int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid); int pcmk_daemon_user(uid_t *uid, gid_t *gid); #ifdef HAVE_GNUTLS_GNUTLS_H void crm_gnutls_global_init(void); #endif char *pcmk_hostname(void); bool pcmk_str_is_infinity(const char *s); bool pcmk_str_is_minus_infinity(const char *s); #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #ifdef __cplusplus } #endif #endif diff --git a/include/crm/common/util_compat.h b/include/crm/common/util_compat.h index 8d988f516e..c3ab998eed 100644 --- a/include/crm/common/util_compat.h +++ b/include/crm/common/util_compat.h @@ -1,174 +1,174 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_UTIL_COMPAT__H # define PCMK__CRM_COMMON_UTIL_COMPAT__H # include # include # include #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Deprecated Pacemaker utilities * \ingroup core * \deprecated Do not include this header directly. The utilities in this * header, and the header itself, will be removed in a future * release. */ //! \deprecated Do not use #define crm_get_interval crm_parse_interval_spec //! \deprecated Do not use #define CRM_DEFAULT_OP_TIMEOUT_S "20s" //! \deprecated Use !pcmk_is_set() or !pcmk_all_flags_set() instead static inline gboolean is_not_set(long long word, long long bit) { return ((word & bit) == 0); } //! \deprecated Use pcmk_is_set() or pcmk_all_flags_set() instead static inline gboolean is_set(long long word, long long bit) { return ((word & bit) == bit); } //! \deprecated Use pcmk_any_flags_set() instead static inline gboolean is_set_any(long long word, long long bit) { return ((word & bit) != 0); } //! \deprecated Use strcmp() or strcasecmp() instead gboolean crm_str_eq(const char *a, const char *b, gboolean use_case); //! \deprecated Use strcmp() instead gboolean safe_str_neq(const char *a, const char *b); //! \deprecated Use strcasecmp() instead #define safe_str_eq(a, b) crm_str_eq(a, b, FALSE) //! \deprecated Use snprintf() instead char *crm_itoa_stack(int an_int, char *buf, size_t len); //! \deprecated Use sscanf() instead int pcmk_scan_nvpair(const char *input, char **name, char **value); //! \deprecated Use a standard printf()-style function instead char *pcmk_format_nvpair(const char *name, const char *value, const char *units); //! \deprecated Use \c crm_xml_add() or \c xml_remove_prop() instead const char *crm_xml_replace(xmlNode *node, const char *name, const char *value); //! \deprecated Use a standard printf()-style function instead char *pcmk_format_named_time(const char *name, time_t epoch_time); //! \deprecated Use strtoll() instead long long crm_parse_ll(const char *text, const char *default_text); //! \deprecated Use strtoll() instead int crm_parse_int(const char *text, const char *default_text); //! \deprecated Use strtoll() instead # define crm_atoi(text, default_text) crm_parse_int(text, default_text) //! \deprecated Use g_str_hash() instead guint g_str_hash_traditional(gconstpointer v); //! \deprecated Use g_str_hash() instead #define crm_str_hash g_str_hash_traditional //! \deprecated Do not use Pacemaker for generic string comparison gboolean crm_strcase_equal(gconstpointer a, gconstpointer b); //! \deprecated Do not use Pacemaker for generic string manipulation guint crm_strcase_hash(gconstpointer v); //! \deprecated Use g_hash_table_new_full() instead static inline GHashTable * crm_str_table_new(void) { return g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); } //! \deprecated Use g_hash_table_new_full() instead static inline GHashTable * crm_strcase_table_new(void) { return g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, free); } //! \deprecated Do not use Pacemaker for generic hash table manipulation GHashTable *crm_str_table_dup(GHashTable *old_table); //! \deprecated Use g_hash_able_size() instead static inline guint crm_hash_table_size(GHashTable *hashtable) { if (hashtable == NULL) { return 0; } return g_hash_table_size(hashtable); } //! \deprecated Don't use Pacemaker for string manipulation char *crm_strip_trailing_newline(char *str); //! \deprecated Don't use Pacemaker for string manipulation int pcmk_numeric_strcasecmp(const char *s1, const char *s2); //! \deprecated Don't use Pacemaker for string manipulation static inline char * crm_itoa(int an_int) { return crm_strdup_printf("%d", an_int); } //! \deprecated Don't use Pacemaker for string manipulation static inline char * crm_ftoa(double a_float) { return crm_strdup_printf("%f", a_float); } //! \deprecated Don't use Pacemaker for string manipulation static inline char * crm_ttoa(time_t epoch_time) { return crm_strdup_printf("%lld", (long long) epoch_time); } //! \deprecated Do not use Pacemaker libraries for generic I/O void crm_build_path(const char *path_c, mode_t mode); //! \deprecated Use pcmk_readable_score() instead char *score2char(int score); //! \deprecated Use pcmk_readable_score() instead char *score2char_stack(int score, char *buf, size_t len); -//! \deprecated Do not use +//! \deprecated Use \c pcmk_parse_interval_spec() instead guint crm_parse_interval_spec(const char *input); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_UTIL_COMPAT__H diff --git a/lib/common/options.c b/lib/common/options.c index 2a53c71a34..6ce92b31c5 100644 --- a/lib/common/options.c +++ b/lib/common/options.c @@ -1,580 +1,580 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include void pcmk__cli_help(char cmd) { if (cmd == 'v' || cmd == '$') { printf("Pacemaker %s\n", PACEMAKER_VERSION); printf("Written by Andrew Beekhof and " "the Pacemaker project contributors\n"); } else if (cmd == '!') { printf("Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); } crm_exit(CRM_EX_OK); while(1); // above does not return } /* * Environment variable option handling */ /*! * \internal * \brief Get the value of a Pacemaker environment variable option * * If an environment variable option is set, with either a PCMK_ or (for * backward compatibility) HA_ prefix, log and return the value. * * \param[in] option Environment variable name (without prefix) * * \return Value of environment variable option, or NULL in case of * option name too long or value not found */ const char * pcmk__env_option(const char *option) { const char *const prefixes[] = {"PCMK_", "HA_"}; char env_name[NAME_MAX]; const char *value = NULL; CRM_CHECK(!pcmk__str_empty(option), return NULL); for (int i = 0; i < PCMK__NELEM(prefixes); i++) { int rv = snprintf(env_name, NAME_MAX, "%s%s", prefixes[i], option); if (rv < 0) { crm_err("Failed to write %s%s to buffer: %s", prefixes[i], option, strerror(errno)); return NULL; } if (rv >= sizeof(env_name)) { crm_trace("\"%s%s\" is too long", prefixes[i], option); continue; } value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } } crm_trace("Nothing found for %s", option); return NULL; } /*! * \brief Set or unset a Pacemaker environment variable option * * Set an environment variable option with a \c "PCMK_" prefix and optionally * an \c "HA_" prefix for backward compatibility. * * \param[in] option Environment variable name (without prefix) * \param[in] value New value (or NULL to unset) * \param[in] compat If false and \p value is not \c NULL, set only * \c "PCMK_