diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c index 1b42614eb4..e08908d553 100644 --- a/daemons/attrd/attrd_attributes.c +++ b/daemons/attrd/attrd_attributes.c @@ -1,185 +1,185 @@ /* * Copyright 2013-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static attribute_t * attrd_create_attribute(xmlNode *xml) { int dampen = 0; const char *value = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); attribute_t *a = calloc(1, sizeof(attribute_t)); a->id = crm_element_value_copy(xml, PCMK__XA_ATTR_NAME); a->set = crm_element_value_copy(xml, PCMK__XA_ATTR_SET); a->uuid = crm_element_value_copy(xml, PCMK__XA_ATTR_UUID); a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value); crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &a->is_private); a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER); crm_trace("Performing all %s operations as user '%s'", a->id, a->user); if (value != NULL) { dampen = crm_get_msec(value); } crm_trace("Created attribute %s with %s write delay", a->id, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); if(dampen > 0) { a->timeout_ms = dampen; a->timer = attrd_add_timer(a->id, a->timeout_ms, a); } else if (dampen < 0) { crm_warn("Ignoring invalid delay %s for attribute %s", value, a->id); } g_hash_table_replace(attributes, a->id, a); return a; } static int attrd_update_dampening(attribute_t *a, xmlNode *xml, const char *attr) { const char *dvalue = crm_element_value(xml, PCMK__XA_ATTR_DAMPENING); int dampen = 0; if (dvalue == NULL) { crm_warn("Could not update %s: peer did not specify value for delay", attr); return EINVAL; } dampen = crm_get_msec(dvalue); if (dampen < 0) { crm_warn("Could not update %s: invalid delay value %dms (%s)", attr, dampen, dvalue); return EINVAL; } if (a->timeout_ms != dampen) { mainloop_timer_del(a->timer); a->timeout_ms = dampen; if (dampen > 0) { a->timer = attrd_add_timer(attr, a->timeout_ms, a); crm_info("Update attribute %s delay to %dms (%s)", attr, dampen, dvalue); } else { a->timer = NULL; crm_info("Update attribute %s to remove delay", attr); } /* If dampening changed, do an immediate write-out, * otherwise repeated dampening changes would prevent write-outs */ attrd_write_or_elect_attribute(a); } return pcmk_rc_ok; } GHashTable *attributes = NULL; /*! * \internal * \brief Create an XML representation of an attribute for use in peer messages * - * \param[in] parent Create attribute XML as child element of this element - * \param[in] a Attribute to represent - * \param[in] v Attribute value to represent - * \param[in] force_write If true, value should be written even if unchanged + * \param[in,out] parent Create attribute XML as child element of this + * \param[in] a Attribute to represent + * \param[in] v Attribute value to represent + * \param[in] force_write If true, value should be written even if unchanged * * \return XML representation of attribute */ xmlNode * -attrd_add_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v, - bool force_write) +attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write) { xmlNode *xml = create_xml_node(parent, __func__); crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id); crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set); crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid); crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); pcmk__xe_add_node(xml, v->nodename, v->nodeid); if (v->is_remote != 0) { crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1); } crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000); crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private); crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write); return xml; } void attrd_clear_value_seen(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a; attribute_value_t *v = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { v->seen = FALSE; crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current); } } } attribute_t * attrd_populate_attribute(xmlNode *xml, const char *attr) { attribute_t *a = NULL; bool update_both = false; const char *op = crm_element_value(xml, PCMK__XA_TASK); // NULL because PCMK__ATTRD_CMD_SYNC_RESPONSE has no PCMK__XA_TASK update_both = pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH, pcmk__str_null_matches); // Look up or create attribute entry a = g_hash_table_lookup(attributes, attr); if (a == NULL) { if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE, pcmk__str_none)) { a = attrd_create_attribute(xml); } else { crm_warn("Could not update %s: attribute not found", attr); return NULL; } } // Update attribute dampening if (update_both || pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { int rc = attrd_update_dampening(a, xml, attr); if (rc != pcmk_rc_ok || !update_both) { return NULL; } } return a; } diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index b466bdba53..8eeecc1378 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -1,563 +1,565 @@ /* * Copyright 2013-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" extern crm_exit_t attrd_exit_status; static void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { const char *election_op = crm_element_value(xml, F_CRM_TASK); if (election_op) { attrd_handle_election_op(peer, xml); return; } if (attrd_shutting_down()) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. */ return; } else { pcmk__request_t request = { .ipc_client = NULL, .ipc_id = 0, .ipc_flags = 0, .peer = crm_element_value(xml, F_ORIG), .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); CRM_CHECK(request.op != NULL, return); attrd_handle_request(&request); pcmk__reset_request(&request); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down()) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } /*! * \internal * \brief Override an attribute sync with a local value * * Broadcast the local node's value for an attribute that's different from the * value provided in a peer's attribute synchronization response. This ensures a * node's values for itself take precedence and all peers are kept in sync. * * \param[in] a Attribute entry to override * * \return Local instance of attribute value */ static attribute_value_t * -broadcast_local_value(attribute_t *a) +broadcast_local_value(const attribute_t *a) { attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname); xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); attrd_add_value_xml(sync, a, v, false); attrd_xml_add_writer(sync); attrd_send_message(NULL, sync); free_xml(sync); return v; } /*! * \internal * \brief Ensure a Pacemaker Remote node is in the correct peer cache * - * \param[in] + * \param[in] node_name Name of Pacemaker Remote node to check */ static void cache_remote_node(const char *node_name) { /* If we previously assumed this node was an unseen cluster node, * remove its entry from the cluster peer cache. */ crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name); if (dup && (dup->uuid == NULL)) { reap_crm_member(0, node_name); } // Ensure node is in the remote peer cache CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); } #define state_text(state) pcmk__s((state), "in unknown state") /*! * \internal * \brief Return host's hash table entry (creating one if needed) * - * \param[in] values Hash table of values - * \param[in] host Name of peer to look up - * \param[in] xml XML describing the attribute + * \param[in,out] values Hash table of values + * \param[in] host Name of peer to look up + * \param[in] xml XML describing the attribute * * \return Pointer to new or existing hash table entry */ static attribute_value_t * -attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml) +attrd_lookup_or_create_value(GHashTable *values, const char *host, + const xmlNode *xml) { attribute_value_t *v = g_hash_table_lookup(values, host); int is_remote = 0; crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { cache_remote_node(host); } if (v == NULL) { v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(v != NULL); pcmk__str_update(&v->nodename, host); v->is_remote = is_remote; g_hash_table_replace(values, v->nodename, v); } return(v); } static void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { bool gone = false; bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); switch (kind) { case crm_status_uname: crm_debug("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state)); break; case crm_status_processes: if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { gone = true; } crm_debug("Node %s is %s a peer", peer->uname, (gone? "no longer" : "now")); break; case crm_status_nstate: crm_debug("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state), state_text(data)); if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if (attrd_election_won() && !pcmk_is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { // Remove all attribute values associated with lost nodes attrd_peer_remove(peer->uname, false, "loss"); gone = true; } break; } // Remove votes from cluster nodes that leave, in case election in progress if (gone && !is_remote) { attrd_remove_voter(peer); // Ensure remote nodes that come up are in the remote node cache } else if (!gone && is_remote) { cache_remote_node(peer->uname); } } static void record_peer_nodeid(attribute_value_t *v, const char *host) { crm_node_t *known_peer = crm_get_peer(v->nodeid, host); crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); if (attrd_election_won()) { attrd_write_attributes(false, false); } } static void -update_attr_on_host(attribute_t *a, crm_node_t *peer, xmlNode *xml, const char *attr, - const char *value, const char *host, bool filter, - int is_force_write) +update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, + bool filter, int is_force_write) { attribute_value_t *v = NULL; v = attrd_lookup_or_create_value(a->values, host, xml); if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", attr, host, v->current, value, peer->uname); v = broadcast_local_value(a); } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { crm_notice("Setting %s[%s]: %s -> %s " CRM_XS " from %s with %s write delay", attr, host, pcmk__s(v->current, "(unset)"), pcmk__s(value, "(unset)"), peer->uname, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); pcmk__str_update(&v->current, value); a->changed = true; if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei) && pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { attrd_set_requesting_shutdown(); } else { attrd_clear_requesting_shutdown(); } } // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr); mainloop_timer_start(a->timer); } else { attrd_write_or_elect_attribute(a); } } else { if (is_force_write == 1 && a->timeout_ms && a->timer) { /* Save forced writing and set change flag. */ /* The actual attribute is written by Writer after election. */ crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)", attr, host, peer->uname, value); a->force_write = TRUE; } else { crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } } /* Set the seen flag for attribute processing held only in the own node. */ v->seen = TRUE; /* If this is a cluster node whose node ID we are learning, remember it */ if ((v->nodeid == 0) && (v->is_remote == FALSE) && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { record_peer_nodeid(v, host); } } static void -attrd_peer_update_one(crm_node_t *peer, xmlNode *xml, bool filter) +attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) { attribute_t *a = NULL; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); int is_force_write = 0; if (attr == NULL) { crm_warn("Could not update attribute: peer did not specify name"); return; } crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); a = attrd_populate_attribute(xml, attr); if (a == NULL) { return; } if (host == NULL) { // If no host was specified, update all hosts GHashTableIter vIter; crm_debug("Setting %s for all hosts to %s", attr, value); xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID); g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } } else { // Update attribute value for the given host update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } /* If this is a message from some attrd instance broadcasting its protocol * version, check to see if it's a new minimum version. */ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { attrd_update_minimum_protocol_ver(value); } } static void -broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml) +broadcast_unseen_local_values(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei)) { if (sync == NULL) { sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); } attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); } } } if (sync != NULL) { crm_debug("Broadcasting local-only values"); attrd_send_message(NULL, sync); free_xml(sync); } } int attrd_cluster_connect(void) { attrd_cluster = calloc(1, sizeof(crm_cluster_t)); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); return -ENOTCONN; } return pcmk_ok; } void attrd_peer_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); guint interval_ms = crm_parse_interval_spec(interval_spec); char *attr = NULL; GHashTableIter iter; regex_t regex; crm_node_t *peer = crm_get_peer(0, request->peer); if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", pcmk__s(rsc, "all resources")); return; } crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); } g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, pcmk__s(rsc, "all resources")); crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); attrd_peer_update(peer, xml, host, false); } } regfree(®ex); } /*! * \internal * \brief Load attributes from a peer sync response * - * \param[in] peer Peer that sent clear request - * \param[in] peer_won Whether peer is the attribute writer - * \param[in] xml Request XML + * \param[in] peer Peer that sent clear request + * \param[in] peer_won Whether peer is the attribute writer + * \param[in,out] xml Request XML */ void -attrd_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml) +attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml) { crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", peer->uname); if (peer_won) { /* Initialize the "seen" flag for all attributes to cleared, so we can * detect attributes that local node has but the writer doesn't. */ attrd_clear_value_seen(); } // Process each attribute update in the sync response for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { attrd_peer_update(peer, child, crm_element_value(child, PCMK__XA_ATTR_NODE_NAME), true); } if (peer_won) { /* If any attributes are still not marked as seen, the writer doesn't * know about them, so send all peers an update with them. */ - broadcast_unseen_local_values(peer, xml); + broadcast_unseen_local_values(); } } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If true, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, bool uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for peer %s", host, source); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for peer %s", a->id, host, source); } } if (uncache) { crm_remote_peer_cache_remove(host); reap_crm_member(0, host); } } void attrd_peer_sync(crm_node_t *peer, xmlNode *xml) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); attrd_add_value_xml(sync, a, v, false); } } crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); attrd_send_message(peer, sync); free_xml(sync); } void -attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) +attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter) { if (xml_has_children(xml)) { for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { /* Set the node name on the child message, assuming it isn't already. */ if (crm_element_value(child, PCMK__XA_ATTR_NODE_NAME) == NULL) { pcmk__xe_add_node(xml, host, 0); } attrd_peer_update_one(peer, child, filter); } } else { attrd_peer_update_one(peer, xml, filter); } } diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index b649b7bf8d..cb8c813ca2 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -1,509 +1,509 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static qb_ipcs_service_t *ipcs = NULL; /*! * \internal * \brief Build the XML reply to a client query * * param[in] attr Name of requested attribute * param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = create_xml_node(NULL, __func__); attribute_t *a; if (reply == NULL) { return NULL; } crm_xml_add(reply, F_TYPE, T_ATTRD); crm_xml_add(reply, F_SUBTYPE, PCMK__ATTRD_CMD_QUERY); crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr); /* Allow caller to use "localhost" to refer to local node */ if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) { host = attrd_cluster->uname; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } pcmk__xe_add_node(host_value, host, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } pcmk__xe_add_node(host_value, v->nodename, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current); } } } return reply; } xmlNode * attrd_client_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc, *op, *interval_spec; attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); if (minimum_protocol_version >= 2) { /* Propagate to all peers (including ourselves). * This ends up at attrd_peer_message(). */ attrd_send_message(NULL, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); /* Map this to an update */ crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Add regular expression matching desired attributes */ if (rsc) { char *pattern; if (op == NULL) { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); } else { guint interval_ms = crm_parse_interval_spec(interval_spec); pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); } crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, pattern); free(pattern); } else { crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, ATTRD_RE_CLEAR_ALL); } /* Make sure attribute and value are not set, so we delete via regex */ if (crm_element_value(xml, PCMK__XA_ATTR_NAME)) { crm_xml_replace(xml, PCMK__XA_ATTR_NAME, NULL); } if (crm_element_value(xml, PCMK__XA_ATTR_VALUE)) { crm_xml_replace(xml, PCMK__XA_ATTR_VALUE, NULL); } return attrd_client_update(request); } xmlNode * attrd_client_peer_remove(pcmk__request_t *request) { xmlNode *xml = request->xml; // Host and ID are not used in combination, rather host has precedence const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); char *host_alloc = NULL; attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); if (host == NULL) { int nodeid = 0; crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); if (nodeid > 0) { crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL); char *host_alloc = NULL; if (node && node->uname) { // Use cached name if available host = node->uname; } else { // Otherwise ask cluster layer host_alloc = get_node_name(nodeid); host = host_alloc; } pcmk__xe_add_node(xml, host, 0); } } if (host) { crm_info("Client %s is requesting all values for %s be removed", pcmk__client_name(request->ipc_client), host); attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ free(host_alloc); } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", pcmk__client_name(request->ipc_client)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } xmlNode * attrd_client_query(pcmk__request_t *request) { xmlNode *query = request->xml; xmlNode *reply = NULL; const char *attr = NULL; crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client)); /* Request must specify attribute name to query */ attr = crm_element_value(query, PCMK__XA_ATTR_NAME); if (attr == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Ignoring malformed query from %s (no attribute name given)", pcmk__client_name(request->ipc_client)); return NULL; } /* Build the XML reply */ reply = build_query_reply(attr, crm_element_value(query, PCMK__XA_ATTR_NODE_NAME)); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Could not respond to query from %s: could not create XML reply", pcmk__client_name(request->ipc_client)); return NULL; } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } request->ipc_client->request_id = 0; return reply; } xmlNode * attrd_client_refresh(pcmk__request_t *request) { crm_info("Updating all attributes"); attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); attrd_write_attributes(true, true); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } xmlNode * attrd_client_update(pcmk__request_t *request) { xmlNode *xml = request->xml; attribute_t *a = NULL; char *host; const char *attr, *value, *regex; /* If the message has children, that means it is a message from a newer * client that supports sending multiple operations at a time. There are * two ways we can handle that. */ if (xml_has_children(xml)) { if (minimum_protocol_version >= 4) { /* First, if all peers support a certain protocol version, we can * just broadcast the big message and they'll handle it. */ attrd_send_message(NULL, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { /* Second, if they do not support that protocol version, split it * up into individual messages and call attrd_client_update on * each one. */ for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { request->xml = child; /* Calling pcmk__set_result is handled by one of these calls to * attrd_client_update, so no need to do it again here. */ attrd_client_update(request); } } return NULL; } host = crm_element_value_copy(xml, PCMK__XA_ATTR_NODE_NAME); attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); /* If a regex was specified, broadcast a message for each match */ if ((attr == NULL) && regex) { GHashTableIter aIter; regex_t *r_patt = calloc(1, sizeof(regex_t)); crm_debug("Setting %s to %s", regex, value); if (regcomp(r_patt, regex, REG_EXTENDED|REG_NOSUB)) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Bad regex '%s' for update from client %s", regex, pcmk__client_name(request->ipc_client)); } else { g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(r_patt, attr, 0, NULL, 0); if (status == 0) { crm_trace("Matched %s with %s", attr, regex); crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); attrd_send_message(NULL, xml); } } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } free(host); regfree(r_patt); free(r_patt); return NULL; } else if (attr == NULL) { crm_err("Update request did not specify attribute or regular expression"); pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Client %s update request did not specify attribute or regular expression", pcmk__client_name(request->ipc_client)); free(host); return NULL; } if (host == NULL) { crm_trace("Inferring host"); host = strdup(attrd_cluster->uname); pcmk__xe_add_node(xml, host, attrd_cluster->nodeid); } a = g_hash_table_lookup(attributes, attr); if (a == NULL && pcmk__str_eq(request->op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); free(host); return NULL; } /* If value was specified using ++ or += notation, expand to real value */ if (value) { if (attrd_value_needs_expansion(value)) { int int_value; attribute_value_t *v = NULL; if (a) { v = g_hash_table_lookup(a->values, host); } int_value = attrd_expand_value(value, (v? v->current : NULL)); crm_info("Expanded %s=%s to %d", attr, value, int_value); crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); } } crm_debug("Broadcasting %s[%s]=%s%s", attr, host, value, (attrd_election_won()? " (writer)" : "")); free(host); attrd_send_message(NULL, xml); /* ends up at attrd_peer_message() */ pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } /*! * \internal * \brief Accept a new client IPC connection * - * \param[in] c New connection - * \param[in] uid Client user id - * \param[in] gid Client group id + * \param[in,out] c New connection + * \param[in] uid Client user id + * \param[in] gid Client group id * * \return pcmk_ok on success, -errno otherwise */ static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("New client connection %p", c); if (attrd_shutting_down()) { crm_info("Ignoring new connection from pid %d during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return pcmk_ok; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in] c Connection to destroy * * \return FALSE (i.e. do not re-run this callback) */ static int32_t attrd_ipc_closed(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { crm_trace("Ignoring request to clean up unknown connection %p", c); } else { crm_trace("Cleaning up closed client connection %p", c); pcmk__free_client(client); } return FALSE; } /*! * \internal * \brief Destroy a client IPC connection * - * \param[in] c Connection to destroy + * \param[in,out] c Connection to destroy * * \note We handle a destroyed connection the same as a closed one, * but we need a separate handler because the return type is different. */ static void attrd_ipc_destroy(qb_ipcs_connection_t *c) { crm_trace("Destroying client connection %p", c); attrd_ipc_closed(c); } static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *xml = NULL; // Sanity-check, and parse XML from IPC data CRM_CHECK((c != NULL) && (client != NULL), return 0); if (data == NULL) { crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); return 0; } xml = pcmk__client_data2xml(client, data, &id, &flags); if (xml == NULL) { crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); return 0; } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); CRM_CHECK(request.op != NULL, return 0); attrd_handle_request(&request); pcmk__reset_request(&request); } free_xml(xml); return 0; } static struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, .connection_created = NULL, .msg_process = attrd_ipc_dispatch, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy }; void attrd_ipc_fini(void) { if (ipcs != NULL) { pcmk__drop_all_clients(ipcs); qb_ipcs_destroy(ipcs); ipcs = NULL; } } /*! * \internal * \brief Set up attrd IPC communication */ void attrd_init_ipc(void) { pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks); } diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index cf176cd1ad..71ce90a2c2 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -1,183 +1,185 @@ /* * Copyright 2013-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #ifndef PACEMAKER_ATTRD__H # define PACEMAKER_ATTRD__H #include #include #include #include #include #include #include /* * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used * with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned. * * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will * also set a private attribute for itself with its version, so any attrd can * determine the minimum version supported by all peers. * * Protocol Pacemaker Significant changes * -------- --------- ------------------- * 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only), * PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH, * PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC, * PCMK__ATTRD_CMD_SYNC_RESPONSE * 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_PATTERN), * PCMK__ATTRD_CMD_QUERY * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH, * PCMK__ATTRD_CMD_UPDATE_DELAY * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes * 4 2.2.0 Multiple attributes can be updated in a single IPC * message */ #define ATTRD_PROTOCOL_VERSION "4" #define attrd_send_ack(client, id, flags) \ pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) void attrd_init_mainloop(void); void attrd_run_mainloop(void); void attrd_set_requesting_shutdown(void); void attrd_clear_requesting_shutdown(void); bool attrd_requesting_shutdown(void); bool attrd_shutting_down(void); void attrd_shutdown(int nsig); void attrd_init_ipc(void); void attrd_ipc_fini(void); void attrd_cib_disconnect(void); bool attrd_value_needs_expansion(const char *value); int attrd_expand_value(const char *value, const char *old_value); /* regular expression to clear failures of all resources */ #define ATTRD_RE_CLEAR_ALL \ "^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-" /* regular expression to clear failure of all operations for one resource * (format takes resource name) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$" /* regular expression to clear failure of one operation for one resource * (format takes resource name, operation name, and interval) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$" int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, guint interval_ms); extern cib_t *the_cib; /* Alerts */ extern lrmd_t *the_lrmd; extern crm_trigger_t *attrd_config_read; void attrd_lrmd_disconnect(void); gboolean attrd_read_options(gpointer user_data); void attrd_cib_replaced_cb(const char *event, xmlNode * msg); void attrd_cib_updated_cb(const char *event, xmlNode *msg); int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value); // Elections void attrd_election_init(void); void attrd_election_fini(void); void attrd_start_election_if_needed(void); bool attrd_election_won(void); void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml); bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml); void attrd_declare_winner(void); void attrd_remove_voter(const crm_node_t *peer); void attrd_xml_add_writer(xmlNode *xml); typedef struct attribute_s { char *uuid; /* TODO: Remove if at all possible */ char *id; char *set; GHashTable *values; int update; int timeout_ms; /* TODO: refactor these three as a bitmask */ bool changed; /* whether attribute value has changed since last write */ bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ gboolean is_private; /* whether to keep this attribute out of the CIB */ mainloop_timer_t *timer; char *user; gboolean force_write; /* Flag for updating attribute by ignoring delay */ } attribute_t; typedef struct attribute_value_s { uint32_t nodeid; gboolean is_remote; char *nodename; char *current; char *requested; gboolean seen; } attribute_value_t; extern crm_cluster_t *attrd_cluster; extern GHashTable *attributes; #define CIB_OP_TIMEOUT_S 120 int attrd_cluster_connect(void); -void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter); +void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(const char *host, bool uncache, const char *source); void attrd_peer_clear_failure(pcmk__request_t *request); -void attrd_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml); +void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, + xmlNode *xml); void attrd_broadcast_protocol(void); xmlNode *attrd_client_peer_remove(pcmk__request_t *request); xmlNode *attrd_client_clear_failure(pcmk__request_t *request); xmlNode *attrd_client_update(pcmk__request_t *request); xmlNode *attrd_client_refresh(pcmk__request_t *request); xmlNode *attrd_client_query(pcmk__request_t *request); gboolean attrd_send_message(crm_node_t * node, xmlNode * data); -xmlNode *attrd_add_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v, - bool force_write); +xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + const attribute_value_t *v, bool force_write); void attrd_clear_value_seen(void); void attrd_free_attribute(gpointer data); void attrd_free_attribute_value(gpointer data); attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); void attrd_write_attribute(attribute_t *a, bool ignore_delay); void attrd_write_attributes(bool all, bool ignore_delay); void attrd_write_or_elect_attribute(attribute_t *a); extern int minimum_protocol_version; void attrd_update_minimum_protocol_ver(const char *value); mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); void attrd_unregister_handlers(void); void attrd_handle_request(pcmk__request_t *request); #endif /* PACEMAKER_ATTRD__H */ diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 17e1364cdd..158cfd2055 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1,1921 +1,1925 @@ /* * Copyright 2012-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include // Check whether we have a high-resolution monotonic clock #undef PCMK__TIME_USE_CGT #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) # define PCMK__TIME_USE_CGT # include /* clock_gettime */ #endif #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; guint interval_ms; int start_delay; int timeout_orig; int call_id; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *userdata_str; pcmk__action_result_t result; /* We can track operation queue time and run time, to be saved with the CIB * resource history (and displayed in cluster status). We need * high-resolution monotonic time for this purpose, so we use * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature * is disabled). * * However, we also need epoch timestamps for recording the time the command * last ran and the time its return value last changed, for use in time * displays (as opposed to interval calculations). We keep time_t values for * this purpose. * * The last run time is used for both purposes, so we keep redundant * monotonic and epoch values for this. Technically the two could represent * different times, but since time_t has only second resolution and the * values are used for distinct purposes, that is not significant. */ #ifdef PCMK__TIME_USE_CGT /* Recurring and systemd operations may involve more than one executor * command per operation, so they need info about the original and the most * recent. */ struct timespec t_first_run; // When op first ran struct timespec t_run; // When op most recently ran struct timespec t_first_queue; // When op was first queued struct timespec t_queue; // When op was most recently queued #endif time_t epoch_last_run; // Epoch timestamp of when op last ran time_t epoch_rcchange; // Epoch timestamp of when rc last changed bool first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean execute_resource_action(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); #ifdef PCMK__TIME_USE_CGT /*! * \internal * \brief Check whether a struct timespec has been set * * \param[in] timespec Time to check * * \return true if timespec has been set (i.e. is nonzero), false otherwise */ static inline bool -time_is_set(struct timespec *timespec) +time_is_set(const struct timespec *timespec) { return (timespec != NULL) && ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0)); } /* * \internal * \brief Set a timespec (and its original if unset) to the current time * * \param[out] t_current Where to store current time * \param[out] t_orig Where to copy t_current if unset */ static void get_current_time(struct timespec *t_current, struct timespec *t_orig) { clock_gettime(CLOCK_MONOTONIC, t_current); if ((t_orig != NULL) && !time_is_set(t_orig)) { *t_orig = *t_current; } } /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or unset) * * \note Can overflow on 32bit machines when the differences is around * 24 days or more. */ static int -time_diff_ms(struct timespec *now, struct timespec *old) +time_diff_ms(const struct timespec *now, const struct timespec *old) { int diff_ms = 0; if (time_is_set(old)) { struct timespec local_now = { 0, }; if (now == NULL) { clock_gettime(CLOCK_MONOTONIC, &local_now); now = &local_now; } diff_ms = (now->tv_sec - old->tv_sec) * 1000 + (now->tv_nsec - old->tv_nsec) / 1000000; } return diff_ms; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * - * \param[in] cmd Executor command object to reset + * \param[in,out] cmd Executor command object to reset * * \note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static inline bool -action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms) +action_matches(const lrmd_cmd_t *cmd, const char *action, guint interval_ms) { return (cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, action, pcmk__str_casei); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command to log result for * \param[in] exec_time_ms Execution time in milliseconds, if known * \param[in] queue_time_ms Queue time in milliseconds, if known */ static void -log_finished(lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms) +log_finished(const lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms) { int log_level = LOG_INFO; GString *str = g_string_sized_new(100); // reasonable starting size if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } g_string_printf(str, "%s %s (call %d", cmd->rsc_id, cmd->action, cmd->call_id); if (cmd->last_pid != 0) { g_string_append_printf(str, ", PID %d", cmd->last_pid); } if (cmd->result.execution_status == PCMK_EXEC_DONE) { g_string_append_printf(str, ") exited with status %d", cmd->result.exit_status); } else { g_string_append_printf(str, ") could not be executed: %s", pcmk_exec_status_str(cmd->result.execution_status)); } if (cmd->result.exit_reason != NULL) { g_string_append_printf(str, " (%s)", cmd->result.exit_reason); } #ifdef PCMK__TIME_USE_CGT g_string_append_printf(str, " (execution time %s", pcmk__readable_interval(exec_time_ms)); if (queue_time_ms > 0) { g_string_append_printf(str, " after being queued %s", pcmk__readable_interval(queue_time_ms)); } g_string_append(str, ")"); #endif do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (pcmk__str_eq(action, "monitor", pcmk__str_casei) && pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, execute_resource_action, rsc); // Initialize fence device probes (to return "not running") pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) { crm_debug("Setting flag to leave pid group on timeout and " "only kill action pid for " PCMK__OP_FMT, cmd->rsc_id, cmd->action, cmd->interval_ms); cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", cmd->action, 0, SVC_ACTION_LEAVE_GROUP, "SVC_ACTION_LEAVE_GROUP"); } return cmd; } static void stop_recurring_timer(lrmd_cmd_t *cmd) { if (cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = 0; } } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { stop_recurring_timer(cmd); if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } pcmk__reset_result(&(cmd->result)); free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); return FALSE; } static inline void start_recurring_timer(lrmd_cmd_t *cmd) { if (cmd && (cmd->interval_ms > 0)) { cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms, stonith_recurring_op_helper, cmd); } } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } /*! * \internal * \brief Check whether a list already contains the equivalent of a given action + * + * \param[in] action_list List to search + * \param[in] cmd Action to search for */ static lrmd_cmd_t * -find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd) +find_duplicate_action(const GList *action_list, const lrmd_cmd_t *cmd) { - for (GList *item = action_list; item != NULL; item = item->next) { + for (const GList *item = action_list; item != NULL; item = item->next) { lrmd_cmd_t *dup = item->data; if (action_matches(cmd, dup->action, dup->interval_ms)) { return dup; } } return NULL; } static bool merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { lrmd_cmd_t * dup = NULL; bool dup_pending = true; if (cmd->interval_ms == 0) { return false; } // Search for a duplicate of this action (in-flight or not) dup = find_duplicate_action(rsc->pending_ops, cmd); if (dup == NULL) { dup_pending = false; dup = find_duplicate_action(rsc->recurring_ops, cmd); if (dup == NULL) { return false; } } /* Do not merge fencing monitors marked for cancellation, so we can reply to * the cancellation separately. */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei) && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) { return false; } /* This should not occur. If it does, we need to investigate how something * like this is possible in the controller. */ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); // Merge new action's call ID and user data into existing action dup->first_notify_sent = false; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; free_lrmd_cmd(cmd); cmd = NULL; /* If dup is not pending, that means it has already executed at least once * and is waiting in the interval. In that case, stop waiting and initiate * a new instance now. */ if (!dup_pending) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stop_recurring_timer(dup); stonith_recurring_op_helper(dup); } else { services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); } } return true; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); if (merge_recurring_duplicate(rsc, cmd)) { // Equivalent of cmd has already been scheduled return; } /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static xmlNode * create_lrmd_reply(const char *origin, int rc, int call_id) { xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, origin); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); return reply; } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; int rc; int log_level = LOG_WARNING; const char *msg = NULL; CRM_CHECK(client != NULL, return); if (client->name == NULL) { crm_trace("Skipping notification to client without name"); return; } if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) { /* We only want to notify clients of the executor IPC API. If we are * running as Pacemaker Remote, we may have clients proxied to other * IPC services in the cluster, so skip those. */ crm_trace("Skipping executor API notification to client %s", pcmk__client_name(client)); return; } rc = lrmd_server_send_notify(client, update_msg); if (rc == pcmk_rc_ok) { return; } switch (rc) { case ENOTCONN: case EPIPE: // Client exited without waiting for notification log_level = LOG_INFO; msg = "Disconnected"; break; default: msg = pcmk_rc_str(rc); break; } do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d", pcmk__client_name(client), msg, rc); } static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { xmlNode *notify = NULL; int exec_time = 0; int queue_time = 0; #ifdef PCMK__TIME_USE_CGT exec_time = time_diff_ms(NULL, &(cmd->t_run)); queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue)); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if ((cmd->last_notify_rc == cmd->result.exit_status) && (cmd->last_notify_op_status == cmd->result.execution_status)) { /* only send changes */ return; } } cmd->first_notify_sent = true; cmd->last_notify_rc = cmd->result.exit_status; cmd->last_notify_op_status = cmd->result.execution_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME, (long long) cmd->epoch_last_run); crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME, (long long) cmd->epoch_rcchange); #ifdef PCMK__TIME_USE_CGT crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason); if (cmd->result.action_stderr != NULL) { crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr); } else if (cmd->result.action_stdout != NULL) { crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout); } if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else { pcmk__foreach_ipc_client(send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (pcmk__ipc_client_count() != 0) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); pcmk__foreach_ipc_client(send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->last_pid = 0; #ifdef PCMK__TIME_USE_CGT memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); #endif cmd->epoch_last_run = 0; pcmk__reset_result(&(cmd->result)); cmd->result.execution_status = PCMK_EXEC_DONE; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if ((cmd->interval_ms != 0) && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval_ms == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } struct notify_new_client_data { xmlNode *notify; pcmk__client_t *new_client; }; static void notify_one_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *client = value; struct notify_new_client_data *data = user_data; if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) { send_client_notify(key, (gpointer) client, (gpointer) data->notify); } } void notify_of_new_client(pcmk__client_t *new_client) { struct notify_new_client_data data; data.new_client = new_client; data.notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__); crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); pcmk__foreach_ipc_client(notify_one_client, &data); free_xml(data.notify); } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; enum ocf_exitcode code; #ifdef PCMK__TIME_USE_CGT const char *rclass = NULL; bool goagain = false; #endif if (!cmd) { crm_err("Completed executor action (%s) does not match any known operations", action->id); return; } #ifdef PCMK__TIME_USE_CGT if (cmd->result.exit_status != action->rc) { cmd->epoch_rcchange = time(NULL); } #endif cmd->last_pid = action->pid; // Cast variable instead of function return to keep compilers happy code = services_result2ocf(action->standard, cmd->action, action->rc); pcmk__set_result(&(cmd->result), (int) code, action->status, services__exit_reason(action)); rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; #ifdef PCMK__TIME_USE_CGT if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { rclass = resources_find_service_class(rsc->type); } else if(rsc) { rclass = rsc->class; } if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { if (pcmk__result_ok(&(cmd->result)) && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) { /* systemd returns from start and stop actions after the action * begins, not after it completes. We have to jump through a few * hoops so that we don't report 'complete' to the rest of pacemaker * until it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop completed if (cmd->result.execution_status == PCMK_EXEC_PENDING) { goagain = true; } else if (pcmk__result_ok(&(cmd->result)) && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { goagain = true; } else { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s systemd %s is now complete (elapsed=%dms, " "remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->result.exit_status), cmd->result.exit_status); cmd_original_times(cmd); // Monitors may return "not running", but start/stop shouldn't if ((cmd->result.execution_status == PCMK_EXEC_DONE) && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; } } } } } #endif #if SUPPORT_NAGIOS if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { if (action_matches(cmd, "monitor", 0) && pcmk__result_ok(&(cmd->result))) { /* Successfully executed --version for the nagios plugin */ cmd->result.exit_status = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) && !pcmk__result_ok(&(cmd->result))) { #ifdef PCMK__TIME_USE_CGT goagain = true; #endif } } #endif #ifdef PCMK__TIME_USE_CGT if (goagain) { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if (pcmk__result_ok(&(cmd->result))) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->result.exit_status), cmd->result.exit_status, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, (cmd->real_action? cmd->real_action : cmd->action), cmd->result.exit_status, time_sum, timeout_left); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Investigate reason for timeout, and adjust " "configured operation timeout if necessary"); cmd_original_times(cmd); } } #endif pcmk__set_result_output(&(cmd->result), services__grab_stdout(action), services__grab_stderr(action)); cmd_finalize(cmd, rsc); } /*! * \internal * \brief Process the result of a fence device action (start, stop, or monitor) * - * \param[in] cmd Fence device action that completed - * \param[in] exit_status Fencer API exit status for action - * \param[in] execution_status Fencer API execution status for action - * \param[in] exit_reason Human-friendly detail, if action failed + * \param[in,out] cmd Fence device action that completed + * \param[in] exit_status Fencer API exit status for action + * \param[in] execution_status Fencer API execution status for action + * \param[in] exit_reason Human-friendly detail, if action failed */ static void stonith_action_complete(lrmd_cmd_t *cmd, int exit_status, enum pcmk_exec_status execution_status, const char *exit_reason) { // This can be NULL if resource was removed before command completed lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); // Simplify fencer exit status to uniform exit status if (exit_status != CRM_EX_OK) { exit_status = PCMK_OCF_UNKNOWN_ERROR; } if (cmd->result.execution_status == PCMK_EXEC_CANCELLED) { /* An in-flight fence action was cancelled. The execution status is * already correct, so don't overwrite it. */ execution_status = PCMK_EXEC_CANCELLED; } else { /* Some execution status codes have specific meanings for the fencer * that executor clients may not expect, so map them to a simple error * status. */ switch (execution_status) { case PCMK_EXEC_NOT_CONNECTED: case PCMK_EXEC_INVALID: execution_status = PCMK_EXEC_ERROR; break; case PCMK_EXEC_NO_FENCE_DEVICE: /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ if (pcmk__str_eq(cmd->action, CRMD_ACTION_STATUS, pcmk__str_none)) { exit_status = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(cmd->action, CRMD_ACTION_STOP, pcmk__str_none)) { exit_status = PCMK_OCF_OK; } else { exit_status = PCMK_OCF_NOT_INSTALLED; } execution_status = PCMK_EXEC_ERROR; break; case PCMK_EXEC_NOT_SUPPORTED: exit_status = PCMK_OCF_UNIMPLEMENT_FEATURE; break; default: break; } } pcmk__set_result(&cmd->result, exit_status, execution_status, exit_reason); // Certain successful actions change the known state of the resource if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) { if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); // "running" } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); // "not running" } } /* The recurring timer should not be running at this point in any case, but * as a failsafe, stop it if it is. */ stop_recurring_timer(cmd); /* Reschedule this command if appropriate. If a recurring command is *not* * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will * not be removed from recurring_ops by cmd_finalize(). */ if (rsc && (cmd->interval_ms > 0) && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) { start_recurring_timer(cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence action result: " "Invalid callback arguments (bug?)"); } else { stonith_action_complete((lrmd_cmd_t *) data->userdata, stonith__exit_status(data), stonith__execution_status(data), stonith__exit_reason(data)); } } void stonith_connection_failed(void) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; crm_warn("Connection to fencer lost (any pending operations for " "fence devices will be considered failed)"); g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &rsc)) { if (!pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_none)) { continue; } /* If we registered this fence device, we don't know whether the * fencer still has the registration or not. Cause future probes to * return an error until the resource is stopped or started * successfully. This is especially important if the controller also * went away (possibly due to a cluster layer restart) and won't * receive our client notification of any monitors finalized below. */ if (rsc->fence_probe_result.execution_status == PCMK_EXEC_DONE) { pcmk__set_result(&rsc->fence_probe_result, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } // Consider any active, pending, or recurring operations as failed for (GList *op = rsc->recurring_ops; op != NULL; op = op->next) { lrmd_cmd_t *cmd = op->data; /* This won't free a recurring op but instead restart its timer. * If cmd is rsc->active, this will set rsc->active to NULL, so we * don't have to worry about finalizing it a second time below. */ stonith_action_complete(cmd, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } if (rsc->active != NULL) { rsc->pending_ops = g_list_prepend(rsc->pending_ops, rsc->active); } while (rsc->pending_ops != NULL) { // This will free the op and remove it from rsc->pending_ops stonith_action_complete((lrmd_cmd_t *) rsc->pending_ops->data, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, "Lost connection to fencer"); } } } /*! * \internal * \brief Execute a stonith resource "start" action * * Start a stonith resource by registering it with the fencer. * (Stonith agents don't have a start command.) * - * \param[in] stonith_api Connection to fencer - * \param[in] rsc Stonith resource to start - * \param[in] cmd Start command to execute + * \param[in,out] stonith_api Connection to fencer + * \param[in] rsc Stonith resource to start + * \param[in] cmd Start command to execute * * \return pcmk_ok on success, -errno otherwise */ static int -execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) +execd_stonith_start(stonith_t *stonith_api, const lrmd_rsc_t *rsc, + const lrmd_cmd_t *cmd) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; int rc = pcmk_ok; // Convert command parameters to stonith API key/values if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* The fencer will automatically register devices via CIB notifications * when the CIB changes, but to avoid a possible race condition between * the fencer receiving the notification and the executor requesting that * resource, the executor registers the device as well. The fencer knows how * to handle duplicate registrations. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); return rc; } /*! * \internal * \brief Execute a stonith resource "stop" action * * Stop a stonith resource by unregistering it with the fencer. * (Stonith agents don't have a stop command.) * - * \param[in] stonith_api Connection to fencer - * \param[in] rsc Stonith resource to stop + * \param[in,out] stonith_api Connection to fencer + * \param[in] rsc Stonith resource to stop * * \return pcmk_ok on success, -errno otherwise */ static inline int execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) { /* @TODO Failure would indicate a problem communicating with fencer; * perhaps we should try reconnecting and retrying a few times? */ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, rsc->rsc_id); } /*! * \internal * \brief Initiate a stonith resource agent recurring "monitor" action * - * \param[in] stonith_api Connection to fencer - * \param[in] rsc Stonith resource to monitor - * \param[in] cmd Monitor command being executed + * \param[in,out] stonith_api Connection to fencer + * \param[in,out] rsc Stonith resource to monitor + * \param[in] cmd Monitor command being executed * * \return pcmk_ok if monitor was successfully initiated, -errno otherwise */ static inline int execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); if (rc == TRUE) { rsc->active = cmd; rc = pcmk_ok; } else { rc = -pcmk_err_generic; } return rc; } static void execute_stonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = 0; bool do_monitor = FALSE; stonith_t *stonith_api = get_stonith_connection(); if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei) && (cmd->interval_ms == 0)) { // Probes don't require a fencer connection stonith_action_complete(cmd, rsc->fence_probe_result.exit_status, rsc->fence_probe_result.execution_status, rsc->fence_probe_result.exit_reason); return; } else if (stonith_api == NULL) { stonith_action_complete(cmd, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, "No connection to fencer"); return; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == pcmk_ok) { do_monitor = TRUE; } } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { rc = execd_stonith_stop(stonith_api, rsc); } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { do_monitor = TRUE; } else { stonith_action_complete(cmd, PCMK_OCF_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, "Invalid fence device action (bug?)"); return; } if (do_monitor) { rc = execd_stonith_monitor(stonith_api, rsc, cmd); if (rc == pcmk_ok) { // Don't clean up yet, we will find out result of the monitor later return; } } stonith_action_complete(cmd, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == -pcmk_err_generic)? NULL : pcmk_strerror(rc))); } static void execute_nonstonith_action(lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei) && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; cmd_finalize(cmd, rsc); return; } #endif params_copy = pcmk__str_table_dup(cmd->params); action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval_ms, cmd->timeout, params_copy, cmd->service_flags); if (action == NULL) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); cmd_finalize(cmd, rsc); return; } if (action->rc != PCMK_OCF_UNKNOWN) { pcmk__set_result(&(cmd->result), action->rc, action->status, services__exit_reason(action)); services_action_free(action); cmd_finalize(cmd, rsc); return; } action->cb_data = cmd; if (services_action_async(action, action_complete)) { /* The services library has taken responsibility for the action. It * could be pending, blocked, or merged into a duplicate recurring * action, in which case the action callback (action_complete()) * will be called when the action completes, otherwise the callback has * already been called. * * action_complete() calls cmd_finalize() which can free cmd, so cmd * cannot be used here. */ } else { /* This is a recurring action that is not being cancelled and could not * be initiated. It has been rescheduled, and the action callback * (action_complete()) has been called, which in this case has already * called cmd_finalize(), which in this case should only reset (not * free) cmd. */ pcmk__set_result(&(cmd->result), action->rc, action->status, services__exit_reason(action)); services_action_free(action); } } static gboolean execute_resource_action(gpointer user_data) { lrmd_rsc_t *rsc = (lrmd_rsc_t *) user_data; lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_run), &(cmd->t_first_run)); #endif cmd->epoch_last_run = time(NULL); } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval_ms) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { execute_stonith_action(rsc, cmd); } else { execute_nonstonith_action(rsc, cmd); } return TRUE; } void free_rsc(gpointer data) { GList *gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei); gIter = rsc->pending_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval_ms); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, xmlNode **reply) { int rc = pcmk_ok; const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_MIN_PROTOCOL_VERSION, protocol_version); rc = -EPROTO; } if (pcmk__xe_attr_is_true(request, F_LRMD_IS_IPC_PROVIDER)) { #ifdef PCMK__COMPILE_REMOTE if ((client->remote != NULL) && pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { // This is a remote connection from a cluster node's controller ipc_proxy_add_provider(client); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif } *reply = create_lrmd_reply(__func__, rc, call_id); crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(*reply, F_LRMD_CLIENTID, client->id); crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); return rc; } static int process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) && pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Cached agent information for '%s'", rsc->rsc_id); return rc; } static xmlNode * process_lrmd_get_rsc_info(xmlNode *request, int call_id) { int rc = pcmk_ok; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; if (rsc_id == NULL) { rc = -ENODEV; } else { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Agent information for '%s' not in cache", rsc_id); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } return reply; } static int process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Ignoring unregistration of resource '%s', which is not registered", rsc_id); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation (%p) still in progress for unregistered resource %s", rsc->active, rsc_id); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, guint interval_ms) { GList *gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval_ms) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval_ms == 0) { continue; } if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); guint interval_ms = 0; crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval_ms); } static void add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) { xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC); crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id); for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { lrmd_cmd_t *cmd = item->data; xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP); crm_xml_add(op_xml, F_LRMD_RSC_ACTION, (cmd->real_action? cmd->real_action : cmd->action)); crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig); } } static xmlNode * process_lrmd_get_recurring(xmlNode *request, int call_id) { int rc = pcmk_ok; const char *rsc_id = NULL; lrmd_rsc_t *rsc = NULL; xmlNode *reply = NULL; xmlNode *rsc_xml = NULL; // Resource ID is optional rsc_xml = first_named_child(request, F_LRMD_CALLDATA); if (rsc_xml) { rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC); } if (rsc_xml) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); } // If resource ID is specified, resource must exist if (rsc_id != NULL) { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); // If resource ID is not specified, check all resources if (rsc_id == NULL) { GHashTableIter iter; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rsc)) { add_recurring_op_xml(reply, rsc); } } else if (rsc) { add_recurring_op_xml(reply, rsc); } return reply; } void process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; xmlNode *reply = NULL; /* Certain IPC commands may be done only by privileged users (i.e. root or * hacluster), because they would otherwise provide a means of bypassing * ACLs. */ bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged); crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) { #ifdef PCMK__COMPILE_REMOTE if (allowed) { ipc_proxy_forward_client(client, request); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif do_reply = 1; } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { rc = process_lrmd_signon(client, request, call_id, &reply); do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_rsc_info(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_cancel(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) { do_notify = 1; do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) { if (allowed) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); CRM_LOG_ASSERT(data != NULL); pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG)); } else { rc = -EACCES; } } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_alert_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_recurring(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown IPC request '%s' from client %s", op, pcmk__client_name(client)); } if (rc == -EACCES) { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", op, pcmk__client_name(client)); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { int send_rc = pcmk_rc_ok; if (reply == NULL) { reply = create_lrmd_reply(__func__, rc, call_id); } send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc != pcmk_rc_ok) { crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d", pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc); } } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index f4a19d8f82..2e753a68ca 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,555 +1,555 @@ /* * Copyright 2012-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; int lrmd_call_id = 0; #ifdef PCMK__COMPILE_REMOTE /* whether shutdown request has been sent */ static gboolean shutting_down = FALSE; /* timer for waiting for acknowledgment of shutdown request */ static guint shutdown_ack_timer = 0; static gboolean lrmd_exit(gpointer data); #endif static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith_api_delete(stonith_api); stonith_api = NULL; } if (stonith_api == NULL) { int rc = pcmk_ok; stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return NULL; } rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 10 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); stonith_api_delete(stonith_api); stonith_api = NULL; } else { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { pcmk__client_t *new_client = pcmk__find_client(c); crm_trace("Connection %p", c); CRM_ASSERT(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } if (!client->name) { const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); if (value == NULL) { client->name = pcmk__itoa(pcmk__client_pid(c)); } else { client->name = strdup(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, F_LRMD_CLIENTID, client->id); crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); free_xml(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * - * \param[in] client Client connection to free + * \param[in,out] client Client connection to free */ void lrmd_client_destroy(pcmk__client_t *client) { pcmk__free_client(client); #ifdef PCMK__COMPILE_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { lrmd_exit(NULL); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef PCMK__COMPILE_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; // \return Standard Pacemaker return code int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: return pcmk__ipc_send_xml(client, id, reply, FALSE); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: return lrmd__remote_send_xml(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown type for client %s " CRM_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } // \return Standard Pacemaker return code int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg) { crm_trace("Sending notification to client (%s)", client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return ENOTCONN; } return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return ENOTCONN; } else { return lrmd__remote_send_xml(client->remote, msg, 0, "notify"); } #endif default: crm_err("Could not notify client %s with unknown transport " CRM_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately * * \param[in] data Ignored * * \return Doesn't return * \note This can be used as a timer callback. */ static gboolean lrmd_exit(gpointer data) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); if (stonith_api) { stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->disconnect(stonith_api); stonith_api_delete(stonith_api); } if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef PCMK__COMPILE_REMOTE execd_stop_tls_server(); ipc_proxy_cleanup(); #endif pcmk__client_cleanup(); g_hash_table_destroy(rsc_list); if (mainloop) { lrmd_drain_alerts(mainloop); } crm_exit(CRM_EX_OK); return FALSE; } /*! * \internal * \brief Clean up and exit if shutdown has started * * \return Doesn't return */ void execd_exit_if_shutting_down(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_warn("exit because TLS connection was closed and 'shutting_down' set"); lrmd_exit(NULL); } #endif } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef PCMK__COMPILE_REMOTE pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host (which ensures the proxy host * supports shutdown requests), then wait for all proxy hosts to * disconnect (which ensures that all resources have been stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ execd_stop_tls_server(); /* Older controller versions will never acknowledge our request, so * set a fairly short timeout to exit quickly in that case. If we * get the ack, we'll defuse this timer. */ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif lrmd_exit(NULL); } /*! * \internal * \brief Defuse short exit timer if shutting down */ void handle_shutdown_ack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Received shutdown ack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = 0; } return; } #endif crm_debug("Ignoring unexpected shutdown ack"); } /*! * \internal * \brief Make short exit timer fire immediately */ void handle_shutdown_nack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Received shutdown nack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL); } return; } #endif crm_debug("Ignoring unexpected shutdown nack"); } static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "help", no_argument, NULL, '?', "\tThis text", pcmk__option_default }, { "version", no_argument, NULL, '$', "\tVersion information", pcmk__option_default }, { "verbose", no_argument, NULL, 'V', "\tIncrease debug output", pcmk__option_default }, { "logfile", required_argument, NULL, 'l', "\tSend logs to the additional named logfile", pcmk__option_default }, #ifdef PCMK__COMPILE_REMOTE { "port", required_argument, NULL, 'p', "\tPort to listen on", pcmk__option_default }, #endif { 0, 0, 0, 0 } }; #ifdef PCMK__COMPILE_REMOTE # define EXECD_TYPE "remote" # define EXECD_NAME "pacemaker-remoted" # define EXECD_DESC "resource agent executor daemon for Pacemaker Remote nodes" #else # define EXECD_TYPE "local" # define EXECD_NAME "pacemaker-execd" # define EXECD_DESC "resource agent executor daemon for Pacemaker cluster nodes" #endif int main(int argc, char **argv, char **envp) { int flag = 0; int index = 0; int bump_log_num = 0; const char *option = NULL; #ifdef PCMK__COMPILE_REMOTE // If necessary, create PID 1 now before any file descriptors are opened remoted_spawn_pidone(argc, argv, envp); #endif crm_log_preinit(EXECD_NAME, argc, argv); pcmk__set_cli_options(NULL, "[options]", long_options, EXECD_DESC); while (1) { flag = pcmk__next_cli_option(argc, argv, &index, NULL); if (flag == -1) { break; } switch (flag) { case 'l': { int rc = pcmk__add_logfile(optarg); if (rc != pcmk_rc_ok) { /* Logging has not yet been initialized, so stderr is * the only way to get information out */ fprintf(stderr, "Logging to %s is disabled: %s\n", optarg, pcmk_rc_str(rc)); } } break; case 'p': setenv("PCMK_remote_port", optarg, 1); break; case 'V': bump_log_num++; break; case '?': case '$': pcmk__cli_help(flag, CRM_EX_OK); break; default: pcmk__cli_help('?', CRM_EX_USAGE); break; } } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); while (bump_log_num > 0) { crm_bump_log_level(argc, argv); bump_log_num--; } option = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(option, PCMK__VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches) && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */ } option = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(option, PCMK__VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */ if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */ } } crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); { // Temporary directory for resource agent use (leave owned by root) int rc = pcmk__build_path(CRM_RSCTMP_DIR, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create resource agent temporary directory " CRM_RSCTMP_DIR ": %s", pcmk_rc_str(rc)); } } rsc_list = pcmk__strkey_table(NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); crm_exit(CRM_EX_FATAL); } #ifdef PCMK__COMPILE_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); crm_exit(CRM_EX_FATAL); } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); crm_notice("OCF resource agent search path is %s", OCF_RA_PATH); g_main_loop_run(mainloop); /* should never get here */ lrmd_exit(NULL); return CRM_EX_OK; } diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c index 0efe116819..62c8c3a2d5 100644 --- a/daemons/execd/remoted_proxy.c +++ b/daemons/execd/remoted_proxy.c @@ -1,468 +1,470 @@ /* * Copyright 2012-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include "pacemaker-execd.h" #include #include #include #include #include #include #include #include static qb_ipcs_service_t *cib_ro = NULL; static qb_ipcs_service_t *cib_rw = NULL; static qb_ipcs_service_t *cib_shm = NULL; static qb_ipcs_service_t *attrd_ipcs = NULL; static qb_ipcs_service_t *crmd_ipcs = NULL; static qb_ipcs_service_t *stonith_ipcs = NULL; static qb_ipcs_service_t *pacemakerd_ipcs = NULL; // An IPC provider is a cluster node controller connecting as a client static GList *ipc_providers = NULL; /* ipc clients == things like cibadmin, crm_resource, connecting locally */ static GHashTable *ipc_clients = NULL; /*! * \internal * \brief Get an IPC proxy provider * * \return Pointer to a provider if one exists, NULL otherwise * * \note Grab the first provider, which is the most recent connection. That way, * if we haven't yet timed out an old, failed connection, we don't try to * use it. */ pcmk__client_t * ipc_proxy_get_provider(void) { return ipc_providers? (pcmk__client_t *) (ipc_providers->data) : NULL; } /*! * \internal * \brief Accept a client connection on a proxy IPC server * * \param[in] c Client's IPC connection * \param[in] uid Client's user ID * \param[in] gid Client's group ID * \param[in] ipc_channel Name of IPC server to proxy * * \return pcmk_ok on success, -errno on error */ static int32_t ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel) { pcmk__client_t *client; pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); xmlNode *msg; if (ipc_proxy == NULL) { crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s " "because not connected to cluster", uid, gid, ipc_channel); return -EREMOTEIO; } /* This new client is a local IPC client on a Pacemaker Remote controlled * node, needing to access cluster node IPC services. */ client = pcmk__new_client(c, uid, gid); if (client == NULL) { return -EREMOTEIO; } /* This ipc client is bound to a single ipc provider. If the * provider goes away, this client is disconnected */ client->userdata = strdup(ipc_proxy->id); client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id); /* Allow remote executor to distinguish between proxied local clients and * actual executor API clients */ pcmk__set_client_flags(client, pcmk__client_to_proxy); g_hash_table_insert(ipc_clients, client->id, client); msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_NEW); crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel); crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); lrmd_server_send_notify(ipc_proxy, msg); free_xml(msg); crm_debug("Accepted IPC proxy connection (session ID %s) " "from uid %d gid %d on channel %s", client->id, uid, gid, ipc_channel); return 0; } static int32_t crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD); } static int32_t attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, T_ATTRD); } static int32_t stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, "stonith-ng"); } static int32_t pacemakerd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return -EREMOTEIO; } static int32_t cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RW); } static int32_t cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, PCMK__SERVER_BASED_RO); } void ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml) { const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION); const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP); xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG); pcmk__client_t *ipc_client; int rc = pcmk_rc_ok; /* If the IPC provider is acknowledging our shutdown request, * defuse the short exit timer to give the cluster time to * stop any resources we're running. */ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK, pcmk__str_casei)) { handle_shutdown_ack(); return; } if (pcmk__str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK, pcmk__str_casei)) { handle_shutdown_nack(); return; } ipc_client = pcmk__find_client_by_id(session); if (ipc_client == NULL) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, F_LRMD_IPC_SESSION, session); lrmd_server_send_notify(ipc_proxy, msg); free_xml(msg); return; } /* This is an event or response from the ipc provider * going to the local ipc client. * * Looking at the chain of events. * * -----remote node----------------|---- cluster node ------ * ipc_client <--1--> this code * <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event() * <--3--> ipc server * * This function is receiving a msg from connection 2 * and forwarding it to connection 1. */ if (pcmk__str_eq(msg_type, LRMD_IPC_OP_EVENT, pcmk__str_casei)) { crm_trace("Sending event to %s", ipc_client->id); rc = pcmk__ipc_send_xml(ipc_client, 0, msg, crm_ipc_server_event); } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_RESPONSE, pcmk__str_casei)) { int msg_id = 0; crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id); crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id); rc = pcmk__ipc_send_xml(ipc_client, msg_id, msg, FALSE); CRM_LOG_ASSERT(msg_id == ipc_client->request_id); ipc_client->request_id = 0; } else if (pcmk__str_eq(msg_type, LRMD_IPC_OP_DESTROY, pcmk__str_casei)) { qb_ipcs_disconnect(ipc_client->ipcs); } else { crm_err("Unknown ipc proxy msg type %s" , msg_type); } if (rc != pcmk_rc_ok) { crm_warn("Could not proxy IPC to client %s: %s " CRM_XS " rc=%d", ipc_client->id, pcmk_rc_str(rc), rc); } } static int32_t ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); pcmk__client_t *ipc_proxy = pcmk__find_client_by_id(client->userdata); xmlNode *request = NULL; xmlNode *msg = NULL; if (!ipc_proxy) { qb_ipcs_disconnect(client->ipcs); return 0; } /* This is a request from the local ipc client going * to the ipc provider. * * Looking at the chain of events. * * -----remote node----------------|---- cluster node ------ * ipc_client <--1--> this code * <--2--> pacemaker-controld:remote_proxy_dispatch_internal() * <--3--> ipc server * * This function is receiving a request from connection * 1 and forwarding it to connection 2. */ request = pcmk__client_data2xml(client, data, &id, &flags); if (!request) { return 0; } CRM_CHECK(client != NULL, crm_err("Invalid client"); free_xml(request); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); free_xml(request); return FALSE); /* This ensures that synced request/responses happen over the event channel * in the controller, allowing the controller to process the messages async. */ pcmk__set_ipc_flags(flags, pcmk__client_name(client), crm_ipc_proxied); client->request_id = id; msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST); crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); crm_xml_add(msg, F_LRMD_IPC_CLIENT, pcmk__client_name(client)); crm_xml_add(msg, F_LRMD_IPC_USER, client->user); crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id); crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags); add_message_xml(msg, F_LRMD_IPC_MSG, request); lrmd_server_send_notify(ipc_proxy, msg); free_xml(request); free_xml(msg); return 0; } /*! * \internal * \brief Notify a proxy provider that we wish to shut down * + * \param[in,out] ipc_proxy IPC client connection to proxy provider + * * \return 0 on success, -1 on error */ int ipc_proxy_shutdown_req(pcmk__client_t *ipc_proxy) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); int rc; crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ); /* We don't really have a session, but the controller needs this attribute * to recognize this as proxy communication. */ crm_xml_add(msg, F_LRMD_IPC_SESSION, "0"); rc = (lrmd_server_send_notify(ipc_proxy, msg) != pcmk_rc_ok)? -1 : 0; free_xml(msg); return rc; } static int32_t ipc_proxy_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); pcmk__client_t *ipc_proxy; if (client == NULL) { return 0; } ipc_proxy = pcmk__find_client_by_id(client->userdata); crm_trace("Connection %p", c); if (ipc_proxy) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); lrmd_server_send_notify(ipc_proxy, msg); free_xml(msg); } g_hash_table_remove(ipc_clients, client->id); free(client->userdata); client->userdata = NULL; pcmk__free_client(client); return 0; } static void ipc_proxy_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); ipc_proxy_closed(c); } static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { .connection_accept = crmd_proxy_accept, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { .connection_accept = attrd_proxy_accept, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { .connection_accept = stonith_proxy_accept, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers pacemakerd_proxy_callbacks = { .connection_accept = pacemakerd_proxy_accept, .connection_created = NULL, .msg_process = NULL, .connection_closed = NULL, .connection_destroyed = NULL }; static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { .connection_accept = cib_proxy_accept_ro, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { .connection_accept = cib_proxy_accept_rw, .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; void ipc_proxy_add_provider(pcmk__client_t *ipc_proxy) { // Prepending ensures the most recent connection is always first ipc_providers = g_list_prepend(ipc_providers, ipc_proxy); } void ipc_proxy_remove_provider(pcmk__client_t *ipc_proxy) { GHashTableIter iter; pcmk__client_t *ipc_client = NULL; char *key = NULL; GList *remove_these = NULL; GList *gIter = NULL; ipc_providers = g_list_remove(ipc_providers, ipc_proxy); g_hash_table_iter_init(&iter, ipc_clients); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) { const char *proxy_id = ipc_client->userdata; if (pcmk__str_eq(proxy_id, ipc_proxy->id, pcmk__str_casei)) { crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.", ipc_client->id, ipc_client->pid); /* we can't remove during the iteration, so copy items * to a list we can destroy later */ remove_these = g_list_append(remove_these, ipc_client); } } for (gIter = remove_these; gIter != NULL; gIter = gIter->next) { ipc_client = gIter->data; // Disconnection callback will free the client here qb_ipcs_disconnect(ipc_client->ipcs); } /* just frees the list, not the elements in the list */ g_list_free(remove_these); } void ipc_proxy_init(void) { ipc_clients = pcmk__strkey_table(NULL, NULL); pcmk__serve_based_ipc(&cib_ro, &cib_rw, &cib_shm, &cib_proxy_callbacks_ro, &cib_proxy_callbacks_rw); pcmk__serve_attrd_ipc(&attrd_ipcs, &attrd_proxy_callbacks); pcmk__serve_fenced_ipc(&stonith_ipcs, &stonith_proxy_callbacks); pcmk__serve_pacemakerd_ipc(&pacemakerd_ipcs, &pacemakerd_proxy_callbacks); crmd_ipcs = pcmk__serve_controld_ipc(&crmd_proxy_callbacks); if (crmd_ipcs == NULL) { crm_err("Failed to create controller: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } void ipc_proxy_cleanup(void) { if (ipc_providers) { g_list_free(ipc_providers); ipc_providers = NULL; } if (ipc_clients) { g_hash_table_destroy(ipc_clients); ipc_clients = NULL; } pcmk__stop_based_ipc(cib_ro, cib_rw, cib_shm); qb_ipcs_destroy(attrd_ipcs); qb_ipcs_destroy(stonith_ipcs); qb_ipcs_destroy(pacemakerd_ipcs); qb_ipcs_destroy(crmd_ipcs); cib_ro = NULL; cib_rw = NULL; cib_shm = NULL; } diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c index a904cf5424..6f4b2d0062 100644 --- a/daemons/execd/remoted_tls.c +++ b/daemons/execd/remoted_tls.c @@ -1,428 +1,432 @@ /* * Copyright 2012-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #ifdef HAVE_GNUTLS_GNUTLS_H # include # define LRMD_REMOTE_AUTH_TIMEOUT 10000 gnutls_psk_server_credentials_t psk_cred_s; gnutls_dh_params_t dh_params; static int ssock = -1; extern int lrmd_call_id; static void debug_log(int level, const char *str) { fputs(str, stderr); } /*! * \internal * \brief Read (more) TLS handshake data from client + * + * \param[in,out] client IPC client doing handshake + * + * \return 0 on success or more data needed, -1 on error */ static int remoted__read_handshake_data(pcmk__client_t *client) { int rc = pcmk__read_handshake_data(client); if (rc == EAGAIN) { /* No more data is available at the moment. Just return for now; * we'll get invoked again once the client sends more. */ return 0; } else if (rc != pcmk_rc_ok) { return -1; } if (client->remote->auth_timeout) { g_source_remove(client->remote->auth_timeout); } client->remote->auth_timeout = 0; pcmk__set_client_flags(client, pcmk__client_tls_handshake_complete); crm_notice("Remote client connection accepted"); /* Only a client with access to the TLS key can connect, so we can treat * it as privileged. */ pcmk__set_client_flags(client, pcmk__client_privileged); // Alert other clients of the new connection notify_of_new_client(client); return 0; } static int lrmd_remote_client_msg(gpointer data) { int id = 0; int rc; xmlNode *request = NULL; pcmk__client_t *client = data; if (!pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { return remoted__read_handshake_data(client); } switch (pcmk__remote_ready(client->remote, 0)) { case pcmk_rc_ok: break; case ETIME: // No message available to read return 0; default: // Error crm_info("Remote client disconnected while polling it"); return -1; } rc = pcmk__read_remote_message(client->remote, -1); request = pcmk__remote_message_xml(client->remote); while (request) { crm_element_value_int(request, F_LRMD_REMOTE_MSG_ID, &id); crm_trace("Processing remote client request %d", id); if (!client->name) { const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); if (value) { client->name = strdup(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, F_LRMD_CLIENTID, client->id); crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); free_xml(request); /* process all the messages in the current buffer */ request = pcmk__remote_message_xml(client->remote); } if (rc == ENOTCONN) { crm_info("Remote client disconnected while reading from it"); return -1; } return 0; } static void lrmd_remote_client_destroy(gpointer user_data) { pcmk__client_t *client = user_data; if (client == NULL) { return; } crm_notice("Cleaning up after remote client %s disconnected", pcmk__client_name(client)); ipc_proxy_remove_provider(client); /* if this is the last remote connection, stop recurring * operations */ if (pcmk__ipc_client_count() == 1) { client_disconnect_cleanup(NULL); } if (client->remote->tls_session) { void *sock_ptr; int csock; sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session); csock = GPOINTER_TO_INT(sock_ptr); gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*client->remote->tls_session); gnutls_free(client->remote->tls_session); close(csock); } lrmd_client_destroy(client); return; } static gboolean lrmd_auth_timeout_cb(gpointer data) { pcmk__client_t *client = data; client->remote->auth_timeout = 0; if (pcmk_is_set(client->flags, pcmk__client_tls_handshake_complete)) { return FALSE; } mainloop_del_fd(client->remote->source); client->remote->source = NULL; crm_err("Remote client authentication timed out"); return FALSE; } // Dispatch callback for remote server socket static int lrmd_remote_listen(gpointer data) { int csock = -1; gnutls_session_t *session = NULL; pcmk__client_t *new_client = NULL; // For client socket static struct mainloop_fd_callbacks lrmd_remote_fd_cb = { .dispatch = lrmd_remote_client_msg, .destroy = lrmd_remote_client_destroy, }; CRM_CHECK(ssock >= 0, return TRUE); if (pcmk__accept_remote_connection(ssock, &csock) != pcmk_rc_ok) { return TRUE; } session = pcmk__new_tls_session(csock, GNUTLS_SERVER, GNUTLS_CRD_PSK, psk_cred_s); if (session == NULL) { close(csock); return TRUE; } new_client = pcmk__new_unauth_client(NULL); new_client->remote = calloc(1, sizeof(pcmk__remote_t)); pcmk__set_client_flags(new_client, pcmk__client_tls); new_client->remote->tls_session = session; // Require the client to authenticate within this time new_client->remote->auth_timeout = g_timeout_add(LRMD_REMOTE_AUTH_TIMEOUT, lrmd_auth_timeout_cb, new_client); crm_info("Remote client pending authentication " CRM_XS " %p id: %s", new_client, new_client->id); new_client->remote->source = mainloop_add_fd("pacemaker-remote-client", G_PRIORITY_DEFAULT, csock, new_client, &lrmd_remote_fd_cb); return TRUE; } static void tls_server_dropped(gpointer user_data) { crm_notice("TLS server session ended"); /* If we are in the process of shutting down, then we should actually exit. * bz#1804259 */ execd_exit_if_shutting_down(); return; } // \return 0 on success, -1 on error (gnutls_psk_server_credentials_function) static int lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) { return (lrmd__init_remote_key(key) == pcmk_rc_ok)? 0 : -1; } static int bind_and_listen(struct addrinfo *addr) { int optval; int fd; int rc; char buffer[INET6_ADDRSTRLEN] = { 0, }; pcmk__sockaddr2str(addr->ai_addr, buffer); crm_trace("Attempting to bind to address %s", buffer); fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); if (fd < 0) { crm_perror(LOG_ERR, "Listener socket creation failed"); return -1; } /* reuse address */ optval = 1; rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { crm_perror(LOG_ERR, "Local address reuse not allowed on %s", buffer); close(fd); return -1; } if (addr->ai_family == AF_INET6) { optval = 0; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval)); if (rc < 0) { crm_perror(LOG_INFO, "Couldn't disable IPV6-only on %s", buffer); close(fd); return -1; } } if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) { crm_perror(LOG_ERR, "Cannot bind to %s", buffer); close(fd); return -1; } if (listen(fd, 10) == -1) { crm_perror(LOG_ERR, "Cannot listen on %s", buffer); close(fd); return -1; } return fd; } static int get_address_info(const char *bind_name, int port, struct addrinfo **res) { int rc; char port_str[6]; // at most "65535" struct addrinfo hints; memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_flags = AI_PASSIVE; hints.ai_family = AF_UNSPEC; // IPv6 or IPv4 hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; snprintf(port_str, sizeof(port_str), "%d", port); rc = getaddrinfo(bind_name, port_str, &hints, res); if (rc) { crm_err("Unable to get IP address(es) for %s: %s", (bind_name? bind_name : "local node"), gai_strerror(rc)); return -EADDRNOTAVAIL; } return pcmk_ok; } int lrmd_init_remote_tls_server(void) { int filter; int port = crm_default_remote_port(); struct addrinfo *res = NULL, *iter; gnutls_datum_t psk_key = { NULL, 0 }; const char *bind_name = getenv("PCMK_remote_address"); static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = lrmd_remote_listen, .destroy = tls_server_dropped, }; CRM_CHECK(ssock == -1, return ssock); crm_debug("Starting TLS listener on %s port %d", (bind_name? bind_name : "all addresses on"), port); crm_gnutls_global_init(); gnutls_global_set_log_function(debug_log); if (pcmk__init_tls_dh(&dh_params) != pcmk_rc_ok) { return -1; } gnutls_psk_allocate_server_credentials(&psk_cred_s); gnutls_psk_set_server_credentials_function(psk_cred_s, lrmd_tls_server_key_cb); gnutls_psk_set_server_dh_params(psk_cred_s, dh_params); /* The key callback won't get called until the first client connection * attempt. Do it once here, so we can warn the user at start-up if we can't * read the key. We don't error out, though, because it's fine if the key is * going to be added later. */ if (lrmd__init_remote_key(&psk_key) != pcmk_rc_ok) { crm_warn("A cluster connection will not be possible until the key is available"); } gnutls_free(psk_key.data); if (get_address_info(bind_name, port, &res) != pcmk_ok) { return -1; } /* Currently we listen on only one address from the resulting list (the * first IPv6 address we can bind to if possible, otherwise the first IPv4 * address we can bind to). When bind_name is NULL, this should be the * respective wildcard address. * * @TODO If there is demand for specifying more than one address, allow * bind_name to be a space-separated list, call getaddrinfo() for each, * and create a socket for each result (set IPV6_V6ONLY on IPv6 sockets * since IPv4 listeners will have their own sockets). */ iter = res; filter = AF_INET6; while (iter) { if (iter->ai_family == filter) { ssock = bind_and_listen(iter); } if (ssock != -1) { break; } iter = iter->ai_next; if (iter == NULL && filter == AF_INET6) { iter = res; filter = AF_INET; } } if (ssock >= 0) { mainloop_add_fd("pacemaker-remote-server", G_PRIORITY_DEFAULT, ssock, NULL, &remote_listen_fd_callbacks); crm_debug("Started TLS listener on %s port %d", (bind_name? bind_name : "all addresses on"), port); } freeaddrinfo(res); return ssock; } void execd_stop_tls_server(void) { if (psk_cred_s) { gnutls_psk_free_server_credentials(psk_cred_s); psk_cred_s = 0; } if (ssock >= 0) { close(ssock); ssock = -1; } } #endif diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c index 094c34099a..4ed33f1cd1 100644 --- a/lib/common/ipc_server.c +++ b/lib/common/ipc_server.c @@ -1,1009 +1,1010 @@ /* * Copyright 2004-2022 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" /* Evict clients whose event queue grows this large (by default) */ #define PCMK_IPC_DEFAULT_QUEUE_MAX 500 static GHashTable *client_connections = NULL; /*! * \internal * \brief Count IPC clients * * \return Number of active IPC client connections */ guint pcmk__ipc_client_count(void) { return client_connections? g_hash_table_size(client_connections) : 0; } /*! * \internal * \brief Execute a function for each active IPC client connection * * \param[in] func Function to call * \param[in] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach(). */ void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach(client_connections, func, user_data); } } pcmk__client_t * pcmk__find_client(const qb_ipcs_connection_t *c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } pcmk__client_t * pcmk__find_client_by_id(const char *id) { if ((client_connections != NULL) && (id != NULL)) { gpointer key; pcmk__client_t *client = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id='%s'", pcmk__s(id, "")); return NULL; } /*! * \internal * \brief Get a client identifier for use in log messages * * \param[in] c Client * * \return Client's name, client's ID, or a string literal, as available * \note This is intended to be used in format strings like "client %s". */ const char * pcmk__client_name(const pcmk__client_t *c) { if (c == NULL) { return "(unspecified)"; } else if (c->name != NULL) { return c->name; } else if (c->id != NULL) { return c->id; } else { return "(unidentified)"; } } void pcmk__client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); - if (active) { - crm_err("Exiting with %d active IPC client%s", - active, pcmk__plural_s(active)); + if (active > 0) { + crm_warn("Exiting with %d active IPC client%s", + active, pcmk__plural_s(active)); } - g_hash_table_destroy(client_connections); client_connections = NULL; + g_hash_table_destroy(client_connections); + client_connections = NULL; } } void pcmk__drop_all_clients(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, pcmk__client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } /*! * \internal * \brief Allocate a new pcmk__client_t object based on an IPC connection * * \param[in] c IPC connection (or NULL to allocate generic client) * \param[in] key Connection table key (or NULL to use sane default) * \param[in] uid_client UID corresponding to c (ignored if c is NULL) * * \return Pointer to new pcmk__client_t (or NULL on error) */ static pcmk__client_t * client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) { pcmk__client_t *client = calloc(1, sizeof(pcmk__client_t)); if (client == NULL) { crm_perror(LOG_ERR, "Allocating client"); return NULL; } if (c) { client->user = pcmk__uid2username(uid_client); if (client->user == NULL) { client->user = strdup("#unprivileged"); CRM_CHECK(client->user != NULL, free(client); return NULL); crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", uid_client); } client->ipcs = c; pcmk__set_client_flags(client, pcmk__client_ipc); client->pid = pcmk__client_pid(c); if (key == NULL) { key = c; } } client->id = crm_generate_uuid(); if (client->id == NULL) { crm_err("Could not generate UUID for client"); free(client->user); free(client); return NULL; } if (key == NULL) { key = client->id; } if (client_connections == NULL) { crm_trace("Creating IPC client table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(client_connections, key, client); return client; } /*! * \brief Allocate a new pcmk__client_t object and generate its ID * * \param[in] key What to use as connections hash table key (NULL to use ID) * * \return Pointer to new pcmk__client_t (asserts on failure) */ pcmk__client_t * pcmk__new_unauth_client(void *key) { pcmk__client_t *client = client_from_connection(NULL, key, 0); CRM_ASSERT(client != NULL); return client; } pcmk__client_t * pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) { gid_t uid_cluster = 0; gid_t gid_cluster = 0; pcmk__client_t *client = NULL; CRM_CHECK(c != NULL, return NULL); if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { static bool need_log = TRUE; if (need_log) { crm_warn("Could not find user and group IDs for user %s", CRM_DAEMON_USER); need_log = FALSE; } } if (uid_client != 0) { crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } /* TODO: Do our own auth checking, return NULL if unauthorized */ client = client_from_connection(c, NULL, uid_client); if (client == NULL) { return NULL; } if ((uid_client == 0) || (uid_client == uid_cluster)) { /* Remember when a connection came from root or hacluster */ pcmk__set_client_flags(client, pcmk__client_privileged); } crm_debug("New IPC client %s for PID %u with uid %d and gid %d", client->id, client->pid, uid_client, gid_client); return client; } static struct iovec * pcmk__new_ipc_event(void) { struct iovec *iov = calloc(2, sizeof(struct iovec)); CRM_ASSERT(iov != NULL); return iov; } /*! * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() * * \param[in] event I/O vector to free */ void pcmk_free_ipc_event(struct iovec *event) { if (event != NULL) { free(event[0].iov_base); free(event[1].iov_base); free(event); } } static void free_event(gpointer data) { pcmk_free_ipc_event((struct iovec *) data); } static void add_event(pcmk__client_t *c, struct iovec *iov) { if (c->event_queue == NULL) { c->event_queue = g_queue_new(); } g_queue_push_tail(c->event_queue, iov); } void pcmk__free_client(pcmk__client_t *c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } if (c->event_queue) { crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); g_queue_free_full(c->event_queue, free_event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } free(c->remote->buffer); free(c->remote); } free(c); } /*! * \internal * \brief Raise IPC eviction threshold for a client, if allowed * * \param[in,out] client Client to modify * \param[in] qmax New threshold (as non-NULL string) * * \return true if change was allowed, false otherwise */ bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) { if (pcmk_is_set(client->flags, pcmk__client_privileged)) { long long qmax_ll; if ((pcmk__scan_ll(qmax, &qmax_ll, 0LL) == pcmk_rc_ok) && (qmax_ll > 0LL) && (qmax_ll <= UINT_MAX)) { client->queue_max = (unsigned int) qmax_ll; return true; } } return false; } int pcmk__client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } /*! * \internal * \brief Retrieve message XML from data read from client IPC * * \param[in] c IPC client connection * \param[in] data Data read from client connection * \param[out] id Where to store message ID from libqb header * \param[out] flags Where to store flags from libqb header * * \return Message XML on success, NULL otherwise */ xmlNode * pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; char *uncompressed = NULL; char *text = ((char *)data) + sizeof(pcmk__ipc_header_t); pcmk__ipc_header_t *header = data; if (!pcmk__valid_ipc_header(header)) { return NULL; } if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (pcmk_is_set(header->flags, crm_ipc_proxied)) { /* Mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel, to avoid * blocking the controller serving as proxy. */ pcmk__set_client_flags(c, pcmk__client_proxied); } if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; uncompressed = calloc(1, size_u); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); text = uncompressed; if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); return NULL; } } CRM_ASSERT(text[header->size_uncompressed - 1] == 0); xml = string2xml(text); crm_log_xml_trace(xml, "[IPC received]"); free(uncompressed); return xml; } static int crm_ipcs_flush_events(pcmk__client_t *c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { pcmk__client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } /*! * \internal * \brief Add progressive delay before next event queue flush * * \param[in,out] c Client connection to add delay to * \param[in] queue_len Current event queue length */ static inline void delay_next_flush(pcmk__client_t *c, unsigned int queue_len) { /* Delay a maximum of 1.5 seconds */ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c); } /*! * \internal * \brief Send client any messages in its queue * * \param[in] c Client to flush * * \return Standard Pacemaker return value */ static int crm_ipcs_flush_events(pcmk__client_t *c) { int rc = pcmk_rc_ok; ssize_t qb_rc = 0; unsigned int sent = 0; unsigned int queue_len = 0; if (c == NULL) { return rc; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return rc; } if (c->event_queue) { queue_len = g_queue_get_length(c->event_queue); } while (sent < 100) { pcmk__ipc_header_t *header = NULL; struct iovec *event = NULL; if (c->event_queue) { // We don't pop unless send is successful event = g_queue_peek_head(c->event_queue); } if (event == NULL) { // Queue is empty break; } qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (qb_rc < 0) { rc = (int) -qb_rc; break; } event = g_queue_pop_head(c->event_queue); sent++; header = event[0].iov_base; if (header->size_compressed) { crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent", header->qb.id, c->ipcs, c->pid, (long long) qb_rc); } else { crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s", header->qb.id, c->ipcs, c->pid, (long long) qb_rc, (char *) (event[1].iov_base)); } pcmk_free_ipc_event(event); } queue_len -= sent; if (sent > 0 || queue_len) { crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), (long long) qb_rc); } if (queue_len) { /* Allow clients to briefly fall behind on processing incoming messages, * but drop completely unresponsive clients so the connection doesn't * consume resources indefinitely. */ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { /* Don't evict for a new or shrinking backlog */ crm_warn("Client with process ID %u has a backlog of %u messages " CRM_XS " %p", c->pid, queue_len, c->ipcs); } else { crm_err("Evicting client with process ID %u due to backlog of %u messages " CRM_XS " %p", c->pid, queue_len, c->ipcs); c->queue_backlog = 0; qb_ipcs_disconnect(c->ipcs); return rc; } } c->queue_backlog = queue_len; delay_next_flush(c, queue_len); } else { /* Event queue is empty, there is no backlog */ c->queue_backlog = 0; } return rc; } /*! * \internal * \brief Create an I/O vector for sending an IPC XML message * * \param[in] request Identifier for libqb response header * \param[in] message XML message to send * \param[in] max_send_size If 0, default IPC buffer size is used * \param[out] result Where to store prepared I/O vector * \param[out] bytes Size of prepared data in bytes * * \return Standard Pacemaker return code */ int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, uint32_t max_send_size, struct iovec **result, ssize_t *bytes) { static unsigned int biggest = 0; struct iovec *iov; unsigned int total = 0; char *compressed = NULL; char *buffer = NULL; pcmk__ipc_header_t *header = NULL; if ((message == NULL) || (result == NULL)) { return EINVAL; } header = calloc(1, sizeof(pcmk__ipc_header_t)); if (header == NULL) { return ENOMEM; /* errno mightn't be set by allocator */ } buffer = dump_xml_unformatted(message); if (max_send_size == 0) { max_send_size = crm_ipc_default_buffer_size(); } CRM_LOG_ASSERT(max_send_size != 0); *result = NULL; iov = pcmk__new_ipc_event(); iov[0].iov_len = sizeof(pcmk__ipc_header_t); iov[0].iov_base = header; header->version = PCMK__IPC_VERSION; header->size_uncompressed = 1 + strlen(buffer); total = iov[0].iov_len + header->size_uncompressed; if (total < max_send_size) { iov[1].iov_base = buffer; iov[1].iov_len = header->size_uncompressed; } else { unsigned int new_size = 0; if (pcmk__compress(buffer, (unsigned int) header->size_uncompressed, (unsigned int) max_send_size, &compressed, &new_size) == pcmk_rc_ok) { pcmk__set_ipc_flags(header->flags, "send data", crm_ipc_compressed); header->size_compressed = new_size; iov[1].iov_len = header->size_compressed; iov[1].iov_base = compressed; free(buffer); biggest = QB_MAX(header->size_compressed, biggest); } else { crm_log_xml_trace(message, "EMSGSIZE"); biggest = QB_MAX(header->size_uncompressed, biggest); crm_err("Could not compress %u-byte message into less than IPC " "limit of %u bytes; set PCMK_ipc_buffer to higher value " "(%u bytes suggested)", header->size_uncompressed, max_send_size, 4 * biggest); free(compressed); free(buffer); pcmk_free_ipc_event(iov); return EMSGSIZE; } } header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; CRM_ASSERT(header->qb.size > 0); if (bytes != NULL) { *bytes = header->qb.size; } return pcmk_rc_ok; } int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) { int rc = pcmk_rc_ok; static uint32_t id = 1; pcmk__ipc_header_t *header = iov[0].iov_base; if (c->flags & pcmk__client_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (!pcmk_is_set(flags, crm_ipc_server_event)) { /* The proxied flag lets us know this was originally meant to be a * response, even though we're sending it over the event channel. */ pcmk__set_ipc_flags(flags, "server event", crm_ipc_server_event |crm_ipc_proxied_relay_response); } } pcmk__set_ipc_flags(header->flags, "server event", flags); if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); add_event(c, iov); } else { struct iovec *iov_copy = pcmk__new_ipc_event(); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); add_event(c, iov_copy); } } else { ssize_t qb_rc; CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (qb_rc < header->qb.size) { if (qb_rc < 0) { rc = (int) -qb_rc; } crm_notice("Response %d to pid %d failed: %s " CRM_XS " bytes=%u rc=%lld ipcs=%p", header->qb.id, c->pid, pcmk_rc_str(rc), header->qb.size, (long long) qb_rc, c->ipcs); } else { crm_trace("Response %d sent, %lld bytes to %p[%d]", header->qb.id, (long long) qb_rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { pcmk_free_ipc_event(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if ((rc == EPIPE) || (rc == ENOTCONN)) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, uint32_t flags) { struct iovec *iov = NULL; int rc = pcmk_rc_ok; if (c == NULL) { return EINVAL; } rc = pcmk__ipc_prepare_iov(request, message, crm_ipc_default_buffer_size(), &iov, NULL); if (rc == pcmk_rc_ok) { pcmk__set_ipc_flags(flags, "send data", crm_ipc_server_free); rc = pcmk__ipc_send_iov(c, iov, flags); } else { pcmk_free_ipc_event(iov); crm_notice("IPC message to pid %d failed: %s " CRM_XS " rc=%d", c->pid, pcmk_rc_str(rc), rc); } return rc; } /*! * \internal * \brief Create an acknowledgement with a status code to send to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Exit status code to add to ack * * \return Newly created XML for ack * \note The caller is responsible for freeing the return value with free_xml(). */ xmlNode * pcmk__ipc_create_ack_as(const char *function, int line, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { xmlNode *ack = NULL; if (pcmk_is_set(flags, crm_ipc_client_response)) { ack = create_xml_node(NULL, tag); crm_xml_add(ack, "function", function); crm_xml_add_int(ack, "line", line); crm_xml_add_int(ack, "status", (int) status); crm_xml_add(ack, PCMK__XA_IPC_PROTO_VERSION, ver); } return ack; } /*! * \internal * \brief Send an acknowledgement with a status code to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] c Client to send ack to * \param[in] request Request ID being replied to * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] ver IPC protocol version (can be NULL) * \param[in] status Status code to send with acknowledgement * * \return Standard Pacemaker return code */ int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, const char *ver, crm_exit_t status) { int rc = pcmk_rc_ok; xmlNode *ack = pcmk__ipc_create_ack_as(function, line, flags, tag, ver, status); if (ack != NULL) { crm_trace("Ack'ing IPC message from client %s as <%s status=%d>", pcmk__client_name(c), tag, status); c->request_id = 0; rc = pcmk__ipc_send_xml(c, request, ack, flags); free_xml(ack); } return rc; } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-based API * * \param[out] ipcs_ro New IPC server for read-only pacemaker-based API * \param[out] ipcs_rw New IPC server for read/write pacemaker-based API * \param[out] ipcs_shm New IPC server for shared-memory pacemaker-based API * \param[in] ro_cb IPC callbacks for read-only API * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Destroy IPC servers for pacemaker-based API * * \param[out] ipcs_ro IPC server for read-only pacemaker-based API * \param[out] ipcs_rw IPC server for read/write pacemaker-based API * \param[out] ipcs_shm IPC server for shared-memory pacemaker-based API * * \note This is a convenience function for calling qb_ipcs_destroy() for each * argument. */ void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-controld API * * \param[in] cb IPC callbacks * * \return Newly created IPC server */ qb_ipcs_service_t * pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-attrd API * * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create pacemaker-attrd server: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-fenced API * * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, QB_LOOP_HIGH); if (*ipcs == NULL) { crm_err("Failed to create fencer: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemakerd API * * \param[in] cb IPC callbacks * * \note This function exits with CRM_EX_OSERR if unable to create the servers. */ void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Couldn't start pacemakerd IPC server"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); /* sub-daemons are observed by pacemakerd. Thus we exit CRM_EX_FATAL * if we want to prevent pacemakerd from restarting them. * With pacemakerd we leave the exit-code shown to e.g. systemd * to what it was prior to moving the code here from pacemakerd.c */ crm_exit(CRM_EX_OSERR); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-schedulerd API * * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ qb_ipcs_service_t * pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_NATIVE, cb); } /*! * \brief Check whether string represents a client name used by cluster daemons * * \param[in] name String to check * * \return true if name is standard client name used by daemons, false otherwise * * \note This is provided by the client, and so cannot be used by itself as a * secure means of authentication. */ bool crm_is_daemon_name(const char *name) { name = pcmk__message_name(name); return (!strcmp(name, CRM_SYSTEM_CRMD) || !strcmp(name, CRM_SYSTEM_STONITHD) || !strcmp(name, "stonith-ng") || !strcmp(name, "attrd") || !strcmp(name, CRM_SYSTEM_CIB) || !strcmp(name, CRM_SYSTEM_MCP) || !strcmp(name, CRM_SYSTEM_DC) || !strcmp(name, CRM_SYSTEM_TENGINE) || !strcmp(name, CRM_SYSTEM_LRMD)); }