diff --git a/agents/ocf/ping.in b/agents/ocf/ping.in index 4855e5b306..810a765f80 100755 --- a/agents/ocf/ping.in +++ b/agents/ocf/ping.in @@ -1,436 +1,436 @@ #!/bin/sh # # ocf:pacemaker:ping resource agent # # Copyright 2009-2023 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # ####################################################################### # Initialization: : ${OCF_FUNCTIONS:="${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"} . "${OCF_FUNCTIONS}" : ${__OCF_ACTION:="$1"} # Explicitly list all environment variables used, to make static analysis happy : ${OCF_RESKEY_CRM_meta_timeout:="20000"} : ${OCF_RESKEY_CRM_meta_globally_unique:="false"} : ${OCF_RESKEY_name:="pingd"} : ${OCF_RESKEY_dampen:="5s"} : ${OCF_RESKEY_attempts:="3"} : ${OCF_RESKEY_multiplier:="1"} : ${OCF_RESKEY_debug:="0"} : ${OCF_RESKEY_failure_score:="0"} : ${OCF_RESKEY_use_fping:="1"} : ${OCF_RESKEY_host_list:=""} : ${OCF_RESKEY_options:=""} : ${OCF_RESKEY_timeout:=""} ####################################################################### meta_data() { cat < 1.1 Every time the monitor action is run, this resource agent records (in the CIB) the current number of nodes the host can connect to using the system fping (preferred) or ping tool. node connectivity PID file PID file The time to wait (dampening) further changes occur Dampening interval The name of the attributes to set. This is the name to be used in the constraints. Attribute name The number by which to multiply the number of connected ping nodes by Value multiplier A space separated list of ping nodes to count. Host list Number of ping attempts, per host, before declaring it dead no. of ping attempts How long, in seconds, to wait before declaring a ping lost ping timeout in seconds A catch all for any other options that need to be passed to ping. Extra Options Resource is failed if the score is less than failure_score. Default never fails. failure_score Use fping rather than ping, if found. If set to 0, fping will not be used even if present. Use fping if available Enables to use default attrd_updater verbose logging on every call. Verbose logging END } ####################################################################### ping_conditional_log() { level="$1"; shift if [ $OCF_RESKEY_debug -gt 0 ]; then ocf_log "$level" "$*" fi } ping_usage() { cat <&1); rc=$? active=$(echo "$fping_output" | grep "is alive" | wc -l) case $rc in 0) if [ $OCF_RESKEY_debug -gt 1 ]; then ping_conditional_log info "$fping_output" fi ;; 1) for h in $(echo "$fping_output" | grep "is unreachable" | awk '{print $1}'); do ping_conditional_log warn "$h is inactive: $fping_output" done ;; *) ocf_log err "Unexpected result for '$cmd' $rc: $(echo "$fping_output" | tr '\n' ';')" ;; esac return $active } ping_check() { active=0 for host in $OCF_RESKEY_host_list; do p_exe=ping case $(uname) in Linux) p_args="-n -q -W $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts";; Darwin) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; FreeBSD) p_args="-n -q -t $OCF_RESKEY_timeout -c $OCF_RESKEY_attempts -o";; *) ocf_log err "Unknown host type: $(uname)"; exit $OCF_ERR_INSTALLED;; esac case "$host" in *:*) p_exe=ping6 esac ping_output=$($p_exe $p_args $OCF_RESKEY_options $host 2>&1); rc=$? case $rc in 0) active=$(expr $active + 1) if [ $OCF_RESKEY_debug -gt 1 ]; then ping_conditional_log info "$ping_output" fi ;; 1) ping_conditional_log warn "$host is inactive: $ping_output";; *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $ping_output";; esac done return $active } ping_update() { if use_fping; then fping_check active=$? else ping_check active=$? fi score=$(expr $active \* $OCF_RESKEY_multiplier) if [ "$__OCF_ACTION" = "start" ] ; then attrd_updater -n "$OCF_RESKEY_name" -B "$score" -d "$OCF_RESKEY_dampen" else - attrd_updater -n "$OCF_RESKEY_name" -v "$score" -d "$OCF_RESKEY_dampen" + attrd_updater -n "$OCF_RESKEY_name" -U "$score" -d "$OCF_RESKEY_dampen" fi rc=$? case $rc in 0) ping_conditional_log debug "Updated $OCF_RESKEY_name = $score" ;; *) ocf_log warn "Could not update $OCF_RESKEY_name = $score: rc=$rc";; esac if [ $rc -ne 0 ]; then return $rc fi if [ -n "$OCF_RESKEY_failure_score" ] && [ "$score" -lt "$OCF_RESKEY_failure_score" ]; then ocf_log warn "$OCF_RESKEY_name is less than failure_score($OCF_RESKEY_failure_score)" return 1 fi return 0 } use_fping() { ocf_is_true "$OCF_RESKEY_use_fping" && have_binary fping; } # return values: # 4 IPv4 # 6 IPv6 # 0 indefinite (i.e. hostname) host_family() { case $1 in *[0-9].*[0-9].*[0-9].*[0-9]) return 4 ;; *:*) return 6 ;; *) return 0 ;; esac } # return values same as host_family plus # 99 ambiguous families hosts_family() { # For fping allow only same IP versions or hostnames family=0 for host in $OCF_RESKEY_host_list; do host_family "$host" f=$? if [ $family -ne 0 ] && [ $f -ne 0 ] && [ $f -ne $family ] ; then family=99 break fi [ $f -ne 0 ] && family=$f done return $family } integer=$(echo ${OCF_RESKEY_timeout} | egrep -o '[0-9]*') case "${OCF_RESKEY_timeout}" in *[0-9]ms|*[0-9]msec) OCF_RESKEY_timeout=$(expr $integer / 1000);; *[0-9]m|*[0-9]min) OCF_RESKEY_timeout=$(expr $integer \* 60);; *[0-9]h|*[0-9]hr) OCF_RESKEY_timeout=$(expr $integer \* 60 \* 60);; *) OCF_RESKEY_timeout=$integer;; esac if [ -z "${OCF_RESKEY_timeout}" ]; then if [ -n "$OCF_RESKEY_host_list" ]; then host_count=$(echo $OCF_RESKEY_host_list | awk '{print NF}') OCF_RESKEY_timeout=$(expr $OCF_RESKEY_CRM_meta_timeout / $host_count / $OCF_RESKEY_attempts) OCF_RESKEY_timeout=$(expr $OCF_RESKEY_timeout / 1100) # Convert to seconds and finish 10% early else OCF_RESKEY_timeout=5 fi fi if [ ${OCF_RESKEY_timeout} -lt 1 ]; then OCF_RESKEY_timeout=5 elif [ ${OCF_RESKEY_timeout} -gt 1000 ]; then # ping actually complains if this value is too high, 5 minutes is plenty OCF_RESKEY_timeout=300 fi if [ "${OCF_RESKEY_CRM_meta_globally_unique}" = "false" ]; then : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESKEY_name}"} else : ${OCF_RESKEY_pidfile:="${HA_VARRUN%%/}/ping-${OCF_RESOURCE_INSTANCE}"} fi # Check the debug option case "${OCF_RESKEY_debug}" in true|True|TRUE|1) OCF_RESKEY_debug=1;; false|False|FALSE|0) OCF_RESKEY_debug=0;; verbose|Verbose|VERBOSE|2) OCF_RESKEY_debug=2;; *) ocf_log warn "Value for 'debug' is incorrect. Please specify 'true', 'false', or 'verbose', not: ${OCF_RESKEY_debug}" OCF_RESKEY_debug=false ;; esac case "$__OCF_ACTION" in meta-data) meta_data exit $OCF_SUCCESS ;; start) ping_start;; stop) ping_stop;; monitor) ping_monitor;; validate-all) ping_validate;; reload-agent) ping_reload_agent;; usage|help) ping_usage exit $OCF_SUCCESS ;; *) ping_usage exit $OCF_ERR_UNIMPLEMENTED ;; esac exit $? # vim: set filetype=sh expandtab tabstop=4 softtabstop=4 shiftwidth=4 textwidth=80: diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 928c013374..2c910b4c64 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -1,380 +1,397 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static int last_cib_op_done = 0; static gboolean attribute_timer_cb(gpointer data) { attribute_t *a = data; crm_trace("Dampen interval expired for %s", a->id); attrd_write_or_elect_attribute(a); return FALSE; } static void attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { int level = LOG_ERR; GHashTableIter iter; const char *peer = NULL; attribute_value_t *v = NULL; char *name = user_data; attribute_t *a = g_hash_table_lookup(attributes, name); if(a == NULL) { crm_info("Attribute %s no longer exists", name); return; } a->update = 0; if (rc == pcmk_ok && call_id < 0) { rc = call_id; } switch (rc) { case pcmk_ok: level = LOG_INFO; last_cib_op_done = call_id; if (a->timer && !a->timeout_ms) { // Remove temporary dampening for failed writes mainloop_timer_del(a->timer); a->timer = NULL; } break; case -pcmk_err_diff_failed: /* When an attr changes while the CIB is syncing */ case -ETIME: /* When an attr changes while there is a DC election */ case -ENXIO: /* When an attr changes while the CIB is syncing a * newer config from a node that just came up */ level = LOG_WARNING; break; } do_crm_log(level, "CIB update %d result for %s: %s " CRM_XS " rc=%d", call_id, a->id, pcmk_strerror(rc), rc); g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested); free(v->requested); v->requested = NULL; if (rc != pcmk_ok) { a->changed = true; /* Attempt write out again */ } } if (a->changed && attrd_election_won()) { if (rc == pcmk_ok) { /* We deferred a write of a new update because this update was in * progress. Write out the new value without additional delay. */ attrd_write_attribute(a, false); /* We're re-attempting a write because the original failed; delay * the next attempt so we don't potentially flood the CIB manager * and logs with a zillion attempts per second. * * @TODO We could elect a new writer instead. However, we'd have to * somehow downgrade our vote, and we'd still need something like this * if all peers similarly fail to write this attribute (which may * indicate a corrupted attribute entry rather than a CIB issue). */ } else if (a->timer) { // Attribute has a dampening value, so use that as delay if (!mainloop_timer_running(a->timer)) { crm_trace("Delayed re-attempted write for %s by %s", name, pcmk__readable_interval(a->timeout_ms)); mainloop_timer_start(a->timer); } } else { /* Set a temporary dampening of 2 seconds (timer will continue * to exist until the attribute's dampening gets set or the * write succeeds). */ a->timer = attrd_add_timer(a->id, 2000, a); mainloop_timer_start(a->timer); } } } static void build_update_element(xmlNode *parent, attribute_t *a, const char *nodeid, const char *value) { const char *set = NULL; xmlNode *xml_obj = NULL; xml_obj = create_xml_node(parent, XML_CIB_TAG_STATE); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); xml_obj = create_xml_node(xml_obj, XML_TAG_TRANSIENT_NODEATTRS); crm_xml_add(xml_obj, XML_ATTR_ID, nodeid); if (pcmk__str_eq(a->set_type, XML_TAG_ATTR_SETS, pcmk__str_null_matches)) { xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); } else if (pcmk__str_eq(a->set_type, XML_TAG_UTILIZATION, pcmk__str_none)) { xml_obj = create_xml_node(xml_obj, XML_TAG_UTILIZATION); } else { crm_err("Unknown set type attribute: %s", a->set_type); } if (a->set_id) { crm_xml_set_id(xml_obj, "%s", a->set_id); } else { crm_xml_set_id(xml_obj, "%s-%s", XML_CIB_TAG_STATUS, nodeid); } set = ID(xml_obj); xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_NVPAIR); if (a->uuid) { crm_xml_set_id(xml_obj, "%s", a->uuid); } else { crm_xml_set_id(xml_obj, "%s-%s", set, a->id); } crm_xml_add(xml_obj, XML_NVPAIR_ATTR_NAME, a->id); if(value) { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, value); } else { crm_xml_add(xml_obj, XML_NVPAIR_ATTR_VALUE, ""); crm_xml_add(xml_obj, "__delete__", XML_NVPAIR_ATTR_VALUE); } } static void send_alert_attributes_value(attribute_t *a, GHashTable *t) { int rc = 0; attribute_value_t *at = NULL; GHashTableIter vIter; g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { rc = attrd_send_attribute_alert(at->nodename, at->nodeid, a->id, at->current); crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d", a->id, at->nodename, at->current, at->nodeid, rc); } } static void set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = NULL; a_v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(a_v != NULL); a_v->nodeid = v->nodeid; a_v->nodename = strdup(v->nodename); pcmk__str_update(&a_v->current, v->current); g_hash_table_replace(t, a_v->nodename, a_v); } mainloop_timer_t * attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr) { return mainloop_timer_add(id, timeout_ms, FALSE, attribute_timer_cb, attr); } void attrd_write_attribute(attribute_t *a, bool ignore_delay) { int private_updates = 0, cib_updates = 0; xmlNode *xml_top = NULL; attribute_value_t *v = NULL; GHashTableIter iter; enum cib_call_options flags = cib_none; GHashTable *alert_attribute_value = NULL; if (a == NULL) { return; } /* If this attribute will be written to the CIB ... */ if (!stand_alone && !a->is_private) { /* Defer the write if now's not a good time */ CRM_CHECK(the_cib != NULL, return); if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", a->id, a->update); a->update = 0; // Don't log this message again } else if (a->update) { crm_info("Write out of '%s' delayed: update %d in progress", a->id, a->update); return; } else if (mainloop_timer_running(a->timer)) { if (ignore_delay) { /* 'refresh' forces a write of the current value of all attributes * Cancel any existing timers, we're writing it NOW */ mainloop_timer_stop(a->timer); crm_debug("Write out of '%s': timer is running but ignore delay", a->id); } else { crm_info("Write out of '%s' delayed: timer is running", a->id); return; } } /* Initialize the status update XML */ xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS); } /* Attribute will be written shortly, so clear changed flag */ a->changed = false; /* We will check all peers' uuids shortly, so initialize this to false */ a->unknown_peer_uuids = false; /* Attribute will be written shortly, so clear forced write flag */ a->force_write = FALSE; /* Make the table for the attribute trap */ alert_attribute_value = pcmk__strikey_table(NULL, attrd_free_attribute_value); /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & v)) { crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, CRM_GET_PEER_ANY); /* If the value's peer info does not correspond to a peer, ignore it */ if (peer == NULL) { crm_notice("Cannot update %s[%s]=%s because peer not known", a->id, v->nodename, v->current); continue; } /* If we're just learning the peer's node id, remember it */ if (peer->id && (v->nodeid == 0)) { crm_trace("Learned ID %u for node %s", peer->id, v->nodename); v->nodeid = peer->id; } /* If this is a private attribute, no update needs to be sent */ if (stand_alone || a->is_private) { private_updates++; continue; } /* If the peer is found, but its uuid is unknown, defer write */ if (peer->uuid == NULL) { a->unknown_peer_uuids = true; crm_notice("Cannot update %s[%s]=%s because peer UUID not known " "(will retry if learned)", a->id, v->nodename, v->current); continue; } /* Add this value to status update XML */ crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID %u/%u)", a->id, v->nodename, v->current, peer->uname, peer->uuid, peer->id, v->nodeid); build_update_element(xml_top, a, peer->uuid, v->current); cib_updates++; /* Preservation of the attribute to transmit alert */ set_alert_attribute_value(alert_attribute_value, v); free(v->requested); v->requested = NULL; if (v->current) { v->requested = strdup(v->current); } else { /* Older attrd versions don't know about the cib_mixed_update * flag so make sure it goes to the local cib which does */ cib__set_call_options(flags, crm_system_name, cib_mixed_update|cib_scope_local); } } if (private_updates) { crm_info("Processed %d private change%s for %s, id=%s, set=%s", private_updates, pcmk__plural_s(private_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); } if (cib_updates) { crm_log_xml_trace(xml_top, __func__); a->update = cib_internal_op(the_cib, PCMK__CIB_REQUEST_MODIFY, NULL, XML_CIB_TAG_STATUS, xml_top, NULL, flags, a->user); crm_info("Sent CIB request %d with %d change%s for %s (id %s, set %s)", a->update, cib_updates, pcmk__plural_s(cib_updates), a->id, pcmk__s(a->uuid, "n/a"), pcmk__s(a->set_id, "n/a")); the_cib->cmds->register_callback_full(the_cib, a->update, CIB_OP_TIMEOUT_S, FALSE, strdup(a->id), "attrd_cib_callback", attrd_cib_callback, free); /* Transmit alert of the attribute */ send_alert_attributes_value(a, alert_attribute_value); } g_hash_table_destroy(alert_attribute_value); free_xml(xml_top); } +/*! + * \internal + * \brief Write out attributes + * + * \param[in] options Group of enum attrd_write_options + */ void -attrd_write_attributes(bool all, bool ignore_delay) +attrd_write_attributes(uint32_t options) { GHashTableIter iter; attribute_t *a = NULL; - crm_debug("Writing out %s attributes", all? "all" : "changed"); + crm_debug("Writing out %s attributes", + pcmk_is_set(options, attrd_write_all)? "all" : "changed"); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { - if (!all && a->unknown_peer_uuids) { + if (pcmk_is_set(options, attrd_write_skip_shutdown) + && pcmk__str_eq(a->id, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { + continue; + } + + if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) { // Try writing this attribute again, in case peer ID was learned a->changed = true; } else if (a->force_write) { /* If the force_write flag is set, write the attribute. */ a->changed = true; } - if(all || a->changed) { - /* When forced write flag is set, ignore delay. */ - attrd_write_attribute(a, (a->force_write ? true : ignore_delay)); + if (pcmk_is_set(options, attrd_write_all) || a->changed) { + bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay); + + if (a->force_write) { + // Always ignore delay when forced write flag is set + ignore_delay = true; + } + attrd_write_attribute(a, ignore_delay); } else { crm_trace("Skipping unchanged attribute %s", a->id); } } } void attrd_write_or_elect_attribute(attribute_t *a) { if (attrd_election_won()) { attrd_write_attribute(a, false); } else { attrd_start_election_if_needed(); } } diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index 1aec35a054..49631df6e4 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -1,619 +1,619 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" extern crm_exit_t attrd_exit_status; static xmlNode * attrd_confirmation(int callid) { xmlNode *node = create_xml_node(NULL, __func__); crm_xml_add(node, F_TYPE, T_ATTRD); crm_xml_add(node, F_ORIG, get_local_node_name()); crm_xml_add(node, PCMK__XA_TASK, PCMK__ATTRD_CMD_CONFIRM); crm_xml_add_int(node, XML_LRM_ATTR_CALLID, callid); return node; } static void attrd_peer_message(crm_node_t *peer, xmlNode *xml) { const char *election_op = crm_element_value(xml, F_CRM_TASK); if (election_op) { attrd_handle_election_op(peer, xml); return; } if (attrd_shutting_down(false)) { /* If we're shutting down, we want to continue responding to election * ops as long as we're a cluster member (because our vote may be * needed). Ignore all other messages. */ return; } else { pcmk__request_t request = { .ipc_client = NULL, .ipc_id = 0, .ipc_flags = 0, .peer = peer->uname, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); CRM_CHECK(request.op != NULL, return); attrd_handle_request(&request); /* Having finished handling the request, check to see if the originating * peer requested confirmation. If so, send that confirmation back now. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_CONFIRM) && !pcmk__str_eq(request.op, PCMK__ATTRD_CMD_CONFIRM, pcmk__str_none)) { int callid = 0; xmlNode *reply = NULL; /* Add the confirmation ID for the message we are confirming to the * response so the originating peer knows what they're a confirmation * for. */ crm_element_value_int(xml, XML_LRM_ATTR_CALLID, &callid); reply = attrd_confirmation(callid); /* And then send the confirmation back to the originating peer. This * ends up right back in this same function (attrd_peer_message) on the * peer where it will have to do something with a PCMK__XA_CONFIRM type * message. */ crm_debug("Sending %s a confirmation", peer->uname); attrd_send_message(peer, reply, false); free_xml(reply); } pcmk__reset_request(&request); } } static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down(false)) { crm_info("Disconnected from Corosync process group"); } else { crm_crit("Lost connection to Corosync process group, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } /*! * \internal * \brief Override an attribute sync with a local value * * Broadcast the local node's value for an attribute that's different from the * value provided in a peer's attribute synchronization response. This ensures a * node's values for itself take precedence and all peers are kept in sync. * * \param[in] a Attribute entry to override * * \return Local instance of attribute value */ static attribute_value_t * broadcast_local_value(const attribute_t *a) { attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname); xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); attrd_add_value_xml(sync, a, v, false); attrd_send_message(NULL, sync, false); free_xml(sync); return v; } /*! * \internal * \brief Ensure a Pacemaker Remote node is in the correct peer cache * * \param[in] node_name Name of Pacemaker Remote node to check */ static void cache_remote_node(const char *node_name) { /* If we previously assumed this node was an unseen cluster node, * remove its entry from the cluster peer cache. */ crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name, NULL); if (dup && (dup->uuid == NULL)) { reap_crm_member(0, node_name); } // Ensure node is in the remote peer cache CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); } #define state_text(state) pcmk__s((state), "in unknown state") /*! * \internal * \brief Return host's hash table entry (creating one if needed) * * \param[in,out] values Hash table of values * \param[in] host Name of peer to look up * \param[in] xml XML describing the attribute * * \return Pointer to new or existing hash table entry */ static attribute_value_t * attrd_lookup_or_create_value(GHashTable *values, const char *host, const xmlNode *xml) { attribute_value_t *v = g_hash_table_lookup(values, host); int is_remote = 0; crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { cache_remote_node(host); } if (v == NULL) { v = calloc(1, sizeof(attribute_value_t)); CRM_ASSERT(v != NULL); pcmk__str_update(&v->nodename, host); v->is_remote = is_remote; g_hash_table_replace(values, v->nodename, v); } return(v); } static void attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) { bool gone = false; bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); switch (kind) { case crm_status_uname: crm_debug("%s node %s is now %s", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state)); break; case crm_status_processes: if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { gone = true; } crm_debug("Node %s is %s a peer", peer->uname, (gone? "no longer" : "now")); break; case crm_status_nstate: crm_debug("%s node %s is now %s (was %s)", (is_remote? "Remote" : "Cluster"), peer->uname, state_text(peer->state), state_text(data)); if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { /* If we're the writer, send new peers a list of all attributes * (unless it's a remote node, which doesn't run its own attrd) */ if (attrd_election_won() && !pcmk_is_set(peer->flags, crm_remote_node)) { attrd_peer_sync(peer, NULL); } } else { // Remove all attribute values associated with lost nodes attrd_peer_remove(peer->uname, false, "loss"); gone = true; } break; } // Remove votes from cluster nodes that leave, in case election in progress if (gone && !is_remote) { attrd_remove_voter(peer); attrd_remove_peer_protocol_ver(peer->uname); attrd_do_not_expect_from_peer(peer->uname); // Ensure remote nodes that come up are in the remote node cache } else if (!gone && is_remote) { cache_remote_node(peer->uname); } } static void record_peer_nodeid(attribute_value_t *v, const char *host) { crm_node_t *known_peer = crm_get_peer(v->nodeid, host); crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); if (attrd_election_won()) { - attrd_write_attributes(false, false); + attrd_write_attributes(attrd_write_changed); } } static void update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, const char *attr, const char *value, const char *host, bool filter, int is_force_write) { attribute_value_t *v = NULL; v = attrd_lookup_or_create_value(a->values, host, xml); if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", attr, host, v->current, value, peer->uname); v = broadcast_local_value(a); } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { crm_notice("Setting %s[%s]%s%s: %s -> %s " CRM_XS " from %s with %s write delay", attr, host, a->set_type ? " in " : "", pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"), pcmk__s(value, "(unset)"), peer->uname, (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); pcmk__str_update(&v->current, value); a->changed = true; if (pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei) && pcmk__str_eq(attr, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) { if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) { attrd_set_requesting_shutdown(); } else { attrd_clear_requesting_shutdown(); } } // Write out new value or start dampening timer if (a->timeout_ms && a->timer) { crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr); mainloop_timer_start(a->timer); } else { attrd_write_or_elect_attribute(a); } } else { if (is_force_write == 1 && a->timeout_ms && a->timer) { /* Save forced writing and set change flag. */ /* The actual attribute is written by Writer after election. */ crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)", attr, host, peer->uname, value); a->force_write = TRUE; } else { crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value); } } /* Set the seen flag for attribute processing held only in the own node. */ v->seen = TRUE; /* If this is a cluster node whose node ID we are learning, remember it */ if ((v->nodeid == 0) && (v->is_remote == FALSE) && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { record_peer_nodeid(v, host); } } static void attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) { attribute_t *a = NULL; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); int is_force_write = 0; if (attr == NULL) { crm_warn("Could not update attribute: peer did not specify name"); return; } crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); a = attrd_populate_attribute(xml, attr); if (a == NULL) { return; } if (host == NULL) { // If no host was specified, update all hosts GHashTableIter vIter; crm_debug("Setting %s for all hosts to %s", attr, value); xml_remove_prop(xml, PCMK__XA_ATTR_NODE_ID); g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } } else { // Update attribute value for the given host update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); } /* If this is a message from some attrd instance broadcasting its protocol * version, check to see if it's a new minimum version. */ if (pcmk__str_eq(attr, CRM_ATTR_PROTOCOL, pcmk__str_none)) { attrd_update_minimum_protocol_ver(peer->uname, value); } } static void broadcast_unseen_local_values(void) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = NULL; g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei)) { if (sync == NULL) { sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); } attrd_add_value_xml(sync, a, v, a->timeout_ms && a->timer); } } } if (sync != NULL) { crm_debug("Broadcasting local-only values"); attrd_send_message(NULL, sync, false); free_xml(sync); } } int attrd_cluster_connect(void) { attrd_cluster = pcmk_cluster_new(); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); return -ENOTCONN; } return pcmk_ok; } void attrd_peer_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); const char *op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); const char *interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); guint interval_ms = crm_parse_interval_spec(interval_spec); char *attr = NULL; GHashTableIter iter; regex_t regex; crm_node_t *peer = crm_get_peer(0, request->peer); if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { crm_info("Ignoring invalid request to clear failures for %s", pcmk__s(rsc, "all resources")); return; } crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Make sure value is not set, so we delete */ xml_remove_prop(xml, PCMK__XA_ATTR_VALUE); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, (gpointer *) &attr, NULL)) { if (regexec(®ex, attr, 0, NULL, 0) == 0) { crm_trace("Matched %s when clearing %s", attr, pcmk__s(rsc, "all resources")); crm_xml_add(xml, PCMK__XA_ATTR_NAME, attr); attrd_peer_update(peer, xml, host, false); } } regfree(®ex); } /*! * \internal * \brief Load attributes from a peer sync response * * \param[in] peer Peer that sent clear request * \param[in] peer_won Whether peer is the attribute writer * \param[in,out] xml Request XML */ void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml) { crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", peer->uname); if (peer_won) { /* Initialize the "seen" flag for all attributes to cleared, so we can * detect attributes that local node has but the writer doesn't. */ attrd_clear_value_seen(); } // Process each attribute update in the sync response for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { attrd_peer_update(peer, child, crm_element_value(child, PCMK__XA_ATTR_NODE_NAME), true); } if (peer_won) { /* If any attributes are still not marked as seen, the writer doesn't * know about them, so send all peers an update with them. */ broadcast_unseen_local_values(); } } /*! * \internal * \brief Remove all attributes and optionally peer cache entries for a node * * \param[in] host Name of node to purge * \param[in] uncache If true, remove node from peer caches * \param[in] source Who requested removal (only used for logging) */ void attrd_peer_remove(const char *host, bool uncache, const char *source) { attribute_t *a = NULL; GHashTableIter aIter; CRM_CHECK(host != NULL, return); crm_notice("Removing all %s attributes for peer %s", host, source); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { if(g_hash_table_remove(a->values, host)) { crm_debug("Removed %s[%s] for peer %s", a->id, host, source); } } if (uncache) { crm_remote_peer_cache_remove(host); reap_crm_member(0, host); } } void attrd_peer_sync(crm_node_t *peer, xmlNode *xml) { GHashTableIter aIter; GHashTableIter vIter; attribute_t *a = NULL; attribute_value_t *v = NULL; xmlNode *sync = create_xml_node(NULL, __func__); crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { g_hash_table_iter_init(&vIter, a->values); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); attrd_add_value_xml(sync, a, v, false); } } crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); attrd_send_message(peer, sync, false); free_xml(sync); } void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, bool filter) { bool handle_sync_point = false; CRM_CHECK((peer != NULL) && (xml != NULL), return); if (xml->children != NULL) { for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { attrd_copy_xml_attributes(xml, child); attrd_peer_update_one(peer, child, filter); if (attrd_request_has_sync_point(child)) { handle_sync_point = true; } } } else { attrd_peer_update_one(peer, xml, filter); if (attrd_request_has_sync_point(xml)) { handle_sync_point = true; } } /* If the update XML specified that the client wanted to wait for a sync * point, process that now. */ if (handle_sync_point) { crm_trace("Hit local sync point for attribute update"); attrd_ack_waitlist_clients(attrd_sync_point_local, xml); } } diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c index c25a41a449..a95cd44cbd 100644 --- a/daemons/attrd/attrd_elections.c +++ b/daemons/attrd/attrd_elections.c @@ -1,187 +1,197 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include "pacemaker-attrd.h" static char *peer_writer = NULL; static election_t *writer = NULL; static gboolean attrd_election_cb(gpointer user_data) { attrd_declare_winner(); if (attrd_shutting_down(true)) { /* This node is shutting down or about to, meaning its attributes will * be removed (and may have already been removed from the CIB by a * controller). Don't sync or write its attributes in this case. */ return G_SOURCE_REMOVE; } /* Update the peers after an election */ attrd_peer_sync(NULL, NULL); - /* Update the CIB after an election */ - attrd_write_attributes(true, false); + /* After winning an election, update the CIB with the values of all + * attributes as the winner knows them. + * + * However, do not write out any "shutdown" attributes. A node that is + * shutting down will have all its transient attributes removed from the CIB + * when its controller exits, and from the attribute manager's memory (on + * remaining nodes) when its attribute manager exits; if an election is won + * between when those two things happen, we don't want to write the shutdown + * attribute back out, which would cause the node to immediately shut down + * the next time it rejoins. + */ + attrd_write_attributes(attrd_write_all|attrd_write_skip_shutdown); return G_SOURCE_REMOVE; } void attrd_election_init(void) { writer = election_init(T_ATTRD, attrd_cluster->uname, 120000, attrd_election_cb); } void attrd_election_fini(void) { election_fini(writer); } void attrd_start_election_if_needed(void) { if ((peer_writer == NULL) && (election_state(writer) != election_in_progress) && !attrd_shutting_down(false)) { crm_info("Starting an election to determine the writer"); election_vote(writer); } } bool attrd_election_won(void) { return (election_state(writer) == election_won); } void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml) { enum election_result rc = 0; enum election_result previous = election_state(writer); crm_xml_add(xml, F_CRM_HOST_FROM, peer->uname); // Don't become writer if we're shutting down rc = election_count_vote(writer, xml, !attrd_shutting_down(false)); switch(rc) { case election_start: crm_debug("Unsetting writer (was %s) and starting new election", peer_writer? peer_writer : "unset"); free(peer_writer); peer_writer = NULL; election_vote(writer); break; case election_lost: /* The election API should really distinguish between "we just lost * to this peer" and "we already lost previously, and we are * discarding this vote for some reason", but it doesn't. * * In the first case, we want to tentatively set the peer writer to * this peer, even though another peer may eventually win (which we * will learn via attrd_check_for_new_writer()), so * attrd_start_election_if_needed() doesn't start a new election. * * Approximate a test for that case as best as possible. */ if ((peer_writer == NULL) || (previous != election_lost)) { pcmk__str_update(&peer_writer, peer->uname); crm_debug("Election lost, presuming %s is writer for now", peer_writer); } break; case election_in_progress: election_check(writer); break; default: crm_info("Ignoring election op from %s due to error", peer->uname); break; } } bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml) { int peer_state = 0; crm_element_value_int(xml, PCMK__XA_ATTR_WRITER, &peer_state); if (peer_state == election_won) { if ((election_state(writer) == election_won) && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { crm_notice("Detected another attribute writer (%s), starting new election", peer->uname); election_vote(writer); } else if (!pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { crm_notice("Recorded new attribute writer: %s (was %s)", peer->uname, (peer_writer? peer_writer : "unset")); pcmk__str_update(&peer_writer, peer->uname); } } return (peer_state == election_won); } void attrd_declare_winner(void) { crm_notice("Recorded local node as attribute writer (was %s)", (peer_writer? peer_writer : "unset")); pcmk__str_update(&peer_writer, attrd_cluster->uname); } void attrd_remove_voter(const crm_node_t *peer) { election_remove(writer, peer->uname); if (peer_writer && pcmk__str_eq(peer->uname, peer_writer, pcmk__str_casei)) { free(peer_writer); peer_writer = NULL; crm_notice("Lost attribute writer %s", peer->uname); /* Clear any election dampening in effect. Otherwise, if the lost writer * had just won, the election could fizzle out with no new writer. */ election_clear_dampening(writer); /* If the writer received attribute updates during its shutdown, it will * not have written them to the CIB. Ensure we get a new writer so they * are written out. This means that every node that sees the writer * leave will start a new election, but that's better than losing * attributes. */ attrd_start_election_if_needed(); /* If an election is in progress, we need to call election_check(), in case * this lost peer is the only one that hasn't voted, otherwise the election * would be pending until it's timed out. */ } else if (election_state(writer) == election_in_progress) { crm_debug("Checking election status upon loss of voter %s", peer->uname); election_check(writer); } } void attrd_xml_add_writer(xmlNode *xml) { crm_xml_add_int(xml, PCMK__XA_ATTR_WRITER, election_state(writer)); } diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index 4be789de7f..05c4a696a1 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -1,629 +1,629 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static qb_ipcs_service_t *ipcs = NULL; /*! * \internal * \brief Build the XML reply to a client query * * param[in] attr Name of requested attribute * param[in] host Name of requested host (or NULL for all hosts) * * \return New XML reply * \note Caller is responsible for freeing the resulting XML */ static xmlNode *build_query_reply(const char *attr, const char *host) { xmlNode *reply = create_xml_node(NULL, __func__); attribute_t *a; if (reply == NULL) { return NULL; } crm_xml_add(reply, F_TYPE, T_ATTRD); crm_xml_add(reply, F_SUBTYPE, PCMK__ATTRD_CMD_QUERY); crm_xml_add(reply, PCMK__XA_ATTR_VERSION, ATTRD_PROTOCOL_VERSION); /* If desired attribute exists, add its value(s) to the reply */ a = g_hash_table_lookup(attributes, attr); if (a) { attribute_value_t *v; xmlNode *host_value; crm_xml_add(reply, PCMK__XA_ATTR_NAME, attr); /* Allow caller to use "localhost" to refer to local node */ if (pcmk__str_eq(host, "localhost", pcmk__str_casei)) { host = attrd_cluster->uname; crm_trace("Mapped localhost to %s", host); } /* If a specific node was requested, add its value */ if (host) { v = g_hash_table_lookup(a->values, host); host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } pcmk__xe_add_node(host_value, host, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, (v? v->current : NULL)); /* Otherwise, add all nodes' values */ } else { GHashTableIter iter; g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { host_value = create_xml_node(reply, XML_CIB_TAG_NODE); if (host_value == NULL) { free_xml(reply); return NULL; } pcmk__xe_add_node(host_value, v->nodename, 0); crm_xml_add(host_value, PCMK__XA_ATTR_VALUE, v->current); } } } return reply; } xmlNode * attrd_client_clear_failure(pcmk__request_t *request) { xmlNode *xml = request->xml; const char *rsc, *op, *interval_spec; if (minimum_protocol_version >= 2) { /* Propagate to all peers (including ourselves). * This ends up at attrd_peer_message(). */ attrd_send_message(NULL, xml, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } rsc = crm_element_value(xml, PCMK__XA_ATTR_RESOURCE); op = crm_element_value(xml, PCMK__XA_ATTR_OPERATION); interval_spec = crm_element_value(xml, PCMK__XA_ATTR_INTERVAL); /* Map this to an update */ crm_xml_add(xml, PCMK__XA_TASK, PCMK__ATTRD_CMD_UPDATE); /* Add regular expression matching desired attributes */ if (rsc) { char *pattern; if (op == NULL) { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); } else { guint interval_ms = crm_parse_interval_spec(interval_spec); pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); } crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, pattern); free(pattern); } else { crm_xml_add(xml, PCMK__XA_ATTR_PATTERN, ATTRD_RE_CLEAR_ALL); } /* Make sure attribute and value are not set, so we delete via regex */ xml_remove_prop(xml, PCMK__XA_ATTR_NAME); xml_remove_prop(xml, PCMK__XA_ATTR_VALUE); return attrd_client_update(request); } xmlNode * attrd_client_peer_remove(pcmk__request_t *request) { xmlNode *xml = request->xml; // Host and ID are not used in combination, rather host has precedence const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); char *host_alloc = NULL; attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); if (host == NULL) { int nodeid = 0; crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); if (nodeid > 0) { crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL, NULL); char *host_alloc = NULL; if (node && node->uname) { // Use cached name if available host = node->uname; } else { // Otherwise ask cluster layer host_alloc = get_node_name(nodeid); host = host_alloc; } pcmk__xe_add_node(xml, host, 0); } } if (host) { crm_info("Client %s is requesting all values for %s be removed", pcmk__client_name(request->ipc_client), host); attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ free(host_alloc); } else { crm_info("Ignoring request by client %s to remove all peer values without specifying peer", pcmk__client_name(request->ipc_client)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } xmlNode * attrd_client_query(pcmk__request_t *request) { xmlNode *query = request->xml; xmlNode *reply = NULL; const char *attr = NULL; crm_debug("Query arrived from %s", pcmk__client_name(request->ipc_client)); /* Request must specify attribute name to query */ attr = crm_element_value(query, PCMK__XA_ATTR_NAME); if (attr == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Ignoring malformed query from %s (no attribute name given)", pcmk__client_name(request->ipc_client)); return NULL; } /* Build the XML reply */ reply = build_query_reply(attr, crm_element_value(query, PCMK__XA_ATTR_NODE_NAME)); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Could not respond to query from %s: could not create XML reply", pcmk__client_name(request->ipc_client)); return NULL; } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } request->ipc_client->request_id = 0; return reply; } xmlNode * attrd_client_refresh(pcmk__request_t *request) { crm_info("Updating all attributes"); attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags); - attrd_write_attributes(true, true); + attrd_write_attributes(attrd_write_all|attrd_write_no_delay); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static void handle_missing_host(xmlNode *xml) { const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); if (host == NULL) { crm_trace("Inferring host"); pcmk__xe_add_node(xml, attrd_cluster->uname, attrd_cluster->nodeid); } } /* Convert a single IPC message with a regex into one with multiple children, one * for each regex match. */ static int expand_regexes(xmlNode *xml, const char *attr, const char *value, const char *regex) { if (attr == NULL && regex) { bool matched = false; GHashTableIter aIter; regex_t r_patt; crm_debug("Setting %s to %s", regex, value); if (regcomp(&r_patt, regex, REG_EXTENDED|REG_NOSUB)) { return EINVAL; } g_hash_table_iter_init(&aIter, attributes); while (g_hash_table_iter_next(&aIter, (gpointer *) & attr, NULL)) { int status = regexec(&r_patt, attr, 0, NULL, 0); if (status == 0) { xmlNode *child = create_xml_node(xml, XML_ATTR_OP); crm_trace("Matched %s with %s", attr, regex); matched = true; /* Copy all the attributes from the parent over, but remove the * regex and replace it with the name. */ attrd_copy_xml_attributes(xml, child); xml_remove_prop(child, PCMK__XA_ATTR_PATTERN); crm_xml_add(child, PCMK__XA_ATTR_NAME, attr); } } regfree(&r_patt); /* Return a code if we never matched anything. This should not be treated * as an error. It indicates there was a regex, and it was a valid regex, * but simply did not match anything and the caller should not continue * doing any regex-related processing. */ if (!matched) { return pcmk_rc_op_unsatisfied; } } else if (attr == NULL) { return pcmk_rc_bad_nvpair; } return pcmk_rc_ok; } static int handle_regexes(pcmk__request_t *request) { xmlNode *xml = request->xml; int rc = pcmk_rc_ok; const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); const char *regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); rc = expand_regexes(xml, attr, value, regex); if (rc == EINVAL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Bad regex '%s' for update from client %s", regex, pcmk__client_name(request->ipc_client)); } else if (rc == pcmk_rc_bad_nvpair) { crm_err("Update request did not specify attribute or regular expression"); pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Client %s update request did not specify attribute or regular expression", pcmk__client_name(request->ipc_client)); } return rc; } static int handle_value_expansion(const char **value, xmlNode *xml, const char *op, const char *attr) { attribute_t *a = g_hash_table_lookup(attributes, attr); if (a == NULL && pcmk__str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY, pcmk__str_none)) { return EINVAL; } if (*value && attrd_value_needs_expansion(*value)) { int int_value; attribute_value_t *v = NULL; if (a) { const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); v = g_hash_table_lookup(a->values, host); } int_value = attrd_expand_value(*value, (v? v->current : NULL)); crm_info("Expanded %s=%s to %d", attr, *value, int_value); crm_xml_add_int(xml, PCMK__XA_ATTR_VALUE, int_value); /* Replacing the value frees the previous memory, so re-query it */ *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); } return pcmk_rc_ok; } static void send_update_msg_to_cluster(pcmk__request_t *request, xmlNode *xml) { if (pcmk__str_eq(attrd_request_sync_point(xml), PCMK__VALUE_CLUSTER, pcmk__str_none)) { /* The client is waiting on the cluster-wide sync point. In this case, * the response ACK is not sent until this attrd broadcasts the update * and receives its own confirmation back from all peers. */ attrd_expect_confirmations(request, attrd_cluster_sync_point_update); attrd_send_message(NULL, xml, true); /* ends up at attrd_peer_message() */ } else { /* The client is either waiting on the local sync point or was not * waiting on any sync point at all. For the local sync point, the * response ACK is sent in attrd_peer_update. For clients not * waiting on any sync point, the response ACK is sent in * handle_update_request immediately before this function was called. */ attrd_send_message(NULL, xml, false); /* ends up at attrd_peer_message() */ } } static int send_child_update(xmlNode *child, void *data) { pcmk__request_t *request = (pcmk__request_t *) data; /* Calling pcmk__set_result is handled by one of these calls to * attrd_client_update, so no need to do it again here. */ request->xml = child; attrd_client_update(request); return pcmk_rc_ok; } xmlNode * attrd_client_update(pcmk__request_t *request) { xmlNode *xml = NULL; const char *attr, *value, *regex; CRM_CHECK((request != NULL) && (request->xml != NULL), return NULL); xml = request->xml; /* If the message has children, that means it is a message from a newer * client that supports sending multiple operations at a time. There are * two ways we can handle that. */ if (xml->children != NULL) { if (ATTRD_SUPPORTS_MULTI_MESSAGE(minimum_protocol_version)) { /* First, if all peers support a certain protocol version, we can * just broadcast the big message and they'll handle it. However, * we also need to apply all the transformations in this function * to the children since they don't happen anywhere else. */ for (xmlNode *child = first_named_child(xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { attr = crm_element_value(child, PCMK__XA_ATTR_NAME); value = crm_element_value(child, PCMK__XA_ATTR_VALUE); handle_missing_host(child); if (handle_value_expansion(&value, child, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } } send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { /* Save the original xml node pointer so it can be restored after iterating * over all the children. */ xmlNode *orig_xml = request->xml; /* Second, if they do not support that protocol version, split it * up into individual messages and call attrd_client_update on * each one. */ pcmk__xe_foreach_child(xml, XML_ATTR_OP, send_child_update, request); request->xml = orig_xml; } return NULL; } attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); regex = crm_element_value(xml, PCMK__XA_ATTR_PATTERN); if (handle_regexes(request) != pcmk_rc_ok) { /* Error handling was already dealt with in handle_regexes, so just return. */ return NULL; } else if (regex) { /* Recursively call attrd_client_update on the new message with regexes * expanded. If supported by the attribute daemon, this means that all * matches can also be handled atomically. */ return attrd_client_update(request); } handle_missing_host(xml); if (handle_value_expansion(&value, xml, request->op, attr) == EINVAL) { pcmk__format_result(&request->result, CRM_EX_NOSUCH, PCMK_EXEC_ERROR, "Attribute %s does not exist", attr); return NULL; } crm_debug("Broadcasting %s[%s]=%s%s", attr, crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME), value, (attrd_election_won()? " (writer)" : "")); send_update_msg_to_cluster(request, xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } /*! * \internal * \brief Accept a new client IPC connection * * \param[in,out] c New connection * \param[in] uid Client user id * \param[in] gid Client group id * * \return pcmk_ok on success, -errno otherwise */ static int32_t attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("New client connection %p", c); if (attrd_shutting_down(false)) { crm_info("Ignoring new connection from pid %d during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return pcmk_ok; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in] c Connection to destroy * * \return FALSE (i.e. do not re-run this callback) */ static int32_t attrd_ipc_closed(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { crm_trace("Ignoring request to clean up unknown connection %p", c); } else { crm_trace("Cleaning up closed client connection %p", c); /* Remove the client from the sync point waitlist if it's present. */ attrd_remove_client_from_waitlist(client); /* And no longer wait for confirmations from any peers. */ attrd_do_not_wait_for_client(client); pcmk__free_client(client); } return FALSE; } /*! * \internal * \brief Destroy a client IPC connection * * \param[in,out] c Connection to destroy * * \note We handle a destroyed connection the same as a closed one, * but we need a separate handler because the return type is different. */ static void attrd_ipc_destroy(qb_ipcs_connection_t *c) { crm_trace("Destroying client connection %p", c); attrd_ipc_closed(c); } static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *xml = NULL; // Sanity-check, and parse XML from IPC data CRM_CHECK((c != NULL) && (client != NULL), return 0); if (data == NULL) { crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); return 0; } xml = pcmk__client_data2xml(client, data, &id, &flags); if (xml == NULL) { crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); return 0; } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = xml, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); request.op = crm_element_value_copy(request.xml, PCMK__XA_TASK); CRM_CHECK(request.op != NULL, return 0); attrd_handle_request(&request); pcmk__reset_request(&request); } free_xml(xml); return 0; } static struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, .connection_created = NULL, .msg_process = attrd_ipc_dispatch, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy }; void attrd_ipc_fini(void) { if (ipcs != NULL) { pcmk__drop_all_clients(ipcs); qb_ipcs_destroy(ipcs); ipcs = NULL; } } /*! * \internal * \brief Set up attrd IPC communication */ void attrd_init_ipc(void) { pcmk__serve_attrd_ipc(&ipcs, &ipc_callbacks); } diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c index c43eac1695..bfd5136889 100644 --- a/daemons/attrd/attrd_utils.c +++ b/daemons/attrd/attrd_utils.c @@ -1,358 +1,358 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" cib_t *the_cib = NULL; static bool requesting_shutdown = false; static bool shutting_down = false; static GMainLoop *mloop = NULL; /* A hash table storing information on the protocol version of each peer attrd. * The key is the peer's uname, and the value is the protocol version number. */ GHashTable *peer_protocol_vers = NULL; /*! * \internal * \brief Set requesting_shutdown state */ void attrd_set_requesting_shutdown(void) { requesting_shutdown = true; } /*! * \internal * \brief Clear requesting_shutdown state */ void attrd_clear_requesting_shutdown(void) { requesting_shutdown = false; } /*! * \internal * \brief Check whether local attribute manager is shutting down * * \param[in] if_requested Also consider presence of "shutdown" attribute * * \return \c true if local attribute manager has begun shutdown sequence * or (if \p if_requested is \c true) whether local node has a nonzero * "shutdown" attribute set, otherwise \c false * \note Most callers should pass \c false for \p if_requested, because the * attribute manager needs to continue performing while the controller is * shutting down, and even needs to be eligible for election in case all * nodes are shutting down. */ bool attrd_shutting_down(bool if_requested) { return shutting_down || (if_requested && requesting_shutdown); } /*! * \internal * \brief Exit (using mainloop or not, as appropriate) * * \param[in] nsig Ignored */ void attrd_shutdown(int nsig) { // Tell various functions not to do anthing shutting_down = true; // Don't respond to signals while shutting down mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGCHLD); mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGUSR2); mainloop_destroy_signal(SIGTRAP); attrd_free_waitlist(); attrd_free_confirmations(); if (peer_protocol_vers != NULL) { g_hash_table_destroy(peer_protocol_vers); peer_protocol_vers = NULL; } if ((mloop == NULL) || !g_main_loop_is_running(mloop)) { /* If there's no main loop active, just exit. This should be possible * only if we get SIGTERM in brief windows at start-up and shutdown. */ crm_exit(CRM_EX_OK); } else { g_main_loop_quit(mloop); g_main_loop_unref(mloop); } } /*! * \internal * \brief Create a main loop for attrd */ void attrd_init_mainloop(void) { mloop = g_main_loop_new(NULL, FALSE); } /*! * \internal * \brief Run attrd main loop */ void attrd_run_mainloop(void) { g_main_loop_run(mloop); } void attrd_cib_disconnect(void) { CRM_CHECK(the_cib != NULL, return); the_cib->cmds->del_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); the_cib->cmds->del_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); cib__clean_up_connection(&the_cib); } void attrd_cib_replaced_cb(const char *event, xmlNode * msg) { int change_section = cib_change_section_nodes | cib_change_section_status | cib_change_section_alerts; if (attrd_shutting_down(true)) { return; } crm_element_value_int(msg, F_CIB_CHANGE_SECTION, &change_section); if (attrd_election_won()) { if (change_section & (cib_change_section_nodes | cib_change_section_status)) { crm_notice("Updating all attributes after %s event", event); - attrd_write_attributes(true, false); + attrd_write_attributes(attrd_write_all); } } if (change_section & cib_change_section_alerts) { // Check for changes in alerts mainloop_set_trigger(attrd_config_read); } } /* strlen("value") */ #define plus_plus_len (5) /*! * \internal * \brief Check whether an attribute value should be expanded * * \param[in] value Attribute value to check * * \return true if value needs expansion, false otherwise */ bool attrd_value_needs_expansion(const char *value) { return ((strlen(value) >= (plus_plus_len + 2)) && (value[plus_plus_len] == '+') && ((value[plus_plus_len + 1] == '+') || (value[plus_plus_len + 1] == '='))); } /*! * \internal * \brief Expand an increment expression into an integer * * \param[in] value Attribute increment expression to expand * \param[in] old_value Previous value of attribute * * \return Expanded value */ int attrd_expand_value(const char *value, const char *old_value) { int offset = 1; int int_value = char2score(old_value); if (value[plus_plus_len + 1] != '+') { const char *offset_s = value + (plus_plus_len + 2); offset = char2score(offset_s); } int_value += offset; if (int_value > INFINITY) { int_value = INFINITY; } return int_value; } /*! * \internal * \brief Create regular expression matching failure-related attributes * * \param[out] regex Where to store created regular expression * \param[in] rsc Name of resource to clear (or NULL for all) * \param[in] op Operation to clear if rsc is specified (or NULL for all) * \param[in] interval_ms Interval of operation to clear if op is specified * * \return pcmk_ok on success, -EINVAL if arguments are invalid * * \note The caller is responsible for freeing the result with regfree(). */ int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, guint interval_ms) { char *pattern = NULL; int rc; /* Create a pattern that matches desired attributes */ if (rsc == NULL) { pattern = strdup(ATTRD_RE_CLEAR_ALL); } else if (op == NULL) { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_ONE, rsc); } else { pattern = crm_strdup_printf(ATTRD_RE_CLEAR_OP, rsc, op, interval_ms); } /* Compile pattern into regular expression */ crm_trace("Clearing attributes matching %s", pattern); rc = regcomp(regex, pattern, REG_EXTENDED|REG_NOSUB); free(pattern); return (rc == 0)? pcmk_ok : -EINVAL; } void attrd_free_attribute_value(gpointer data) { attribute_value_t *v = data; free(v->nodename); free(v->current); free(v->requested); free(v); } void attrd_free_attribute(gpointer data) { attribute_t *a = data; if(a) { free(a->id); free(a->set_id); free(a->set_type); free(a->uuid); free(a->user); mainloop_timer_del(a->timer); g_hash_table_destroy(a->values); free(a); } } /*! * \internal * \brief When a peer node leaves the cluster, stop tracking its protocol version. * * \param[in] host The peer node's uname to be removed */ void attrd_remove_peer_protocol_ver(const char *host) { if (peer_protocol_vers != NULL) { g_hash_table_remove(peer_protocol_vers, host); } } /*! * \internal * \brief When a peer node broadcasts a message with its protocol version, keep * track of that information. * * We keep track of each peer's protocol version so we know which peers to * expect confirmation messages from when handling cluster-wide sync points. * We additionally keep track of the lowest protocol version supported by all * peers so we know when we can send IPC messages containing more than one * request. * * \param[in] host The peer node's uname to be tracked * \param[in] value The peer node's protocol version */ void attrd_update_minimum_protocol_ver(const char *host, const char *value) { int ver; if (peer_protocol_vers == NULL) { peer_protocol_vers = pcmk__strkey_table(free, NULL); } pcmk__scan_min_int(value, &ver, 0); if (ver > 0) { char *host_name = strdup(host); /* Record the peer attrd's protocol version. */ CRM_ASSERT(host_name != NULL); g_hash_table_insert(peer_protocol_vers, host_name, GINT_TO_POINTER(ver)); /* If the protocol version is a new minimum, record it as such. */ if (minimum_protocol_version == -1 || ver < minimum_protocol_version) { minimum_protocol_version = ver; crm_trace("Set minimum attrd protocol version to %d", minimum_protocol_version); } } } void attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest) { /* Copy attributes from the wrapper parent node into the child node. * We can't just use copy_in_properties because we want to skip any * attributes that are already set on the child. For instance, if * we were told to use a specific node, there will already be a node * attribute on the child. Copying the parent's node attribute over * could result in the wrong value. */ for (xmlAttrPtr a = pcmk__xe_first_attr(src); a != NULL; a = a->next) { const char *p_name = (const char *) a->name; const char *p_value = ((a == NULL) || (a->children == NULL)) ? NULL : (const char *) a->children->content; if (crm_element_value(dest, p_name) == NULL) { crm_xml_add(dest, p_name, p_value); } } } diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index 41f31d97b3..2e35bd7ec5 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -1,215 +1,222 @@ /* * Copyright 2013-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #ifndef PACEMAKER_ATTRD__H # define PACEMAKER_ATTRD__H #include #include #include #include #include #include #include /* * Legacy attrd (all pre-1.1.11 Pacemaker versions, plus all versions when used * with the no-longer-supported CMAN or corosync-plugin stacks) is unversioned. * * With atomic attrd, each attrd will send ATTRD_PROTOCOL_VERSION with every * peer request and reply. As of Pacemaker 2.0.0, at start-up each attrd will * also set a private attribute for itself with its version, so any attrd can * determine the minimum version supported by all peers. * * Protocol Pacemaker Significant changes * -------- --------- ------------------- * 1 1.1.11 PCMK__ATTRD_CMD_UPDATE (PCMK__XA_ATTR_NAME only), * PCMK__ATTRD_CMD_PEER_REMOVE, PCMK__ATTRD_CMD_REFRESH, * PCMK__ATTRD_CMD_FLUSH, PCMK__ATTRD_CMD_SYNC, * PCMK__ATTRD_CMD_SYNC_RESPONSE * 1 1.1.13 PCMK__ATTRD_CMD_UPDATE (with PCMK__XA_ATTR_PATTERN), * PCMK__ATTRD_CMD_QUERY * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH, * PCMK__ATTRD_CMD_UPDATE_DELAY * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes * 4 2.1.5 Multiple attributes can be updated in a single IPC * message * 5 2.1.5 Peers can request confirmation of a sent message */ #define ATTRD_PROTOCOL_VERSION "5" #define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) #define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) #define attrd_send_ack(client, id, flags) \ pcmk__ipc_send_ack((client), (id), (flags), "ack", ATTRD_PROTOCOL_VERSION, CRM_EX_INDETERMINATE) void attrd_init_mainloop(void); void attrd_run_mainloop(void); void attrd_set_requesting_shutdown(void); void attrd_clear_requesting_shutdown(void); void attrd_free_waitlist(void); bool attrd_shutting_down(bool if_requested); void attrd_shutdown(int nsig); void attrd_init_ipc(void); void attrd_ipc_fini(void); void attrd_cib_disconnect(void); bool attrd_value_needs_expansion(const char *value); int attrd_expand_value(const char *value, const char *old_value); /* regular expression to clear failures of all resources */ #define ATTRD_RE_CLEAR_ALL \ "^(" PCMK__FAIL_COUNT_PREFIX "|" PCMK__LAST_FAILURE_PREFIX ")-" /* regular expression to clear failure of all operations for one resource * (format takes resource name) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_ONE ATTRD_RE_CLEAR_ALL "%s(#.+_[0-9]+)?$" /* regular expression to clear failure of one operation for one resource * (format takes resource name, operation name, and interval) * * @COMPAT attributes set < 1.1.17: * also match older attributes that do not have the operation part */ #define ATTRD_RE_CLEAR_OP ATTRD_RE_CLEAR_ALL "%s(#%s_%u)?$" int attrd_failure_regex(regex_t *regex, const char *rsc, const char *op, guint interval_ms); extern cib_t *the_cib; /* Alerts */ extern lrmd_t *the_lrmd; extern crm_trigger_t *attrd_config_read; void attrd_lrmd_disconnect(void); gboolean attrd_read_options(gpointer user_data); void attrd_cib_replaced_cb(const char *event, xmlNode * msg); void attrd_cib_updated_cb(const char *event, xmlNode *msg); int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value); // Elections void attrd_election_init(void); void attrd_election_fini(void); void attrd_start_election_if_needed(void); bool attrd_election_won(void); void attrd_handle_election_op(const crm_node_t *peer, xmlNode *xml); bool attrd_check_for_new_writer(const crm_node_t *peer, const xmlNode *xml); void attrd_declare_winner(void); void attrd_remove_voter(const crm_node_t *peer); void attrd_xml_add_writer(xmlNode *xml); typedef struct attribute_s { char *uuid; /* TODO: Remove if at all possible */ char *id; char *set_id; char *set_type; GHashTable *values; int update; int timeout_ms; /* TODO: refactor these three as a bitmask */ bool changed; /* whether attribute value has changed since last write */ bool unknown_peer_uuids; /* whether we know we're missing a peer uuid */ gboolean is_private; /* whether to keep this attribute out of the CIB */ mainloop_timer_t *timer; char *user; gboolean force_write; /* Flag for updating attribute by ignoring delay */ } attribute_t; typedef struct attribute_value_s { uint32_t nodeid; gboolean is_remote; char *nodename; char *current; char *requested; gboolean seen; } attribute_value_t; extern crm_cluster_t *attrd_cluster; extern GHashTable *attributes; extern GHashTable *peer_protocol_vers; #define CIB_OP_TIMEOUT_S 120 int attrd_cluster_connect(void); void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, bool filter); void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); void attrd_peer_remove(const char *host, bool uncache, const char *source); void attrd_peer_clear_failure(pcmk__request_t *request); void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, xmlNode *xml); void attrd_broadcast_protocol(void); xmlNode *attrd_client_peer_remove(pcmk__request_t *request); xmlNode *attrd_client_clear_failure(pcmk__request_t *request); xmlNode *attrd_client_update(pcmk__request_t *request); xmlNode *attrd_client_refresh(pcmk__request_t *request); xmlNode *attrd_client_query(pcmk__request_t *request); gboolean attrd_send_message(crm_node_t *node, xmlNode *data, bool confirm); xmlNode *attrd_add_value_xml(xmlNode *parent, const attribute_t *a, const attribute_value_t *v, bool force_write); void attrd_clear_value_seen(void); void attrd_free_attribute(gpointer data); void attrd_free_attribute_value(gpointer data); attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr); +enum attrd_write_options { + attrd_write_changed = 0, + attrd_write_all = (1 << 0), + attrd_write_no_delay = (1 << 1), + attrd_write_skip_shutdown = (1 << 2), +}; + void attrd_write_attribute(attribute_t *a, bool ignore_delay); -void attrd_write_attributes(bool all, bool ignore_delay); +void attrd_write_attributes(uint32_t options); void attrd_write_or_elect_attribute(attribute_t *a); extern int minimum_protocol_version; void attrd_remove_peer_protocol_ver(const char *host); void attrd_update_minimum_protocol_ver(const char *host, const char *value); mainloop_timer_t *attrd_add_timer(const char *id, int timeout_ms, attribute_t *attr); void attrd_unregister_handlers(void); void attrd_handle_request(pcmk__request_t *request); enum attrd_sync_point { attrd_sync_point_local, attrd_sync_point_cluster, }; typedef int (*attrd_confirmation_action_fn)(xmlNode *); void attrd_add_client_to_waitlist(pcmk__request_t *request); void attrd_ack_waitlist_clients(enum attrd_sync_point sync_point, const xmlNode *xml); int attrd_cluster_sync_point_update(xmlNode *xml); void attrd_do_not_expect_from_peer(const char *host); void attrd_do_not_wait_for_client(pcmk__client_t *client); void attrd_expect_confirmations(pcmk__request_t *request, attrd_confirmation_action_fn fn); void attrd_free_confirmations(void); void attrd_handle_confirmation(int callid, const char *host); void attrd_remove_client_from_waitlist(pcmk__client_t *client); const char *attrd_request_sync_point(xmlNode *xml); bool attrd_request_has_sync_point(xmlNode *xml); void attrd_copy_xml_attributes(xmlNode *src, xmlNode *dest); extern gboolean stand_alone; #endif /* PACEMAKER_ATTRD__H */ diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index 0504cea506..07d6298535 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -1,1847 +1,1855 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // uint32_t, uint64_t, UINT64_C() #include #include #include // PRIu64 #include #include #include #include #include #include #include #include #include #define EXIT_ESCALATION_MS 10000 static unsigned long cib_local_bcast_num = 0; typedef struct cib_local_notify_s { xmlNode *notify_src; char *client_id; gboolean from_peer; gboolean sync_reply; } cib_local_notify_t; struct digest_data { char *nodes; char *alerts; char *status; }; int next_client_id = 0; gboolean legacy_mode = FALSE; qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; static int cib_process_command(xmlNode *request, const cib__operation_t *operation, cib__op_fn_t op_function, xmlNode **reply, xmlNode **cib_diff, bool privileged); static gboolean cib_common_callback(qb_ipcs_connection_t *c, void *data, size_t size, gboolean privileged); gboolean cib_legacy_mode(void) { return legacy_mode; } /*! * \internal * \brief Free a struct digest_data object's members * * \param[in,out] digests Object whose members to free * * \note This does not free the object itself, which may be stack-allocated. */ static void free_digests(struct digest_data *digests) { if (digests != NULL) { free(digests->nodes); free(digests->alerts); free(digests->status); } } static int32_t cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (cib_shutdown_flag) { crm_info("Ignoring new IPC client [%d] during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); pcmk__free_client(client); return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); cib_ipc_closed(c); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; /*! * \internal * \brief Create reply XML for a CIB request * * \param[in] op CIB operation type * \param[in] call_id CIB call ID * \param[in] client_id CIB client ID * \param[in] call_options Group of enum cib_call_options flags * \param[in] rc Request return code * \param[in] call_data Request output data * * \return Reply XML * * \note The caller is responsible for freeing the return value using * \p free_xml(). */ static xmlNode * create_cib_reply(const char *op, const char *call_id, const char *client_id, int call_options, int rc, xmlNode *call_data) { xmlNode *reply = create_xml_node(NULL, "cib-reply"); CRM_ASSERT(reply != NULL); crm_xml_add(reply, F_TYPE, T_CIB); crm_xml_add(reply, F_CIB_OPERATION, op); crm_xml_add(reply, F_CIB_CALLID, call_id); crm_xml_add(reply, F_CIB_CLIENTID, client_id); crm_xml_add_int(reply, F_CIB_CALLOPTS, call_options); crm_xml_add_int(reply, F_CIB_RC, rc); if (call_data != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_CIB_CALLDATA, call_data); } crm_log_xml_explicit(reply, "cib:reply"); return reply; } static void do_local_notify(xmlNode *notify_src, const char *client_id, bool sync_reply, bool from_peer) { int rid = 0; int call_id = 0; pcmk__client_t *client_obj = NULL; CRM_ASSERT(notify_src && client_id); crm_element_value_int(notify_src, F_CIB_CALLID, &call_id); client_obj = pcmk__find_client_by_id(client_id); if (client_obj == NULL) { crm_debug("Could not send response %d: client %s not found", call_id, client_id); return; } if (sync_reply) { if (client_obj->ipcs) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to client %s%s", rid, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } else { crm_trace("Sending response (call %d) to client %s%s", call_id, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } } else { crm_trace("Sending event %d to client %s%s", call_id, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } switch (PCMK__CLIENT_TYPE(client_obj)) { case pcmk__client_ipc: { int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, (sync_reply? crm_ipc_flags_none : crm_ipc_server_event)); if (rc != pcmk_rc_ok) { crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d", (sync_reply? "Synchronous" : "Asynchronous"), pcmk__client_name(client_obj), pcmk_rc_str(rc), rc); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: #endif case pcmk__client_tcp: pcmk__remote_send_xml(client_obj->remote, notify_src); break; default: crm_err("Unknown transport for client %s " CRM_XS " flags=%#016" PRIx64, pcmk__client_name(client_obj), client_obj->flags); } } void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, pcmk__client_t *cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, F_CIB_OPERATION); int call_options = cib_none; crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); /* Requests with cib_transaction set should not be sent to based directly * (outside of a commit-transaction request) */ if (pcmk_is_set(call_options, cib_transaction)) { return; } if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { if (flags & crm_ipc_client_response) { xmlNode *ack = create_xml_node(NULL, __func__); crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER); crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id); pcmk__ipc_send_xml(cib_client, id, ack, flags); cib_client->request_id = 0; free_xml(ack); } return; } else if (pcmk__str_eq(op, T_CIB_NOTIFY, pcmk__str_none)) { /* Update the notify filters for this client */ int on_off = 0; crm_exit_t status = CRM_EX_OK; uint64_t bit = UINT64_C(0); const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE); crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks %s for client %s", type, (on_off? "on" : "off"), pcmk__client_name(cib_client)); if (pcmk__str_eq(type, T_CIB_POST_NOTIFY, pcmk__str_casei)) { bit = cib_notify_post; } else if (pcmk__str_eq(type, T_CIB_PRE_NOTIFY, pcmk__str_casei)) { bit = cib_notify_pre; } else if (pcmk__str_eq(type, T_CIB_UPDATE_CONFIRM, pcmk__str_casei)) { bit = cib_notify_confirm; } else if (pcmk__str_eq(type, T_CIB_DIFF_NOTIFY, pcmk__str_casei)) { bit = cib_notify_diff; } else if (pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) { bit = cib_notify_replace; } else { status = CRM_EX_INVALID_PARAM; } if (bit != 0) { if (on_off) { pcmk__set_client_flags(cib_client, bit); } else { pcmk__clear_client_flags(cib_client, bit); } } pcmk__ipc_send_ack(cib_client, id, flags, "ack", NULL, status); return; } cib_process_request(op_request, privileged, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; pcmk__client_t *cib_client = pcmk__find_client(c); xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags); if (op_request) { crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); } if (op_request == NULL) { crm_trace("Invalid message from %p", c); pcmk__ipc_send_ack(cib_client, id, flags, "nack", NULL, CRM_EX_PROTOCOL); return 0; } else if(cib_client == NULL) { crm_trace("Invalid client %p", c); return 0; } if (pcmk_is_set(call_options, cib_sync_call)) { CRM_LOG_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ cib_client->request_id = id; /* Reply only to the last one */ } if (cib_client->name == NULL) { const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = pcmk__itoa(cib_client->pid); } else { cib_client->name = strdup(value); if (crm_is_daemon_name(value)) { pcmk__set_client_flags(cib_client, cib_is_daemon); } } } /* Allow cluster daemons more leeway before being evicted */ if (pcmk_is_set(cib_client->flags, cib_is_daemon)) { const char *qmax = cib_config_lookup("cluster-ipc-limit"); if (pcmk__set_client_queue_max(cib_client, qmax)) { crm_trace("IPC threshold for client %s[%u] is now %u", pcmk__client_name(cib_client), cib_client->pid, cib_client->queue_max); } } crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name); CRM_LOG_ASSERT(cib_client->user != NULL); pcmk__update_acl_user(op_request, F_CIB_USER, cib_client->user); cib_common_callback_worker(id, flags, op_request, cib_client, privileged); free_xml(op_request); return 0; } static uint64_t ping_seq = 0; static char *ping_digest = NULL; static bool ping_modified_since = FALSE; static gboolean cib_digester_cb(gpointer data) { if (based_is_primary) { char buffer[32]; xmlNode *ping = create_xml_node(NULL, "ping"); ping_seq++; free(ping_digest); ping_digest = NULL; ping_modified_since = FALSE; snprintf(buffer, 32, "%" PRIu64, ping_seq); crm_trace("Requesting peer digests (%s)", buffer); crm_xml_add(ping, F_TYPE, "cib"); crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING); crm_xml_add(ping, F_CIB_PING_ID, buffer); crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); send_cluster_message(NULL, crm_msg_cib, ping, TRUE); free_xml(ping); } return FALSE; } static void process_ping_reply(xmlNode *reply) { uint64_t seq = 0; const char *host = crm_element_value(reply, F_ORIG); xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA); const char *seq_s = crm_element_value(pong, F_CIB_PING_ID); const char *digest = crm_element_value(pong, XML_ATTR_DIGEST); if (seq_s == NULL) { crm_debug("Ignoring ping reply with no " F_CIB_PING_ID); return; } else { long long seq_ll; if (pcmk__scan_ll(seq_s, &seq_ll, 0LL) != pcmk_rc_ok) { return; } seq = (uint64_t) seq_ll; } if(digest == NULL) { crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host); } else if(seq != ping_seq) { crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host); } else if(ping_modified_since) { crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host); } else { const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION); if(ping_digest == NULL) { crm_trace("Calculating new digest"); ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version); } crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest); if (!pcmk__str_eq(ping_digest, digest, pcmk__str_casei)) { xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA); crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p", crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(the_cib, XML_ATTR_GENERATION), crm_element_value(the_cib, XML_ATTR_NUMUPDATES), ping_digest, host, remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_", digest, remote_cib); if(remote_cib && remote_cib->children) { // Additional debug xml_calculate_changes(the_cib, remote_cib); pcmk__output_set_log_level(logger_out, LOG_INFO); pcmk__xml_show_changes(logger_out, remote_cib); crm_trace("End of differences"); } free_xml(remote_cib); sync_our_cib(reply, FALSE); } } } static void local_notify_destroy_callback(gpointer data) { cib_local_notify_t *notify = data; free_xml(notify->notify_src); free(notify->client_id); free(notify); } static void check_local_notify(int bcast_id) { cib_local_notify_t *notify = NULL; if (!local_notify_queue) { return; } notify = pcmk__intkey_table_lookup(local_notify_queue, bcast_id); if (notify) { do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, notify->from_peer); pcmk__intkey_table_remove(local_notify_queue, bcast_id); } } static void queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); notify->notify_src = notify_src; notify->client_id = strdup(client_id); notify->sync_reply = sync_reply; notify->from_peer = from_peer; if (!local_notify_queue) { local_notify_queue = pcmk__intkey_table(local_notify_destroy_callback); } pcmk__intkey_table_insert(local_notify_queue, cib_local_bcast_num, notify); // cppcheck doesn't know notify will get freed when hash table is destroyed // cppcheck-suppress memleak } static void parse_local_options_v1(const pcmk__client_t *cib_client, const cib__operation_t *operation, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if (pcmk_is_set(operation->flags, cib__op_attr_modifies) && !pcmk_is_set(call_options, cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if (host == NULL && (call_options & cib_scope_local)) { crm_trace("Processing locally scoped %s op from client %s", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if ((host == NULL) && based_is_primary) { crm_trace("Processing %s op locally from client %s as primary", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { crm_trace("Processing locally addressed %s op from client %s", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if (stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_trace("%s op from %s needs to be forwarded to client %s", op, pcmk__client_name(cib_client), pcmk__s(host, "the primary instance")); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options_v2(const pcmk__client_t *cib_client, const cib__operation_t *operation, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { // Process locally and notify local client *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; if (pcmk_is_set(operation->flags, cib__op_attr_local)) { /* Always process locally if cib__op_attr_local is set. * * @COMPAT: Currently host is ignored. At a compatibility break, throw * an error (from cib_process_request() or earlier) if host is not NULL or * OUR_NODENAME. */ crm_trace("Processing always-local %s op from client %s", op, pcmk__client_name(cib_client)); if (!pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { crm_warn("Operation '%s' is always local but its target host is " "set to '%s'", op, host); } return; } if (pcmk_is_set(operation->flags, cib__op_attr_modifies) || !pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { // Forward modifying and non-local requests via cluster *process = FALSE; *needs_reply = FALSE; *local_notify = FALSE; *needs_forward = TRUE; crm_trace("%s op from %s needs to be forwarded to %s", op, pcmk__client_name(cib_client), pcmk__s(host, "all nodes")); return; } if (stand_alone) { crm_trace("Processing %s op from client %s (stand-alone)", op, pcmk__client_name(cib_client)); } else { crm_trace("Processing %saddressed %s op from client %s", ((host != NULL)? "locally " : "un"), op, pcmk__client_name(cib_client)); } } static void parse_local_options(const pcmk__client_t *cib_client, const cib__operation_t *operation, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if(cib_legacy_mode()) { parse_local_options_v1(cib_client, operation, call_options, host, op, local_notify, needs_reply, process, needs_forward); } else { parse_local_options_v2(cib_client, operation, call_options, host, op, local_notify, needs_reply, process, needs_forward); } } static gboolean parse_peer_options_v1(const cib__operation_t *operation, xmlNode *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process) { const char *op = NULL; const char *host = NULL; const char *delegated = NULL; const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { *needs_reply = FALSE; if (is_reply) { *local_notify = TRUE; crm_trace("Processing global/peer update from %s" " that originated from us", originator); } else { crm_trace("Processing global/peer update from %s", originator); } return TRUE; } op = crm_element_value(request, F_CIB_OPERATION); - crm_trace("Processing %s request sent by %s", op, originator); + crm_trace("Processing legacy %s request sent by %s", op, originator); + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { /* Always process these */ *local_notify = FALSE; if (reply_to == NULL || is_reply) { *process = TRUE; } if (is_reply) { *needs_reply = FALSE; } return *process; } if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { process_ping_reply(request); return FALSE; } if (is_reply) { crm_trace("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } host = crm_element_value(request, F_CIB_HOST); if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { crm_trace("Processing %s request sent to us from %s", op, originator); return TRUE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator); *needs_reply = TRUE; return TRUE; } else if ((host == NULL) && based_is_primary) { crm_trace("Processing %s request sent to primary instance from %s", op, originator); return TRUE; } delegated = crm_element_value(request, F_CIB_DELEGATED); if (delegated != NULL) { crm_trace("Ignoring message for primary instance"); } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring msg for instance on %s", host); } else if ((reply_to == NULL) && !based_is_primary) { // This is for the primary instance, and we're not it crm_trace("Ignoring reply for primary instance"); } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { if (reply_to != NULL) { crm_debug("Processing %s from %s", op, originator); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, originator); } return TRUE; } else { crm_err("Nothing for us to do?"); crm_log_xml_err(request, "Peer[inbound]"); } return FALSE; } static gboolean parse_peer_options_v2(const cib__operation_t *operation, xmlNode *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process) { const char *host = NULL; const char *delegated = crm_element_value(request, F_CIB_DELEGATED); const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); gboolean is_reply = pcmk__str_eq(reply_to, OUR_NODENAME, pcmk__str_casei); + if (originator == NULL) { // Shouldn't be possible + originator = "peer"; + } + if (pcmk__str_eq(op, PCMK__CIB_REQUEST_REPLACE, pcmk__str_none)) { /* sync_our_cib() sets F_CIB_ISREPLY */ if (reply_to) { delegated = reply_to; } goto skip_is_reply; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SYNC_TO_ALL, pcmk__str_none)) { // Nothing to do } else if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { process_ping_reply(request); return FALSE; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_UPGRADE, pcmk__str_none)) { /* Only the DC (node with the oldest software) should process * this operation if F_CIB_SCHEMA_MAX is unset * * If the DC is happy it will then send out another * PCMK__CIB_REQUEST_UPGRADE which will tell all nodes to do the actual * upgrade. * * Except this time F_CIB_SCHEMA_MAX will be set which puts a * limit on how far newer nodes will go */ const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX); const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC); - crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s", - op, (is_reply? " reply" : ""), + crm_trace("Parsing upgrade %s for %s with max=%s and upgrade_rc=%s", + (is_reply? "reply" : "request"), (based_is_primary? "primary" : "secondary"), - (max? max : "none"), (upgrade_rc? upgrade_rc : "none")); + pcmk__s(max, "none"), pcmk__s(upgrade_rc, "none")); if (upgrade_rc != NULL) { // Our upgrade request was rejected by DC, notify clients of result crm_xml_add(request, F_CIB_RC, upgrade_rc); } else if ((max == NULL) && based_is_primary) { /* We are the DC, check if this upgrade is allowed */ goto skip_is_reply; } else if(max) { /* Ok, go ahead and upgrade to 'max' */ goto skip_is_reply; } else { - // Ignore broadcast client requests when we're not DC + // Ignore broadcast client requests when we're not primary return FALSE; } } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { crm_info("Detected legacy %s global update from %s", op, originator); send_sync_request(NULL); legacy_mode = TRUE; return FALSE; } else if (is_reply && pcmk_is_set(operation->flags, cib__op_attr_modifies)) { crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); return FALSE; } else if (pcmk__str_eq(op, PCMK__CIB_REQUEST_SHUTDOWN, pcmk__str_none)) { - // @COMPAT: Legacy handling - crm_debug("Legacy handling of %s message from %s", op, originator); *local_notify = FALSE; if (reply_to == NULL) { *process = TRUE; + } else { // Not possible? + crm_debug("Ignoring shutdown request from %s because reply_to=%s", + originator, reply_to); } return *process; } - if(is_reply) { - crm_trace("Handling %s reply sent from %s to local clients", op, originator); + if (is_reply) { + crm_trace("Will notify local clients for %s reply from %s", + op, originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } skip_is_reply: *process = TRUE; *needs_reply = FALSE; *local_notify = pcmk__str_eq(delegated, OUR_NODENAME, pcmk__str_casei); host = crm_element_value(request, F_CIB_HOST); if (pcmk__str_eq(host, OUR_NODENAME, pcmk__str_casei)) { crm_trace("Processing %s request sent to us from %s", op, originator); *needs_reply = TRUE; return TRUE; } else if (host != NULL) { - /* this is for a specific instance and we're not it */ - crm_trace("Ignoring %s operation for instance on %s", op, host); + crm_trace("Ignoring %s request intended for CIB manager on %s", + op, host); return FALSE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { *needs_reply = TRUE; } - crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op, - crm_element_value(request, F_CIB_CLIENTNAME), - crm_element_value(request, F_CIB_CALLID), - originator, (*local_notify)?"(notify)":""); + crm_trace("Processing %s request broadcast by %s call %s on %s " + "(local clients will%s be notified)", op, + pcmk__s(crm_element_value(request, F_CIB_CLIENTNAME), "client"), + pcmk__s(crm_element_value(request, F_CIB_CALLID), "without ID"), + originator, (*local_notify? "" : "not")); return TRUE; } static gboolean parse_peer_options(const cib__operation_t *operation, xmlNode *request, gboolean *local_notify, gboolean *needs_reply, gboolean *process) { /* TODO: What happens when an update comes in after node A * requests the CIB from node B, but before it gets the reply (and * sends out the replace operation) */ if(cib_legacy_mode()) { return parse_peer_options_v1(operation, request, local_notify, needs_reply, process); } else { return parse_peer_options_v2(operation, request, local_notify, needs_reply, process); } } /*! * \internal * \brief Forward a CIB request to the appropriate target host(s) * * \param[in] request CIB request to forward */ static void forward_request(xmlNode *request) { const char *op = crm_element_value(request, F_CIB_OPERATION); const char *section = crm_element_value(request, F_CIB_SECTION); const char *host = crm_element_value(request, F_CIB_HOST); const char *originator = crm_element_value(request, F_ORIG); const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); const char *call_id = crm_element_value(request, F_CIB_CALLID); int log_level = LOG_INFO; if (pcmk__str_eq(op, PCMK__CIB_REQUEST_NOOP, pcmk__str_none)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", pcmk__s(op, "invalid"), pcmk__s(section, "all"), pcmk__s(host, (cib_legacy_mode()? "primary" : "all")), pcmk__s(originator, "local"), pcmk__s(client_name, "unspecified"), pcmk__s(call_id, "unspecified")); crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); send_cluster_message(((host != NULL)? crm_get_peer(0, host) : NULL), crm_msg_cib, request, FALSE); // Return the request to its original state xml_remove_prop(request, F_CIB_DELEGATED); } static gboolean send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) { CRM_ASSERT(msg != NULL); if (broadcast) { /* @COMPAT: Legacy code * * This successful call modified the CIB, and the change needs to be * broadcast (sent via cluster to all nodes). */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *digest = NULL; int format = 1; CRM_LOG_ASSERT(result_diff != NULL); digest = crm_element_value(result_diff, XML_ATTR_DIGEST); crm_element_value_int(result_diff, "format", &format); cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest); crm_xml_add(msg, F_CIB_ISREPLY, originator); pcmk__xe_set_bool_attr(msg, F_CIB_GLOBAL_UPDATE, true); crm_xml_add(msg, F_CIB_OPERATION, PCMK__CIB_REQUEST_APPLY_PATCH); crm_xml_add(msg, F_CIB_USER, CRM_DAEMON_USER); if (format == 1) { CRM_ASSERT(digest != NULL); } add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); crm_log_xml_explicit(msg, "copy"); return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); } else if (originator != NULL) { /* send reply via HA to originating node */ crm_trace("Sending request result to %s only", originator); crm_xml_add(msg, F_CIB_ISREPLY, originator); return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); } return FALSE; } /*! * \internal * \brief Handle an IPC or CPG message containing a request * * \param[in,out] request Request XML * \param[in] privileged Whether privileged commands may be run * (see cib_server_ops[] definition) * \param[in] cib_client IPC client that sent request (or NULL if CPG) * * \return Legacy Pacemaker return code */ int cib_process_request(xmlNode *request, gboolean privileged, const pcmk__client_t *cib_client) { // @TODO: Break into multiple smaller functions int call_options = 0; gboolean process = TRUE; // Whether to process request locally now gboolean is_update = TRUE; // Whether request would modify CIB gboolean needs_reply = TRUE; // Whether to build a reply gboolean local_notify = FALSE; // Whether to notify (local) requester gboolean needs_forward = FALSE; // Whether to forward request somewhere else xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *host = crm_element_value(request, F_CIB_HOST); const char *target = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client_id = crm_element_value(request, F_CIB_CLIENTID); const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); const cib__operation_t *operation = NULL; cib__op_fn_t op_function = NULL; crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if ((host != NULL) && (*host == '\0')) { host = NULL; } // @TODO: Improve trace messages. Target is accurate only for legacy mode. if (host) { target = host; } else if (call_options & cib_scope_local) { target = "local host"; } else { target = "primary"; } if (cib_client == NULL) { crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)", op, client_name, call_id, originator, target, reply_to); } else { crm_xml_add(request, F_ORIG, OUR_NODENAME); crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target); } rc = cib__get_operation(op, &operation); rc = pcmk_rc2legacy(rc); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return rc; } op_function = based_get_op_function(operation); if (op_function == NULL) { crm_err("Operation %s not supported by CIB manager", op); return -EOPNOTSUPP; } if (cib_client != NULL) { parse_local_options(cib_client, operation, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (!parse_peer_options(operation, request, &local_notify, &needs_reply, &process)) { return rc; } if (pcmk_is_set(call_options, cib_transaction)) { /* All requests in a transaction are processed locally against a working * CIB copy, and we don't notify for individual requests because the * entire transaction is atomic. * * We still call the option parser functions above, for the sake of log * messages and checking whether we're the target for peer requests. */ process = TRUE; needs_reply = FALSE; local_notify = FALSE; needs_forward = FALSE; } is_update = pcmk_is_set(operation->flags, cib__op_attr_modifies); if (pcmk_is_set(call_options, cib_discard_reply)) { /* If the request will modify the CIB, and we are in legacy mode, we * need to build a reply so we can broadcast a diff, even if the * requester doesn't want one. */ needs_reply = is_update && cib_legacy_mode(); local_notify = FALSE; crm_trace("Client is not interested in the reply"); } if (needs_forward) { forward_request(request); return rc; } if (cib_status != pcmk_ok) { rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); op_reply = create_cib_reply(op, call_id, client_id, call_options, rc, the_cib); } else if (process) { time_t finished = 0; time_t now = time(NULL); int level = LOG_INFO; const char *section = crm_element_value(request, F_CIB_SECTION); rc = cib_process_command(request, operation, op_function, &op_reply, &result_diff, privileged); if (!is_update) { level = LOG_TRACE; } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { switch (rc) { case pcmk_ok: level = LOG_INFO; break; case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_TRACE; break; default: level = LOG_ERR; } } else if (rc != pcmk_ok) { level = LOG_WARNING; } do_crm_log(level, "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", op, section ? section : "'all'", pcmk_strerror(rc), rc, originator ? originator : "local", client_name, call_id, the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0"); finished = time(NULL); if ((finished - now) > 3) { crm_trace("%s operation took %lds to complete", op, (long)(finished - now)); crm_write_blackbox(0, NULL); } if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } if (is_update && !cib_legacy_mode()) { crm_trace("Completed pre-sync update from %s/%s/%s%s", originator ? originator : "local", client_name, call_id, local_notify?" with local notification":""); } else if (!needs_reply || stand_alone) { // This was a non-originating secondary update crm_trace("Completed update as secondary"); } else if (cib_legacy_mode() && rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { gboolean broadcast = FALSE; cib_local_bcast_num++; crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); broadcast = send_peer_reply(request, result_diff, originator, TRUE); if (broadcast && client_id && local_notify && op_reply) { /* If we have been asked to sync the reply, * and a bcast msg has gone out, we queue the local notify * until we know the bcast message has been received */ local_notify = FALSE; crm_trace("Queuing local %ssync notification for %s", (call_options & cib_sync_call) ? "" : "a-", client_id); queue_local_notify(op_reply, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); op_reply = NULL; /* the reply is queued, so don't free here */ } } else if ((cib_client == NULL) && !pcmk_is_set(call_options, cib_discard_reply)) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (call_options & cib_inhibit_bcast) { crm_trace("Request not broadcast: inhibited"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, result_diff, originator, FALSE); } if (local_notify && client_id) { crm_trace("Performing local %ssync notification for %s", (pcmk_is_set(call_options, cib_sync_call)? "" : "a"), client_id); if (process == FALSE) { do_local_notify(request, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } else { do_local_notify(op_reply, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } } free_xml(op_reply); free_xml(result_diff); return rc; } /*! * \internal * \brief Get a CIB operation's input from the request XML * * \param[in] request CIB request XML * \param[in] type CIB operation type * \param[out] section Where to store CIB section name * * \return Input XML for CIB operation * * \note If not \c NULL, the return value is a non-const pointer to part of * \p request. The caller should not free it directly. */ static xmlNode * prepare_input(const xmlNode *request, enum cib__op_type type, const char **section) { xmlNode *input = NULL; *section = NULL; switch (type) { case cib__op_apply_patch: if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { input = get_message_xml(request, F_CIB_UPDATE_DIFF); } else { input = get_message_xml(request, F_CIB_CALLDATA); } break; default: input = get_message_xml(request, F_CIB_CALLDATA); *section = crm_element_value(request, F_CIB_SECTION); break; } // Grab the specified section if ((*section != NULL) && pcmk__xe_is(input, XML_TAG_CIB)) { input = pcmk_find_cib_element(input, *section); } return input; } static char * calculate_section_digest(const char *xpath, xmlNode * xml_obj) { xmlNode *xml_section = NULL; if (xml_obj == NULL) { return NULL; } xml_section = get_xpath_object(xpath, xml_obj, LOG_TRACE); if (xml_section == NULL) { return NULL; } return calculate_xml_versioned_digest(xml_section, FALSE, TRUE, CRM_FEATURE_SET); } #define XPATH_CONFIG "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION #define XPATH_NODES XPATH_CONFIG "/" XML_CIB_TAG_NODES #define XPATH_ALERTS XPATH_CONFIG "/" XML_CIB_TAG_ALERTS #define XPATH_STATUS "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS /*! * \internal * \brief Calculate digests of CIB sections * * \param[in] cib CIB to get digests from * \param[out] digests Where to store digests * * \note The caller is responsible for freeing the output argument's members * using \p free_digests(). */ static void get_digests(xmlNode *cib, struct digest_data *digests) { digests->nodes = calculate_section_digest(XPATH_NODES, cib); digests->alerts = calculate_section_digest(XPATH_ALERTS, cib); digests->status = calculate_section_digest(XPATH_STATUS, cib); } /*! * \internal * \brief Determine which CIB sections were changed by an operation * * \param[in] before Digests before the operation * \param[in] after Digests after the operation * * \return Group of enum cib_change_section_info flags indicating which * sections have changed */ static uint32_t get_change_sections(const struct digest_data *before, const struct digest_data *after) { uint32_t change_sections = cib_change_section_none; if (!pcmk__str_eq(before->nodes, after->nodes, pcmk__str_none)) { pcmk__set_change_section(change_sections, cib_change_section_nodes); } if (!pcmk__str_eq(before->alerts, after->alerts, pcmk__str_none)) { pcmk__set_change_section(change_sections, cib_change_section_alerts); } if (!pcmk__str_eq(before->status, after->status, pcmk__str_none)) { pcmk__set_change_section(change_sections, cib_change_section_status); } return change_sections; } /*! * \internal * \brief Check whether a CIB replace notification should be sent * * A CIB replace notification should be sent after an operation if the operation * is of an appropriate type, notifications are not disabled, and any of certain * CIB sections has changed. * * \param[in] before_digests Digests before operation * \param[in] after_cib Result CIB after operation * \param[in] operation CIB operation * \param[in] call_options Group of enum cib_call_options flags * \param[out] change_sections Group of enum cib_change_section_info * flags indicating which sections have changed * * \return \p true if a replace notification should be sent, or \p false * otherwise */ static bool should_replace_notify(const struct digest_data *before_digests, xmlNode *after_cib, const cib__operation_t *operation, int call_options, uint32_t *change_sections) { struct digest_data after_digests = { 0, }; *change_sections = cib_change_section_none; if (!pcmk_is_set(operation->flags, cib__op_attr_replaces) || pcmk_is_set(call_options, cib_inhibit_notify)) { return false; } get_digests(after_cib, &after_digests); crm_trace("after-digest %s:%s:%s", pcmk__s(after_digests.nodes, "(null)"), pcmk__s(after_digests.alerts, "(null)"), pcmk__s(after_digests.status, "(null)")); *change_sections = get_change_sections(before_digests, &after_digests); free_digests(&after_digests); return (*change_sections != cib_change_section_none); } // v1 and v2 patch formats #define XPATH_CONFIG_CHANGE \ "//" XML_CIB_TAG_CRMCONFIG " | " \ "//" XML_DIFF_CHANGE \ "[contains(@" XML_DIFF_PATH ",'/" XML_CIB_TAG_CRMCONFIG "/')]" static bool contains_config_change(xmlNode *diff) { bool changed = false; if (diff) { xmlXPathObject *xpathObj = xpath_search(diff, XPATH_CONFIG_CHANGE); if (numXpathResults(xpathObj) > 0) { changed = true; } freeXpathObject(xpathObj); } return changed; } static int cib_process_command(xmlNode *request, const cib__operation_t *operation, cib__op_fn_t op_function, xmlNode **reply, xmlNode **cib_diff, bool privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; int call_options = 0; const char *op = NULL; const char *section = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client_id = crm_element_value(request, F_CIB_CLIENTID); const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); const char *origin = crm_element_value(request, F_ORIG); int rc = pcmk_ok; bool config_changed = false; bool manage_counters = true; static mainloop_timer_t *digest_timer = NULL; struct digest_data before_digests = { 0, }; CRM_ASSERT(cib_status == pcmk_ok); if(digest_timer == NULL) { digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL); } *reply = NULL; *cib_diff = NULL; /* Start processing the request... */ op = crm_element_value(request, F_CIB_OPERATION); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if (!privileged && pcmk_is_set(operation->flags, cib__op_attr_privileged)) { rc = -EACCES; crm_trace("Failed due to lack of privileges: %s", pcmk_strerror(rc)); goto done; } input = prepare_input(request, operation->type, §ion); if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) { rc = cib_perform_op(op, call_options, op_function, true, section, request, input, false, &config_changed, &the_cib, &result_cib, NULL, &output); CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* @COMPAT: Handle a valid write action (legacy) * * @TODO: Re-evaluate whether this is all truly legacy. The cib_force_diff * portion is. However, F_CIB_GLOBAL_UPDATE may be set by a sync operation * even in non-legacy mode, and manage_counters tells xml_create_patchset() * whether to update version/epoch info. */ if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { manage_counters = false; cib__set_call_options(call_options, "call", cib_force_diff); crm_trace("Global update detected"); CRM_LOG_ASSERT(pcmk__str_any_of(op, PCMK__CIB_REQUEST_APPLY_PATCH, PCMK__CIB_REQUEST_REPLACE, NULL)); } ping_modified_since = TRUE; if (pcmk_is_set(call_options, cib_inhibit_bcast)) { crm_trace("Skipping update: inhibit broadcast"); manage_counters = false; } // Calculate the digests of relevant sections before the operation if (pcmk_is_set(operation->flags, cib__op_attr_replaces) && !pcmk_any_flags_set(call_options, cib_dryrun|cib_inhibit_notify|cib_transaction)) { get_digests(the_cib, &before_digests); crm_trace("before-digest %s:%s:%s", pcmk__s(before_digests.nodes, "(null)"), pcmk__s(before_digests.alerts, "(null)"), pcmk__s(before_digests.status, "(null)")); } // result_cib must not be modified after cib_perform_op() returns rc = cib_perform_op(op, call_options, op_function, false, section, request, input, manage_counters, &config_changed, &the_cib, &result_cib, cib_diff, &output); // @COMPAT: Legacy code if (!manage_counters) { int format = 1; // If the diff is NULL at this point, it's because nothing changed if (*cib_diff != NULL) { crm_element_value_int(*cib_diff, "format", &format); } if (format == 1) { config_changed = cib__config_changed_v1(NULL, NULL, cib_diff); } } /* Always write to disk for successful ops with the flag set. This also * negates the need to detect ordering changes. */ if ((rc == pcmk_ok) && pcmk_is_set(operation->flags, cib__op_attr_writes_through)) { config_changed = true; } if ((rc == pcmk_ok) && !pcmk_any_flags_set(call_options, cib_dryrun|cib_transaction)) { uint32_t change_sections = cib_change_section_none; if (result_cib != the_cib) { crm_trace("Activating %s->%s%s", crm_element_value(the_cib, XML_ATTR_NUMUPDATES), crm_element_value(result_cib, XML_ATTR_NUMUPDATES), (config_changed? " changed" : "")); rc = activateCibXml(result_cib, config_changed, op); if (rc != pcmk_ok) { crm_err("Failed to activate new CIB: %s", pcmk_strerror(rc)); } } if ((rc == pcmk_ok) && contains_config_change(*cib_diff)) { cib_read_config(config_hash, result_cib); } if (should_replace_notify(&before_digests, result_cib, operation, call_options, &change_sections)) { // @TODO: Should update argument be result_cib instead of the_cib? cib_replace_notify(op, rc, call_id, client_id, client_name, origin, the_cib, *cib_diff, change_sections); } mainloop_timer_stop(digest_timer); mainloop_timer_start(digest_timer); } else if (rc == -pcmk_err_schema_validation) { CRM_ASSERT(result_cib != the_cib); if (output != NULL) { crm_log_xml_info(output, "cib:output"); free_xml(output); } output = result_cib; } else { crm_trace("Not activating %d %d %s", rc, pcmk_is_set(call_options, cib_dryrun), crm_element_value(result_cib, XML_ATTR_NUMUPDATES)); if (result_cib != the_cib) { free_xml(result_cib); } } if (!pcmk_any_flags_set(call_options, cib_dryrun|cib_inhibit_notify|cib_transaction)) { crm_trace("Sending notifications %d", pcmk_is_set(call_options, cib_dryrun)); cib_diff_notify(op, rc, call_id, client_id, client_name, origin, input, *cib_diff); } pcmk__output_set_log_level(logger_out, LOG_TRACE); logger_out->message(logger_out, "xml-patchset", *cib_diff); done: if (!pcmk_is_set(call_options, cib_discard_reply) || cib_legacy_mode()) { *reply = create_cib_reply(op, call_id, client_id, call_options, rc, output); } if (output != the_cib) { free_xml(output); } free_digests(&before_digests); crm_trace("done"); return rc; } void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = crm_element_value(msg, F_ORIG); if (cib_legacy_mode() && pcmk__str_eq(originator, OUR_NODENAME, pcmk__str_casei|pcmk__str_null_matches)) { /* message is from ourselves */ int bcast_id = 0; if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { check_local_notify(bcast_id); } return; } else if (crm_peer_cache == NULL) { reason = "membership not established"; goto bail; } if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, F_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace(msg, "Peer[inbound]"); */ cib_process_request(msg, TRUE, NULL); return; bail: if (reason) { const char *seq = crm_element_value(msg, F_SEQ); const char *op = crm_element_value(msg, F_CIB_OPERATION); crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason); } } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__func__, CRM_EX_ERROR); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *a_client = value; crm_err("Can't disconnect client %s: Not implemented", pcmk__client_name(a_client)); } static void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; active = crm_active_peers(); - if (active < 2) { + if (active < 2) { // This is the last active node terminate_cib(__func__, 0); return; } - crm_info("Sending disconnect notification to %d peers...", active); + crm_info("Sending shutdown request to %d peers", active); leaving = create_xml_node(NULL, "exit-notification"); crm_xml_add(leaving, F_TYPE, "cib"); crm_xml_add(leaving, F_CIB_OPERATION, PCMK__CIB_REQUEST_SHUTDOWN); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); free_xml(leaving); g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; c = qb_ipcs_connection_first_get(ipcs_rw); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_rw, last); crm_debug("Disconnecting r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_ro); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_ro, last); crm_debug("Disconnecting r/o client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_shm); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_shm, last); crm_debug("Disconnecting non-blocking r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } disconnects += pcmk__ipc_client_count(); crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count()); pcmk__foreach_ipc_client(disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if (pcmk__ipc_client_count() == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", pcmk__ipc_client_count(), srv_stats.active_connections); } } extern int remote_fd; extern int remote_tls_fd; /*! * \internal * \brief Close remote sockets, free the global CIB and quit * * \param[in] caller Name of calling function (for log message) * \param[in] fast If -1, skip disconnect; if positive, exit that */ void terminate_cib(const char *caller, int fast) { crm_info("%s: Exiting%s...", caller, (fast > 0)? " fast" : mainloop ? " from mainloop" : ""); if (remote_fd > 0) { close(remote_fd); remote_fd = 0; } if (remote_tls_fd > 0) { close(remote_tls_fd); remote_tls_fd = 0; } uninitializeCib(); if (logger_out != NULL) { logger_out->finish(logger_out, CRM_EX_OK, true, NULL); pcmk__output_free(logger_out); logger_out = NULL; } if (fast > 0) { /* Quit fast on error */ pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(fast); } else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) { /* Quit via returning from the main loop. If fast == -1, we skip the * disconnect here, and it will be done when the main loop returns * (this allows the peer status callback to avoid messing with the * peer caches). */ if (fast == 0) { crm_cluster_disconnect(crm_cluster); } g_main_loop_quit(mainloop); } else { /* Quit via clean exit. Even the peer status callback can disconnect * here, because we're not returning control to the caller. */ crm_cluster_disconnect(crm_cluster); pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(CRM_EX_OK); } } diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c index 18dee9f617..825f86165e 100644 --- a/daemons/controld/controld_cib.c +++ b/daemons/controld/controld_cib.c @@ -1,1166 +1,1164 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* sleep */ #include #include #include #include #include #include // Call ID of the most recent in-progress CIB resource update (or 0 if none) static int pending_rsc_update = 0; // Call IDs of requested CIB replacements that won't trigger a new election // (used as a set of gint values) static GHashTable *cib_replacements = NULL; /*! * \internal * \brief Store the call ID of a CIB replacement that the controller requested * * The \p do_cib_replaced() callback function will avoid triggering a new * election when we're notified of one of these expected replacements. * * \param[in] call_id CIB call ID (or 0 for a synchronous call) * * \note This function should be called after making any asynchronous CIB * request (or before making any synchronous CIB request) that may replace * part of the nodes or status section. This may include CIB sync calls. */ void controld_record_cib_replace_call(int call_id) { CRM_CHECK(call_id >= 0, return); if (cib_replacements == NULL) { cib_replacements = g_hash_table_new(NULL, NULL); } /* If the call ID is already present in the table, then it's old. We may not * be removing them properly, and we could improperly ignore replacement * notifications if cib_t:call_id wraps around. */ CRM_LOG_ASSERT(g_hash_table_add(cib_replacements, GINT_TO_POINTER((gint) call_id))); } /*! * \internal * \brief Remove the call ID of a CIB replacement from the replacements table * * \param[in] call_id CIB call ID (or 0 for a synchronous call) * * \return \p true if \p call_id was found in the table, or \p false otherwise * * \note CIB notifications run before CIB callbacks. If this function is called * from within a callback, \p do_cib_replaced() will have removed * \p call_id from the table first if relevant changes triggered a * notification. */ bool controld_forget_cib_replace_call(int call_id) { CRM_CHECK(call_id >= 0, return false); if (cib_replacements == NULL) { return false; } return g_hash_table_remove(cib_replacements, GINT_TO_POINTER((gint) call_id)); } /*! * \internal * \brief Empty the hash table containing call IDs of CIB replacement requests */ void controld_forget_all_cib_replace_calls(void) { if (cib_replacements != NULL) { g_hash_table_remove_all(cib_replacements); } } /*! * \internal * \brief Free the hash table containing call IDs of CIB replacement requests */ void controld_destroy_cib_replacements_table(void) { if (cib_replacements != NULL) { g_hash_table_destroy(cib_replacements); cib_replacements = NULL; } } /*! * \internal * \brief Respond to a dropped CIB connection * * \param[in] user_data CIB connection that dropped */ static void handle_cib_disconnect(gpointer user_data) { CRM_LOG_ASSERT(user_data == controld_globals.cib_conn); controld_trigger_fsa(); controld_globals.cib_conn->state = cib_disconnected; if (pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { // @TODO This should trigger a reconnect, not a shutdown crm_crit("Lost connection to the CIB manager, shutting down"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_CIB_CONNECTED); } else { // Expected - crm_info("Connection to the CIB manager terminated"); + crm_info("Disconnected from the CIB manager"); } } static void do_cib_updated(const char *event, xmlNode * msg) { if (pcmk__alert_in_patchset(msg, TRUE)) { controld_trigger_config(); } } static void do_cib_replaced(const char *event, xmlNode * msg) { int call_id = 0; const char *client_id = crm_element_value(msg, F_CIB_CLIENTID); uint32_t change_section = cib_change_section_nodes |cib_change_section_status; long long value = 0; crm_debug("Updating the CIB after a replace: DC=%s", pcmk__btoa(AM_I_DC)); if (!AM_I_DC) { return; } if ((crm_element_value_int(msg, F_CIB_CALLID, &call_id) == 0) && pcmk__str_eq(client_id, controld_globals.cib_client_id, pcmk__str_none) && controld_forget_cib_replace_call(call_id)) { // We requested this replace op. No need to restart the join. return; } if ((crm_element_value_ll(msg, F_CIB_CHANGE_SECTION, &value) < 0) || (value < 0) || (value > UINT32_MAX)) { crm_trace("Couldn't parse '%s' from message", F_CIB_CHANGE_SECTION); } else { change_section = (uint32_t) value; } if (pcmk_any_flags_set(change_section, cib_change_section_nodes |cib_change_section_status)) { /* start the join process again so we get everyone's LRM status */ populate_cib_nodes(node_update_quick|node_update_all, __func__); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } void controld_disconnect_cib_manager(void) { cib_t *cib_conn = controld_globals.cib_conn; CRM_ASSERT(cib_conn != NULL); - crm_info("Disconnecting from the CIB manager"); + crm_debug("Disconnecting from the CIB manager"); controld_clear_fsa_input_flags(R_CIB_CONNECTED); cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY, do_cib_replaced); cib_conn->cmds->del_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, do_cib_updated); cib_free_callbacks(cib_conn); if (cib_conn->state != cib_disconnected) { cib_conn->cmds->set_secondary(cib_conn, cib_scope_local|cib_discard_reply); cib_conn->cmds->signoff(cib_conn); } - - crm_notice("Disconnected from the CIB manager"); } /* A_CIB_STOP, A_CIB_START, O_CIB_RESTART */ void do_cib_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static int cib_retries = 0; cib_t *cib_conn = controld_globals.cib_conn; void (*dnotify_fn) (gpointer user_data) = handle_cib_disconnect; void (*replace_cb) (const char *event, xmlNodePtr msg) = do_cib_replaced; void (*update_cb) (const char *event, xmlNodePtr msg) = do_cib_updated; int rc = pcmk_ok; CRM_ASSERT(cib_conn != NULL); if (pcmk_is_set(action, A_CIB_STOP)) { if ((cib_conn->state != cib_disconnected) && (pending_rsc_update != 0)) { crm_info("Waiting for resource update %d to complete", pending_rsc_update); crmd_fsa_stall(FALSE); return; } controld_disconnect_cib_manager(); } if (!pcmk_is_set(action, A_CIB_START)) { return; } if (cur_state == S_STOPPING) { crm_err("Ignoring request to connect to the CIB manager after " "shutdown"); return; } rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); if (rc != pcmk_ok) { // A short wait that usually avoids stalling the FSA sleep(1); rc = cib_conn->cmds->signon(cib_conn, CRM_SYSTEM_CRMD, cib_command_nonblocking); } if (rc != pcmk_ok) { crm_info("Could not connect to the CIB manager: %s", pcmk_strerror(rc)); } else if (cib_conn->cmds->set_connection_dnotify(cib_conn, dnotify_fn) != pcmk_ok) { crm_err("Could not set dnotify callback"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_REPLACE_NOTIFY, replace_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (replace)"); } else if (cib_conn->cmds->add_notify_callback(cib_conn, T_CIB_DIFF_NOTIFY, update_cb) != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); } else { controld_set_fsa_input_flags(R_CIB_CONNECTED); cib_retries = 0; cib_conn->cmds->client_id(cib_conn, &controld_globals.cib_client_id, NULL); } if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { cib_retries++; if (cib_retries < 30) { crm_warn("Couldn't complete CIB registration %d times... " "pause and retry", cib_retries); controld_start_wait_timer(); crmd_fsa_stall(FALSE); } else { crm_err("Could not complete CIB registration %d times... " "hard error", cib_retries); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } } #define MIN_CIB_OP_TIMEOUT (30) /*! * \internal * \brief Get the timeout (in seconds) that should be used with CIB operations * * \return The maximum of 30 seconds, the value of the PCMK_cib_timeout * environment variable, or 10 seconds times one more than the number of * nodes in the cluster. */ unsigned int cib_op_timeout(void) { static int env_timeout = -1; unsigned int calculated_timeout = 0; if (env_timeout == -1) { const char *env = getenv("PCMK_cib_timeout"); pcmk__scan_min_int(env, &env_timeout, MIN_CIB_OP_TIMEOUT); crm_trace("Minimum CIB op timeout: %ds (environment: %s)", env_timeout, (env? env : "none")); } calculated_timeout = 1 + crm_active_peers(); if (crm_remote_peer_cache) { calculated_timeout += g_hash_table_size(crm_remote_peer_cache); } calculated_timeout *= 10; calculated_timeout = QB_MAX(calculated_timeout, env_timeout); crm_trace("Calculated timeout: %us", calculated_timeout); if (controld_globals.cib_conn) { controld_globals.cib_conn->call_timeout = calculated_timeout; } return calculated_timeout; } /*! * \internal * \brief Get CIB call options to use local scope if primary is unavailable * * \return CIB call options */ int crmd_cib_smart_opt(void) { int call_opt = cib_none; if ((controld_globals.fsa_state == S_ELECTION) || (controld_globals.fsa_state == S_PENDING)) { crm_info("Sending update to local CIB in state: %s", fsa_state2string(controld_globals.fsa_state)); cib__set_call_options(call_opt, "update", cib_scope_local); } return call_opt; } static void cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *desc = user_data; if (rc == 0) { crm_debug("Deletion of %s (via CIB call %d) succeeded", desc, call_id); } else { crm_warn("Deletion of %s (via CIB call %d) failed: %s " CRM_XS " rc=%d", desc, call_id, pcmk_strerror(rc), rc); } } // Searches for various portions of node_state to delete // Match a particular node's node_state (takes node name 1x) #define XPATH_NODE_STATE "//" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" // Node's lrm section (name 1x) #define XPATH_NODE_LRM XPATH_NODE_STATE "/" XML_CIB_TAG_LRM /* Node's lrm_rsc_op entries and lrm_resource entries without unexpired lock * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x) */ #define XPATH_NODE_LRM_UNLOCKED XPATH_NODE_STATE "//" XML_LRM_TAG_RSC_OP \ "|" XPATH_NODE_STATE \ "//" XML_LRM_TAG_RESOURCE \ "[not(@" XML_CONFIG_ATTR_SHUTDOWN_LOCK ") " \ "or " XML_CONFIG_ATTR_SHUTDOWN_LOCK "<%lld]" // Node's transient_attributes section (name 1x) #define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" XML_TAG_TRANSIENT_NODEATTRS // Everything under node_state (name 1x) #define XPATH_NODE_ALL XPATH_NODE_STATE "/*" /* Unlocked history + transient attributes * (name 2x, (seconds_since_epoch - XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT) 1x, * name 1x) */ #define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS /*! * \internal * \brief Get the XPath and description of a node state section to be deleted * * \param[in] uname Desired node * \param[in] section Subsection of node_state to be deleted * \param[out] xpath Where to store XPath of \p section * \param[out] desc If not \c NULL, where to store description of \p section */ void controld_node_state_deletion_strings(const char *uname, enum controld_section_e section, char **xpath, char **desc) { const char *desc_pre = NULL; // Shutdown locks that started before this time are expired long long expire = (long long) time(NULL) - controld_globals.shutdown_lock_limit; switch (section) { case controld_section_lrm: *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname); desc_pre = "resource history"; break; case controld_section_lrm_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED, uname, uname, expire); desc_pre = "resource history (other than shutdown locks)"; break; case controld_section_attrs: *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname); desc_pre = "transient attributes"; break; case controld_section_all: *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname); desc_pre = "all state"; break; case controld_section_all_unlocked: *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED, uname, uname, expire, uname); desc_pre = "all state (other than shutdown locks)"; break; default: // We called this function incorrectly CRM_ASSERT(false); break; } if (desc != NULL) { *desc = crm_strdup_printf("%s for node %s", desc_pre, uname); } } /*! * \internal * \brief Delete subsection of a node's CIB node_state * * \param[in] uname Desired node * \param[in] section Subsection of node_state to delete * \param[in] options CIB call options to use */ void controld_delete_node_state(const char *uname, enum controld_section_e section, int options) { cib_t *cib = controld_globals.cib_conn; char *xpath = NULL; char *desc = NULL; int cib_rc = pcmk_ok; CRM_ASSERT((uname != NULL) && (cib != NULL)); controld_node_state_deletion_strings(uname, section, &xpath, &desc); cib__set_call_options(options, "node state deletion", cib_xpath|cib_multiple); cib_rc = cib->cmds->remove(cib, xpath, NULL, options); fsa_register_cib_callback(cib_rc, desc, cib_delete_callback); crm_info("Deleting %s (via CIB call %d) " CRM_XS " xpath=%s", desc, cib_rc, xpath); // CIB library handles freeing desc free(xpath); } // Takes node name and resource ID #define XPATH_RESOURCE_HISTORY "//" XML_CIB_TAG_STATE \ "[@" XML_ATTR_UNAME "='%s']/" \ XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE \ "[@" XML_ATTR_ID "='%s']" // @TODO could add "and @XML_CONFIG_ATTR_SHUTDOWN_LOCK" to limit to locks /*! * \internal * \brief Clear resource history from CIB for a given resource and node * * \param[in] rsc_id ID of resource to be cleared * \param[in] node Node whose resource history should be cleared * \param[in] user_name ACL user name to use * \param[in] call_options CIB call options * * \return Standard Pacemaker return code */ int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options) { char *desc = NULL; char *xpath = NULL; int rc = pcmk_rc_ok; CRM_CHECK((rsc_id != NULL) && (node != NULL), return EINVAL); desc = crm_strdup_printf("resource history for %s on %s", rsc_id, node); if (controld_globals.cib_conn == NULL) { crm_err("Unable to clear %s: no CIB connection", desc); free(desc); return ENOTCONN; } // Ask CIB to delete the entry xpath = crm_strdup_printf(XPATH_RESOURCE_HISTORY, node, rsc_id); rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_DELETE, NULL, xpath, NULL, NULL, call_options|cib_xpath, user_name); if (rc < 0) { rc = pcmk_legacy2rc(rc); crm_err("Could not delete resource status of %s on %s%s%s: %s " CRM_XS " rc=%d", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : ""), pcmk_rc_str(rc), rc); free(desc); free(xpath); return rc; } if (pcmk_is_set(call_options, cib_sync_call)) { if (pcmk_is_set(call_options, cib_dryrun)) { crm_debug("Deletion of %s would succeed", desc); } else { crm_debug("Deletion of %s succeeded", desc); } free(desc); } else { crm_info("Clearing %s (via CIB call %d) " CRM_XS " xpath=%s", desc, rc, xpath); fsa_register_cib_callback(rc, desc, cib_delete_callback); // CIB library handles freeing desc } free(xpath); return pcmk_rc_ok; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * \p g_string_free() and the XML result with \p free_xml(). */ static GString * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { GString *list = NULL; *result = create_xml_node(NULL, XML_TAG_PARAMS); /* Consider all parameters only except private ones to be consistent with * what scheduler does with calculate_secure_digest(). */ if (param_type == ra_param_private && compare_version(controld_globals.dc_version, "3.16.0") >= 0) { g_hash_table_foreach(op->params, hash2field, *result); pcmk__filter_op_for_digest(*result); } for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, 256, " "); } pcmk__add_word(&list, 0, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(*result, param->rap_name, v); } } else { crm_trace("Removing attr %s from the xml result", param->rap_name); xml_remove_prop(*result, param->rap_name); } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, 0, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { // Add parameters not marked reloadable to the "op-force-restart" list list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * "op-force-restart" list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, (list == NULL)? "" : (const char *) list->str); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); if ((list != NULL) && (list->len > 0)) { crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); } else { crm_trace("%s: %s", op->rsc_id, digest); } if (list != NULL) { g_string_free(list, TRUE); } free_xml(restart); free(digest); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { GString *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, (const char *) list->str); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, (const char *) list->str); g_string_free(list, TRUE); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); } /*! * \internal * \brief Create XML for a resource history entry * * \param[in] func Function name of caller * \param[in,out] parent XML to add entry to * \param[in] rsc Affected resource * \param[in,out] op Action to add an entry for (or NULL to do nothing) * \param[in] node_name Node where action occurred */ void controld_add_resource_history_xml_as(const char *func, xmlNode *parent, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *node_name) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return; } target_rc = rsc_op_expected_rc(op); caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_CHECK(caller_version != NULL, caller_version = CRM_FEATURE_SET); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, controld_globals.our_nodename, func); if (xml_op == NULL) { return; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return; } /* Ideally the metadata is cached, and the agent is just a fallback. * * @TODO Go through all callers and ensure they get metadata asynchronously * first. */ metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_agent |controld_metadata_from_cache); if (metadata == NULL) { return; } crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return; } /*! * \internal * \brief Record an action as pending in the CIB, if appropriate * * \param[in] node_name Node where the action is pending * \param[in] rsc Resource that action is for * \param[in,out] op Pending action * * \return true if action was recorded in CIB, otherwise false */ bool controld_record_pending_op(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK((node_name != NULL) && (rsc != NULL) && (op != NULL), return false); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return false; } // Check action's record-pending meta-attribute (defaults to true) record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if ((record_pending != NULL) && !crm_is_true(record_pending)) { return false; } op->call_id = -1; op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); crm_debug("Recording pending %s-interval %s for %s on %s in the CIB", pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id, node_name); controld_update_resource_history(node_name, rsc, op, 0); return true; } static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource history update completed (call=%d rc=%d)", call_id, rc); break; default: if (call_id > 0) { crm_warn("Resource history update %d failed: %s " CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc); } else { crm_warn("Resource history update failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } if (call_id == pending_rsc_update) { pending_rsc_update = 0; controld_trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) { return false; } if (!strcmp(op->op_type, PCMK_ACTION_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, PCMK_ACTION_MONITOR) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } /*! * \internal * \brief Request a CIB update * * \param[in] section Section of CIB to update * \param[in] data New XML of CIB section to update * \param[in] options CIB call options * \param[in] callback If not \c NULL, set this as the operation callback * * \return Standard Pacemaker return code * * \note If \p callback is \p cib_rsc_callback(), the CIB update's call ID is * stored in \p pending_rsc_update on success. */ int controld_update_cib(const char *section, xmlNode *data, int options, void (*callback)(xmlNode *, int, int, xmlNode *, void *)) { int cib_rc = -ENOTCONN; CRM_ASSERT(data != NULL); if (controld_globals.cib_conn != NULL) { cib_rc = cib_internal_op(controld_globals.cib_conn, PCMK__CIB_REQUEST_MODIFY, NULL, section, data, NULL, options, NULL); if (cib_rc >= 0) { crm_debug("Submitted CIB update %d for %s section", cib_rc, section); } } if (callback == NULL) { if (cib_rc < 0) { crm_err("Failed to update CIB %s section: %s", section, pcmk_rc_str(pcmk_legacy2rc(cib_rc))); } } else { if ((cib_rc >= 0) && (callback == cib_rsc_callback)) { /* Checking for a particular callback is a little hacky, but it * didn't seem worth adding an output argument for cib_rc for just * one use case. */ pending_rsc_update = cib_rc; } fsa_register_cib_callback(cib_rc, NULL, callback); } return (cib_rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(cib_rc); } /*! * \internal * \brief Update resource history entry in CIB * * \param[in] node_name Node where action occurred * \param[in] rsc Resource that action is for * \param[in,out] op Action to record * \param[in] lock_time If nonzero, when resource was locked to node * * \note On success, the CIB update's call ID will be stored in * pending_rsc_update. */ void controld_update_resource_history(const char *node_name, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { xmlNode *update = NULL; xmlNode *xml = NULL; int call_opt = crmd_cib_smart_opt(); const char *node_id = NULL; const char *container = NULL; CRM_CHECK((node_name != NULL) && (op != NULL), return); if (rsc == NULL) { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); return; } // update = create_xml_node(NULL, XML_CIB_TAG_STATUS); // xml = create_xml_node(update, XML_CIB_TAG_STATE); if (pcmk__str_eq(node_name, controld_globals.our_nodename, pcmk__str_casei)) { node_id = controld_globals.our_uuid; } else { node_id = node_name; pcmk__xe_set_bool_attr(xml, XML_NODE_IS_REMOTE, true); } crm_xml_add(xml, XML_ATTR_ID, node_id); crm_xml_add(xml, XML_ATTR_UNAME, node_name); crm_xml_add(xml, XML_ATTR_ORIGIN, __func__); // xml = create_xml_node(xml, XML_CIB_TAG_LRM); crm_xml_add(xml, XML_ATTR_ID, node_id); // xml = create_xml_node(xml, XML_LRM_TAG_RESOURCES); // xml = create_xml_node(xml, XML_LRM_TAG_RESOURCE); crm_xml_add(xml, XML_ATTR_ID, op->rsc_id); crm_xml_add(xml, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(xml, XML_AGENT_ATTR_PROVIDER, rsc->provider); crm_xml_add(xml, XML_ATTR_TYPE, rsc->type); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } crm_xml_add_ll(xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) lock_time); } if (op->params != NULL) { container = g_hash_table_lookup(op->params, CRM_META "_" XML_RSC_ATTR_CONTAINER); if (container != NULL) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(xml, XML_RSC_ATTR_CONTAINER, container); } } // (possibly more than one) controld_add_resource_history_xml(xml, rsc, op, node_name); /* Update CIB asynchronously. Even if it fails, the resource state should be * discovered during the next election. Worst case, the node is wrongly * fenced for running a resource it isn't. */ crm_log_xml_trace(update, __func__); controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, cib_rsc_callback); free_xml(update); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] op Operation whose history should be deleted */ void controld_delete_action_history(const lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_none); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Delete a last_failure resource history entry from the CIB * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] action If specified, delete only if this was failed action * \param[in] interval_ms If \p action is specified, it has this interval */ void controld_cib_delete_last_failure(const char *rsc_id, const char *node, const char *action, guint interval_ms) { char *xpath = NULL; char *last_failure_key = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL), return); // Generate XPath to match desired entry last_failure_key = pcmk__op_key(rsc_id, "last_failure", 0); if (action == NULL) { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, last_failure_key); } else { char *action_key = pcmk__op_key(rsc_id, action, interval_ms); xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, node, rsc_id, last_failure_key, action_key); free(action_key); } free(last_failure_key); controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } /*! * \internal * \brief Delete resource history entry from the CIB, given operation key * * \param[in] rsc_id Name of resource to clear history for * \param[in] node Name of node to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] call_id If specified, delete entry only if it has this call ID */ void controld_delete_action_history_by_key(const char *rsc_id, const char *node, const char *key, int call_id) { char *xpath = NULL; CRM_CHECK((rsc_id != NULL) && (node != NULL) && (key != NULL), return); if (call_id > 0) { xpath = crm_strdup_printf(XPATH_HISTORY_CALL, node, rsc_id, key, call_id); } else { xpath = crm_strdup_printf(XPATH_HISTORY_ID, node, rsc_id, key); } controld_globals.cib_conn->cmds->remove(controld_globals.cib_conn, xpath, NULL, cib_xpath); free(xpath); } diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c index e7818a5e51..b69e821bea 100644 --- a/daemons/controld/controld_corosync.c +++ b/daemons/controld/controld_corosync.c @@ -1,165 +1,162 @@ /* - * Copyright 2004-2022 the Pacemaker project contributors + * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #if SUPPORT_COROSYNC extern void post_cache_update(int seq); /* A_HA_CONNECT */ static void crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { crm_node_t *peer = NULL; xmlNode *xml = string2xml(data); if (xml == NULL) { crm_err("Could not parse message content (%d): %.100s", kind, data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ peer = crm_get_peer(0, from); if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { /* If we can still talk to our peer process on that node, * then it must be part of the corosync membership */ crm_warn("Receiving messages from a node we think is dead: %s[%d]", peer->uname, peer->id); crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); } crmd_ha_msg_filter(xml); free_xml(xml); } else { crm_err("Invalid message class (%d): %.100s", kind, data); } free(data); } static gboolean crmd_quorum_callback(unsigned long long seq, gboolean quorate) { crm_update_quorum(quorate, FALSE); post_cache_update(seq); return TRUE; } static void crmd_cs_destroy(gpointer user_data) { if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { crm_crit("Lost connection to cluster layer, shutting down"); crmd_exit(CRM_EX_DISCONNECT); - - } else { - crm_info("Corosync connection closed"); } } /*! * \brief Handle a Corosync notification of a CPG configuration change * * \param[in] handle CPG connection * \param[in] cpg_name CPG group name * \param[in] member_list List of current CPG members * \param[in] member_list_entries Number of entries in \p member_list * \param[in] left_list List of CPG members that left * \param[in] left_list_entries Number of entries in \p left_list * \param[in] joined_list List of CPG members that joined * \param[in] joined_list_entries Number of entries in \p joined_list */ static void cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* When nodes leave CPG, the DC clears their transient node attributes. * * However if there is no DC, or the DC is among the nodes that left, each * remaining node needs to do the clearing, to ensure it gets done. * Otherwise, the attributes would persist when the nodes rejoin, which * could have serious consequences for unfencing, agents that use attributes * for internal logic, etc. * * Here, we set a global boolean if the DC is among the nodes that left, for * use by the peer callback. */ if (controld_globals.dc_name != NULL) { crm_node_t *peer = NULL; peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name, NULL); if (peer != NULL) { for (int i = 0; i < left_list_entries; ++i) { if (left_list[i].nodeid == peer->id) { controld_set_global_flags(controld_dc_left); break; } } } } // Process the change normally, which will call the peer callback as needed pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries); controld_clear_global_flags(controld_dc_left); } extern gboolean crm_connect_corosync(crm_cluster_t * cluster); gboolean crm_connect_corosync(crm_cluster_t * cluster) { if (is_corosync_cluster()) { crm_set_status_callback(&peer_update_callback); cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; cluster->cpg.cpg_confchg_fn = cpg_membership_callback; cluster->destroy = crmd_cs_destroy; if (crm_cluster_connect(cluster)) { pcmk__corosync_quorum_connect(crmd_quorum_callback, crmd_cs_destroy); return TRUE; } } return FALSE; } #endif diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index a90e8d833e..480d37d743 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2447 +1,2441 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static void lrm_connection_destroy(void) { if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { - crm_crit("Connection to executor failed"); + crm_crit("Lost connection to local executor"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); - - } else { - crm_info("Disconnected from executor"); } - } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); controld_delete_resource_history(op->rsc_id, lrm_state->node_name, NULL, crmd_cib_smart_opt()); return; } if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); pcmk__str_update(&entry->rsc.provider, rsc->provider); } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_EXEC_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, PCMK_ACTION_MONITOR, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = pcmk__strkey_table(free, free); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if ((entry->recurring_op_list != NULL) && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR, pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input, const char *rsc_id, const lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return pcmk__s(op->remote_nodename, controld_globals.our_nodename); } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } static void try_local_executor_connect(long long action, fsa_data_t *msg_data, lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; crm_debug("Connecting to the local executor"); // If we can connect, great rc = controld_connect_local_executor(lrm_state); if (rc == pcmk_rc_ok) { controld_set_fsa_input_flags(R_LRM_CONNECTED); crm_info("Connection to the local executor established"); return; } // Otherwise, if we can try again, set a timer to do so if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the local executor %d time%s " "(%d max): %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); controld_start_wait_timer(); crmd_fsa_stall(FALSE); return; } // Otherwise give up crm_err("Failed to connect to the executor the max allowed " "%d time%s: %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), pcmk_rc_str(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if (controld_globals.our_nodename == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(controld_globals.our_nodename); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } controld_clear_fsa_input_flags(R_LRM_CONNECTED); - crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); - crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { try_local_executor_connect(action, msg_data, lrm_state); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; active_op_t *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) { guint removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->active_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, pcmk__plural_s(removed), when, nremaining); } } if (lrm_state->active_ops != NULL) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, pcmk__plural_s(counter), when); if ((cur_state == S_TERMINATE) || !pcmk_is_set(controld_globals.fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->active_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->active_ops != NULL) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->active_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if ((entry->last->rc == PCMK_OCF_OK) && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP, pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name); controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name); } } return FALSE; } xmlNode * controld_query_executor_state(void) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename); if (!lrm_state) { crm_err("Could not find executor state for node %s", controld_globals.our_nodename); return NULL; } peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, node_update_cluster|node_update_peer, NULL, __func__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } /*! * \internal * \brief Map standard Pacemaker return code to operation status and OCF code * * \param[out] event Executor event whose status and return code should be set * \param[in] rc Standard Pacemaker return code */ void controld_rc2event(lrmd_event_data_t *event, int rc) { /* This is called for cleanup requests from controller peers/clients, not * for resource actions, so no exit reason is needed. */ switch (rc) { case pcmk_rc_ok: lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); break; case EACCES: lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, PCMK_EXEC_ERROR, NULL); break; default: lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, NULL); break; } } /*! * \internal * \brief Trigger a new transition after CIB status was deleted * * If a CIB status delete was not expected (as part of the transition graph), * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" * cluster property. * * \param[in] from_sys IPC name that requested the delete * \param[in] rsc_id Resource whose status was deleted (for logging only) */ void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); cib__update_node_attr(controld_globals.logger_out, controld_globals.cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, NULL, NULL); free(now_s); } } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; active_op_t *pending = value; if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } static void delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, GHashTableIter *rsc_iter, int rc, const char *user_name, bool from_cib) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_iter) { g_hash_table_iter_remove(rsc_iter); } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); } if (from_cib) { controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); } g_hash_table_foreach_remove(lrm_state->active_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } if (lrm_state->resource_history != NULL) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; active_op_t *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->active_ops, key); if (pending) { if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { controld_set_active_op_flags(pending, active_op_remove); crm_debug("Scheduling %s for removal", key); } if (pcmk_is_set(pending->flags, active_op_cancelled)) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } controld_set_active_op_flags(pending, active_op_cancelled); } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the active operations list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from active operations will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; active_op_t *op = value; if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->active_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in,out] lrm_state Executor connection state to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource if not already * \param[out] rsc_info Where to store information obtained from executor * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc, GHashTableIter *iter, const char *sys, const char *user, ha_msg_input_t *request, bool unregister, bool from_cib) { int rc = pcmk_ok; crm_info("Removing resource %s from executor for %s%s%s", id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Could not delete '%s' from executor for %s%s%s: %s " CRM_XS " rc=%d", id, sys, (user? " as " : ""), (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode, const char *exit_reason) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, op_exitcode, op_status, exit_reason); } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ bool unregister = true; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { unregister = false; if (reprobe_all_nodes) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state != NULL) { /* If reprobing all nodes, be sure to reprobe the remote * node before clearing its connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE, reprobe_all_nodes); } } } /* Don't delete from the CIB, since we'll delete the whole node's LRM * state from the CIB soon */ delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister, false); } /* Now delete the copy in the CIB */ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, cib_scope_local); // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false) update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in,out] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use * \param[in] exit_reason Human-friendly detail, if error */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action, int op_status, enum ocf_exitcode rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); } crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation (replacing \p NULL with local node * name) * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(const xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = controld_globals.our_nodename; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; if (user_name && !pcmk__is_privileged(user_name)) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_INSUFFICIENT_PRIV, "Unprivileged user cannot fail resources"); controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, "Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, "Cannot fail unknown resource"); } controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node, bool reprobe_all_nodes) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node, reprobe_all_nodes); if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, controld_globals.our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; guint interval_ms = 0; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { free(meta_key); return FALSE; } free(meta_key); op_key = pcmk__op_key(rsc->id, op_task, interval_ms); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); pcmk__scan_min_int(call_id, &call, 0); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } controld_delete_action_history_by_key(rsc->id, lrm_state->node_name, op_key, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ if (lrm_state->active_ops != NULL) { g_hash_table_remove(lrm_state->active_ops, op_id); } free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { bool unregister = true; int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, user_name, cib_dryrun|cib_sync_call); if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. */ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = false; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister, true); } // User data for asynchronous metadata execution struct metadata_cb_data { lrmd_rsc_info_t *rsc; // Copy of resource information xmlNode *input_xml; // Copy of FSA input XML }; static struct metadata_cb_data * new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml) { struct metadata_cb_data *data = NULL; data = calloc(1, sizeof(struct metadata_cb_data)); CRM_ASSERT(data != NULL); data->input_xml = copy_xml(input_xml); data->rsc = lrmd_copy_rsc_info(rsc); return data; } static void free_metadata_cb_data(struct metadata_cb_data *data) { lrmd_free_rsc_info(data->rsc); free_xml(data->input_xml); free(data); } /*! * \internal * \brief Execute an action after metadata has been retrieved * * \param[in] pid Ignored * \param[in] result Result of metadata action * \param[in] user_data Metadata callback data */ static void metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data) { struct metadata_cb_data *data = (struct metadata_cb_data *) user_data; struct ra_metadata_s *md = NULL; lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml)); if ((lrm_state != NULL) && pcmk__result_ok(result)) { md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc, result->action_stdout); } if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) { do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md); } free_metadata_cb_data(data); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = lrm_op_target(input->xml); gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; // Message routed to the local node is targeting a specific, non-local node is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename, pcmk__str_casei); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Local node has no connection to remote"); return; } CRM_ASSERT(lrm_state != NULL); user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL); crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { crm_rsc_delete = TRUE; // from crm_resource } operation = PCMK_ACTION_DELETE; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return); crm_trace("'%s' execution request from %s as %s user", pcmk__s(crm_op, operation), pcmk__s(from_sys, "unknown subsystem"), pcmk__s(user_name, "current")); if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) { /* @COMPAT This can only be sent by crm_resource --refresh on a * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely * unlikely. It previously would cause the controller to re-write its * resource history to the CIB. Just ignore it. */ crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node"); // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none) || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { const char *raw_target = NULL; if (input->xml != NULL) { // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET); } handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node, (raw_target == NULL)); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Not connected to remote executor"); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name, true); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_NOT_CONFIGURED, // fatal error "Invalid resource definition"); return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_INVALID_PARAM, // hard error "Could not register resource with executor"); return; } if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE, pcmk__str_none)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { struct ra_metadata_s *md = NULL; /* Getting metadata from cache is OK except for start actions -- * always refresh from the agent for those, in case the resource * agent was updated. * * @TODO Only refresh metadata for starts if the agent actually * changed (using something like inotify, or a hash or modification * time of the agent executable). */ if (strcmp(operation, PCMK_ACTION_START) != 0) { md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); } if ((md == NULL) && crm_op_needs_metadata(rsc->standard, operation)) { /* Most likely, we'll need the agent metadata to record the * pending operation and the operation result. Get it now rather * than wait until then, so the metadata action doesn't eat into * the real action's timeout. * * @TODO Metadata is retrieved via direct execution of the * agent, which has a couple of related issues: the executor * should execute agents, not the controller; and metadata for * Pacemaker Remote nodes should be collected on those nodes, * not locally. */ struct metadata_cb_data *data = NULL; data = new_metadata_cb_data(rsc, input->xml); crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously", rsc->id, rsc->standard, ((rsc->provider == NULL)? "" : ":"), ((rsc->provider == NULL)? "" : rsc->provider), rsc->type); (void) lrmd__metadata_async(rsc, metadata_complete, (void *) data); } else { do_lrm_rsc_op(lrm_state, rsc, input->xml, md); } } lrmd_free_rsc_info(rsc); } else { crm_err("Invalid execution request: unknown command '%s' (bug?)", crm_op); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; GHashTable *params = NULL; xmlNode *primitive = NULL; const char *class = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = lrmd_new_event(rsc_id, operation, 0); op->type = lrmd_event_exec_complete; op->timeout = 0; op->start_delay = 0; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = pcmk__strkey_table(free, free); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); pcmk__scan_min_int(op_delay, &op->start_delay, 0); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); pcmk__scan_min_int(op_timeout, &op->timeout, 0); if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0, &(op->interval_ms)) != pcmk_rc_ok) { op->interval_ms = 0; } /* Use pcmk_monitor_timeout instead of meta timeout for stonith recurring monitor, if set */ primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE); class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS); if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (op_timeout != NULL) { op->timeout = crm_get_msec(op_timeout); } } if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = pcmk__strkey_table(free, free); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP, NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } /*! * \internal * \brief Send a (synthesized) event result * * Reply with a synthesized event result directly, as opposed to going through * the executor. * * \param[in] to_host Host to send result to * \param[in] to_sys IPC name to send result (NULL for transition engine) * \param[in] rsc Type information about resource the result is for * \param[in,out] op Event with result to send * \param[in] rsc_id ID of resource the result is for */ void controld_ack_event_directly(const char *to_host, const char *to_sys, const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, controld_globals.our_nodename); update = create_node_state_update(peer, node_update_none, NULL, __func__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); controld_add_resource_history_xml(iter, rsc, op, controld_globals.our_nodename); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } controld_set_fsa_input_flags(R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; active_op_t *op = value; if ((op->interval_ms != 0) && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; active_op_t *op = value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } /*! * \internal * \brief Check whether recurring actions should be cancelled before an action * * \param[in] rsc_id Resource that action is for * \param[in] action Action being performed * \param[in] interval_ms Operation interval of \p action (in milliseconds) * * \return true if recurring actions should be cancelled, otherwise false */ static bool should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms) { if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0) && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) { /* Don't stop monitoring a migrating Pacemaker Remote connection * resource until the entire migration has completed. We must detect if * the connection is unexpectedly severed, even during a migration. */ return false; } // Cancel recurring actions before changing resource state return (interval_ms == 0) && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR, PCMK_ACTION_NOTIFY, NULL); } /*! * \internal * \brief Check whether an action should not be performed at this time * * \param[in] operation Action to be performed * * \return Readable description of why action should not be performed, * or NULL if it should be performed */ static const char * should_nack_action(const char *action) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN) && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); return "Not attempting start due to shutdown in progress"; } switch (controld_globals.fsa_state) { case S_NOT_DC: case S_POLICY_ENGINE: // Recalculating case S_TRANSITION_ENGINE: break; default: if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) { return "Controller cannot attempt actions at this time"; } break; } return NULL; } static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg, struct ra_metadata_s *md) { int rc; int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; const char *operation = NULL; const char *nack_reason = NULL; CRM_CHECK((rsc != NULL) && (msg != NULL), return); operation = crm_element_value(msg, XML_LRM_ATTR_TASK); CRM_CHECK(!pcmk__str_empty(operation), return); transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (pcmk__str_empty(transition)) { crm_log_xml_err(msg, "Missing transition number"); } if (lrm_state == NULL) { // This shouldn't be possible, but provide a failsafe just in case crm_err("Cannot execute %s of %s: No executor connection " CRM_XS " transition_key=%s", operation, rsc->id, pcmk__s(transition, "")); synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID, PCMK_OCF_UNKNOWN_ERROR, "No executor connection"); return; } if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. * Default to the OCF 1.1 name. */ if ((md != NULL) && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { operation = PCMK_ACTION_RELOAD; } else { operation = PCMK_ACTION_RELOAD_AGENT; } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->active_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " PCMK__OP_FMT, removed, pcmk__plural_s(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_notice("Requesting local execution of %s operation for %s on %s " CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, pcmk__readable_action(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, pcmk__s(transition, ""), rsc->id, operation, op->interval_ms); nack_reason = should_nack_action(operation); if (nack_reason != NULL) { crm_notice("Discarding attempt to perform action %s on %s in state %s " "(shutdown=%s)", operation, rsc->id, fsa_state2string(controld_globals.fsa_state), pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN))); lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, nack_reason); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } controld_record_pending_op(lrm_state->node_name, rsc, op); op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, op->params, &call_id); if (rc == pcmk_rc_ok) { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); active_op_t *pending = NULL; pending = calloc(1, sizeof(active_op_t)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pcmk__str_update(&pending->user_data, op->user_data); if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &(pending->lock_time)) != pcmk_ok) { pending->lock_time = 0; } g_hash_table_replace(lrm_state->active_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = PCMK_OCF_OK; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } else if (lrm_state_is_local(lrm_state)) { crm_err("Could not initiate %s action for resource %s locally: %s " CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { crm_err("Could not initiate %s action for resource %s remotely on %s: " "%s " CRM_XS " rc=%d", operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); } free(op_id); lrmd_free_event(op); } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } /*! * \internal * \brief Log the result of an executor action (actual or synthesized) * * \param[in] op Executor action to log result for * \param[in] op_key Operation key for action * \param[in] node_name Name of node action was performed on, if known * \param[in] confirmed Whether to log that graph action was confirmed */ static void log_executor_event(const lrmd_event_data_t *op, const char *op_key, const char *node_name, gboolean confirmed) { int log_level = LOG_ERR; GString *str = g_string_sized_new(100); // reasonable starting size pcmk__g_strcat(str, "Result of ", pcmk__readable_action(op->op_type, op->interval_ms), " operation for ", op->rsc_id, NULL); if (node_name != NULL) { pcmk__g_strcat(str, " on ", node_name, NULL); } switch (op->op_status) { case PCMK_EXEC_DONE: log_level = LOG_NOTICE; pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL); break; case PCMK_EXEC_TIMEOUT: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), " after ", pcmk__readable_interval(op->timeout), NULL); break; case PCMK_EXEC_CANCELLED: log_level = LOG_INFO; /* order of __attribute__ and Fall through comment is IMPORTANT! * do not change it without proper testing with both clang and gcc * in multiple versions. * the clang check allows to build with all versions of clang. * the has_c_attribute check is to workaround a bug in clang version * in rhel7. has_attribute would happily return "YES SIR WE GOT IT" * and fail the build the next line. */ #ifdef __clang__ #ifdef __has_c_attribute #if __has_attribute(fallthrough) __attribute__((fallthrough)); #endif #endif #endif // Fall through default: pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status), NULL); } if ((op->exit_reason != NULL) && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL); } g_string_append(str, " " CRM_XS); g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", (confirmed? "" : "un"), op->call_id, op_key); if (op->op_status == PCMK_EXEC_DONE) { g_string_append_printf(str, " rc=%d", op->rc); } do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); /* The services library has already logged the output at info or debug * level, so just raise to notice if it looks like a failure. */ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", op->rsc_id, op->op_type, op->interval_ms, node_name); crm_log_output(LOG_NOTICE, prefix, op->output); free(prefix); } } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, active_op_t *pending, const xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (compare_version(controld_globals.dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_EXEC_NOT_CONNECTED: lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, PCMK_EXEC_ERROR, op->exit_reason); break; case PCMK_EXEC_INVALID: lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, op->exit_reason); break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(xml, XML_ATTR_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->active_ops, op_id); } } if (op->op_status == PCMK_EXEC_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_EXEC_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_EXEC_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can time_t lock_time = (pending == NULL)? 0 : pending->lock_time; controld_update_resource_history(node_name, rsc, op, lock_time); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pcmk_is_set(pending->flags, active_op_remove)) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { controld_delete_action_history(op); } /* Directly acknowledge failed recurring actions here. The above call to * controld_delete_action_history() will not erase any corresponding * last_failure entry, which means that the DC won't confirm the * cancellation via process_op_deletion(), and the transition would * otherwise wait for the action timer to pop. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_EXEC_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->active_ops)); } } log_executor_event(op, op_key, node_name, removed); if (lrm_state) { if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA, pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL, true); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ controld_trigger_fsa(); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 3c7e7db0eb..a235e34e7b 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -1,1113 +1,1113 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event); /* * stonith failure counting * * We don't want to get stuck in a permanent fencing loop. Keep track of the * number of fencing failures for each target node, and the most we'll restart a * transition for. */ struct st_fail_rec { int count; }; static bool fence_reaction_panic = false; static unsigned long int stonith_max_attempts = 10; static GHashTable *stonith_failures = NULL; /*! * \internal * \brief Update max fencing attempts before giving up * * \param[in] value New max fencing attempts */ static void update_stonith_max_attempts(const char *value) { stonith_max_attempts = char2score(value); if (stonith_max_attempts < 1UL) { stonith_max_attempts = 10UL; } } /*! * \internal * \brief Configure reaction to notification of local node being fenced * * \param[in] reaction_s Reaction type */ static void set_fence_reaction(const char *reaction_s) { if (pcmk__str_eq(reaction_s, "panic", pcmk__str_casei)) { fence_reaction_panic = true; } else { if (!pcmk__str_eq(reaction_s, "stop", pcmk__str_casei)) { crm_warn("Invalid value '%s' for %s, using 'stop'", reaction_s, XML_CONFIG_ATTR_FENCE_REACTION); } fence_reaction_panic = false; } } /*! * \internal * \brief Configure fencing options based on the CIB * * \param[in,out] options Name/value pairs for configured options */ void controld_configure_fencing(GHashTable *options) { const char *value = NULL; value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FENCE_REACTION); set_fence_reaction(value); value = g_hash_table_lookup(options, "stonith-max-attempts"); update_stonith_max_attempts(value); } static gboolean too_many_st_failures(const char *target) { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *value = NULL; if (stonith_failures == NULL) { return FALSE; } if (target == NULL) { g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { if (value->count >= stonith_max_attempts) { target = (const char*)key; goto too_many; } } } else { value = g_hash_table_lookup(stonith_failures, target); if ((value != NULL) && (value->count >= stonith_max_attempts)) { goto too_many; } } return FALSE; too_many: crm_warn("Too many failures (%d) to fence %s, giving up", value->count, target); return TRUE; } /*! * \internal * \brief Reset a stonith fail count * * \param[in] target Name of node to reset, or NULL for all */ void st_fail_count_reset(const char *target) { if (stonith_failures == NULL) { return; } if (target) { struct st_fail_rec *rec = NULL; rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count = 0; } } else { GHashTableIter iter; const char *key = NULL; struct st_fail_rec *rec = NULL; g_hash_table_iter_init(&iter, stonith_failures); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rec)) { rec->count = 0; } } } static void st_fail_count_increment(const char *target) { struct st_fail_rec *rec = NULL; if (stonith_failures == NULL) { stonith_failures = pcmk__strkey_table(free, free); } rec = g_hash_table_lookup(stonith_failures, target); if (rec) { rec->count++; } else { rec = malloc(sizeof(struct st_fail_rec)); if(rec == NULL) { return; } rec->count = 1; g_hash_table_insert(stonith_failures, strdup(target), rec); } } /* end stonith fail count functions */ static void cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { if (rc < pcmk_ok) { crm_err("Fencing update %d for %s: failed - %s (%d)", call_id, (char *)user_data, pcmk_strerror(rc), rc); crm_log_xml_warn(msg, "Failed update"); abort_transition(INFINITY, pcmk__graph_shutdown, "CIB update failed", NULL); } else { crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data); } } static void send_stonith_update(pcmk__graph_action_t *action, const char *target, const char *uuid) { int rc = pcmk_ok; crm_node_t *peer = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node * might have already rejoined before we get the stonith result here. */ int flags = node_update_join | node_update_expected; /* zero out the node-status & remove all LRM status info */ xmlNode *node_state = NULL; CRM_CHECK(target != NULL, return); CRM_CHECK(uuid != NULL, return); /* Make sure the membership and join caches are accurate. * Try getting any existing node cache entry also by node uuid in case it * doesn't have an uname yet. */ peer = pcmk__get_peer_full(0, target, uuid, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { /* Usually, we rely on the membership layer to update the cluster state * in the CIB. However, if the node has never been seen, do it here, so * the node is not considered unclean. */ flags |= node_update_cluster; } if (peer->uuid == NULL) { crm_info("Recording uuid '%s' for node '%s'", uuid, target); peer->uuid = strdup(uuid); } crmd_peer_down(peer, TRUE); /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __func__); /* we have to mark whether or not remote nodes have already been fenced */ if (peer->flags & crm_remote_node) { char *now_s = pcmk__ttoa(time(NULL)); crm_xml_add(node_state, XML_NODE_IS_FENCED, now_s); free(now_s); } /* Force our known ID */ crm_xml_add(node_state, XML_ATTR_ID, uuid); rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, XML_CIB_TAG_STATUS, node_state, cib_scope_local |cib_can_create); /* Delay processing the trigger until the update completes */ crm_debug("Sending fencing update %d for %s", rc, target); fsa_register_cib_callback(rc, strdup(target), cib_fencing_updated); // Make sure it sticks /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn, * cib_scope_local); */ controld_delete_node_state(peer->uname, controld_section_all, cib_scope_local); free_xml(node_state); return; } /*! * \internal * \brief Abort transition due to stonith failure * * \param[in] abort_action Whether to restart or stop transition * \param[in] target Don't restart if this (NULL for any) has too many failures * \param[in] reason Log this stonith action XML as abort reason (or NULL) */ static void abort_for_stonith_failure(enum pcmk__graph_next abort_action, const char *target, const xmlNode *reason) { /* If stonith repeatedly fails, we eventually give up on starting a new * transition for that reason. */ if ((abort_action != pcmk__graph_wait) && too_many_st_failures(target)) { abort_action = pcmk__graph_wait; } abort_transition(INFINITY, abort_action, "Stonith failed", reason); } /* * stonith cleanup list * * If the DC is shot, proper notifications might not go out. * The stonith cleanup list allows the cluster to (re-)send * notifications once a new DC is elected. */ static GList *stonith_cleanup_list = NULL; /*! * \internal * \brief Add a node to the stonith cleanup list * * \param[in] target Name of node to add */ void add_stonith_cleanup(const char *target) { stonith_cleanup_list = g_list_append(stonith_cleanup_list, strdup(target)); } /*! * \internal * \brief Remove a node from the stonith cleanup list * * \param[in] Name of node to remove */ void remove_stonith_cleanup(const char *target) { GList *iter = stonith_cleanup_list; while (iter != NULL) { GList *tmp = iter; char *iter_name = tmp->data; iter = iter->next; if (pcmk__str_eq(target, iter_name, pcmk__str_casei)) { crm_trace("Removing %s from the cleanup list", iter_name); stonith_cleanup_list = g_list_delete_link(stonith_cleanup_list, tmp); free(iter_name); } } } /*! * \internal * \brief Purge all entries from the stonith cleanup list */ void purge_stonith_cleanup(void) { if (stonith_cleanup_list) { GList *iter = NULL; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_info("Purging %s from stonith cleanup list", target); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } } /*! * \internal * \brief Send stonith updates for all entries in cleanup list, then purge it */ void execute_stonith_cleanup(void) { GList *iter; for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { char *target = iter->data; crm_node_t *target_node = crm_get_peer(0, target); const char *uuid = crm_peer_uuid(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); free(target); } g_list_free(stonith_cleanup_list); stonith_cleanup_list = NULL; } /* end stonith cleanup list functions */ /* stonith API client * * Functions that need to interact directly with the fencer via its API */ static stonith_t *stonith_api = NULL; static crm_trigger_t *stonith_reconnect = NULL; static char *te_client_id = NULL; static gboolean fail_incompletable_stonith(pcmk__graph_t *graph) { GList *lpc = NULL; const char *task = NULL; xmlNode *last_action = NULL; if (graph == NULL) { return FALSE; } for (lpc = graph->synapses; lpc != NULL; lpc = lpc->next) { GList *lpc2 = NULL; pcmk__graph_synapse_t *synapse = (pcmk__graph_synapse_t *) lpc->data; if (pcmk_is_set(synapse->flags, pcmk__synapse_confirmed)) { continue; } for (lpc2 = synapse->actions; lpc2 != NULL; lpc2 = lpc2->next) { pcmk__graph_action_t *action = (pcmk__graph_action_t *) lpc2->data; if ((action->type != pcmk__cluster_graph_action) || pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) { continue; } task = crm_element_value(action->xml, XML_LRM_ATTR_TASK); if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) { pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); last_action = action->xml; pcmk__update_graph(graph, action); crm_notice("Failing action %d (%s): fencer terminated", action->id, ID(action->xml)); } } } if (last_action != NULL) { crm_warn("Fencer failure resulted in unrunnable actions"); abort_for_stonith_failure(pcmk__graph_restart, NULL, last_action); return TRUE; } return FALSE; } static void tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e) { te_cleanup_stonith_history_sync(st, FALSE); if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { - crm_crit("Fencing daemon connection failed"); + crm_err("Lost fencer connection (will attempt to reconnect)"); mainloop_set_trigger(stonith_reconnect); } else { - crm_info("Fencing daemon disconnected"); + crm_info("Disconnected from fencer"); } if (stonith_api) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(st); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (AM_I_DC) { fail_incompletable_stonith(controld_globals.transition_graph); trigger_graph(); } } /*! * \internal * \brief Handle an event notification from the fencing API * * \param[in] st Fencing API connection (ignored) * \param[in] event Fencing API event notification */ static void handle_fence_notification(stonith_t *st, stonith_event_t *event) { bool succeeded = true; const char *executioner = "the cluster"; const char *client = "a client"; const char *reason = NULL; int exec_status; if (te_client_id == NULL) { te_client_id = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } if (event == NULL) { crm_err("Notify data not found"); return; } if (event->executioner != NULL) { executioner = event->executioner; } if (event->client_origin != NULL) { client = event->client_origin; } exec_status = stonith__event_execution_status(event); if ((stonith__event_exit_status(event) != CRM_EX_OK) || (exec_status != PCMK_EXEC_DONE)) { succeeded = false; if (exec_status == PCMK_EXEC_DONE) { exec_status = PCMK_EXEC_ERROR; } } reason = stonith__event_exit_reason(event); crmd_alert_fencing_op(event); if (pcmk__str_eq(PCMK_ACTION_ON, event->action, pcmk__str_none)) { // Unfencing doesn't need special handling, just a log message if (succeeded) { crm_notice("%s was unfenced by %s at the request of %s@%s", event->target, executioner, client, event->origin); } else { crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d", event->target, executioner, pcmk_exec_status_str(exec_status), ((reason == NULL)? "" : ": "), ((reason == NULL)? "" : reason), stonith__event_exit_status(event)); } return; } if (succeeded && pcmk__str_eq(event->target, controld_globals.our_nodename, pcmk__str_casei)) { /* We were notified of our own fencing. Most likely, either fencing was * misconfigured, or fabric fencing that doesn't cut cluster * communication is in use. * * Either way, shutting down the local host is a good idea, to require * administrator intervention. Also, other nodes would otherwise likely * set our status to lost because of the fencing callback and discard * our subsequent election votes as "not part of our cluster". */ crm_crit("We were allegedly just fenced by %s for %s!", executioner, event->origin); // Dumps blackbox if enabled if (fence_reaction_panic) { pcmk__panic(__func__); } else { crm_exit(CRM_EX_FATAL); } return; // Should never get here } /* Update the count of fencing failures for this target, in case we become * DC later. The current DC has already updated its fail count in * tengine_stonith_callback(). */ if (!AM_I_DC) { if (succeeded) { st_fail_count_reset(event->target); } else { st_fail_count_increment(event->target); } } crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: " "%s%s%s%s " CRM_XS " event=%s", event->target, (succeeded? "" : " not"), event->action, executioner, client, event->origin, (succeeded? "OK" : pcmk_exec_status_str(exec_status)), ((reason == NULL)? "" : " ("), ((reason == NULL)? "" : reason), ((reason == NULL)? "" : ")"), event->id); if (succeeded) { crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, CRM_GET_PEER_ANY); const char *uuid = NULL; if (peer == NULL) { return; } uuid = crm_peer_uuid(peer); if (AM_I_DC) { /* The DC always sends updates */ send_stonith_update(NULL, event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. * Unfortunately, the controller doesn't have a simple, reliable way * to map hosts to guests. It might be possible to track this in the * peer cache via crm_remote_peer_cache_refresh(). For now, we rely * on the scheduler creating fence pseudo-events for the guests. */ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) { /* Abort the current transition if it wasn't the cluster that * initiated fencing. */ crm_info("External fencing operation from %s fenced %s", client, event->target); abort_transition(INFINITY, pcmk__graph_restart, "External Fencing Operation", NULL); } } else if (pcmk__str_eq(controld_globals.dc_name, event->target, pcmk__str_null_matches|pcmk__str_casei) && !pcmk_is_set(peer->flags, crm_remote_node)) { // Assume the target was our DC if we don't currently have one if (controld_globals.dc_name != NULL) { crm_notice("Fencing target %s was our DC", event->target); } else { crm_notice("Fencing target %s may have been our DC", event->target); } /* Given the CIB resyncing that occurs around elections, * have one node update the CIB now and, if the new DC is different, * have them do so too after the election */ if (pcmk__str_eq(event->executioner, controld_globals.our_nodename, pcmk__str_casei)) { send_stonith_update(NULL, event->target, uuid); } add_stonith_cleanup(event->target); } /* If the target is a remote node, and we host its connection, * immediately fail all monitors so it can be recovered quickly. * The connection won't necessarily drop when a remote node is fenced, * so the failure might not otherwise be detected until the next poke. */ if (pcmk_is_set(peer->flags, crm_remote_node)) { remote_ra_fail(event->target); } crmd_peer_down(peer, TRUE); } } /*! * \brief Connect to fencer * * \param[in] user_data If NULL, retry failures now, otherwise retry in main loop * * \return TRUE * \note If user_data is NULL, this will wait 2s between attempts, for up to * 30 attempts, meaning the controller could be blocked as long as 58s. */ static gboolean te_connect_stonith(gpointer user_data) { int rc = pcmk_ok; if (stonith_api == NULL) { stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return TRUE; } } if (stonith_api->state != stonith_disconnected) { crm_trace("Already connected to fencer, no need to retry"); return TRUE; } if (user_data == NULL) { // Blocking (retry failures now until successful) rc = stonith_api_connect_retry(stonith_api, crm_system_name, 30); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 30 attempts: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } } else { // Non-blocking (retry failures later in main loop) rc = stonith_api->cmds->connect(stonith_api, crm_system_name, NULL); if (rc != pcmk_ok) { if (pcmk_is_set(controld_globals.fsa_input_register, R_ST_REQUIRED)) { crm_notice("Fencer connection failed (will retry): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); mainloop_set_trigger(stonith_reconnect); } else { crm_info("Fencer connection failed (ignoring because no longer required): %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); } return TRUE; } } if (rc == pcmk_ok) { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, tengine_stonith_connection_destroy); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_FENCE, handle_fence_notification); stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_HISTORY_SYNCED, tengine_stonith_history_synced); te_trigger_stonith_history_sync(TRUE); crm_notice("Fencer successfully connected"); } return TRUE; } /*! \internal \brief Schedule fencer connection attempt in main loop */ void controld_trigger_fencer_connect(void) { if (stonith_reconnect == NULL) { stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, GINT_TO_POINTER(TRUE)); } controld_set_fsa_input_flags(R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); } void controld_disconnect_fencer(bool destroy) { if (stonith_api) { // Prevent fencer connection from coming up again controld_clear_fsa_input_flags(R_ST_REQUIRED); if (stonith_api->state != stonith_disconnected) { stonith_api->cmds->disconnect(stonith_api); } stonith_api->cmds->remove_notification(stonith_api, NULL); } if (destroy) { if (stonith_api) { stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if (stonith_reconnect) { mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; } if (te_client_id) { free(te_client_id); te_client_id = NULL; } } } static gboolean do_stonith_history_sync(gpointer user_data) { if (stonith_api && (stonith_api->state != stonith_disconnected)) { stonith_history_t *history = NULL; te_cleanup_stonith_history_sync(stonith_api, FALSE); stonith_api->cmds->history(stonith_api, st_opt_sync_call | st_opt_broadcast, NULL, &history, 5); stonith_history_free(history); return TRUE; } else { crm_info("Skip triggering stonith history-sync as stonith is disconnected"); return FALSE; } } static void tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) { char *uuid = NULL; int stonith_id = -1; int transition_id = -1; pcmk__graph_action_t *action = NULL; const char *target = NULL; if ((data == NULL) || (data->userdata == NULL)) { crm_err("Ignoring fence operation %d result: " "No transition key given (bug?)", ((data == NULL)? -1 : data->call_id)); return; } if (!AM_I_DC) { const char *reason = stonith__exit_reason(data); if (reason == NULL) { reason = pcmk_exec_status_str(stonith__execution_status(data)); } crm_notice("Result of fence operation %d: %d (%s) " CRM_XS " key=%s", data->call_id, stonith__exit_status(data), reason, (const char *) data->userdata); return; } CRM_CHECK(decode_transition_key(data->userdata, &uuid, &transition_id, &stonith_id, NULL), goto bail); if (controld_globals.transition_graph->complete || (stonith_id < 0) || !pcmk__str_eq(uuid, controld_globals.te_uuid, pcmk__str_none) || (controld_globals.transition_graph->id != transition_id)) { crm_info("Ignoring fence operation %d result: " "Not from current transition " CRM_XS " complete=%s action=%d uuid=%s (vs %s) transition=%d (vs %d)", data->call_id, pcmk__btoa(controld_globals.transition_graph->complete), stonith_id, uuid, controld_globals.te_uuid, transition_id, controld_globals.transition_graph->id); goto bail; } action = controld_get_action(stonith_id); if (action == NULL) { crm_err("Ignoring fence operation %d result: " "Action %d not found in transition graph (bug?) " CRM_XS " uuid=%s transition=%d", data->call_id, stonith_id, uuid, transition_id); goto bail; } target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); if (target == NULL) { crm_err("Ignoring fence operation %d result: No target given (bug?)", data->call_id); goto bail; } stop_te_timer(action); if (stonith__exit_status(data) == CRM_EX_OK) { const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *op = crm_meta_value(action->params, "stonith_action"); crm_info("Fence operation %d for %s succeeded", data->call_id, target); if (!(pcmk_is_set(action->flags, pcmk__graph_action_confirmed))) { te_action_confirmed(action, NULL); if (pcmk__str_eq(PCMK_ACTION_ON, op, pcmk__str_casei)) { const char *value = NULL; char *now = pcmk__ttoa(time(NULL)); gboolean is_remote_node = FALSE; /* This check is not 100% reliable, since this node is not * guaranteed to have the remote node cached. However, it * doesn't have to be reliable, since the attribute manager can * learn a node's "remoteness" by other means sooner or later. * This allows it to learn more quickly if this node does have * the information. */ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { is_remote_node = TRUE; } update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, is_remote_node); free(now); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, is_remote_node); value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, is_remote_node); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { send_stonith_update(action, target, uuid); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } } st_fail_count_reset(target); } else { enum pcmk__graph_next abort_action = pcmk__graph_restart; int status = stonith__execution_status(data); const char *reason = stonith__exit_reason(data); if (reason == NULL) { if (status == PCMK_EXEC_DONE) { reason = "Agent returned error"; } else { reason = pcmk_exec_status_str(status); } } pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); /* If no fence devices were available, there's no use in immediately * checking again, so don't start a new transition in that case. */ if (status == PCMK_EXEC_NO_FENCE_DEVICE) { crm_warn("Fence operation %d for %s failed: %s " "(aborting transition and giving up for now)", data->call_id, target, reason); abort_action = pcmk__graph_wait; } else { crm_notice("Fence operation %d for %s failed: %s " "(aborting transition)", data->call_id, target, reason); } /* Increment the fail count now, so abort_for_stonith_failure() can * check it. Non-DC nodes will increment it in * handle_fence_notification(). */ st_fail_count_increment(target); abort_for_stonith_failure(abort_action, target, NULL); } pcmk__update_graph(controld_globals.transition_graph, action); trigger_graph(); bail: free(data->userdata); free(uuid); return; } static int fence_with_delay(const char *target, const char *type, int delay) { uint32_t options = st_opt_none; // Group of enum stonith_call_options int timeout_sec = (int) (controld_globals.transition_graph->stonith_timeout / 1000); if (crmd_join_phase_count(crm_join_confirmed) == 1) { stonith__set_call_options(options, target, st_opt_allow_suicide); } return stonith_api->cmds->fence_with_delay(stonith_api, options, target, type, timeout_sec, 0, delay); } /*! * \internal * \brief Execute a fencing action from a transition graph * * \param[in] graph Transition graph being executed (ignored) * \param[in] action Fencing action to execute * * \return Standard Pacemaker return code */ int controld_execute_fence_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc = 0; const char *id = ID(action->xml); const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET); const char *type = crm_meta_value(action->params, "stonith_action"); char *transition_key = NULL; const char *priority_delay = NULL; int delay_i = 0; gboolean invalid_action = FALSE; int stonith_timeout = (int) (controld_globals.transition_graph->stonith_timeout / 1000); CRM_CHECK(id != NULL, invalid_action = TRUE); CRM_CHECK(uuid != NULL, invalid_action = TRUE); CRM_CHECK(type != NULL, invalid_action = TRUE); CRM_CHECK(target != NULL, invalid_action = TRUE); if (invalid_action) { crm_log_xml_warn(action->xml, "BadAction"); return EPROTO; } priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); crm_notice("Requesting fencing (%s) targeting node %s " CRM_XS " action=%s timeout=%i%s%s", type, target, id, stonith_timeout, priority_delay ? " priority_delay=" : "", priority_delay ? priority_delay : ""); /* Passing NULL means block until we can connect... */ te_connect_stonith(NULL); pcmk__scan_min_int(priority_delay, &delay_i, 0); rc = fence_with_delay(target, type, delay_i); transition_key = pcmk__transition_key(controld_globals.transition_graph->id, action->id, 0, controld_globals.te_uuid), stonith_api->cmds->register_callback(stonith_api, rc, (stonith_timeout + (delay_i > 0 ? delay_i : 0)), st_opt_timeout_updates, transition_key, "tengine_stonith_callback", tengine_stonith_callback); return pcmk_rc_ok; } bool controld_verify_stonith_watchdog_timeout(const char *value) { long st_timeout = value? crm_get_msec(value) : 0; const char *our_nodename = controld_globals.our_nodename; gboolean rv = TRUE; if (st_timeout == 0 || (stonith_api && (stonith_api->state != stonith_disconnected) && stonith__watchdog_fencing_enabled_for_node_api(stonith_api, our_nodename))) { rv = pcmk__valid_sbd_timeout(value); } return rv; } /* end stonith API client functions */ /* * stonith history synchronization * * Each node's fencer keeps track of a cluster-wide fencing history. When a node * joins or leaves, we need to synchronize the history across all nodes. */ static crm_trigger_t *stonith_history_sync_trigger = NULL; static mainloop_timer_t *stonith_history_sync_timer_short = NULL; static mainloop_timer_t *stonith_history_sync_timer_long = NULL; void te_cleanup_stonith_history_sync(stonith_t *st, bool free_timers) { if (free_timers) { mainloop_timer_del(stonith_history_sync_timer_short); stonith_history_sync_timer_short = NULL; mainloop_timer_del(stonith_history_sync_timer_long); stonith_history_sync_timer_long = NULL; } else { mainloop_timer_stop(stonith_history_sync_timer_short); mainloop_timer_stop(stonith_history_sync_timer_long); } if (st) { st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY_SYNCED); } } static void tengine_stonith_history_synced(stonith_t *st, stonith_event_t *st_event) { te_cleanup_stonith_history_sync(st, FALSE); crm_debug("Fence-history synced - cancel all timers"); } static gboolean stonith_history_sync_set_trigger(gpointer user_data) { mainloop_set_trigger(stonith_history_sync_trigger); return FALSE; } void te_trigger_stonith_history_sync(bool long_timeout) { /* trigger a sync in 5s to give more nodes the * chance to show up so that we don't create * unnecessary stonith-history-sync traffic * * the long timeout of 30s is there as a fallback * so that after a successful connection to fenced * we will wait for 30s for the DC to trigger a * history-sync * if this doesn't happen we trigger a sync locally * (e.g. fenced segfaults and is restarted by pacemakerd) */ /* as we are finally checking the stonith-connection * in do_stonith_history_sync we should be fine * leaving stonith_history_sync_time & stonith_history_sync_trigger * around */ if (stonith_history_sync_trigger == NULL) { stonith_history_sync_trigger = mainloop_add_trigger(G_PRIORITY_LOW, do_stonith_history_sync, NULL); } if (long_timeout) { if(stonith_history_sync_timer_long == NULL) { stonith_history_sync_timer_long = mainloop_timer_add("history_sync_long", 30000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 30 seconds"); mainloop_timer_start(stonith_history_sync_timer_long); } else { if(stonith_history_sync_timer_short == NULL) { stonith_history_sync_timer_short = mainloop_timer_add("history_sync_short", 5000, FALSE, stonith_history_sync_set_trigger, NULL); } crm_info("Fence history will be synchronized cluster-wide within 5 seconds"); mainloop_timer_start(stonith_history_sync_timer_short); } } /* end stonith history synchronization functions */ diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index d3a74891e5..9222ad0253 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,507 +1,505 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include #include #include #include static void handle_disconnect(void); static pcmk_ipc_api_t *schedulerd_api = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void controld_shutdown_schedulerd_ipc(void) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); pcmk_free_ipc_api(schedulerd_api); schedulerd_api = NULL; } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in,out] output Result of CIB query * \param[in] user_data Unique identifier for filename * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { const char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure */ static void handle_disconnect(void) { // If we aren't connected to the scheduler, we can't expect a reply controld_expect_sched_reply(NULL); if (pcmk_is_set(controld_globals.fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); - crm_crit("Connection to the scheduler failed " - CRM_XS " uuid=%s", uuid_str); + crm_crit("Lost connection to the scheduler " + CRM_XS " CIB will be saved to " PE_STATE_DIR "/pe-core-%s.bz2", + uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = controld_globals.cib_conn->cmds->query(controld_globals.cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, uuid_str, save_cib_contents); - - } else { - crm_info("Connection to the scheduler released"); } controld_clear_fsa_input_flags(R_PE_CONNECTED); controld_trigger_fsa(); return; } static void handle_reply(pcmk_schedulerd_api_reply_t *reply) { const char *msg_ref = NULL; if (!AM_I_DC) { return; } msg_ref = reply->data.graph.reference; if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC); } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, pcmk__str_none)) { ha_msg_input_t fsa_input; xmlNode *crm_data_node; controld_stop_sched_timer(); /* do_te_invoke (which will eventually process the fsa_input we are constructing * here) requires that fsa_input.xml be non-NULL. That will only happen if * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function * gave us the values we need, we just need to put them into XML. * * The name of the top level element here is irrelevant. Nothing checks it. */ fsa_input.msg = create_xml_node(NULL, "dummy-reply"); crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref); crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input); crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA); add_node_copy(crm_data_node, reply->data.graph.tgraph); register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); free_xml(fsa_input.msg); } else { crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref); } } static void scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_schedulerd_api_reply_t *reply = event_data; switch (event_type) { case pcmk_ipc_event_disconnect: handle_disconnect(); break; case pcmk_ipc_event_reply: handle_reply(reply); break; default: break; } } static bool new_schedulerd_ipc_connection(void) { int rc; controld_set_fsa_input_flags(R_PE_REQUIRED); if (schedulerd_api == NULL) { rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd); if (rc != pcmk_rc_ok) { crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); return false; } } pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3); if (rc != pcmk_rc_ok) { crm_err("Error connecting to %s: %s", pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc)); return false; } controld_set_fsa_input_flags(R_PE_CONNECTED); return true; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (pcmk_is_set(action, A_PE_STOP)) { controld_clear_fsa_input_flags(R_PE_REQUIRED); pcmk_disconnect_ipc(schedulerd_api); handle_disconnect(); } if (pcmk_is_set(action, A_PE_START) && !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (cur_state == S_STOPPING) { crm_info("Ignoring request to connect to scheduler while shutting down"); } else if (!new_schedulerd_ipc_connection()) { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } static int fsa_pe_query = 0; static mainloop_timer_t *controld_sched_timer = NULL; // @TODO Make this a configurable cluster option if there's demand for it #define SCHED_TIMEOUT_MS (120000) /*! * \internal * \brief Handle a timeout waiting for scheduler reply * * \param[in] user_data Ignored * * \return FALSE (indicating that timer should not be restarted) */ static gboolean controld_sched_timeout(gpointer user_data) { if (AM_I_DC) { /* If this node is the DC but can't communicate with the scheduler, just * exit (and likely get fenced) so this node doesn't interfere with any * further DC elections. * * @TODO We could try something less drastic first, like disconnecting * and reconnecting to the scheduler, but something is likely going * seriously wrong, so perhaps it's better to just fail as quickly as * possible. */ crmd_exit(CRM_EX_FATAL); } return FALSE; } void controld_stop_sched_timer(void) { if ((controld_sched_timer != NULL) && (controld_globals.fsa_pe_ref != NULL)) { crm_trace("Stopping timer for scheduler reply %s", controld_globals.fsa_pe_ref); } mainloop_timer_stop(controld_sched_timer); } /*! * \internal * \brief Set the scheduler request currently being waited on * * \param[in] ref Request to expect reply to (or NULL for none) * * \note This function takes ownership of \p ref. */ void controld_expect_sched_reply(char *ref) { if (ref) { if (controld_sched_timer == NULL) { controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", SCHED_TIMEOUT_MS, FALSE, controld_sched_timeout, NULL); } mainloop_timer_start(controld_sched_timer); } else { controld_stop_sched_timer(); } free(controld_globals.fsa_pe_ref); controld_globals.fsa_pe_ref = ref; } /*! * \internal * \brief Free the scheduler reply timer */ void controld_free_sched_timer(void) { if (controld_sched_timer != NULL) { mainloop_timer_del(controld_sched_timer); controld_sched_timer = NULL; } } /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { cib_t *cib_conn = controld_globals.cib_conn; if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); controld_set_fsa_action_flags(A_PE_START); controld_trigger_fsa(); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = cib_conn->cmds->query(cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(controld_globals.fsa_state)); controld_expect_sched_reply(NULL); fsa_register_cib_callback(fsa_pe_query, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; const char *xpath_base = NULL; char *xpath_string = NULL; xmlXPathObjectPtr xpathObj = NULL; xpath_base = pcmk_cib_xpath_for(XML_CIB_TAG_CRMCONFIG); if (xpath_base == NULL) { crm_err(XML_CIB_TAG_CRMCONFIG " CIB element not known (bug?)"); return; } xpath_string = crm_strdup_printf("%s//%s//nvpair[@name='%s']", xpath_base, XML_CIB_TAG_PROPSET, attr_name); xpathObj = xpath_search(xml, xpath_string); max = numXpathResults(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value); configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL, NULL); if (configuration == NULL) { configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); } crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL, NULL); if (crm_config == NULL) { crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); } cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET, NULL, NULL); if (cluster_property_set == NULL) { cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); } xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); } freeXpathObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *ref = NULL; pid_t watchdog = pcmk__locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (!AM_I_DC || !pcmk_is_set(controld_globals.fsa_input_register, R_PE_CONNECTED)) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (controld_globals.fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(controld_globals.fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); controld_set_fsa_action_flags(A_PE_INVOKE); controld_trigger_fsa(); return; } CRM_LOG_ASSERT(output != NULL); /* Refresh the remote node cache and the known node cache when the * scheduler is invoked */ pcmk__refresh_node_caches_from_cib(output); crm_xml_add(output, XML_ATTR_DC_UUID, controld_globals.our_uuid); pcmk__xe_set_bool_attr(output, XML_ATTR_HAVE_QUORUM, pcmk_is_set(controld_globals.flags, controld_has_quorum)); force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog)); if (pcmk_is_set(controld_globals.flags, controld_ever_had_quorum) && !crm_have_quorum) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref)); if (rc < 0) { crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else { CRM_ASSERT(ref != NULL); controld_expect_sched_reply(ref); crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, " "quorate=%s", fsa_pe_query, controld_globals.fsa_pe_ref, crm_peer_seq, pcmk__btoa(pcmk_is_set(controld_globals.flags, controld_has_quorum))); } } diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index 5d3a46c358..843b3d4654 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -1,2509 +1,2512 @@ /* * Copyright 2009-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 /* When one fencer queries its peers for devices able to handle a fencing * request, each peer will reply with a list of such devices available to it. * Each reply will be parsed into a peer_device_info_t, with each device's * information kept in a device_properties_t. */ typedef struct device_properties_s { /* Whether access to this device has been verified */ gboolean verified; /* The remaining members are indexed by the operation's "phase" */ /* Whether this device has been executed in each phase */ gboolean executed[st_phase_max]; /* Whether this device is disallowed from executing in each phase */ gboolean disallowed[st_phase_max]; /* Action-specific timeout for each phase */ int custom_action_timeout[st_phase_max]; /* Action-specific maximum random delay for each phase */ int delay_max[st_phase_max]; /* Action-specific base delay for each phase */ int delay_base[st_phase_max]; /* Group of enum st_device_flags */ uint32_t device_support_flags; } device_properties_t; typedef struct { /* Name of peer that sent this result */ char *host; /* Only try peers for non-topology based operations once */ gboolean tried; /* Number of entries in the devices table */ int ndevices; /* Devices available to this host that are capable of fencing the target */ GHashTable *devices; } peer_device_info_t; GHashTable *stonith_remote_op_list = NULL; extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer); static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data != NULL) { peer_device_info_t *peer = data; g_hash_table_destroy(peer->devices); free(peer->host); free(peer); } } void free_stonith_remote_op_list(void) { if (stonith_remote_op_list != NULL) { g_hash_table_destroy(stonith_remote_op_list); stonith_remote_op_list = NULL; } } struct peer_count_data { const remote_fencing_op_t *op; gboolean verified_only; uint32_t support_action_only; int count; }; /*! * \internal * \brief Increment a counter if a device has not been executed yet * * \param[in] key Device ID (ignored) * \param[in] value Device properties * \param[in,out] user_data Peer count data */ static void count_peer_device(gpointer key, gpointer value, gpointer user_data) { device_properties_t *props = (device_properties_t*)value; struct peer_count_data *data = user_data; if (!props->executed[data->op->phase] && (!data->verified_only || props->verified) && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) { ++(data->count); } } /*! * \internal * \brief Check the number of available devices in a peer's query results * * \param[in] op Operation that results are for * \param[in] peer Peer to count * \param[in] verified_only Whether to count only verified devices * \param[in] support_action_only Whether to count only devices that support action * * \return Number of devices available to peer that were not already executed */ static int count_peer_devices(const remote_fencing_op_t *op, const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only) { struct peer_count_data data; data.op = op; data.verified_only = verified_only; data.support_action_only = support_on_action_only; data.count = 0; if (peer) { g_hash_table_foreach(peer->devices, count_peer_device, &data); } return data.count; } /*! * \internal * \brief Search for a device in a query result * * \param[in] op Operation that result is for * \param[in] peer Query result for a peer * \param[in] device Device ID to search for * * \return Device properties if found, NULL otherwise */ static device_properties_t * find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, uint32_t support_action_only) { device_properties_t *props = g_hash_table_lookup(peer->devices, device); if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) { return NULL; } return (props && !props->executed[op->phase] && !props->disallowed[op->phase])? props : NULL; } /*! * \internal * \brief Find a device in a peer's device list and mark it as executed * * \param[in] op Operation that peer result is for * \param[in,out] peer Peer with results to search * \param[in] device ID of device to mark as done * \param[in] verified_devices_only Only consider verified devices * * \return TRUE if device was found and marked, FALSE otherwise */ static gboolean grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, const char *device, gboolean verified_devices_only) { device_properties_t *props = find_peer_device(op, peer, device, fenced_support_flag(op->action)); if ((props == NULL) || (verified_devices_only && !props->verified)) { return FALSE; } crm_trace("Removing %s from %s (%d remaining)", device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none)); props->executed[op->phase] = TRUE; return TRUE; } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->delegate); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { free_xml(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); g_list_free(op->duplicates); pcmk__reset_result(&op->result); free(op); } void init_stonith_remote_op_hash_table(GHashTable **table) { if (*table == NULL) { *table = pcmk__strkey_table(NULL, free_remote_op); } } /*! * \internal * \brief Return an operation's originally requested action (before any remap) * * \param[in] op Operation to check * * \return Operation's original action */ static const char * op_requested_action(const remote_fencing_op_t *op) { return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action); } /*! * \internal * \brief Remap a "reboot" operation to the "off" phase * * \param[in,out] op Operation to remap */ static void op_phase_off(remote_fencing_op_t *op) { crm_info("Remapping multiple-device reboot targeting %s to 'off' " CRM_XS " id=%.8s", op->target, op->id); op->phase = st_phase_off; /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ strcpy(op->action, PCMK_ACTION_OFF); } /*! * \internal * \brief Advance a remapped reboot operation to the "on" phase * * \param[in,out] op Operation to remap */ static void op_phase_on(remote_fencing_op_t *op) { GList *iter = NULL; crm_info("Remapped 'off' targeting %s complete, " "remapping to 'on' for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; strcpy(op->action, PCMK_ACTION_ON); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. */ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { GList *match = g_list_find_custom(op->devices_list, iter->data, sort_strings); if (match) { op->devices_list = g_list_remove(op->devices_list, match->data); } } g_list_free_full(op->automatic_list, free); op->automatic_list = NULL; /* Rewind device list pointer */ op->devices = op->devices_list; } /*! * \internal * \brief Reset a remapped reboot operation * * \param[in,out] op Operation to reset */ static void undo_op_remap(remote_fencing_op_t *op) { if (op->phase > 0) { crm_info("Undoing remap of reboot targeting %s for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; strcpy(op->action, PCMK_ACTION_REBOOT); } } /*! * \internal * \brief Create notification data XML for a fencing operation result * * \param[in] op Fencer operation that completed * * \return Newly created XML to add as notification data * \note The caller is responsible for freeing the result. */ static xmlNode * fencing_result2xml(const remote_fencing_op_t *op) { xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_ACTION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); return notify_data; } /*! * \internal * \brief Broadcast a fence result notification to all CPG peers * * \param[in] op Fencer operation that completed * \param[in] op_merged Whether this operation is a duplicate of another */ void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged) { static int count = 0; xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); xmlNode *notify_data = fencing_result2xml(op); count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_xml_add_int(bcast, "count", count); if (op_merged) { pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true); } stonith__xe_set_result(notify_data, &op->result); add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(notify_data); free_xml(bcast); return; } /*! * \internal * \brief Reply to a local request originator and notify all subscribed clients * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; pcmk__client_t *client = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); reply = fenced_construct_reply(op->request, data, &op->result); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ client = pcmk__find_client_by_id(op->client_id); if (client == NULL) { crm_trace("Skipping reply to %s: no longer a client", op->client_id); } else { do_local_reply(reply, client, op->call_options); } /* bcast to all local clients that the fencing operation happend */ notify_data = fencing_result2xml(op); fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data); free_xml(notify_data); fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; free_xml(reply); } /*! * \internal * \brief Finalize all duplicates of a given fencer operation * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data) { for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { other->state = op->state; crm_debug("Performing duplicate notification for %s@%s: %s " CRM_XS " id=%.8s", other->client_name, other->originator, pcmk_exec_status_str(op->result.execution_status), other->id); pcmk__copy_result(&op->result, &other->result); finalize_op(other, data, true); } else { // Possible if (for example) it timed out already crm_err("Skipping duplicate notification for %s@%s " CRM_XS " state=%s id=%.8s", other->client_name, other->originator, stonith_op_state_str(other->state), other->id); } } } static char * delegate_from_xml(xmlNode *xml) { xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER); if (match == NULL) { return crm_element_value_copy(xml, F_ORIG); } else { return crm_element_value_copy(match, F_STONITH_DELEGATE); } } /*! * \internal * \brief Finalize a peer fencing operation * * Clean up after a fencing operation completes. This function has two code * paths: the executioner uses it to broadcast the result to CPG peers, and then * each peer (including the executioner) uses it to process that broadcast and * notify its IPC clients of the result. * * \param[in,out] op Fencer operation that completed * \param[in,out] data If not NULL, XML reply of last delegated operation * \param[in] dup Whether this operation is a duplicate of another * (in which case, do not broadcast the result) * * \note The operation result should be set before calling this function. */ static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; gboolean op_merged = FALSE; CRM_CHECK((op != NULL), return); // This is a no-op if timers have already been cleared clear_remote_op_timers(op); if (op->notify_sent) { // Most likely, this is a timed-out action that eventually completed crm_notice("Operation '%s'%s%s by %s for %s@%s%s: " "Result arrived too late " CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), op->id); return; } set_fencing_completed(op); undo_op_remap(op); if (data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } else if (op->delegate == NULL) { switch (op->result.execution_status) { case PCMK_EXEC_NO_FENCE_DEVICE: break; case PCMK_EXEC_INVALID: if (op->result.exit_status != CRM_EX_EXPIRED) { op->delegate = delegate_from_xml(data); } break; default: op->delegate = delegate_from_xml(data); break; } } if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) { op_merged = true; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, F_SUBTYPE); if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) { /* Defer notification until the bcast message arrives */ fenced_broadcast_op_result(op, op_merged); free_xml(local_data); return; } if (pcmk__result_ok(&op->result) || dup || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { level = LOG_NOTICE; } do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) " CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), crm_exit_str(op->result.exit_status), pcmk_exec_status_str(op->result.execution_status), ((op->result.exit_reason == NULL)? "" : ": "), ((op->result.exit_reason == NULL)? "" : op->result.exit_reason), op->id); handle_local_reply_and_notify(op, data); if (!dup) { finalize_op_duplicates(op, data); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { free_xml(op->request); op->request = NULL; } free_xml(local_data); } /*! * \internal * \brief Finalize a watchdog fencer op after the waiting time expires * * \param[in,out] userdata Fencer operation that completed * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Self-fencing (%s) by %s for %s assumed complete " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, NULL, false); return G_SOURCE_REMOVE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Peer did not return fence result within timeout"); // The requested delay has been applied for the first device - if (op->delay > 0) { - op->delay = 0; + if (op->client_delay > 0) { + op->client_delay = 0; crm_trace("Try another device for '%s' action targeting %s " "for client %s without delay " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } // Try another device, if appropriate request_peer_fencing(op, NULL); return G_SOURCE_REMOVE; } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] op Fencer operation that timed out * \param[in] reason Readable description of what step timed out */ static void finalize_timed_out_op(remote_fencing_op_t *op, const char *reason) { crm_debug("Action '%s' targeting %s for client %s timed out " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); if (op->phase == st_phase_on) { /* A remapped reboot operation timed out in the "on" phase, but the * "off" phase completed successfully, so quit trying any further * devices, and return success. */ op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { op->state = st_failed; pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason); } finalize_op(op, NULL, false); } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] userdata Fencer operation that timed out * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action '%s' targeting %s for client %s already completed " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } else { finalize_timed_out_op(userdata, "Fencing did not complete within a " "total timeout based on the " "configured timeout and retries for " "any devices attempted"); } return G_SOURCE_REMOVE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %.8s targeting %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %.8s targeting %s already in progress", op->id, op->target); } else if (op->query_results) { // Query succeeded, so attempt the actual fencing crm_debug("Query %.8s targeting %s complete (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); request_peer_fencing(op, NULL); } else { crm_debug("Query %.8s targeting %s timed out (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); finalize_timed_out_op(op, "No capable peers replied to device query " "within timeout"); } return G_SOURCE_REMOVE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } /*! * \internal * \brief Add a device to an operation's automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to add */ static void add_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (!match) { op->automatic_list = g_list_prepend(op->automatic_list, strdup(device)); } } /*! * \internal * \brief Remove a device from the automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to remove */ static void remove_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (match) { op->automatic_list = g_list_remove(op->automatic_list, match->data); } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GList *devices) { GList *lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } op->devices = op->devices_list; } /*! * \internal * \brief Check whether a node matches a topology target * * \param[in] tp Topology table entry to check * \param[in] node Name of node to check * * \return TRUE if node matches topology target */ static gboolean topology_matches(const stonith_topology_t *tp, const char *node) { regex_t r_patt; CRM_CHECK(node && tp && tp->target, return FALSE); switch (tp->kind) { case fenced_target_by_attribute: /* This level targets by attribute, so tp->target is a NAME=VALUE pair * of a permanent attribute applied to targeted nodes. The test below * relies on the locally cached copy of the CIB, so if fencing needs to * be done before the initial CIB is received or after a malformed CIB * is received, then the topology will be unable to be used. */ if (node_has_attr(node, tp->target_attribute, tp->target_value)) { crm_notice("Matched %s with %s by attribute", node, tp->target); return TRUE; } break; case fenced_target_by_pattern: /* This level targets node names matching a pattern, so tp->target * (and tp->target_pattern) is a regular expression. */ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { crm_info("Bad regex '%s' for fencing level", tp->target); } else { int status = regexec(&r_patt, node, 0, NULL, 0); regfree(&r_patt); if (status == 0) { crm_notice("Matched %s with %s by name", node, tp->target); return TRUE; } } break; case fenced_target_by_name: crm_trace("Testing %s against %s", node, tp->target); return pcmk__str_eq(tp->target, node, pcmk__str_casei); default: break; } crm_trace("No match for %s with %s", node, tp->target); return FALSE; } stonith_topology_t * find_topology_for_host(const char *host) { GHashTableIter tIter; stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp != NULL) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (topology_matches(tp, host)) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } } crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); return NULL; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * \param[in] empty_ok If true, an operation without a target (i.e. * queries) or a target without a topology will get a * pcmk_rc_ok return value instead of ENODEV * * \return Standard Pacemaker return value */ static int advance_topology_level(remote_fencing_op_t *op, bool empty_ok) { stonith_topology_t *tp = NULL; if (op->target) { tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return empty_ok? pcmk_rc_ok : ENODEV; } CRM_ASSERT(tp->levels != NULL); stonith__set_call_options(op->call_options, op->id, st_opt_topology); /* This is a new level, so undo any remapping left over from previous */ undo_op_remap(op); do { op->level++; } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if (op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d targeting %s (%d devices) " "for client %s@%s (id=%.8s)", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level - if (op->level > 1 && op->delay > 0) { - op->delay = 0; + if ((op->level > 1) && (op->client_delay > 0)) { + op->client_delay = 0; } if ((g_list_next(op->devices_list) != NULL) && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about * switched power outlets for redundant power supplies.) */ op_phase_off(op); } return pcmk_rc_ok; } crm_info("All %sfencing options targeting %s for client %s@%s failed " CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", op->target, op->client_name, op->originator, op->id); return ENODEV; } /*! * \internal * \brief If fencing operation is a duplicate, merge it into the other one * * \param[in,out] op Fencing operation to check */ static void merge_duplicates(remote_fencing_op_t *op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { const char *other_action = op_requested_action(other); if (!strcmp(op->id, other->id)) { continue; // Don't compare against self } if (other->state > st_exec) { crm_trace("%.8s not duplicate of %.8s: not in progress", op->id, other->id); continue; } if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", op->id, other->id, op->target, other->target); continue; } if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) { crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", op->id, other->id, op->action, other_action); continue; } if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: same client %s", op->id, other->id, op->client_name); continue; } if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: suicide for %s", op->id, other->id, other->target); continue; } if (!fencing_peer_active(crm_get_peer(0, other->originator))) { crm_notice("Failing action '%s' targeting %s originating from " "client %s@%s: Originator is dead " CRM_XS " id=%.8s", other->action, other->target, other->client_name, other->originator, other->id); crm_trace("%.8s not duplicate of %.8s: originator dead", op->id, other->id); other->state = st_failed; continue; } if ((other->total_timeout > 0) && (now > (other->total_timeout + other->created))) { crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)", op->id, other->id, now, other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); crm_trace("Best guess as to timeout used for %.8s: %d", other->id, other->total_timeout); } crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical request from %s@%s " CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds", op->action, op->target, op->client_name, other->client_name, other->originator, op->id, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; crm_node_t *entry; GHashTableIter gIter; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } /*! * \internal * \brief Process a manual confirmation of a pending fence action * * \param[in] client IPC client that sent confirmation * \param[in,out] msg Request XML with manual confirmation * * \return Standard Pacemaker return code */ int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); CRM_CHECK(dev != NULL, return EPROTO); crm_notice("Received manual confirmation that %s has been fenced", pcmk__s(crm_element_value(dev, F_STONITH_TARGET), "unknown target")); op = initiate_remote_stonith_op(client, msg, TRUE); if (op == NULL) { return EPROTO; } op->state = st_done; set_fencing_completed(op); op->delegate = strdup("a human"); // For the fencer's purposes, the fencing operation is done pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, msg, false); /* For the requester's purposes, the operation is still pending. The * actual result will be sent asynchronously via the operation's done_cb(). */ return EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith operation * * \param[in] client ID of local stonith client that initiated the operation * \param[in] request The request from the client that started the operation * \param[in] peer TRUE if this operation is owned by another stonith peer * (an operation owned by one peer is stored on all peers, * but only the owner executes it; all nodes get the results * once the owner finishes execution) */ void * create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER); int call_options = 0; const char *operation = NULL; init_stonith_remote_op_hash_table(&stonith_remote_op_list); /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(stonith_remote_op_list, op_id); if (op) { crm_debug("Reusing existing remote fencing op %.8s for %s", op_id, ((client == NULL)? "unknown client" : client)); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); CRM_ASSERT(op != NULL); crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays - crm_element_value_int(request, F_STONITH_DELAY, &(op->delay)); + crm_element_value_int(request, F_STONITH_DELAY, &(op->client_delay)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(stonith_remote_op_list, op->id, op); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ op->created = time(NULL); if (op->originator == NULL) { /* Local or relayed request */ op->originator = strdup(stonith_our_uname); } CRM_LOG_ASSERT(client != NULL); if (client) { op->client_id = strdup(client); } /* For a RELAY operation, set fenced on the client. */ operation = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } else { op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); } op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); op->call_options = call_options; crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid)); crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " "base timeout %d, %u %s expected)", (peer && dev)? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name, op->base_timeout, op->replies_expected, pcmk__plural_alt(op->replies_expected, "reply", "replies")); if (op->call_options & st_opt_cs_nodeid) { int nodeid; crm_node_t *node; pcmk__scan_min_int(op->target, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); /* Ensure the conversion only happens once */ stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); if (node && node->uname) { free(op->target); op->target = strdup(node->uname); } else { crm_warn("Could not expand nodeid '%s' into a host name", op->target); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); if (op->state != st_duplicate) { /* kick history readers */ fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); } /* safe to trim as long as that doesn't touch pending ops */ stonith_fence_history_trim(); return op; } /*! * \internal * \brief Create a peer fencing operation from a request, and initiate it * * \param[in] client IPC client that made request (NULL to get from request) * \param[in] request Request XML * \param[in] manual_ack Whether this is a manual action confirmation * * \return Newly created operation on success, otherwise NULL */ remote_fencing_op_t * initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; const char *relay_op_id = NULL; const char *operation = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { return op; } CRM_CHECK(op->action, return NULL); if (advance_topology_level(op, true) != pcmk_rc_ok) { op->state = st_failed; } switch (op->state) { case st_failed: // advance_topology_level() exhausted levels pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "All topology levels failed"); crm_warn("Could not request peer fencing (%s) targeting %s " CRM_XS " id=%.8s", op->action, op->target, op->id); finalize_op(op, NULL, false); return op; case st_duplicate: crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " CRM_XS " id=%.8s", op->action, op->target, op->id); return op; default: crm_notice("Requesting peer fencing (%s) targeting %s " CRM_XS " id=%.8s state=%s base_timeout=%d", op->action, op->target, op->id, stonith_op_state_str(op->state), op->base_timeout); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, op->call_options); crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op)); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ operation = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); if (relay_op_id) { crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id); } } send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static peer_device_info_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GList *iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { peer_device_info_t *peer = iter->data; crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", peer->host, op->target, peer->ndevices, pcmk__plural_s(peer->ndevices), peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if (pcmk_is_set(op->call_options, st_opt_topology)) { if (grab_peer_device(op, peer, device, verified_devices_only)) { return peer; } } else if (!peer->tried && count_peer_devices(op, peer, verified_devices_only, fenced_support_flag(op->action))) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static peer_device_info_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; peer_device_info_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence (%s) %s using %s", op->action, op->target, device); } else { crm_trace("Checking for someone to fence (%s) %s", op->action, op->target); } /* Best choice is a peer other than the target with verified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } /* If no other peer has verified access, next best is unverified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } /* If no other peer can do it, last option is self-fencing * (which is never allowed for the "on" phase of a remapped reboot) */ if (op->phase != st_phase_on) { peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if (peer) { crm_trace("%s will fence itself", peer->host); return peer; } } /* Try the next fencing level if there is one (unless we're in the "on" * phase of a remapped "reboot", because we ignore errors in that case) */ } while ((op->phase != st_phase_on) && pcmk_is_set(op->call_options, st_opt_topology) && (advance_topology_level(op, false) == pcmk_rc_ok)); if ((stonith_watchdog_timeout_ms > 0) && pcmk__is_fencing_action(op->action) && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) && node_does_watchdog_fencing(op->target)) { crm_info("Couldn't contact watchdog-fencing target-node (%s)", op->target); /* check_watchdog_fencing_and_wait will log additional info */ } else { crm_notice("Couldn't find anyone to fence (%s) %s using %s", op->action, op->target, (device? device : "any device")); } return NULL; } static int get_device_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, bool with_delay) { device_properties_t *props; int delay = 0; if (!peer || !device) { return op->base_timeout; } props = g_hash_table_lookup(peer->devices, device); if (!props) { return op->base_timeout; } - // op->delay < 0 means disable any static/random fencing delays - if (with_delay && op->delay >= 0) { + // op->client_delay < 0 means disable any static/random fencing delays + if (with_delay && (op->client_delay >= 0)) { // delay_base is eventually limited by delay_max delay = (props->delay_max[op->phase] > 0 ? props->delay_max[op->phase] : props->delay_base[op->phase]); } return (props->custom_action_timeout[op->phase]? props->custom_action_timeout[op->phase] : op->base_timeout) + delay; } struct timeout_data { const remote_fencing_op_t *op; const peer_device_info_t *peer; int total_timeout; }; /*! * \internal * \brief Add timeout to a total if device has not been executed yet * * \param[in] key GHashTable key (device ID) * \param[in] value GHashTable value (device properties) * \param[in,out] user_data Timeout data */ static void add_device_timeout(gpointer key, gpointer value, gpointer user_data) { const char *device_id = key; device_properties_t *props = value; struct timeout_data *timeout = user_data; if (!props->executed[timeout->op->phase] && !props->disallowed[timeout->op->phase]) { timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, device_id, true); } } static int get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) { struct timeout_data timeout; timeout.op = op; timeout.peer = peer; timeout.total_timeout = 0; g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); } static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer) { int total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { int i; GList *device_list = NULL; GList *iter = NULL; GList *auto_list = NULL; if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none) && (op->automatic_list != NULL)) { auto_list = g_list_copy(op->automatic_list); } /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST_LEVEL_MAX; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { /* in case of watchdog-device we add the timeout to the budget regardless of if we got a reply or not */ if ((stonith_watchdog_timeout_ms > 0) && pcmk__is_fencing_action(op->action) && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID, pcmk__str_none) && node_does_watchdog_fencing(op->target)) { total_timeout += stonith_watchdog_timeout_ms / 1000; continue; } for (iter = op->query_results; iter != NULL; iter = iter->next) { const peer_device_info_t *peer = iter->data; if (auto_list) { GList *match = g_list_find_custom(auto_list, device_list->data, sort_strings); if (match) { auto_list = g_list_remove(auto_list, match->data); } } if (find_peer_device(op, peer, device_list->data, fenced_support_flag(op->action))) { total_timeout += get_device_timeout(op, peer, device_list->data, true); break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ //Add only exists automatic_list device timeout if (auto_list) { for (iter = auto_list; iter != NULL; iter = iter->next) { GList *iter2 = NULL; for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) { peer_device_info_t *peer = iter2->data; if (find_peer_device(op, peer, iter->data, st_device_supports_on)) { total_timeout += get_device_timeout(op, peer, iter->data, true); break; } } } } g_list_free(auto_list); } else if (chosen_peer) { total_timeout = get_peer_timeout(op, chosen_peer); } else { total_timeout = op->base_timeout; } /* Take any requested fencing delay into account to prevent it from eating * up the total timeout. */ return ((total_timeout ? total_timeout : op->base_timeout) - + (op->delay > 0 ? op->delay : 0)); + + ((op->client_delay > 0)? op->client_delay : 0)); } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GList *iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a synchronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); call_id = crm_element_value(op->request, F_STONITH_CALLID); client_id = crm_element_value(op->request, F_STONITH_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) { // Client is connected to this node, so send update directly to them do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(update, F_STONITH_CLIENTID, client_id); crm_xml_add(update, F_STONITH_CALLID, call_id); crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); free_xml(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %.8s to client %s", dup->id, dup->client_name); report_timeout_period(iter->data, op_timeout); } } /*! * \internal * \brief Advance an operation to the next device in its topology * * \param[in,out] op Fencer operation to advance * \param[in] device ID of device that just completed * \param[in,out] msg If not NULL, XML reply of last delegated operation */ static void advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, xmlNode *msg) { /* Advance to the next device at this topology level, if any */ if (op->devices) { op->devices = op->devices->next; } /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); /* If there are no more devices at this topology level, run through any * remaining devices with automatic unfencing */ if (op->devices == NULL) { op->devices = op->automatic_list; } } if ((op->devices == NULL) && (op->phase == st_phase_off)) { /* We're done with this level and with required devices, but we had * remapped "reboot" to "off", so start over with "on". If any devices * need to be turned back on, op->devices will be non-NULL after this. */ op_phase_on(op); } // This function is only called if the previous device succeeded pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (op->devices) { /* Necessary devices remain, so execute the next one */ crm_trace("Next targeting %s on behalf of %s@%s", op->target, op->client_name, op->originator); // The requested delay has been applied for the first device - if (op->delay > 0) { - op->delay = 0; + if (op->client_delay > 0) { + op->client_delay = 0; } request_peer_fencing(op, NULL); } else { /* We're done with all devices and phases, so finalize operation */ crm_trace("Marking complex fencing op targeting %s as complete", op->target); op->state = st_done; finalize_op(op, msg, false); } } static gboolean check_watchdog_fencing_and_wait(remote_fencing_op_t * op) { if (node_does_watchdog_fencing(op->target)) { crm_notice("Waiting %lds for %s to self-fence (%s) for " "client %s " CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), op->target, op->action, op->client_name, op->id); if (op->op_timer_one) { g_source_remove(op->op_timer_one); } op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); return TRUE; } else { crm_debug("Skipping fallback to watchdog-fencing as %s is " "not in host-list", op->target); } return FALSE; } /*! * \internal * \brief Ask a peer to execute a fencing operation * * \param[in,out] op Fencing operation to be executed * \param[in,out] peer If NULL or topology is in use, choose best peer to * execute the fencing, otherwise use this peer */ static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) { const char *device = NULL; int timeout; CRM_CHECK(op != NULL, return); crm_trace("Action %.8s targeting %s for %s is %s", op->id, op->target, op->client_name, stonith_op_state_str(op->state)); if ((op->phase == st_phase_on) && (op->devices != NULL)) { /* We are in the "on" phase of a remapped topology reboot. If this * device has pcmk_reboot_action="off", or doesn't support the "on" * action, skip it. * * We can't check device properties at this point because we haven't * chosen a peer for this stage yet. Instead, we check the local node's * knowledge about the device. If different versions of the fence agent * are installed on different nodes, there's a chance this could be * mistaken, but the worst that could happen is we don't try turning the * node back on when we should. */ device = op->devices->data; if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF, pcmk__str_none)) { crm_info("Not turning %s back on using %s because the device is " "configured to stay off (pcmk_reboot_action='off')", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } if (!fenced_device_supports_on(device)) { crm_info("Not turning %s back on using %s because the agent " "doesn't support 'on'", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } } timeout = op->base_timeout; if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer); op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total timeout set to %d for peer's fencing targeting %s for %s" CRM_XS "id=%.8s", op->total_timeout, op->target, op->client_name, op->id); } if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore the caller's peer preference if topology is in use, because * that peer might not have access to the required device. With * topology, stonith_choose_peer() removes the device from further * consideration, so the timeout must be calculated beforehand. * * @TODO Basing the total timeout on the caller's preferred peer (above) * is less than ideal. */ peer = stonith_choose_peer(op); device = op->devices->data; /* Fencing timeout sent to peer takes no delay into account. * The peer will add a dedicated timer for any delay upon * schedule_stonith_command(). */ timeout = get_device_timeout(op, peer, device, false); } if (peer) { - /* Take any requested fencing delay into account to prevent it from eating - * up the timeout. - */ - int timeout_one = (op->delay > 0 ? - TIMEOUT_MULTIPLY_FACTOR * op->delay : 0); + int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); + if (op->client_delay > 0) { + /* Take requested fencing delay into account to prevent it from + * eating up the timeout. + */ + timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay; + } + crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); - crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay); + crm_xml_add_int(remote_op, F_STONITH_DELAY, op->client_delay); if (device) { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device, true); crm_notice("Requesting that %s perform '%s' action targeting %s " "using %s " CRM_XS " for client %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, F_STONITH_DEVICE, device); } else { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); crm_notice("Requesting that %s perform '%s' action targeting %s " CRM_XS " for client %s (%ds, %lds)", peer->host, op->action, op->target, op->client_name, timeout_one, stonith_watchdog_timeout_ms); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } if (!((stonith_watchdog_timeout_ms > 0) && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) && pcmk__is_fencing_action(op->action))) && check_watchdog_fencing_and_wait(op))) { /* Some thoughts about self-fencing cases reaching this point: - Actually check in check_watchdog_fencing_and_wait shouldn't fail if STONITH_WATCHDOG_ID is chosen as fencing-device and it being present implies watchdog-fencing is enabled anyway - If watchdog-fencing is disabled either in general or for a specific target - detected in check_watchdog_fencing_and_wait - for some other kind of self-fencing we can't expect a success answer but timeout is fine if the node doesn't come back in between - Delicate might be the case where we have watchdog-fencing enabled for a node but the watchdog-fencing-device isn't explicitly chosen for suicide. Local pe-execution in sbd may detect the node as unclean and lead to timely suicide. Otherwise the selection of stonith-watchdog-timeout at least is questionable. */ /* coming here we're not waiting for watchdog timeout - thus engage timer with timout evaluated before */ op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); peer->tried = TRUE; free_xml(remote_op); return; } else if (op->phase == st_phase_on) { /* A remapped "on" cannot be executed, but the node was already * turned off successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " "after successful 'off'", device, op->target); advance_topology_device_in_level(op, device, NULL); return; } else if (op->owner == FALSE) { crm_err("Fencing (%s) targeting %s for client %s is not ours to control", op->action, op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of fencing (%s) %s for client %s " CRM_XS " state=%s", op->action, op->target, op->client_name, stonith_op_state_str(op->state)); CRM_CHECK(op->state < st_done, return); finalize_timed_out_op(op, "All nodes failed, or are unable, to " "fence target"); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches)) { if (check_watchdog_fencing_and_wait(op)) { return; } } if (op->state == st_query) { crm_info("No peers (out of %d) have devices capable of fencing " "(%s) %s for client %s " CRM_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } else { if (pcmk_is_set(op->call_options, st_opt_topology)) { pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } /* ... else use existing result from previous failed attempt * (topology is not in use, and no devices remain to be attempted). * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would * prevent finalize_op() from setting the correct delegate if * needed. */ crm_info("No peers (out of %d) are capable of fencing (%s) %s " "for client %s " CRM_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); } op->state = st_failed; finalize_op(op, NULL, false); } else { crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " "for client %s " CRM_XS " id=%.8s", op->action, op->target, (device? " using " : ""), (device? device : ""), op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const peer_device_info_t *peer_a = a; const peer_device_info_t *peer_b = b; return (peer_b->ndevices - peer_a->ndevices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not * * \param[in] op Fencing operation with topology to check */ static gboolean all_topology_devices_found(const remote_fencing_op_t *op) { GList *device = NULL; GList *iter = NULL; device_properties_t *match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (pcmk__is_fencing_action(op->action)) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { peer_device_info_t *peer = iter->data; if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } match = find_peer_device(op, peer, device->data, st_device_supports_none); } if (!match) { return FALSE; } } } return TRUE; } /*! * \internal * \brief Parse action-specific device properties from XML * * \param[in] xml XML element containing the properties * \param[in] peer Name of peer that sent XML (for logs) * \param[in] device Device ID (for logs) * \param[in] action Action the properties relate to (for logs) * \param[in,out] op Fencing operation that properties are being parsed for * \param[in] phase Phase the properties relate to * \param[in,out] props Device properties to update */ static void parse_action_specific(const xmlNode *xml, const char *peer, const char *device, const char *action, remote_fencing_op_t *op, enum st_remap_phase phase, device_properties_t *props) { props->custom_action_timeout[phase] = 0; crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, &props->custom_action_timeout[phase]); if (props->custom_action_timeout[phase]) { crm_trace("Peer %s with device %s returned %s action timeout %d", peer, device, action, props->custom_action_timeout[phase]); } props->delay_max[phase] = 0; crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]); if (props->delay_max[phase]) { crm_trace("Peer %s with device %s returned maximum of random delay %d for %s", peer, device, props->delay_max[phase], action); } props->delay_base[phase] = 0; crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]); if (props->delay_base[phase]) { crm_trace("Peer %s with device %s returned base delay %d for %s", peer, device, props->delay_base[phase], action); } /* Handle devices with automatic unfencing */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { int required = 0; crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); if (required) { crm_trace("Peer %s requires device %s to execute for action %s", peer, device, action); add_required_device(op, device); } } /* If a reboot is remapped to off+on, it's possible that a node is allowed * to perform one action but not another. */ if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) { props->disallowed[phase] = TRUE; crm_trace("Peer %s is disallowed from executing %s for device %s", peer, action, device); } } /*! * \internal * \brief Parse one device's properties from peer's XML query reply * * \param[in] xml XML node containing device properties * \param[in,out] op Operation that query and reply relate to * \param[in,out] peer Peer's device information * \param[in] device ID of device being parsed */ static void add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, peer_device_info_t *peer, const char *device) { xmlNode *child; int verified = 0; device_properties_t *props = calloc(1, sizeof(device_properties_t)); int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */ /* Add a new entry to this peer's devices list */ CRM_ASSERT(props != NULL); g_hash_table_insert(peer->devices, strdup(device), props); /* Peers with verified (monitored) access will be preferred */ crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); if (verified) { crm_trace("Peer %s has confirmed a verified device %s", peer->host, device); props->verified = TRUE; } crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags); props->device_support_flags = flags; /* Parse action-specific device properties */ parse_action_specific(xml, peer->host, device, op_requested_action(op), op, st_phase_requested, props); for (child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { /* Replies for "reboot" operations will include the action-specific * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ if (pcmk__str_eq(ID(child), PCMK_ACTION_OFF, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF, op, st_phase_off, props); } else if (pcmk__str_eq(ID(child), PCMK_ACTION_ON, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_ON, op, st_phase_on, props); } } } /*! * \internal * \brief Parse a peer's XML query reply and add it to operation's results * * \param[in,out] op Operation that query and reply relate to * \param[in] host Name of peer that sent this reply * \param[in] ndevices Number of devices expected in reply * \param[in] xml XML node containing device list * * \return Newly allocated result structure with parsed reply */ static peer_device_info_t * add_result(remote_fencing_op_t *op, const char *host, int ndevices, const xmlNode *xml) { peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t)); xmlNode *child; // cppcheck seems not to understand the abort logic in CRM_CHECK // cppcheck-suppress memleak CRM_CHECK(peer != NULL, return NULL); peer->host = strdup(host); peer->devices = pcmk__strkey_table(free, free); /* Each child element describes one capable device available to the peer */ for (child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { const char *device = ID(child); if (device) { add_device_properties(child, op, peer, device); } } peer->ndevices = g_hash_table_size(peer->devices); CRM_CHECK(ndevices == peer->ndevices, crm_err("Query claimed to have %d device%s but %d found", ndevices, pcmk__plural_s(ndevices), peer->ndevices)); op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); return peer; } /*! * \internal * \brief Handle a peer's reply to our fencing query * * Parse a query result from XML and store it in the remote operation * table, and when enough replies have been received, issue a fencing request. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error * * \note See initiate_remote_stonith_op() for how the XML query was initially * formed, and stonith_query() for how the peer formed its XML reply. */ int process_remote_stonith_query(xmlNode *msg) { int ndevices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; peer_device_info_t *peer = NULL; uint32_t replies_expected; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices); op = g_hash_table_lookup(stonith_remote_op_list, id); if (op == NULL) { crm_debug("Received query reply for unknown or expired operation %s", id); return -EOPNOTSUPP; } replies_expected = fencing_active_peers(); if (op->replies_expected < replies_expected) { replies_expected = op->replies_expected; } if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = crm_element_value(msg, F_ORIG); host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", op->replies, replies_expected, host, op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); if (ndevices > 0) { peer = add_result(op, host, ndevices, dev); } pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (pcmk_is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } } else if (op->state == st_query) { int nverified = count_peer_devices(op, peer, TRUE, fenced_support_flag(op->action)); /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if ((peer != NULL) && !host_is_target && nverified) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified device%s", nverified, pcmk__plural_s(nverified)); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if ((peer != NULL) && (op->state == st_done)) { crm_info("Discarding query result from %s (%d device%s): " "Operation is %s", peer->host, peer->ndevices, pcmk__plural_s(peer->ndevices), stonith_op_state_str(op->state)); } return pcmk_ok; } /*! * \internal * \brief Handle a peer's reply to a fencing request * * Parse a fencing reply from XML, and either finalize the operation * or attempt another device as appropriate. * * \param[in] msg XML reply received */ void fenced_process_fencing_reply(xmlNode *msg) { const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(dev != NULL, return); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return); dev = stonith__find_xe_with_result(msg); CRM_CHECK(dev != NULL, return); stonith__xe_get_result(dev, &result); device = crm_element_value(dev, F_STONITH_DEVICE); if (stonith_remote_op_list) { op = g_hash_table_lookup(stonith_remote_op_list, id); } if ((op == NULL) && pcmk__result_ok(&result)) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Received peer result of unknown or expired operation %s", id); pcmk__reset_result(&result); return; } pcmk__reset_result(&op->result); op->result = result; // The operation takes ownership of the result if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { crm_err("Received outdated reply for device %s (instead of %s) to " "fence (%s) %s. Operation already timed out at peer level.", device, (const char *) op->devices->data, op->action, op->target); return; } if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) { if (pcmk__result_ok(&op->result)) { op->state = st_done; } else { op->state = st_failed; } finalize_op(op, msg, false); return; } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err("Received non-broadcast fencing result for operation %.8s " "we do not own (device %s targeting %s)", op->id, device, op->target); return; } if (pcmk_is_set(op->call_options, st_opt_topology)) { const char *device = NULL; const char *reason = op->result.exit_reason; /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { finalize_op(op, msg, false); return; } device = crm_element_value(msg, F_STONITH_DEVICE); if ((op->phase == 2) && !pcmk__result_ok(&op->result)) { /* A remapped "on" failed, but the node was already turned off * successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s " "after successful 'off'", device, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : ": ", (reason == NULL)? "" : reason, op->target); pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: " "%s%s%s%s", op->action, op->target, ((device == NULL)? "" : " using "), ((device == NULL)? "" : device), op->client_name, op->originator, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } if (pcmk__result_ok(&op->result)) { /* An operation completed successfully. Try another device if * necessary, otherwise mark the operation as done. */ advance_topology_device_in_level(op, device, msg); return; } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (advance_topology_level(op, false) != pcmk_rc_ok) { op->state = st_failed; finalize_op(op, msg, false); return; } } } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) { op->state = st_done; finalize_op(op, msg, false); return; } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT) && (op->devices == NULL)) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; finalize_op(op, msg, false); return; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure */ crm_trace("Next for %s on behalf of %s@%s (result was: %s)", op->target, op->originator, op->client_name, pcmk_exec_status_str(op->result.execution_status)); request_peer_fencing(op, NULL); } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; /* We don't have to worry about remapped reboots here * because if state is done, any remapping has been undone */ } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index 8d50af97a5..f3d9073390 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -1,315 +1,318 @@ /* * Copyright 2009-2023 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include // uint32_t, uint64_t #include /*! * \internal * \brief Check whether target has already been fenced recently * * \param[in] tolerance Number of seconds to look back in time * \param[in] target Name of node to search for * \param[in] action Action we want to match * * \return TRUE if an equivalent fencing operation took place in the last * \p tolerance seconds, FALSE otherwise */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; /*! list of actions that must execute on the target node. Used for unfencing */ GString *on_target_actions; GList *targets; time_t targets_age; gboolean has_attr_map; // Whether target's nodeid should be passed as a parameter to the agent gboolean include_nodeid; /* whether the cluster should automatically unfence nodes with the device */ gboolean automatic_unfencing; guint priority; uint32_t flags; // Group of enum st_device_flags GHashTable *params; GHashTable *aliases; GList *pending_ops; mainloop_timer_t *timer; crm_trigger_t *work; xmlNode *agent_metadata; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; gboolean cib_registered; gboolean api_registered; gboolean dirty; } stonith_device_t; /* These values are used to index certain arrays by "phase". Usually an * operation has only one "phase", so phase is always zero. However, some * reboots are remapped to "off" then "on", in which case "reboot" will be * phase 0, "off" will be phase 1 and "on" will be phase 2. */ enum st_remap_phase { st_phase_requested = 0, st_phase_off = 1, st_phase_on = 2, st_phase_max = 3 }; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! When was the fencing action recorded (seconds since epoch) */ time_t created; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! The number of query replies expected */ guint replies_expected; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; - /*! Requested fencing delay. - * Value -1 means disable any static/random fencing delays. */ - int delay; + /*! + * Fencing delay (in seconds) requested by API client (used by controller to + * implement priority-fencing-delay). A value of -1 means disable all + * configured delays. + */ + int client_delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; //! Group of enum stonith_call_options associated with this operation uint32_t call_options; /*! The current state of the remote operation. This indicates * what stage the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The client's call_id that initiated the fencing request */ int client_callid; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GList *query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The current operation phase being executed */ enum st_remap_phase phase; /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ GList *automatic_list; /*! List of all devices at the currently executing topology level */ GList *devices_list; /*! Current entry in the topology device list */ GList *devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GList *duplicates; /*! The point at which the remote operation completed(nsec) */ long long completed_nsec; /*! The (potentially intermediate) result of the operation */ pcmk__action_result_t result; } remote_fencing_op_t; void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged); // Fencer-specific client flags enum st_client_flags { st_callback_unknown = UINT64_C(0), st_callback_notify_fence = (UINT64_C(1) << 0), st_callback_device_add = (UINT64_C(1) << 2), st_callback_device_del = (UINT64_C(1) << 4), st_callback_notify_history = (UINT64_C(1) << 5), st_callback_notify_history_synced = (UINT64_C(1) << 6) }; // How the user specified the target of a topology level enum fenced_target_by { fenced_target_by_unknown = -1, // Invalid or not yet parsed fenced_target_by_name, // By target name fenced_target_by_pattern, // By a pattern matching target names fenced_target_by_attribute, // By a node attribute/value on target }; /* * Complex fencing requirements are specified via fencing topologies. * A topology consists of levels; each level is a list of fencing devices. * Topologies are stored in a hash table by node name. When a node needs to be * fenced, if it has an entry in the topology table, the levels are tried * sequentially, and the devices in each level are tried sequentially. * Fencing is considered successful as soon as any level succeeds; * a level is considered successful if all its devices succeed. * Essentially, all devices at a given level are "and-ed" and the * levels are "or-ed". * * This structure is used for the topology table entries. * Topology levels start from 1, so levels[0] is unused and always NULL. */ typedef struct stonith_topology_s { enum fenced_target_by kind; // How target was specified /*! Node name regex or attribute name=value for which topology applies */ char *target; char *target_value; char *target_pattern; char *target_attribute; /*! Names of fencing devices at each topology level */ GList *levels[ST_LEVEL_MAX]; } stonith_topology_t; void init_device_list(void); void free_device_list(void); void init_topology_list(void); void free_topology_list(void); void free_stonith_remote_op_list(void); void init_stonith_remote_op_hash_table(GHashTable **table); void free_metadata_cache(void); void fenced_unregister_handlers(void); uint64_t get_stonith_flag(const char *name); void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *op_request, const char *remote_peer); int stonith_device_register(xmlNode *msg, gboolean from_cib); void stonith_device_remove(const char *id, bool from_cib); char *stonith_level_key(const xmlNode *msg, enum fenced_target_by); void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); stonith_topology_t *find_topology_for_host(const char *host); void do_local_reply(xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data); void fenced_send_device_notification(const char *op, const pcmk__action_result_t *result, const char *desc); void fenced_send_level_notification(const char *op, const pcmk__action_result_t *result, const char *desc); remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack); void fenced_process_fencing_reply(xmlNode *msg); int process_remote_stonith_query(xmlNode * msg); void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options); void stonith_fence_history_trim(void); bool fencing_peer_active(crm_node_t *peer); void set_fencing_completed(remote_fencing_op_t * op); int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg); void fencer_metadata(void); const char *fenced_device_reboot_action(const char *device_id); bool fenced_device_supports_on(const char *device_id); gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } /*! * \internal * \brief Get the device flag to use with a given action when searching devices * * \param[in] action Action to check * * \return st_device_supports_on if \p action is "on", otherwise * st_device_supports_none */ static inline uint32_t fenced_support_flag(const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { return st_device_supports_on; } return st_device_supports_none; } extern char *stonith_our_uname; extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; extern GHashTable *stonith_remote_op_list; diff --git a/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst b/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst index 0f34f84ffc..06c00f0a99 100644 --- a/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst +++ b/doc/sphinx/Pacemaker_Explained/reusing-configuration.rst @@ -1,415 +1,420 @@ Reusing Parts of the Configuration ---------------------------------- Pacemaker provides multiple ways to simplify the configuration XML by reusing parts of it in multiple places. Besides simplifying the XML, this also allows you to manipulate multiple configuration elements with a single reference. Reusing Resource Definitions ############################ If you want to create lots of resources with similar configurations, defining a *resource template* simplifies the task. Once defined, it can be referenced in primitives or in certain types of constraints. Configuring Resources with Templates ____________________________________ The primitives referencing the template will inherit all meta-attributes, instance attributes, utilization attributes and operations defined in the template. And you can define specific attributes and operations for any of the primitives. If any of these are defined in both the template and the primitive, the values defined in the primitive will take precedence over the ones defined in the template. Hence, resource templates help to reduce the amount of configuration work. If any changes are needed, they can be done to the template definition and will take effect globally in all resource definitions referencing that template. Resource templates have a syntax similar to that of primitives. .. topic:: Resource template for a migratable Xen virtual machine .. code-block:: xml Once you define a resource template, you can use it in primitives by specifying the ``template`` property. .. topic:: Xen primitive resource using a resource template .. code-block:: xml In the example above, the new primitive ``vm1`` will inherit everything from ``vm-template``. For example, the equivalent of the above two examples would be: .. topic:: Equivalent Xen primitive resource not using a resource template .. code-block:: xml If you want to overwrite some attributes or operations, add them to the particular primitive's definition. .. topic:: Xen resource overriding template values .. code-block:: xml In the example above, the new primitive ``vm2`` has special attribute values. Its ``monitor`` operation has a longer ``timeout`` and ``interval``, and the primitive has an additional ``stop`` operation. To see the resulting definition of a resource, run: .. code-block:: none # crm_resource --query-xml --resource vm2 To see the raw definition of a resource in the CIB, run: .. code-block:: none # crm_resource --query-xml-raw --resource vm2 Using Templates in Constraints ______________________________ A resource template can be referenced in the following types of constraints: - ``order`` constraints (see :ref:`s-resource-ordering`) - ``colocation`` constraints (see :ref:`s-resource-colocation`) - ``rsc_ticket`` constraints (for multi-site clusters as described in :ref:`ticket-constraints`) Resource templates referenced in constraints stand for all primitives which are derived from that template. This means, the constraint applies to all primitive resources referencing the resource template. Referencing resource templates in constraints is an alternative to resource sets and can simplify the cluster configuration considerably. For example, given the example templates earlier in this chapter: .. code-block:: xml would colocate all VMs with ``base-rsc`` and is the equivalent of the following constraint configuration: .. code-block:: xml .. note:: In a colocation constraint, only one template may be referenced from either ``rsc`` or ``with-rsc``; the other reference must be a regular resource. Using Templates in Resource Sets ________________________________ Resource templates can also be referenced in resource sets. For example, given the example templates earlier in this section, then: .. code-block:: xml is the equivalent of the following constraint using a sequential resource set: .. code-block:: xml Or, if the resources referencing the template can run in parallel, then: .. code-block:: xml is the equivalent of the following constraint configuration: .. code-block:: xml .. _s-reusing-config-elements: Reusing Rules, Options and Sets of Operations ############################################# Sometimes a number of constraints need to use the same set of rules, and resources need to set the same options and parameters. To simplify this situation, you can refer to an existing object using an ``id-ref`` instead of an ``id``. So if for one resource you have .. code-block:: xml Then instead of duplicating the rule for all your other resources, you can instead specify: .. topic:: **Referencing rules from other constraints** .. code-block:: xml .. important:: The cluster will insist that the ``rule`` exists somewhere. Attempting to add a reference to a non-existing rule will cause a validation failure, as will attempting to remove a ``rule`` that is referenced elsewhere. The same principle applies for ``meta_attributes`` and ``instance_attributes`` as illustrated in the example below: .. topic:: Referencing attributes, options, and operations from other resources .. code-block:: xml ``id-ref`` can similarly be used with ``resource_set`` (in any constraint type), ``nvpair``, and ``operations``. Tagging Configuration Elements ############################## Pacemaker allows you to *tag* any configuration element that has an XML ID. The main purpose of tagging is to support higher-level user interface tools; Pacemaker itself only uses tags within constraints. Therefore, what you can do with tags mostly depends on the tools you use. Configuring Tags ________________ A tag is simply a named list of XML IDs. .. topic:: Tag referencing three resources .. code-block:: xml What you can do with this new tag depends on what your higher-level tools support. For example, a tool might allow you to enable or disable all of the tagged resources at once, or show the status of just the tagged resources. A single configuration element can be listed in any number of tags. +.. important:: + + If listing nodes in a tag, you must list the node's ``id``, not name. + + Using Tags in Constraints and Resource Sets ___________________________________________ Pacemaker itself only uses tags in constraints. If you supply a tag name instead of a resource name in any constraint, the constraint will apply to all resources listed in that tag. .. topic:: Constraint using a tag .. code-block:: xml In the example above, assuming the ``all-vms`` tag is defined as in the previous example, the constraint will behave the same as: .. topic:: Equivalent constraints without tags .. code-block:: xml A tag may be used directly in the constraint, or indirectly by being listed in a :ref:`resource set ` used in the constraint. When used in a resource set, an expanded tag will honor the set's ``sequential`` property. Filtering With Tags ___________________ The ``crm_mon`` tool can be used to display lots of information about the state of the cluster. On large or complicated clusters, this can include a lot of information, which makes it difficult to find the one thing you are interested in. The ``--resource=`` and ``--node=`` command line options can be used to filter results. In their most basic usage, these options take a single resource or node name. However, they can also be supplied with a tag name to display several objects at once. For instance, given the following CIB section: .. code-block:: xml The following would be output for ``crm_mon --resource=inactive-rscs -r``: .. code-block:: none Cluster Summary: * Stack: corosync * Current DC: cluster02 (version 2.0.4-1.e97f9675f.git.el7-e97f9675f) - partition with quorum * Last updated: Tue Oct 20 16:09:01 2020 * Last change: Tue May 5 12:04:36 2020 by hacluster via crmd on cluster01 * 5 nodes configured * 27 resource instances configured (4 DISABLED) Node List: * Online: [ cluster01 cluster02 ] Full List of Resources: * Clone Set: inactive-clone [inactive-dhcpd] (disabled): * Stopped (disabled): [ cluster01 cluster02 ] * Resource Group: inactive-group (disabled): * inactive-dummy-1 (ocf::pacemaker:Dummy): Stopped (disabled) * inactive-dummy-2 (ocf::pacemaker:Dummy): Stopped (disabled) diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index 5de1706701..6af64be6cc 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -1,1689 +1,1689 @@ /* * Copyright 2004-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(HAVE_UCRED) || defined(HAVE_SOCKPEERCRED) # ifdef HAVE_UCRED # ifndef _GNU_SOURCE # define _GNU_SOURCE # endif # endif # include #elif defined(HAVE_GETPEERUCRED) # include #endif #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include "crmcommon_private.h" static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); /*! * \brief Create a new object for using Pacemaker daemon IPC * * \param[out] api Where to store new IPC object * \param[in] server Which Pacemaker daemon the object is for * * \return Standard Pacemaker result code * * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). * \note This is intended to supersede crm_ipc_new() but currently only * supports the controller, pacemakerd, and schedulerd IPC API. */ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) { if (api == NULL) { return EINVAL; } *api = calloc(1, sizeof(pcmk_ipc_api_t)); if (*api == NULL) { return errno; } (*api)->server = server; if (pcmk_ipc_name(*api, false) == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return EOPNOTSUPP; } (*api)->ipc_size_max = 0; // Set server methods and max_size (if not default) switch (server) { case pcmk_ipc_attrd: (*api)->cmds = pcmk__attrd_api_methods(); break; case pcmk_ipc_based: (*api)->ipc_size_max = 512 * 1024; // 512KB break; case pcmk_ipc_controld: (*api)->cmds = pcmk__controld_api_methods(); break; case pcmk_ipc_execd: break; case pcmk_ipc_fenced: break; case pcmk_ipc_pacemakerd: (*api)->cmds = pcmk__pacemakerd_api_methods(); break; case pcmk_ipc_schedulerd: (*api)->cmds = pcmk__schedulerd_api_methods(); // @TODO max_size could vary by client, maybe take as argument? (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB break; } if ((*api)->cmds == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), (*api)->ipc_size_max); if ((*api)->ipc == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } // If daemon API has its own data to track, allocate it if ((*api)->cmds->new_data != NULL) { if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } } crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); return pcmk_rc_ok; } static void free_daemon_specific_data(pcmk_ipc_api_t *api) { if ((api != NULL) && (api->cmds != NULL)) { if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { api->cmds->free_data(api->api_data); api->api_data = NULL; } free(api->cmds); api->cmds = NULL; } } /*! * \internal * \brief Call an IPC API event callback, if one is registed * * \param[in,out] api IPC API connection * \param[in] event_type The type of event that occurred * \param[in] status Event status * \param[in,out] event_data Event-specific data */ void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data) { if ((api != NULL) && (api->cb != NULL)) { api->cb(api, event_type, status, event_data, api->user_data); } } /*! * \internal * \brief Clean up after an IPC disconnect * * \param[in,out] user_data IPC API connection that disconnected * * \note This function can be used as a main loop IPC destroy callback. */ static void ipc_post_disconnect(gpointer user_data) { pcmk_ipc_api_t *api = user_data; - crm_info("Disconnected from %s IPC API", pcmk_ipc_name(api, true)); + crm_info("Disconnected from %s", pcmk_ipc_name(api, true)); // Perform any daemon-specific handling needed if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { api->cmds->post_disconnect(api); } // Call client's registered event callback pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, NULL); /* If this is being called from a running main loop, mainloop_gio_destroy() * will free ipc and mainloop_io immediately after calling this function. * If this is called from a stopped main loop, these will leak, so the best * practice is to close the connection before stopping the main loop. */ api->ipc = NULL; api->mainloop_io = NULL; if (api->free_on_disconnect) { /* pcmk_free_ipc_api() has already been called, but did not free api * or api->cmds because this function needed them. Do that now. */ free_daemon_specific_data(api); crm_trace("Freeing IPC API object after disconnect"); free(api); } } /*! * \brief Free the contents of an IPC API object * * \param[in,out] api IPC API object to free */ void pcmk_free_ipc_api(pcmk_ipc_api_t *api) { bool free_on_disconnect = false; if (api == NULL) { return; } crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); if (api->ipc != NULL) { if (api->mainloop_io != NULL) { /* We need to keep the api pointer itself around, because it is the * user data for the IPC client destroy callback. That will be * triggered by the pcmk_disconnect_ipc() call below, but it might * happen later in the main loop (if still running). * * This flag tells the destroy callback to free the object. It can't * do that unconditionally, because the application might call this * function after a disconnect that happened by other means. */ free_on_disconnect = api->free_on_disconnect = true; } pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true } if (!free_on_disconnect) { free_daemon_specific_data(api); crm_trace("Freeing IPC API object"); free(api); } } /*! * \brief Get the IPC name used with an IPC API connection * * \param[in] api IPC API connection * \param[in] for_log If true, return human-friendly name instead of IPC name * * \return IPC API's human-friendly or connection name, or if none is available, * "Pacemaker" if for_log is true and NULL if for_log is false */ const char * pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log) { if (api == NULL) { return for_log? "Pacemaker" : NULL; } switch (api->server) { case pcmk_ipc_attrd: return for_log? "attribute manager" : T_ATTRD; case pcmk_ipc_based: return for_log? "CIB manager" : NULL /* PCMK__SERVER_BASED_RW */; case pcmk_ipc_controld: return for_log? "controller" : CRM_SYSTEM_CRMD; case pcmk_ipc_execd: return for_log? "executor" : NULL /* CRM_SYSTEM_LRMD */; case pcmk_ipc_fenced: return for_log? "fencer" : NULL /* "stonith-ng" */; case pcmk_ipc_pacemakerd: return for_log? "launcher" : CRM_SYSTEM_MCP; case pcmk_ipc_schedulerd: return for_log? "scheduler" : CRM_SYSTEM_PENGINE; default: return for_log? "Pacemaker" : NULL; } } /*! * \brief Check whether an IPC API connection is active * * \param[in,out] api IPC API connection * * \return true if IPC is connected, false otherwise */ bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api) { return (api != NULL) && crm_ipc_connected(api->ipc); } /*! * \internal * \brief Call the daemon-specific API's dispatch function * * Perform daemon-specific handling of IPC reply dispatch. It is the daemon * method's responsibility to call the client's registered event callback, as * well as allocate and free any event data. * * \param[in,out] api IPC API connection * \param[in,out] message IPC reply XML to dispatch */ static bool call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) { crm_log_xml_trace(message, "ipc-received"); if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { return api->cmds->dispatch(api, message); } return false; } /*! * \internal * \brief Dispatch previously read IPC data * * \param[in] buffer Data read from IPC * \param[in,out] api IPC object * * \return Standard Pacemaker return code. In particular: * * pcmk_rc_ok: There are no more messages expected from the server. Quit * reading. * EINPROGRESS: There are more messages expected from the server. Keep reading. * * All other values indicate an error. */ static int dispatch_ipc_data(const char *buffer, pcmk_ipc_api_t *api) { bool more = false; xmlNode *msg; if (buffer == NULL) { crm_warn("Empty message received from %s IPC", pcmk_ipc_name(api, true)); return ENOMSG; } msg = string2xml(buffer); if (msg == NULL) { crm_warn("Malformed message received from %s IPC", pcmk_ipc_name(api, true)); return EPROTO; } more = call_api_dispatch(api, msg); free_xml(msg); if (more) { return EINPROGRESS; } else { return pcmk_rc_ok; } } /*! * \internal * \brief Dispatch data read from IPC source * * \param[in] buffer Data read from IPC * \param[in] length Number of bytes of data in buffer (ignored) * \param[in,out] user_data IPC object * * \return Always 0 (meaning connection is still required) * * \note This function can be used as a main loop IPC dispatch callback. */ static int dispatch_ipc_source_data(const char *buffer, ssize_t length, gpointer user_data) { pcmk_ipc_api_t *api = user_data; CRM_CHECK(api != NULL, return 0); dispatch_ipc_data(buffer, api); return 0; } /*! * \brief Check whether an IPC connection has data available (without main loop) * * \param[in] api IPC API connection * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once * and return immediately; otherwise, poll for up to * this many milliseconds * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function to check whether IPC data is available. Return values of * interest include pcmk_rc_ok meaning data is available, and EAGAIN * meaning no data is available; all other values indicate errors. * \todo This does not allow the caller to poll multiple file descriptors at * once. If there is demand for that, we could add a wrapper for * pcmk__ipc_fd(api->ipc), so the caller can call poll() themselves. */ int pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms) { int rc; struct pollfd pollfd = { 0, }; if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { return EINVAL; } rc = pcmk__ipc_fd(api->ipc, &(pollfd.fd)); if (rc != pcmk_rc_ok) { crm_debug("Could not obtain file descriptor for %s IPC: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); return rc; } pollfd.events = POLLIN; rc = poll(&pollfd, 1, timeout_ms); if (rc < 0) { /* Some UNIX systems return negative and set EAGAIN for failure to * allocate memory; standardize the return code in that case */ return (errno == EAGAIN)? ENOMEM : errno; } else if (rc == 0) { return EAGAIN; } return pcmk_rc_ok; } /*! * \brief Dispatch available messages on an IPC connection (without main loop) * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function when IPC data is available. */ void pcmk_dispatch_ipc(pcmk_ipc_api_t *api) { if (api == NULL) { return; } while (crm_ipc_ready(api->ipc) > 0) { if (crm_ipc_read(api->ipc) > 0) { dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); } } } // \return Standard Pacemaker return code static int connect_with_main_loop(pcmk_ipc_api_t *api) { int rc; struct ipc_client_callbacks callbacks = { .dispatch = dispatch_ipc_source_data, .destroy = ipc_post_disconnect, }; rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, &callbacks, &(api->mainloop_io)); if (rc != pcmk_rc_ok) { return rc; } crm_debug("Connected to %s IPC (attached to main loop)", pcmk_ipc_name(api, true)); /* After this point, api->mainloop_io owns api->ipc, so api->ipc * should not be explicitly freed. */ return pcmk_rc_ok; } // \return Standard Pacemaker return code static int connect_without_main_loop(pcmk_ipc_api_t *api) { int rc = pcmk__connect_generic_ipc(api->ipc); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } else { crm_debug("Connected to %s IPC (without main loop)", pcmk_ipc_name(api, true)); } return rc; } /*! * \internal * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors) * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * \param[in] attempts How many times to try (in case of soft error) * * \return Standard Pacemaker return code */ int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, int attempts) { int rc = pcmk_rc_ok; if ((api == NULL) || (attempts < 1)) { return EINVAL; } if (api->ipc == NULL) { api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), api->ipc_size_max); if (api->ipc == NULL) { return ENOMEM; } } if (crm_ipc_connected(api->ipc)) { crm_trace("Already connected to %s", pcmk_ipc_name(api, true)); return pcmk_rc_ok; } api->dispatch_type = dispatch_type; crm_debug("Attempting connection to %s (up to %d time%s)", pcmk_ipc_name(api, true), attempts, pcmk__plural_s(attempts)); for (int remaining = attempts - 1; remaining >= 0; --remaining) { switch (dispatch_type) { case pcmk_ipc_dispatch_main: rc = connect_with_main_loop(api); break; case pcmk_ipc_dispatch_sync: case pcmk_ipc_dispatch_poll: rc = connect_without_main_loop(api); break; } if ((remaining == 0) || ((rc != EAGAIN) && (rc != EALREADY))) { break; // Result is final } // Retry after soft error (interrupted by signal, etc.) pcmk__sleep_ms((attempts - remaining) * 500); crm_debug("Re-attempting connection to %s (%d attempt%s remaining)", pcmk_ipc_name(api, true), remaining, pcmk__plural_s(remaining)); } if (rc != pcmk_rc_ok) { return rc; } if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { rc = api->cmds->post_connect(api); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } } return rc; } /*! * \brief Connect to a Pacemaker daemon via IPC * * \param[in,out] api IPC API instance * \param[in] dispatch_type How IPC replies should be dispatched * * \return Standard Pacemaker return code */ int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) { int rc = pcmk__connect_ipc(api, dispatch_type, 2); if (rc != pcmk_rc_ok) { crm_err("Connection to %s failed: %s", pcmk_ipc_name(api, true), pcmk_rc_str(rc)); } return rc; } /*! * \brief Disconnect an IPC API instance * * \param[in,out] api IPC API connection * * \return Standard Pacemaker return code * * \note If the connection is attached to a main loop, this function should be * called before quitting the main loop, to ensure that all memory is * freed. */ void pcmk_disconnect_ipc(pcmk_ipc_api_t *api) { if ((api == NULL) || (api->ipc == NULL)) { return; } switch (api->dispatch_type) { case pcmk_ipc_dispatch_main: { mainloop_io_t *mainloop_io = api->mainloop_io; // Make sure no code with access to api can use these again api->mainloop_io = NULL; api->ipc = NULL; mainloop_del_ipc_client(mainloop_io); // After this point api might have already been freed } break; case pcmk_ipc_dispatch_poll: case pcmk_ipc_dispatch_sync: { crm_ipc_t *ipc = api->ipc; // Make sure no code with access to api can use ipc again api->ipc = NULL; // This should always be the case already, but to be safe api->free_on_disconnect = false; crm_ipc_close(ipc); crm_ipc_destroy(ipc); ipc_post_disconnect(api); } break; } } /*! * \brief Register a callback for IPC API events * * \param[in,out] api IPC API connection * \param[in] callback Callback to register * \param[in] userdata Caller data to pass to callback * * \note This function may be called multiple times to update the callback * and/or user data. The caller remains responsible for freeing * userdata in any case (after the IPC is disconnected, if the * user data is still registered with the IPC). */ void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, void *user_data) { if (api == NULL) { return; } api->cb = cb; api->user_data = user_data; } /*! * \internal * \brief Send an XML request across an IPC API connection * * \param[in,out] api IPC API connection * \param[in,out] request XML request to send * * \return Standard Pacemaker return code * * \note Daemon-specific IPC API functions should call this function to send * requests, because it handles different dispatch types appropriately. */ int pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request) { int rc; xmlNode *reply = NULL; enum crm_ipc_flags flags = crm_ipc_flags_none; if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { return EINVAL; } crm_log_xml_trace(request, "ipc-sent"); // Synchronous dispatch requires waiting for a reply if ((api->dispatch_type == pcmk_ipc_dispatch_sync) && (api->cmds != NULL) && (api->cmds->reply_expected != NULL) && (api->cmds->reply_expected(api, request))) { flags = crm_ipc_client_response; } // The 0 here means a default timeout of 5 seconds rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); if (rc < 0) { return pcmk_legacy2rc(rc); } else if (rc == 0) { return ENODATA; } // With synchronous dispatch, we dispatch any reply now if (reply != NULL) { bool more = call_api_dispatch(api, reply); free_xml(reply); while (more) { rc = crm_ipc_read(api->ipc); if (rc == -EAGAIN) { continue; } else if (rc == -ENOMSG || rc == pcmk_ok) { return pcmk_rc_ok; } else if (rc < 0) { return -rc; } rc = dispatch_ipc_data(crm_ipc_buffer(api->ipc), api); if (rc == pcmk_rc_ok) { more = false; } else if (rc == EINPROGRESS) { more = true; } else { continue; } } } return pcmk_rc_ok; } /*! * \internal * \brief Create the XML for an IPC request to purge a node from the peer cache * * \param[in] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Newly allocated IPC request XML * * \note The controller, fencer, and pacemakerd use the same request syntax, but * the attribute manager uses a different one. The CIB manager doesn't * have any syntax for it. The executor and scheduler don't connect to the * cluster layer and thus don't have or need any syntax for it. * * \todo Modify the attribute manager to accept the common syntax (as well * as its current one, for compatibility with older clients). Modify * the CIB manager to accept and honor the common syntax. Modify the * executor and scheduler to accept the syntax (immediately returning * success), just for consistency. Modify this function to use the * common syntax with all daemons if their version supports it. */ static xmlNode * create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { xmlNode *request = NULL; const char *client = crm_system_name? crm_system_name : "client"; switch (api->server) { case pcmk_ipc_attrd: request = create_xml_node(NULL, __func__); crm_xml_add(request, F_TYPE, T_ATTRD); crm_xml_add(request, F_ORIG, crm_system_name); crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); pcmk__xe_add_node(request, node_name, nodeid); break; case pcmk_ipc_controld: case pcmk_ipc_fenced: case pcmk_ipc_pacemakerd: request = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, pcmk_ipc_name(api, false), client, NULL); if (nodeid > 0) { crm_xml_set_id(request, "%lu", (unsigned long) nodeid); } crm_xml_add(request, XML_ATTR_UNAME, node_name); break; case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_schedulerd: break; } return request; } /*! * \brief Ask a Pacemaker daemon to purge a node from its peer cache * * \param[in,out] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Standard Pacemaker return code * * \note At least one of node_name or nodeid must be specified. */ int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { int rc = 0; xmlNode *request = NULL; if (api == NULL) { return EINVAL; } if ((node_name == NULL) && (nodeid == 0)) { return EINVAL; } request = create_purge_node_request(api, node_name, nodeid); if (request == NULL) { return EOPNOTSUPP; } rc = pcmk__send_ipc_request(api, request); free_xml(request); crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); return rc; } /* * Generic IPC API (to eventually be deprecated as public API and made internal) */ struct crm_ipc_s { struct pollfd pfd; unsigned int max_buf_size; // maximum bytes we can send or receive over IPC unsigned int buf_size; // size of allocated buffer int msg_size; int need_reply; char *buffer; char *server_name; // server IPC name being connected to qb_ipcc_connection_t *ipc; }; /*! * \brief Create a new (legacy) object for using Pacemaker daemon IPC * * \param[in] name IPC system name to connect to * \param[in] max_size Use a maximum IPC buffer size of at least this size * * \return Newly allocated IPC object on success, NULL otherwise * * \note The caller is responsible for freeing the result using * crm_ipc_destroy(). * \note This should be considered deprecated for use with daemons supported by * pcmk_new_ipc_api(). */ crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); if (client == NULL) { crm_err("Could not create IPC connection: %s", strerror(errno)); return NULL; } client->server_name = strdup(name); if (client->server_name == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client); return NULL; } client->buf_size = pcmk__ipc_buffer_size(max_size); client->buffer = malloc(client->buf_size); if (client->buffer == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client->server_name); free(client); return NULL; } /* Clients initiating connection pick the max buf size */ client->max_buf_size = client->buf_size; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \internal * \brief Connect a generic (not daemon-specific) IPC object * * \param[in,out] ipc Generic IPC object to connect * * \return Standard Pacemaker return code */ int pcmk__connect_generic_ipc(crm_ipc_t *ipc) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rc = pcmk_rc_ok; if (ipc == NULL) { return EINVAL; } ipc->need_reply = FALSE; ipc->ipc = qb_ipcc_connect(ipc->server_name, ipc->buf_size); if (ipc->ipc == NULL) { return errno; } rc = qb_ipcc_fd_get(ipc->ipc, &ipc->pfd.fd); if (rc < 0) { // -errno crm_ipc_close(ipc); return -rc; } rc = pcmk_daemon_user(&cl_uid, &cl_gid); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { crm_ipc_close(ipc); return rc; } rc = is_ipc_provider_expected(ipc->ipc, ipc->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid); if (rc != pcmk_rc_ok) { if (rc == pcmk_rc_ipc_unauthorized) { crm_info("%s IPC provider authentication failed: process %lld has " "uid %lld (expected %lld) and gid %lld (expected %lld)", ipc->server_name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) cl_uid, (long long) found_gid, (long long) cl_gid); } crm_ipc_close(ipc); return rc; } ipc->max_buf_size = qb_ipcc_get_buffer_size(ipc->ipc); if (ipc->max_buf_size > ipc->buf_size) { free(ipc->buffer); ipc->buffer = calloc(ipc->max_buf_size, sizeof(char)); if (ipc->buffer == NULL) { rc = errno; crm_ipc_close(ipc); return rc; } ipc->buf_size = ipc->max_buf_size; } return pcmk_rc_ok; } /*! * \brief Establish an IPC connection to a Pacemaker component * * \param[in,out] client Connection instance obtained from crm_ipc_new() * * \return true on success, false otherwise (in which case errno will be set; * specifically, in case of discovering the remote side is not * authentic, its value is set to ECONNABORTED). */ bool crm_ipc_connect(crm_ipc_t *client) { int rc = pcmk__connect_generic_ipc(client); if (rc == pcmk_rc_ok) { return true; } if ((client != NULL) && (client->ipc == NULL)) { errno = (rc > 0)? rc : ENOTCONN; crm_debug("Could not establish %s IPC connection: %s (%d)", client->server_name, pcmk_rc_str(errno), errno); } else if (rc == pcmk_rc_ipc_unauthorized) { crm_err("%s IPC provider authentication failed", (client == NULL)? "Pacemaker" : client->server_name); errno = ECONNABORTED; } else { crm_perror(LOG_ERR, "Could not verify authenticity of %s IPC provider", (client == NULL)? "Pacemaker" : client->server_name); errno = ENOTCONN; } return false; } void crm_ipc_close(crm_ipc_t * client) { if (client) { if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying active %s IPC connection", client->server_name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } else { crm_trace("Destroying inactive %s IPC connection", client->server_name); } free(client->buffer); free(client->server_name); free(client); } } /*! * \internal * \brief Get the file descriptor for a generic IPC object * * \param[in,out] ipc Generic IPC object to get file descriptor for * \param[out] fd Where to store file descriptor * * \return Standard Pacemaker return code */ int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd) { if ((ipc == NULL) || (fd == NULL)) { return EINVAL; } if ((ipc->ipc == NULL) || (ipc->pfd.fd < 0)) { return ENOTCONN; } *fd = ipc->pfd.fd; return pcmk_rc_ok; } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = -1; if (pcmk__ipc_fd(client, &fd) != pcmk_rc_ok) { crm_err("Could not obtain file descriptor for %s IPC", ((client == NULL)? "unspecified" : client->server_name)); errno = EINVAL; return -EINVAL; } return fd; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in,out] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; CRM_ASSERT(client != NULL); if (!crm_ipc_connected(client)) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } // \return Standard Pacemaker return code static int crm_ipc_decompress(crm_ipc_t * client) { pcmk__ipc_header_t *header = (pcmk__ipc_header_t *)(void*)client->buffer; if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; /* never let buf size fall below our max size required for ipc reads. */ unsigned int new_buf_size = QB_MAX((sizeof(pcmk__ipc_header_t) + size_u), client->max_buf_size); char *uncompressed = calloc(1, new_buf_size); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed + sizeof(pcmk__ipc_header_t), &size_u, client->buffer + sizeof(pcmk__ipc_header_t), header->size_compressed, 1, 0); rc = pcmk__bzlib2rc(rc); if (rc != pcmk_rc_ok) { crm_err("Decompression failed: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); free(uncompressed); return rc; } /* * This assert no longer holds true. For an identical msg, some clients may * require compression, and others may not. If that same msg (event) is sent * to multiple clients, it could result in some clients receiving a compressed * msg even though compression was not explicitly required for them. * * CRM_ASSERT((header->size_uncompressed + sizeof(pcmk__ipc_header_t)) >= ipc_buffer_max); */ CRM_ASSERT(size_u == header->size_uncompressed); memcpy(uncompressed, client->buffer, sizeof(pcmk__ipc_header_t)); /* Preserve the header */ header = (pcmk__ipc_header_t *)(void*)uncompressed; free(client->buffer); client->buf_size = new_buf_size; client->buffer = uncompressed; } CRM_ASSERT(client->buffer[sizeof(pcmk__ipc_header_t) + header->size_uncompressed - 1] == 0); return pcmk_rc_ok; } long crm_ipc_read(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; CRM_ASSERT(client != NULL); CRM_ASSERT(client->ipc != NULL); CRM_ASSERT(client->buffer != NULL); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size, 0); if (client->msg_size >= 0) { int rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } header = (pcmk__ipc_header_t *)(void*)client->buffer; if (!pcmk__valid_ipc_header(header)) { return -EBADMSG; } crm_trace("Received %s IPC event %d size=%u rc=%d text='%.100s'", client->server_name, header->qb.id, header->qb.size, client->msg_size, client->buffer + sizeof(pcmk__ipc_header_t)); } else { crm_trace("No message received from %s IPC: %s", client->server_name, pcmk_strerror(client->msg_size)); if (client->msg_size == -EAGAIN) { return -EAGAIN; } } if (!crm_ipc_connected(client) || client->msg_size == -ENOTCONN) { crm_err("Connection to %s IPC failed", client->server_name); } if (header) { /* Data excluding the header */ return header->size_uncompressed; } return -ENOMSG; } const char * crm_ipc_buffer(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->buffer + sizeof(pcmk__ipc_header_t); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; CRM_ASSERT(client != NULL); if (client->buffer == NULL) { return 0; } header = (pcmk__ipc_header_t *)(void*)client->buffer; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->server_name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); int rc = pcmk_rc_ok; /* get the reply */ crm_trace("Waiting on reply to %s IPC message %d", client->server_name, request_id); do { *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000); if (*bytes > 0) { pcmk__ipc_header_t *hdr = NULL; rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return rc; } hdr = (pcmk__ipc_header_t *)(void*)client->buffer; if (hdr->qb.id == request_id) { /* Got it */ break; } else if (hdr->qb.id < request_id) { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "OldIpcReply"); } else { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "ImpossibleReply"); CRM_ASSERT(hdr->qb.id <= request_id); } } else if (!crm_ipc_connected(client)) { crm_err("%s IPC provider disconnected while waiting for message %d", client->server_name, request_id); break; } } while (time(NULL) < timeout); if (*bytes < 0) { rc = (int) -*bytes; // System errno } return rc; } /*! * \brief Send an IPC XML message * * \param[in,out] client Connection to IPC server * \param[in,out] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode ** reply) { int rc = 0; ssize_t qb_rc = 0; ssize_t bytes = 0; struct iovec *iov; static uint32_t id = 0; static int factor = 8; pcmk__ipc_header_t *header; if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (!crm_ipc_connected(client)) { /* Don't even bother */ crm_notice("Can't send %s IPC requests: Connection closed", client->server_name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } if (client->need_reply) { qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (qb_rc < 0) { crm_warn("Sending %s IPC disabled until pending reply received", client->server_name); return -EALREADY; } else { crm_notice("Sending %s IPC re-enabled after pending reply received", client->server_name); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); if (rc != pcmk_rc_ok) { crm_warn("Couldn't prepare %s IPC request: %s " CRM_XS " rc=%d", client->server_name, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; pcmk__set_ipc_flags(header->flags, client->server_name, flags); if (pcmk_is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ pcmk__clear_ipc_flags(flags, "client", crm_ipc_client_response); } if(header->size_compressed) { if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { crm_notice("Compressed message exceeds %d0%% of configured IPC " "limit (%u bytes); consider setting PCMK_ipc_buffer to " "%u or higher", factor, client->max_buf_size, 2 * client->max_buf_size); factor++; } } crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->server_name, header->qb.id, header->qb.size, ms_timeout); if ((ms_timeout > 0) || !pcmk_is_set(flags, crm_ipc_client_response)) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); do { /* @TODO Is this check really needed? Won't qb_ipcc_sendv() return * an error if it's not connected? */ if (!crm_ipc_connected(client)) { goto send_cleanup; } qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout)); rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { goto send_cleanup; } else if (!pcmk_is_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->server_name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes); if (rc != pcmk_rc_ok) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } rc = (int) bytes; // Negative system errno, or size of reply received } else { // No timeout, and client response needed do { qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1); } while ((qb_rc == -EAGAIN) && crm_ipc_connected(client)); rc = (int) qb_rc; // Negative system errno, or size of reply received } if (rc > 0) { pcmk__ipc_header_t *hdr = (pcmk__ipc_header_t *)(void*)client->buffer; crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s", rc, hdr->qb.id, client->server_name, header->qb.id, crm_ipc_buffer(client)); if (reply) { *reply = string2xml(crm_ipc_buffer(client)); } } else { crm_trace("No reply to %s IPC %d: rc=%d", client->server_name, header->qb.id, rc); } send_cleanup: if (!crm_ipc_connected(client)) { crm_notice("Couldn't send %s IPC request %d: Connection closed " CRM_XS " rc=%d", client->server_name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " CRM_XS " rc=%d", client->server_name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " CRM_XS " rc=%d", client->server_name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } pcmk_free_ipc_event(iov); return rc; } /*! * \brief Ensure an IPC provider has expected user or group * * \param[in] qb_ipc libqb client connection if available * \param[in] sock Connected Unix socket for IPC * \param[in] refuid Expected user ID * \param[in] refgid Expected group ID * \param[out] gotpid If not NULL, where to store provider's actual process ID * (or 1 on platforms where ID is not available) * \param[out] gotuid If not NULL, where to store provider's actual user ID * \param[out] gotgid If not NULL, where to store provider's actual group ID * * \return Standard Pacemaker return code * \note An actual user ID of 0 (root) will always be considered authorized, * regardless of the expected values provided. The caller can use the * output arguments to be stricter than this function. */ static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int rc = EOPNOTSUPP; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #ifdef HAVE_QB_IPCC_AUTH_GET if (qb_ipc != NULL) { rc = qb_ipcc_auth_get(qb_ipc, &found_pid, &found_uid, &found_gid); rc = -rc; // libqb returns 0 or -errno if (rc == pcmk_rc_ok) { goto found; } } #endif #ifdef HAVE_UCRED { struct ucred ucred; socklen_t ucred_len = sizeof(ucred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) < 0) { rc = errno; } else if (ucred_len != sizeof(ucred)) { rc = EOPNOTSUPP; } else { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; goto found; } } #endif #ifdef HAVE_SOCKPEERCRED { struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) < 0) { rc = errno; } else if (sockpeercred_len != sizeof(sockpeercred)) { rc = EOPNOTSUPP; } else { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; goto found; } } #endif #ifdef HAVE_GETPEEREID // For example, FreeBSD if (getpeereid(sock, &found_uid, &found_gid) < 0) { rc = errno; } else { found_pid = PCMK__SPECIAL_PID; goto found; } #endif #ifdef HAVE_GETPEERUCRED { ucred_t *ucred = NULL; if (getpeerucred(sock, &ucred) < 0) { rc = errno; } else { found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ucred_free(ucred); goto found; } } #endif return rc; // If we get here, nothing succeeded found: if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } if ((found_uid != 0) && (found_uid != refuid) && (found_gid != refgid)) { return pcmk_rc_ipc_unauthorized; } return pcmk_rc_ok; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = is_ipc_provider_expected(NULL, sock, refuid, refgid, gotpid, gotuid, gotgid); /* The old function had some very odd return codes*/ if (ret == 0) { return 1; } else if (ret == pcmk_rc_ipc_unauthorized) { return 0; } else { return pcmk_rc2legacy(ret); } } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; #ifdef HAVE_QB_IPCC_CONNECT_ASYNC struct pollfd pollfd = { 0, }; int poll_rc; c = qb_ipcc_connect_async(name, 0, &(pollfd.fd)); #else c = qb_ipcc_connect(name, 0); #endif if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } #ifdef HAVE_QB_IPCC_CONNECT_ASYNC pollfd.events = POLLIN; do { poll_rc = poll(&pollfd, 1, 2000); } while ((poll_rc == -1) && (errno == EINTR)); if ((poll_rc <= 0) || (qb_ipcc_connect_continue(c) != 0)) { crm_info("Could not connect to %s IPC: %s", name, (poll_rc == 0)?"timeout":strerror(errno)); rc = pcmk_rc_ipc_unresponsive; if (poll_rc > 0) { c = NULL; // qb_ipcc_connect_continue cleaned up for us } goto bail; } #endif qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = is_ipc_provider_expected(c, fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc == pcmk_rc_ipc_unauthorized) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } if (auth_rc != pcmk_rc_ok) { rc = auth_rc; crm_err("Could not get peer credentials from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 5d9bbedf6c..44cc52bb92 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2557 +1,2570 @@ /* * Copyright 2012-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // uint32_t, uint64_t #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # include #endif #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { uint64_t type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; pcmk__remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; /* while the async connection is occurring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } /*! * \brief Create a new lrmd_event_data_t object * * \param[in] rsc_id ID of resource involved in event * \param[in] task Action name * \param[in] interval_ms Action interval * * \return Newly allocated and initialized lrmd_event_data_t * \note This functions asserts on memory errors, so the return value is * guaranteed to be non-NULL. The caller is responsible for freeing the * result with lrmd_free_event(). */ lrmd_event_data_t * lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) { lrmd_event_data_t *event = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(event != NULL); pcmk__str_update((char **) &event->rsc_id, rsc_id); pcmk__str_update((char **) &event->op_type, task); event->interval_ms = interval_ms; return event; } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); copy->type = event->type; pcmk__str_update((char **) ©->rsc_id, event->rsc_id); pcmk__str_update((char **) ©->op_type, event->op_type); pcmk__str_update((char **) ©->user_data, event->user_data); copy->call_id = event->call_id; copy->timeout = event->timeout; copy->interval_ms = event->interval_ms; copy->start_delay = event->start_delay; copy->rsc_deleted = event->rsc_deleted; copy->rc = event->rc; copy->op_status = event->op_status; pcmk__str_update((char **) ©->output, event->output); copy->t_run = event->t_run; copy->t_rcchange = event->t_rcchange; copy->exec_time = event->exec_time; copy->queue_time = event->queue_time; copy->connection_rc = event->connection_rc; copy->params = pcmk__str_table_dup(event->params); pcmk__str_update((char **) ©->remote_nodename, event->remote_nodename); pcmk__str_update((char **) ©->exit_reason, event->exit_reason); return copy; } /*! * \brief Free an executor event * * \param[in,out] Executor event object to free */ void lrmd_free_event(lrmd_event_data_t *event) { if (event == NULL) { return; } // @TODO Why are these const char *? free((void *) event->rsc_id); free((void *) event->op_type); free((void *) event->user_data); free((void *) event->remote_nodename); lrmd__reset_result(event); if (event->params != NULL) { g_hash_table_destroy(event->params); } free(event); } static void lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->lrmd_private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (pcmk__str_eq(type, LRMD_OP_RSC_REG, pcmk__str_none)) { event.type = lrmd_event_register; } else if (pcmk__str_eq(type, LRMD_OP_RSC_UNREG, pcmk__str_none)) { event.type = lrmd_event_unregister; } else if (pcmk__str_eq(type, LRMD_OP_RSC_EXEC, pcmk__str_none)) { time_t epoch = 0; crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_ms(msg, F_LRMD_RSC_INTERVAL, &event.interval_ms); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_epoch(msg, F_LRMD_RSC_RUN_TIME, &epoch); event.t_run = (unsigned int) epoch; crm_element_value_epoch(msg, F_LRMD_RSC_RCCHANGE_TIME, &epoch); event.t_rcchange = (unsigned int) epoch; crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.type = lrmd_event_exec_complete; /* output and exit_reason may be freed by a callback */ event.output = crm_element_value_copy(msg, F_LRMD_RSC_OUTPUT); lrmd__set_result(&event, event.rc, event.op_status, crm_element_value(msg, F_LRMD_RSC_EXIT_REASON)); event.params = xml2list(msg); } else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) { event.type = lrmd_event_new_client; } else if (pcmk__str_eq(type, LRMD_OP_POKE, pcmk__str_none)) { event.type = lrmd_event_poke; } else { return; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } lrmd__reset_result(&event); } // \return Always 0, to indicate that IPC mainloop source should be kept static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; if (native->callback != NULL) { xmlNode *msg = string2xml(buffer); lrmd_dispatch_internal(lrmd, msg); free_xml(msg); } return 0; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static bool remote_executor_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; return (native->remote->tls_session != NULL); } /*! * \internal * \brief TLS dispatch function (for both trigger and file descriptor sources) * * \param[in,out] userdata API connection * * \return Always return a nonnegative value, which as a file descriptor * dispatch function means keep the mainloop source, and as a * trigger dispatch function, 0 means remove the trigger from the * mainloop while 1 means keep it (and job completed) */ static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *xml = NULL; int rc = pcmk_rc_ok; if (!remote_executor_connected(lrmd)) { crm_trace("TLS dispatch triggered after disconnect"); return 0; } crm_trace("TLS dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ switch (pcmk__remote_ready(native->remote, 0)) { case pcmk_rc_ok: rc = pcmk__read_remote_message(native->remote, -1); xml = pcmk__remote_message_xml(native->remote); break; case ETIME: // Nothing to read, check if a full message is already in buffer xml = pcmk__remote_message_xml(native->remote); break; default: rc = ENOTCONN; break; } while (xml) { const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { lrmd_dispatch_internal(lrmd, xml); } else if (pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { int reply_id = 0; crm_element_value_int(xml, F_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated Pacemaker Remote reply %d", reply_id); } } free_xml(xml); xml = pcmk__remote_message_xml(native->remote); } if (rc == ENOTCONN) { crm_info("Lost %s executor connection while reading data", (native->remote_nodename? native->remote_nodename : "local")); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: if (native->pending_notify) { return 1; } else { int rc = pcmk__remote_ready(native->remote, 0); switch (rc) { case pcmk_rc_ok: return 1; case ETIME: return 0; default: return pcmk_rc2legacy(rc); } } #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return -EPROTONOSUPPORT; } } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->lrmd_private; switch (private->type) { case pcmk__client_ipc: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } crm_trace("Created executor %s command with call options %.8lx (%d)", op, (long)options, options); return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; - crm_info("IPC connection destroyed"); + switch (native->type) { + case pcmk__client_ipc: + crm_info("Disconnected from local executor"); + break; +#ifdef HAVE_GNUTLS_GNUTLS_H + case pcmk__client_tls: + crm_info("Disconnected from remote executor on %s", + native->remote_nodename); + break; +#endif + default: + crm_err("Unsupported executor connection type %d (bug?)", + native->type); + } /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); free(native->remote->start_state); native->remote->buffer = NULL; native->remote->start_state = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } // \return Standard Pacemaker return code int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type) { crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); return pcmk__remote_send_xml(session, msg); } // \return Standard Pacemaker return code static int read_remote_reply(lrmd_t *lrmd, int total_timeout, int expected_reply_id, xmlNode **reply) { lrmd_private_t *native = lrmd->lrmd_private; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; int rc = pcmk_rc_ok; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } for (*reply = NULL; *reply == NULL; ) { *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return ETIME; } rc = pcmk__read_remote_message(native->remote, remaining_timeout); if (rc != pcmk_rc_ok) { return rc; } *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { return ENOMSG; } } crm_element_value_int(*reply, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(*reply, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(*reply); *reply = NULL; } else if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, *reply); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } *reply = NULL; } else if (!pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(*reply); *reply = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } free_xml(*reply); *reply = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return rc; } // \return Standard Pacemaker return code static int send_remote_message(lrmd_t *lrmd, xmlNode *msg) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd__remote_send_xml(native->remote, msg, global_remote_msg_id, "request"); if (rc != pcmk_rc_ok) { crm_err("Disconnecting because TLS message could not be sent to " "Pacemaker Remote: %s", pcmk_rc_str(rc)); lrmd_tls_disconnect(lrmd); } return rc; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; xmlNode *xml = NULL; if (!remote_executor_connected(lrmd)) { return -ENOTCONN; } rc = send_remote_message(lrmd, msg); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } rc = read_remote_reply(lrmd, timeout, global_remote_msg_id, &xml); if (rc != pcmk_rc_ok) { crm_err("Disconnecting remote after request %d reply not received: %s " CRM_XS " rc=%d timeout=%dms", global_remote_msg_id, pcmk_rc_str(rc), rc, timeout); lrmd_tls_disconnect(lrmd); } if (reply) { *reply = xml; } else { free_xml(xml); } return pcmk_rc2legacy(rc); } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = send_remote_message(lrmd, msg); if (rc == pcmk_rc_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } rc = pcmk_rc2legacy(rc); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_connected(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: return remote_executor_connected(lrmd); #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return 0; } } /*! * \internal * \brief Send a prepared API command to the executor * * \param[in,out] lrmd Existing connection to the executor * \param[in] op Name of API command to send * \param[in] data Command data XML to add to the sent command * \param[out] output_data If expecting a reply, it will be stored here * \param[in] timeout Timeout in milliseconds (if 0, defaults to * a sensible value per the type of connection, * standard vs. pacemaker remote); * also propagated to the command XML * \param[in] call_options Call options to pass to server when sending * \param[in] expect_reply If TRUE, wait for a reply from the server; * must be TRUE for IPC (as opposed to TLS) clients * * \return pcmk_ok on success, -errno on error */ static int lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data, xmlNode **output_data, int timeout, enum lrmd_call_options options, gboolean expect_reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("Sending %s op to executor", op); op_msg = lrmd_create_op(native->token, op, data, timeout, options); if (op_msg == NULL) { return -EINVAL; } if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); goto done; } else if(op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Executor disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return rc < 0 ? rc : pcmk_ok; } // \return Standard Pacemaker return code int lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash) { int rc = pcmk_rc_ok; const char *value; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION); crm_xml_add(data, F_LRMD_ORIGIN, __func__); value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); if ((value) && (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) { crm_xml_add(data, F_LRMD_WATCHDOG, value); } rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return (rc < 0)? pcmk_legacy2rc(rc) : pcmk_rc_ok; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { pcmk__xe_set_bool_attr(hello, F_LRMD_IS_IPC_PROVIDER, true); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); const char *start_state = crm_element_value(reply, PCMK__XA_NODE_START_STATE); long long uptime = -1; crm_element_value_int(reply, F_LRMD_RC, &rc); /* The remote executor may add its uptime to the XML reply, which is * useful in handling transient attributes when the connection to the * remote node unexpectedly drops. If no parameter is given, just * default to -1. */ crm_element_value_ll(reply, PCMK__XA_UPTIME, &uptime); native->remote->uptime = uptime; if (start_state) { native->remote->start_state = strdup(start_state); } if (rc == -EPROTO) { crm_err("Executor protocol version mismatch between client (%s) and server (%s)", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to executor"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc != NULL) { rc = pcmk__connect_generic_ipc(native->ipc); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_fd(native->ipc, fd); } if (rc != pcmk_rc_ok) { crm_err("Connection to executor failed: %s", pcmk_rc_str(rc)); rc = -ENOTCONN; } } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the executor API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source) { CRM_ASSERT((dest != NULL) && (source != NULL) && (source->data != NULL)); dest->data = gnutls_malloc(source->size); CRM_ASSERT(dest->data); memcpy(dest->data, source->data, source->size); dest->size = source->size; } static void clear_gnutls_datum(gnutls_datum_t *datum) { gnutls_free(datum->data); datum->data = NULL; datum->size = 0; } #define KEY_READ_LEN 256 // Chunk size for reading key from file // \return Standard Pacemaker return code static int read_gnutls_key(const char *location, gnutls_datum_t *key) { FILE *stream = NULL; size_t buf_len = KEY_READ_LEN; if ((location == NULL) || (key == NULL)) { return EINVAL; } stream = fopen(location, "r"); if (stream == NULL) { return errno; } key->data = gnutls_malloc(buf_len); key->size = 0; while (!feof(stream)) { int next = fgetc(stream); if (next == EOF) { if (!feof(stream)) { crm_warn("Pacemaker Remote key read was partially successful " "(copy in memory may be corrupted)"); } break; } if (key->size == buf_len) { buf_len = key->size + KEY_READ_LEN; key->data = gnutls_realloc(key->data, buf_len); CRM_ASSERT(key->data); } key->data[key->size++] = (unsigned char) next; } fclose(stream); if (key->size == 0) { clear_gnutls_datum(key); return ENOKEY; } return pcmk_rc_ok; } // Cache the most recently used Pacemaker Remote authentication key struct key_cache_s { time_t updated; // When cached key was read (valid for 1 minute) const char *location; // Where cached key was read from gnutls_datum_t key; // Cached key }; static bool key_is_cached(struct key_cache_s *key_cache) { return key_cache->updated != 0; } static bool key_cache_expired(struct key_cache_s *key_cache) { return (time(NULL) - key_cache->updated) >= 60; } static void clear_key_cache(struct key_cache_s *key_cache) { clear_gnutls_datum(&(key_cache->key)); if ((key_cache->updated != 0) || (key_cache->location != NULL)) { key_cache->updated = 0; key_cache->location = NULL; crm_debug("Cleared Pacemaker Remote key cache"); } } static void get_cached_key(struct key_cache_s *key_cache, gnutls_datum_t *key) { copy_gnutls_datum(key, &(key_cache->key)); crm_debug("Using cached Pacemaker Remote key from %s", pcmk__s(key_cache->location, "unknown location")); } static void cache_key(struct key_cache_s *key_cache, gnutls_datum_t *key, const char *location) { key_cache->updated = time(NULL); key_cache->location = location; copy_gnutls_datum(&(key_cache->key), key); crm_debug("Using (and cacheing) Pacemaker Remote key from %s", pcmk__s(location, "unknown location")); } /*! * \internal * \brief Get Pacemaker Remote authentication key from file or cache * * \param[in] location Path to key file to try (this memory must * persist across all calls of this function) * \param[out] key Key from location or cache * * \return Standard Pacemaker return code */ static int get_remote_key(const char *location, gnutls_datum_t *key) { static struct key_cache_s key_cache = { 0, }; int rc = pcmk_rc_ok; if ((location == NULL) || (key == NULL)) { return EINVAL; } if (key_is_cached(&key_cache)) { if (key_cache_expired(&key_cache)) { clear_key_cache(&key_cache); } else { get_cached_key(&key_cache, key); return pcmk_rc_ok; } } rc = read_gnutls_key(location, key); if (rc != pcmk_rc_ok) { return rc; } cache_key(&key_cache, key, location); return pcmk_rc_ok; } /*! * \internal * \brief Initialize the Pacemaker Remote authentication key * * Try loading the Pacemaker Remote authentication key from cache if available, * otherwise from these locations, in order of preference: the value of the * PCMK_authkey_location environment variable, if set; the Pacemaker default key * file location; or (for historical reasons) /etc/corosync/authkey. * * \param[out] key Where to store key * * \return Standard Pacemaker return code */ int lrmd__init_remote_key(gnutls_datum_t *key) { static const char *env_location = NULL; static bool need_env = true; int env_rc = pcmk_rc_ok; int default_rc = pcmk_rc_ok; int alt_rc = pcmk_rc_ok; bool env_is_default = false; bool env_is_fallback = false; if (need_env) { env_location = getenv("PCMK_authkey_location"); need_env = false; } // Try location in environment variable, if set if (env_location != NULL) { env_rc = get_remote_key(env_location, key); if (env_rc == pcmk_rc_ok) { return pcmk_rc_ok; } env_is_default = !strcmp(env_location, DEFAULT_REMOTE_KEY_LOCATION); env_is_fallback = !strcmp(env_location, ALT_REMOTE_KEY_LOCATION); /* @TODO It would be more secure to fail, rather than fall back to the * default, if an explicitly set key location is not readable, and it * would be better to never use the Corosync location as a fallback. * However, that would break any deployments currently working with the * fallbacks. */ } // Try default location, if environment wasn't explicitly set to it if (env_is_default) { default_rc = env_rc; } else { default_rc = get_remote_key(DEFAULT_REMOTE_KEY_LOCATION, key); } // Try fallback location, if environment wasn't set to it and default failed if (env_is_fallback) { alt_rc = env_rc; } else if (default_rc != pcmk_rc_ok) { alt_rc = get_remote_key(ALT_REMOTE_KEY_LOCATION, key); } // We have all results, so log and return if ((env_rc != pcmk_rc_ok) && (default_rc != pcmk_rc_ok) && (alt_rc != pcmk_rc_ok)) { // Environment set, everything failed crm_warn("Could not read Pacemaker Remote key from %s (%s%s%s%s%s): %s", env_location, env_is_default? "" : "or default location ", env_is_default? "" : DEFAULT_REMOTE_KEY_LOCATION, !env_is_default && !env_is_fallback? " " : "", env_is_fallback? "" : "or fallback location ", env_is_fallback? "" : ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(env_rc)); return ENOKEY; } if (env_rc != pcmk_rc_ok) { // Environment set but failed, using a default crm_warn("Could not read Pacemaker Remote key from %s " "(using %s location %s instead): %s", env_location, (default_rc == pcmk_rc_ok)? "default" : "fallback", (default_rc == pcmk_rc_ok)? DEFAULT_REMOTE_KEY_LOCATION : ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(env_rc)); return pcmk_rc_ok; } if ((default_rc != pcmk_rc_ok) && (alt_rc != pcmk_rc_ok)) { // Environment unset, defaults failed crm_warn("Could not read Pacemaker Remote key from default location %s" " (or fallback location %s): %s", DEFAULT_REMOTE_KEY_LOCATION, ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(default_rc)); return ENOKEY; } return pcmk_rc_ok; // Environment variable unset, a default worked } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { crm_gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->lrmd_private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static inline int lrmd__tls_client_handshake(pcmk__remote_t *remote) { return pcmk__tls_client_handshake(remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT); } /*! * \internal * \brief Add trigger and file descriptor mainloop sources for TLS * * \param[in,out] lrmd API connection with established TLS session * \param[in] do_handshake Whether to perform executor handshake * * \return Standard Pacemaker return code */ static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_handshake) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_rc_ok; char *name = crm_strdup_printf("pacemaker-remote-%s:%d", native->server, native->port); struct mainloop_fd_callbacks tls_fd_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &tls_fd_callbacks); /* Async connections lose the client name provided by the API caller, so we * have to use our generated name here to perform the executor handshake. * * @TODO Keep track of the caller-provided name. Perhaps we should be using * that name in this function instead of generating one anyway. */ if (do_handshake) { rc = lrmd_handshake(lrmd, name); rc = pcmk_legacy2rc(rc); } free(name); return rc; } static void lrmd_tcp_connect_cb(void *userdata, int rc, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; gnutls_datum_t psk_key = { NULL, 0 }; native->async_timer = 0; if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } /* The TCP connection was successful, so establish the TLS connection. * @TODO make this async to avoid blocking code in client */ native->sock = sock; rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EPROTO); return; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_warn("Disconnecting after TLS handshake with Pacemaker Remote server %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EKEYREJECTED); return; } crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", native->server, native->port); rc = add_tls_to_mainloop(lrmd, true); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc; int timer_id = 0; lrmd_private_t *native = lrmd->lrmd_private; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, timeout, &timer_id, &(native->sock), lrmd, lrmd_tcp_connect_cb); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } native->async_timer = timer_id; return pcmk_ok; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, 0, NULL, &(native->sock), NULL, NULL); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); return pcmk_rc2legacy(rc); } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(native->sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); return -EPROTO; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -EKEYREJECTED; } crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server, native->port); if (fd) { *fd = native->sock; } else { add_tls_to_mainloop(lrmd, false); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; CRM_CHECK(native && native->callback, return -EINVAL); switch (native->type) { case pcmk__client_ipc: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); native->sock = 0; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_ok; - crm_info("Disconnecting %s %s executor connection", - pcmk__client_type_str(native->type), - (native->remote_nodename? native->remote_nodename : "local")); switch (native->type) { case pcmk__client_ipc: + crm_debug("Disconnecting from local executor"); lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: + crm_debug("Disconnecting from remote executor on %s", + native->remote_nodename); lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return rc; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && (provider == NULL)) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_new_rsc_info(const char *rsc_id, const char *standard, const char *provider, const char *type) { lrmd_rsc_info_t *rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); CRM_ASSERT(rsc_info); pcmk__str_update(&rsc_info->id, rsc_id); pcmk__str_update(&rsc_info->standard, standard); pcmk__str_update(&rsc_info->provider, provider); pcmk__str_update(&rsc_info->type, type); return rsc_info; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard, rsc_info->provider, rsc_info->type); } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->standard); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) { free_xml(output); return NULL; } rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type); free_xml(output); return rsc_info; } void lrmd_free_op_info(lrmd_op_info_t *op_info) { if (op_info) { free(op_info->rsc_id); free(op_info->action); free(op_info->interval_ms_s); free(op_info->timeout_ms_s); free(op_info); } } static int lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms, enum lrmd_call_options options, GList **output) { xmlNode *data = NULL; xmlNode *output_xml = NULL; int rc = pcmk_ok; if (output == NULL) { return -EINVAL; } *output = NULL; // Send request if (rsc_id) { data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); } rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml, timeout_ms, options, TRUE); if (data) { free_xml(data); } // Process reply if ((rc != pcmk_ok) || (output_xml == NULL)) { return rc; } for (xmlNode *rsc_xml = first_named_child(output_xml, F_LRMD_RSC); (rsc_xml != NULL) && (rc == pcmk_ok); rsc_xml = crm_next_same_xml(rsc_xml)) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (rsc_id == NULL) { crm_err("Could not parse recurring operation information from executor"); continue; } for (xmlNode *op_xml = first_named_child(rsc_xml, T_LRMD_RSC_OP); op_xml != NULL; op_xml = crm_next_same_xml(op_xml)) { lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t)); if (op_info == NULL) { rc = -ENOMEM; break; } op_info->rsc_id = strdup(rsc_id); op_info->action = crm_element_value_copy(op_xml, F_LRMD_RSC_ACTION); op_info->interval_ms_s = crm_element_value_copy(op_xml, F_LRMD_RSC_INTERVAL); op_info->timeout_ms_s = crm_element_value_copy(op_xml, F_LRMD_TIMEOUT); *output = g_list_prepend(*output, op_info); } } free_xml(output_xml); return rc; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->lrmd_private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->lrmd_private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->lrmd_private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not get fence agent meta-data: API memory allocation failed"); return -ENOMEM; } rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); if ((rc == pcmk_ok) && (*output == NULL)) { rc = -EIO; } stonith_api->cmds->free(stonith_api); return rc; } static int lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options) { return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type, output, options, NULL); } static int lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options, lrmd_key_value_t *params) { svc_action_t *action = NULL; GHashTable *params_table = NULL; if (!standard || !type) { lrmd_key_value_freeall(params); return -EINVAL; } if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_key_value_freeall(params); return stonith_get_metadata(provider, type, output); } params_table = pcmk__strkey_table(free, free); for (const lrmd_key_value_t *param = params; param; param = param->next) { g_hash_table_insert(params_table, strdup(param->key), strdup(param->value)); } action = services__create_resource_action(type, standard, provider, type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_METADATA_TIMEOUT_MS, params_table, 0); lrmd_key_value_freeall(params); if (action == NULL) { return -ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { services_action_free(action); return -EINVAL; } if (!services_action_sync(action)) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } /* timeout is in ms */ static int lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path, int timeout, lrmd_key_value_t *params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_ALERT); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_ALERT_ID, alert_id); crm_xml_add(data, F_LRMD_ALERT_PATH, alert_path); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout, lrmd_opt_notify_orig_only, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action, guint interval_ms) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if (stonith_api == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return -ENOMEM; } stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; int stonith_count = 0; // Initially, whether to include stonith devices if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stonith_count = 1; } else { GList *gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { stonith_count = 1; } } if (stonith_count) { // Now, if stonith devices are included, how many there are stonith_count = list_stonith_agents(resources); if (stonith_count > 0) { rc += stonith_count; } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static bool does_provider_have_agent(const char *agent, const char *provider, const char *class) { bool found = false; GList *agents = NULL; GList *gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (pcmk__str_eq(agent, gIter2->data, pcmk__str_casei)) { found = true; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GList *gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GList *gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } /*! * \internal * \brief Create an executor API object * * \param[out] api Will be set to newly created API object (it is the * caller's responsibility to free this value with * lrmd_api_delete() if this function succeeds) * \param[in] nodename If the object will be used for a remote connection, * the node name to use in cluster for remote executor * \param[in] server If the object will be used for a remote connection, * the resolvable host name to connect to * \param[in] port If the object will be used for a remote connection, * port number on \p server to connect to * * \return Standard Pacemaker return code * \note If the caller leaves one of \p nodename or \p server NULL, the other's * value will be used for both. If the caller leaves both NULL, an API * object will be created for a local executor connection. */ int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port) { lrmd_private_t *pvt = NULL; if (api == NULL) { return EINVAL; } *api = NULL; // Allocate all memory needed *api = calloc(1, sizeof(lrmd_t)); if (*api == NULL) { return ENOMEM; } pvt = calloc(1, sizeof(lrmd_private_t)); if (pvt == NULL) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } (*api)->lrmd_private = pvt; // @TODO Do we need to do this for local connections? pvt->remote = calloc(1, sizeof(pcmk__remote_t)); (*api)->cmds = calloc(1, sizeof(lrmd_api_operations_t)); if ((pvt->remote == NULL) || ((*api)->cmds == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } // Set methods (*api)->cmds->connect = lrmd_api_connect; (*api)->cmds->connect_async = lrmd_api_connect_async; (*api)->cmds->is_connected = lrmd_api_is_connected; (*api)->cmds->poke_connection = lrmd_api_poke_connection; (*api)->cmds->disconnect = lrmd_api_disconnect; (*api)->cmds->register_rsc = lrmd_api_register_rsc; (*api)->cmds->unregister_rsc = lrmd_api_unregister_rsc; (*api)->cmds->get_rsc_info = lrmd_api_get_rsc_info; (*api)->cmds->get_recurring_ops = lrmd_api_get_recurring_ops; (*api)->cmds->set_callback = lrmd_api_set_callback; (*api)->cmds->get_metadata = lrmd_api_get_metadata; (*api)->cmds->exec = lrmd_api_exec; (*api)->cmds->cancel = lrmd_api_cancel; (*api)->cmds->list_agents = lrmd_api_list_agents; (*api)->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; (*api)->cmds->list_standards = lrmd_api_list_standards; (*api)->cmds->exec_alert = lrmd_api_exec_alert; (*api)->cmds->get_metadata_params = lrmd_api_get_metadata_params; if ((nodename == NULL) && (server == NULL)) { pvt->type = pcmk__client_ipc; } else { #ifdef HAVE_GNUTLS_GNUTLS_H if (nodename == NULL) { nodename = server; } else if (server == NULL) { server = nodename; } pvt->type = pcmk__client_tls; pvt->remote_nodename = strdup(nodename); pvt->server = strdup(server); if ((pvt->remote_nodename == NULL) || (pvt->server == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } pvt->port = port; if (pvt->port == 0) { pvt->port = crm_default_remote_port(); } #else crm_err("Cannot communicate with Pacemaker Remote " "because GnuTLS is not enabled for this build"); lrmd_api_delete(*api); *api = NULL; return EOPNOTSUPP; #endif } return pcmk_rc_ok; } lrmd_t * lrmd_api_new(void) { lrmd_t *api = NULL; CRM_ASSERT(lrmd__new(&api, NULL, NULL, 0) == pcmk_rc_ok); return api; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { lrmd_t *api = NULL; CRM_ASSERT(lrmd__new(&api, nodename, server, port) == pcmk_rc_ok); return api; } void lrmd_api_delete(lrmd_t * lrmd) { if (lrmd == NULL) { return; } if (lrmd->cmds != NULL) { // Never NULL, but make static analysis happy if (lrmd->cmds->disconnect != NULL) { // Also never really NULL lrmd->cmds->disconnect(lrmd); // No-op if already disconnected } free(lrmd->cmds); } if (lrmd->lrmd_private != NULL) { lrmd_private_t *native = lrmd->lrmd_private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); free(lrmd->lrmd_private); } free(lrmd); } struct metadata_cb { void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data); void *user_data; }; /*! * \internal * \brief Process asynchronous metadata completion * * \param[in,out] action Metadata action that completed */ static void metadata_complete(svc_action_t *action) { struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, action->rc, action->status, services__exit_reason(action)); pcmk__set_result_output(&result, action->stdout_data, action->stderr_data); metadata_cb->callback(0, &result, metadata_cb->user_data); result.action_stdout = NULL; // Prevent free, because action owns it result.action_stderr = NULL; // Prevent free, because action owns it pcmk__reset_result(&result); free(metadata_cb); } /*! * \internal * \brief Retrieve agent metadata asynchronously * * \param[in] rsc Resource agent specification * \param[in] callback Function to call with result (this will always be * called, whether by this function directly or later * via the main loop, and on success the metadata will * be in its result argument's action_stdout) * \param[in,out] user_data User data to pass to callback * * \return Standard Pacemaker return code * \note This function is not a lrmd_api_operations_t method because it does not * need an lrmd_t object and does not go through the executor, but * executes the agent directly. */ int lrmd__metadata_async(const lrmd_rsc_info_t *rsc, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data) { svc_action_t *action = NULL; struct metadata_cb *metadata_cb = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(callback != NULL, return EINVAL); if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL, "Invalid resource specification"); callback(0, &result, user_data); pcmk__reset_result(&result); return EINVAL; } if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) { return stonith__metadata_async(rsc->type, PCMK_DEFAULT_METADATA_TIMEOUT_MS / 1000, callback, user_data); } action = services__create_resource_action(pcmk__s(rsc->id, rsc->type), rsc->standard, rsc->provider, rsc->type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_METADATA_TIMEOUT_MS, NULL, 0); if (action == NULL) { pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { pcmk__set_result(&result, action->rc, action->status, services__exit_reason(action)); callback(0, &result, user_data); pcmk__reset_result(&result); services_action_free(action); return EINVAL; } action->cb_data = calloc(1, sizeof(struct metadata_cb)); if (action->cb_data == NULL) { services_action_free(action); pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } metadata_cb = (struct metadata_cb *) action->cb_data; metadata_cb->callback = callback; metadata_cb->user_data = user_data; if (!services_action_async(action, metadata_complete)) { services_action_free(action); return pcmk_rc_error; // @TODO Derive from action->rc and ->status } // The services library has taken responsibility for action return pcmk_rc_ok; } /*! * \internal * \brief Set the result of an executor event * * \param[in,out] event Executor event to set * \param[in] rc OCF exit status of event * \param[in] op_status Executor status of event * \param[in] exit_reason Human-friendly description of event */ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, int op_status, const char *exit_reason) { if (event == NULL) { return; } event->rc = rc; event->op_status = op_status; pcmk__str_update((char **) &event->exit_reason, exit_reason); } /*! * \internal * \brief Clear an executor event's exit reason, output, and error output * * \param[in,out] event Executor event to reset */ void lrmd__reset_result(lrmd_event_data_t *event) { if (event == NULL) { return; } free((void *) event->exit_reason); event->exit_reason = NULL; free((void *) event->output); event->output = NULL; } /*! * \internal * \brief Get the uptime of a remote resource connection * * When the cluster connects to a remote resource, part of that resource's * handshake includes the uptime of the remote resource's connection. This * uptime is stored in the lrmd_t object. * * \return The connection's uptime, or -1 if unknown */ time_t lrmd__uptime(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return -1; } else { return native->remote->uptime; } } const char * lrmd__node_start_state(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return NULL; } else { return native->remote->start_state; } }