diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 875d1e9617..3e472136f8 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -1,1358 +1,1358 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause); static void handle_response(xmlNode *stored_msg); static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause); static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); static void send_msg_via_ipc(xmlNode * msg, const char *sys); /* debug only, can wrap all it likes */ static int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (controld_globals.fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, controld_globals.fsa_actions, TRUE, __func__); } /* reset the action list */ crm_info("Resetting the current action list"); fsa_dump_actions(controld_globals.fsa_actions, "Drop"); controld_globals.fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } void register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, uint64_t with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(controld_globals.fsa_message_queue); fsa_data_t *fsa_data = NULL; if (raised_from == NULL) { raised_from = ""; } if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return; } if (input == I_WAIT_FOR_EVENT) { controld_set_global_flags(controld_fsa_is_stalled); crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", raised_from, fsa_cause2string(cause), data, old_len); if (old_len > 0) { fsa_dump_queue(LOG_TRACE); prepend = FALSE; } if (data == NULL) { controld_set_fsa_action_flags(with_actions); fsa_dump_actions(with_actions, "Restored"); return; } /* Store everything in the new event and reset * controld_globals.fsa_actions */ with_actions |= controld_globals.fsa_actions; controld_globals.fsa_actions = A_NOTHING; } last_data_id++; crm_trace("%s %s FSA input %d (%s) due to %s, %s data", raised_from, (prepend? "prepended" : "appended"), last_data_id, fsa_input2string(input), fsa_cause2string(cause), (data? "with" : "without")); fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", (unsigned long long) with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); crm_trace("Copying %s data from %s as cluster message data", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_TIMER_POPPED: case C_SHUTDOWN: case C_UNKNOWN: case C_STARTUP: crm_crit("Copying %s data (from %s) is not yet implemented", fsa_cause2string(cause), raised_from); crmd_exit(CRM_EX_SOFTWARE); break; } } /* make sure to free it properly later */ if (prepend) { controld_globals.fsa_message_queue = g_list_prepend(controld_globals.fsa_message_queue, fsa_data); } else { controld_globals.fsa_message_queue = g_list_append(controld_globals.fsa_message_queue, fsa_data); } crm_trace("FSA message queue length is %d", g_list_length(controld_globals.fsa_message_queue)); /* fsa_dump_queue(LOG_TRACE); */ if (old_len == g_list_length(controld_globals.fsa_message_queue)) { crm_err("Couldn't add message to the queue"); } if (input != I_WAIT_FOR_EVENT) { controld_trigger_fsa(); } } void fsa_dump_queue(int log_level) { int offset = 0; for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; iter = iter->next) { fsa_data_t *data = (fsa_data_t *) iter->data; do_crm_log_unlikely(log_level, "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, data->data, data->data_type, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { xmlNode *wrapper = NULL; ha_msg_input_t *copy = pcmk__assert_alloc(1, sizeof(ha_msg_input_t)); copy->msg = (orig != NULL)? pcmk__xml_copy(NULL, orig->msg) : NULL; wrapper = pcmk__xe_first_child(copy->msg, PCMK__XE_CRM_XML, NULL, NULL); copy->xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Don't know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); crmd_exit(CRM_EX_SOFTWARE); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = (fsa_data_t *) controld_globals.fsa_message_queue->data; controld_globals.fsa_message_queue = g_list_remove(controld_globals.fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input, cause); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { enum crm_ais_msg_types dest = crm_msg_ais; bool is_for_dc = false; bool is_for_dcib = false; bool is_for_te = false; bool is_for_crm = false; bool is_for_cib = false; bool is_local = false; bool broadcast = false; const char *host_to = NULL; const char *sys_to = NULL; const char *sys_from = NULL; const char *type = NULL; const char *task = NULL; const char *ref = NULL; crm_node_t *node_to = NULL; CRM_CHECK(msg != NULL, return TRUE); host_to = crm_element_value(msg, PCMK__XA_CRM_HOST_TO); sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO); sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM); type = crm_element_value(msg, PCMK__XA_T); task = crm_element_value(msg, PCMK__XA_CRM_TASK); ref = crm_element_value(msg, PCMK_XA_REFERENCE); broadcast = pcmk__str_empty(host_to); if (ref == NULL) { ref = "without reference ID"; } if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { crm_trace("Received hello %s from %s (no processing needed)", ref, pcmk__s(sys_from, "unidentified source")); crm_log_xml_trace(msg, "hello"); return TRUE; } // Require message type (set by create_request()) if (!pcmk__str_eq(type, PCMK__VALUE_CRMD, pcmk__str_none)) { crm_warn("Ignoring invalid message %s with type '%s' " "(not '" PCMK__VALUE_CRMD "')", ref, pcmk__s(type, "")); crm_log_xml_trace(msg, "ignored"); return TRUE; } // Require a destination subsystem (also set by create_request()) if (sys_to == NULL) { crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO, ref); crm_log_xml_trace(msg, "ignored"); return TRUE; } // Get the message type appropriate to the destination subsystem if (is_corosync_cluster()) { dest = text2msg_type(sys_to); if ((dest < crm_msg_ais) || (dest > crm_msg_stonith_ng)) { /* Unrecognized value, use a sane default * * @TODO Maybe we should bail instead */ dest = crm_msg_crmd; } } is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); // Check whether message should be processed locally is_local = false; if (broadcast) { if (is_for_dc || is_for_te) { is_local = false; } else if (is_for_crm) { if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO, PCMK__CONTROLD_CMD_NODES, NULL)) { /* Node info requests do not specify a host, which is normally * treated as "all hosts", because the whole point is that the * client may not know the local node name. Always handle these * requests locally. */ is_local = true; } else { is_local = !originated_locally; } } else { is_local = true; } } else if (pcmk__str_eq(controld_globals.our_nodename, host_to, pcmk__str_casei)) { is_local = true; } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) { // Local delete of an offline node's resource history is_local = true; } } // Check whether message should be relayed if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC) { if (is_for_te) { crm_trace("Route message %s locally as transition request", ref); crm_log_xml_trace(msg, sys_to); send_msg_via_ipc(msg, sys_to); return TRUE; // No further processing of message is needed } crm_trace("Route message %s locally as DC request", ref); return FALSE; // More to be done by caller } if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { crm_trace("Relay message %s to DC (via %s)", ref, pcmk__s(host_to, "broadcast")); crm_log_xml_trace(msg, "relayed"); if (!broadcast) { node_to = pcmk__get_node(0, host_to, NULL, pcmk__node_search_cluster); } send_cluster_message(node_to, dest, msg, TRUE); return TRUE; } /* Transition engine and scheduler messages are sent only to the DC on * the same node. If we are no longer the DC, discard this message. */ crm_trace("Ignoring message %s because we are no longer DC", ref); crm_log_xml_trace(msg, "ignored"); return TRUE; // No further processing of message is needed } if (is_local) { if (is_for_crm || is_for_cib) { crm_trace("Route message %s locally as controller request", ref); return FALSE; // More to be done by caller } crm_trace("Relay message %s locally to %s", ref, sys_to); crm_log_xml_trace(msg, "IPC-relay"); send_msg_via_ipc(msg, sys_to); return TRUE; } if (!broadcast) { node_to = pcmk__search_node_caches(0, host_to, pcmk__node_search_cluster); if (node_to == NULL) { crm_warn("Ignoring message %s because node %s is unknown", ref, host_to); crm_log_xml_trace(msg, "ignored"); return TRUE; } } crm_trace("Relay message %s to %s", ref, pcmk__s(host_to, "all peers")); crm_log_xml_trace(msg, "relayed"); send_cluster_message(node_to, dest, msg, TRUE); return TRUE; } // Return true if field contains a positive integer static bool authorize_version(xmlNode *message_data, const char *field, const char *client_name, const char *ref, const char *uuid) { const char *version = crm_element_value(message_data, field); long long version_num; if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok) || (version_num < 0LL)) { crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s " CRM_XS " ref=%s uuid=%s", client_name, ((version == NULL)? "" : version), field, (ref? ref : "none"), uuid); return false; } return true; } /*! * \internal * \brief Check whether a client IPC message is acceptable * * If a given client IPC message is a hello, "authorize" it by ensuring it has * valid information such as a protocol version, and return false indicating * that nothing further needs to be done with the message. If the message is not * a hello, just return true to indicate it needs further processing. * * \param[in] client_msg XML of IPC message * \param[in,out] curr_client If IPC is not proxied, client that sent message * \param[in] proxy_session If IPC is proxied, the session ID * * \return true if message needs further processing, false if it doesn't */ bool controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client, const char *proxy_session) { xmlNode *wrapper = NULL; xmlNode *message_data = NULL; const char *client_name = NULL; const char *op = crm_element_value(client_msg, PCMK__XA_CRM_TASK); const char *ref = crm_element_value(client_msg, PCMK_XA_REFERENCE); const char *uuid = (curr_client? curr_client->id : proxy_session); if (uuid == NULL) { crm_warn("IPC message from client rejected: No client identifier " CRM_XS " ref=%s", (ref? ref : "none")); goto rejected; } if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) { // Only hello messages need to be authorized return true; } wrapper = pcmk__xe_first_child(client_msg, PCMK__XE_CRM_XML, NULL, NULL); message_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); client_name = crm_element_value(message_data, PCMK__XA_CLIENT_NAME); if (pcmk__str_empty(client_name)) { crm_warn("IPC hello from client rejected: No client name", CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid); goto rejected; } if (!authorize_version(message_data, PCMK__XA_MAJOR_VERSION, client_name, ref, uuid)) { goto rejected; } if (!authorize_version(message_data, PCMK__XA_MINOR_VERSION, client_name, ref, uuid)) { goto rejected; } crm_trace("Validated IPC hello from client %s", client_name); crm_log_xml_trace(client_msg, "hello"); if (curr_client) { curr_client->userdata = pcmk__str_copy(client_name); } controld_trigger_fsa(); return false; rejected: crm_log_xml_trace(client_msg, "rejected"); if (curr_client) { qb_ipcs_disconnect(curr_client->ipcs); } return false; } static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, PCMK__XA_SUBT); if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) { return handle_request(msg, cause); } if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) { handle_response(msg); return I_NULL; } crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'", pcmk__s(type, "")); crm_log_xml_trace(msg, "bad"); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; const char *uname = NULL; const char *op = NULL; char *interval_spec = NULL; guint interval_ms = 0; gboolean is_remote_node = FALSE; xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml_op = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); if (xml_op) { xmlNode *xml_rsc = pcmk__xe_first_child(xml_op, PCMK_XE_PRIMITIVE, NULL, NULL); xmlNode *xml_attrs = pcmk__xe_first_child(xml_op, PCMK__XE_ATTRIBUTES, NULL, NULL); if (xml_rsc) { rsc = pcmk__xe_id(xml_rsc); } if (xml_attrs) { op = crm_element_value(xml_attrs, CRM_META "_" PCMK__META_CLEAR_FAILURE_OP); crm_element_value_ms(xml_attrs, CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL, &interval_ms); } } uname = crm_element_value(xml_op, PCMK__META_ON_NODE); if ((rsc == NULL) || (uname == NULL)) { crm_log_xml_warn(stored_msg, "invalid failcount op"); return I_NULL; } if (crm_element_value(xml_op, PCMK__XA_ROUTER_NODE)) { is_remote_node = TRUE; } crm_debug("Clearing failures for %s-interval %s on %s " "from attribute manager, CIB, and executor state", pcmk__readable_interval(interval_ms), rsc, uname); if (interval_ms) { interval_spec = crm_strdup_printf("%ums", interval_ms); } update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node); free(interval_spec); controld_cib_delete_last_failure(rsc, uname, op, interval_ms); lrm_clear_last_failure(rsc, uname, op, interval_ms); return I_NULL; } static enum crmd_fsa_input handle_lrm_delete(xmlNode *stored_msg) { const char *mode = NULL; xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); CRM_CHECK(msg_data != NULL, return I_NULL); /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to * relay the operation to the affected node, which will unregister the * resource from the local executor, clear the resource's history from the * CIB, and do some bookkeeping in the controller. * * However, if the affected node is offline, the client will specify * mode=PCMK__VALUE_CIB which means the controller receiving the operation * should clear the resource's history from the CIB and nothing else. This * is used to clear shutdown locks. */ mode = crm_element_value(msg_data, PCMK__XA_MODE); if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) { // Relay to affected node crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else { // Delete CIB history locally (compare with do_lrm_delete()) const char *from_sys = NULL; const char *user_name = NULL; const char *rsc_id = NULL; const char *node = NULL; xmlNode *rsc_xml = NULL; int rc = pcmk_rc_ok; rsc_xml = pcmk__xe_first_child(msg_data, PCMK_XE_PRIMITIVE, NULL, NULL); CRM_CHECK(rsc_xml != NULL, return I_NULL); rsc_id = pcmk__xe_id(rsc_xml); from_sys = crm_element_value(stored_msg, PCMK__XA_CRM_SYS_FROM); node = crm_element_value(msg_data, PCMK__META_ON_NODE); user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL); crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " "(clearing CIB resource history only)", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : "")); rc = controld_delete_resource_history(rsc_id, node, user_name, cib_dryrun|cib_sync_call); if (rc == pcmk_rc_ok) { rc = controld_delete_resource_history(rsc_id, node, user_name, crmd_cib_smart_opt()); } /* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and * op=CRM_OP_LRM_DELETE. */ if (from_sys) { lrmd_event_data_t *op = NULL; const char *from_host = crm_element_value(stored_msg, PCMK__XA_SRC); const char *transition; if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) { transition = crm_element_value(msg_data, PCMK__XA_TRANSITION_KEY); } else { transition = crm_element_value(stored_msg, PCMK__XA_TRANSITION_KEY); } crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "local node"), rsc_id, ((rc == pcmk_rc_ok)? "" : " not")); op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0); op->type = lrmd_event_exec_complete; op->user_data = pcmk__str_copy(pcmk__s(transition, FAKE_TE_ID)); op->params = pcmk__strkey_table(free, free); pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); controld_rc2event(op, rc); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } return I_NULL; } } /*! * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_remote_state(const xmlNode *msg) { const char *conn_host = NULL; const char *remote_uname = pcmk__xe_id(msg); crm_node_t *remote_peer; bool remote_is_up = false; int rc = pcmk_rc_ok; rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up); CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL); remote_peer = pcmk__cluster_lookup_remote_node(remote_uname); CRM_CHECK(remote_peer, return I_NULL); pcmk__update_peer_state(__func__, remote_peer, remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST, 0); conn_host = crm_element_value(msg, PCMK__XA_CONNECTION_HOST); if (conn_host) { pcmk__str_update(&remote_peer->conn_host, conn_host); } else if (remote_peer->conn_host) { free(remote_peer->conn_host); remote_peer->conn_host = NULL; } return I_NULL; } /*! * \brief Handle a CRM_OP_PING message * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_ping(const xmlNode *msg) { const char *value = NULL; xmlNode *ping = NULL; xmlNode *reply = NULL; // Build reply ping = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE); value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO); crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value); // Add controller state value = fsa_state2string(controld_globals.fsa_state); crm_xml_add(ping, PCMK__XA_CRMD_STATE, value); crm_notice("Current ping state: %s", value); // CTS needs this // Add controller health // @TODO maybe do some checks to determine meaningful status crm_xml_add(ping, PCMK_XA_RESULT, "ok"); // Send reply reply = create_reply(msg, ping); free_xml(ping); if (reply != NULL) { (void) relay_message(reply, TRUE); free_xml(reply); } // Nothing further to do return I_NULL; } /*! * \brief Handle a PCMK__CONTROLD_CMD_NODES message * * \param[in] request Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_node_list(const xmlNode *request) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *reply = NULL; xmlNode *reply_data = NULL; // Create message data for reply reply_data = pcmk__xe_create(NULL, PCMK_XE_NODES); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = pcmk__xe_create(reply_data, PCMK_XE_NODE); crm_xml_add_ll(xml, PCMK_XA_ID, (long long) node->id); // uint32_t crm_xml_add(xml, PCMK_XA_UNAME, node->uname); crm_xml_add(xml, PCMK__XA_IN_CCM, node->state); } // Create and send reply reply = create_reply(request, reply_data); free_xml(reply_data); if (reply) { (void) relay_message(reply, TRUE); free_xml(reply); } // Nothing further to do return I_NULL; } /*! * \brief Handle a CRM_OP_NODE_INFO request * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_node_info_request(const xmlNode *msg) { const char *value = NULL; crm_node_t *node = NULL; int node_id = 0; xmlNode *reply = NULL; xmlNode *reply_data = NULL; // Build reply reply_data = pcmk__xe_create(NULL, PCMK_XE_NODE); crm_xml_add(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD); // Add whether current partition has quorum pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM, pcmk_is_set(controld_globals.flags, controld_has_quorum)); // Check whether client requested node info by ID and/or name crm_element_value_int(msg, PCMK_XA_ID, &node_id); if (node_id < 0) { node_id = 0; } value = crm_element_value(msg, PCMK_XA_UNAME); // Default to local node if none given if ((node_id == 0) && (value == NULL)) { value = controld_globals.our_nodename; } node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any); if (node) { crm_xml_add(reply_data, PCMK_XA_ID, node->uuid); crm_xml_add(reply_data, PCMK_XA_UNAME, node->uname); crm_xml_add(reply_data, PCMK_XA_CRMD, node->state); pcmk__xe_set_bool_attr(reply_data, PCMK_XA_REMOTE_NODE, pcmk_is_set(node->flags, crm_remote_node)); } // Send reply reply = create_reply(msg, reply_data); free_xml(reply_data); if (reply != NULL) { (void) relay_message(reply, TRUE); free_xml(reply); } // Nothing further to do return I_NULL; } static void verify_feature_set(xmlNode *msg) { const char *dc_version = crm_element_value(msg, PCMK_XA_CRM_FEATURE_SET); if (dc_version == NULL) { /* All we really know is that the DC feature set is older than 3.1.0, * but that's also all that really matters. */ dc_version = "3.0.14"; } if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) { crm_trace("Local feature set (%s) is compatible with DC's (%s)", CRM_FEATURE_SET, dc_version); } else { crm_err("Local feature set (%s) is incompatible with DC's (%s)", CRM_FEATURE_SET, dc_version); // Nothing is likely to improve without administrator involvement controld_set_fsa_input_flags(R_STAYDOWN); crmd_exit(CRM_EX_FATAL); } } // DC gets own shutdown all-clear static enum crmd_fsa_input handle_shutdown_self_ack(xmlNode *stored_msg) { const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { // The expected case -- we initiated own shutdown sequence crm_info("Shutting down controller"); return I_STOP; } if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) { // Must be logic error -- DC confirming its own unrequested shutdown crm_err("Shutting down controller immediately due to " "unexpected shutdown confirmation"); return I_TERMINATE; } if (controld_globals.fsa_state != S_STOPPING) { // Shouldn't happen -- non-DC confirming unrequested shutdown crm_err("Starting new DC election because %s is " "confirming shutdown we did not request", (host_from? host_from : "another node")); return I_ELECTION; } // Shouldn't happen, but we are already stopping anyway crm_debug("Ignoring unexpected shutdown confirmation from %s", (host_from? host_from : "another node")); return I_NULL; } // Non-DC gets shutdown all-clear from DC static enum crmd_fsa_input handle_shutdown_ack(xmlNode *stored_msg) { const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); if (host_from == NULL) { crm_warn("Ignoring shutdown request without origin specified"); return I_NULL; } if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_null_matches|pcmk__str_casei)) { if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting down controller after confirmation from %s", host_from); } else { crm_err("Shutting down controller after unexpected " "shutdown request from %s", host_from); controld_set_fsa_input_flags(R_STAYDOWN); } return I_STOP; } crm_warn("Ignoring shutdown request from %s because DC is %s", host_from, controld_globals.dc_name); return I_NULL; } static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK); /* Optimize this for the DC - it has the most to do */ crm_log_xml_trace(stored_msg, "request"); if (op == NULL) { crm_warn("Ignoring request without " PCMK__XA_CRM_TASK); return I_NULL; } if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { const char *from = crm_element_value(stored_msg, PCMK__XA_SRC); crm_node_t *node = pcmk__search_node_caches(0, from, pcmk__node_search_cluster); pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); if(AM_I_DC == FALSE) { return I_NULL; /* Done */ } } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_self_ack(stored_msg); } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { // Another controller wants to shut down its node return handle_shutdown_request(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) { /* a remote connection host is letting us know the node state */ return handle_remote_state(stored_msg); } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { throttle_update(stored_msg); if (AM_I_DC && (controld_globals.transition_graph != NULL) && !controld_globals.transition_graph->complete) { crm_debug("The throttle changed. Trigger a graph."); trigger_graph(); } return I_NULL; } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); /* Sometimes we _must_ go into S_ELECTION */ if (controld_globals.fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { verify_feature_set(stored_msg); crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, PCMK__XA_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, PCMK__XA_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { return handle_lrm_delete(stored_msg); } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT || (strcmp(op, CRM_OP_REPROBE) == 0)) { crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_PING) == 0) { return handle_ping(stored_msg); } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) { return handle_node_info_request(stored_msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; const char *name = NULL; crm_element_value_int(stored_msg, PCMK_XA_ID, &id); name = crm_element_value(stored_msg, PCMK_XA_UNAME); if(cause == C_IPC_MESSAGE) { msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { crm_err("Could not instruct peers to remove references to node %s/%u", name, id); } else { crm_notice("Instructing peers to remove references to node %s/%u", name, id); } free_xml(msg); } else { - reap_crm_member(id, name); + pcmk__cluster_forget_cluster_node(id, name); /* If we're forgetting this node, also forget any failures to fence * it, so we don't carry that over to any node added later with the * same name. */ st_fail_count_reset(name); } } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) { xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); remote_ra_process_maintenance_nodes(xml); } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) { return handle_node_list(stored_msg); /*========== (NOT_DC)-Only Actions ==========*/ } else if (!AM_I_DC) { if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_ack(stored_msg); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } static void handle_response(xmlNode *stored_msg) { const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK); crm_log_xml_trace(stored_msg, "reply"); if (op == NULL) { crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { // Check whether scheduler answer been superseded by subsequent request const char *msg_ref = crm_element_value(stored_msg, PCMK_XA_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref, pcmk__str_none)) { ha_msg_input_t fsa_input; controld_stop_sched_timer(); fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "controller"); } } static enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/procedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = controld_globals.our_nodename; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(controld_globals.fsa_state)); crm_log_xml_trace(stored_msg, "message"); now_s = pcmk__ttoa(time(NULL)); update_attrd(host_from, PCMK__NODE_ATTR_SHUTDOWN, now_s, NULL, FALSE); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } static void send_msg_via_ipc(xmlNode * msg, const char *sys) { pcmk__client_t *client_channel = NULL; CRM_CHECK(sys != NULL, return); client_channel = pcmk__find_client_by_id(sys); if (crm_element_value(msg, PCMK__XA_SRC) == NULL) { crm_xml_add(msg, PCMK__XA_SRC, controld_globals.our_nodename); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event); } else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); process_te_message(msg, data); } else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; xmlNode *wrapper = NULL; fsa_input.msg = msg; wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); fsa_input.xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __func__; fsa_data.data_type = fsa_dt_ha_msg; do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state, I_MESSAGE, &fsa_data); } else if (crmd_is_proxy_session(sys)) { crmd_proxy_send(sys, msg); } else { crm_info("Received invalid request: unknown subsystem '%s'", sys); } } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } /*! * \internal * \brief Notify the cluster of a remote node state change * * \param[in] node_name Node's name * \param[in] node_up true if node is up, false if down */ void broadcast_remote_state_message(const char *node_name, bool node_up) { xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); crm_info("Notifying cluster of Pacemaker Remote node %s %s", node_name, node_up? "coming up" : "going down"); crm_xml_add(msg, PCMK_XA_ID, node_name); pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up); if (node_up) { crm_xml_add(msg, PCMK__XA_CONNECTION_HOST, controld_globals.our_nodename); } send_cluster_message(NULL, crm_msg_crmd, msg, TRUE); free_xml(msg); } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 3c035c0930..9e0e647192 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3709 +1,3709 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(timeout_ms / 1000, INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; if (msg == NULL) { return NULL; } op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP); cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); crm_element_value_int(msg, PCMK__XA_ST_CALLOPT, &(cmd->options)); crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(stonith_our_uname)) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { crm_node_t *node = pcmk__get_node(0, cmd->target, NULL, pcmk__node_search_cluster); cmd->target_nodeid = node->id; } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, pcmk__str_copy(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in] dev XML containing device parameters */ static GHashTable * xml2device_params(const char *name, const xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) { crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION " cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_OFF) == 0) { map_action(params, PCMK_ACTION_REBOOT, value); } else { map_action(params, PCMK_ACTION_OFF, value); map_action(params, PCMK_ACTION_REBOOT, value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(stonith_our_uname)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(stonith_our_uname); pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST, stonith_our_uname); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE), crm_element_value(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if (!stand_alone /* if standalone, there's no attribute manager */ && (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, PCMK_XA_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range if ((id <= 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_suicide)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { send_cluster_message(pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster), crm_msg_stonith_ng, reply, FALSE); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } /* Pack the results into XML */ list = pcmk__xe_create(NULL, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified); crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } if (list != NULL) { xmlNode *wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); crm_log_xml_trace(list, "Add query results"); xmlAddChild(wrapper, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " CRM_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } if (!stand_alone && pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); free_xml(reply); if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE); stonith__xe_set_result(notify_data, result); crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op); crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost"); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result, notify_data); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; crm_node_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_known); if (node != NULL) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->uname); return entry->uname; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); } relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); CRM_ASSERT(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID); const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml, LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, PCMK__XA_ST_TARGET); action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; CRM_ASSERT(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION), crm_element_value(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if ((request->peer != NULL) || stand_alone) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET); const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); send_cluster_message(pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster), crm_msg_stonith_ng, request->xml, FALSE); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } free_xml(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); const char *device_id = crm_element_value(dev, PCMK_XA_ID); const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, PCMK_XA_ID, &node_id); name = crm_element_value(request->xml, PCMK_XA_UNAME); - reap_crm_member(node_id, name); + pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } free_xml(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { int call_options = st_opt_none; bool is_reply = false; CRM_CHECK(message != NULL, return); if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) { is_reply = true; } crm_element_value_int(message, PCMK__XA_ST_CALLOPT, &call_options); crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index abc9afa941..8549db0bb5 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -1,160 +1,162 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_CLUSTER_INTERNAL__H # define PCMK__CRM_CLUSTER_INTERNAL__H # include // uint32_t, uint64_t # include // gboolean # include /* *INDENT-OFF* */ enum crm_proc_flag { crm_proc_none = 0x00000001, // Cluster layers crm_proc_cpg = 0x04000000, // Daemons crm_proc_execd = 0x00000010, crm_proc_based = 0x00000100, crm_proc_controld = 0x00000200, crm_proc_attrd = 0x00001000, crm_proc_schedulerd = 0x00010000, crm_proc_fenced = 0x00100000, }; /* *INDENT-ON* */ // Used with node cache search functions enum pcmk__node_search_flags { pcmk__node_search_none = 0, pcmk__node_search_cluster = (1 << 0), // Search for cluster nodes pcmk__node_search_remote = (1 << 1), // Search for remote nodes pcmk__node_search_any = pcmk__node_search_cluster |pcmk__node_search_remote, /* @COMPAT The values before this must stay the same until we can drop * support for enum crm_get_peer_flags */ pcmk__node_search_known = (1 << 2), // Search previously known nodes }; /*! * \internal * \brief Return the process bit corresponding to the current cluster stack * * \return Process flag if detectable, otherwise 0 */ static inline uint32_t crm_get_cluster_proc(void) { switch (get_cluster_type()) { case pcmk_cluster_corosync: return crm_proc_cpg; default: break; } return crm_proc_none; } /*! * \internal * \brief Get log-friendly string description of a Corosync return code * * \param[in] error Corosync return code * * \return Log-friendly string description corresponding to \p error */ static inline const char * pcmk__cs_err_str(int error) { # if SUPPORT_COROSYNC switch (error) { case CS_OK: return "OK"; case CS_ERR_LIBRARY: return "Library error"; case CS_ERR_VERSION: return "Version error"; case CS_ERR_INIT: return "Initialization error"; case CS_ERR_TIMEOUT: return "Timeout"; case CS_ERR_TRY_AGAIN: return "Try again"; case CS_ERR_INVALID_PARAM: return "Invalid parameter"; case CS_ERR_NO_MEMORY: return "No memory"; case CS_ERR_BAD_HANDLE: return "Bad handle"; case CS_ERR_BUSY: return "Busy"; case CS_ERR_ACCESS: return "Access error"; case CS_ERR_NOT_EXIST: return "Doesn't exist"; case CS_ERR_NAME_TOO_LONG: return "Name too long"; case CS_ERR_EXIST: return "Exists"; case CS_ERR_NO_SPACE: return "No space"; case CS_ERR_INTERRUPT: return "Interrupt"; case CS_ERR_NAME_NOT_FOUND: return "Name not found"; case CS_ERR_NO_RESOURCES: return "No resources"; case CS_ERR_NOT_SUPPORTED: return "Not supported"; case CS_ERR_BAD_OPERATION: return "Bad operation"; case CS_ERR_FAILED_OPERATION: return "Failed operation"; case CS_ERR_MESSAGE_ERROR: return "Message error"; case CS_ERR_QUEUE_FULL: return "Queue full"; case CS_ERR_QUEUE_NOT_AVAILABLE: return "Queue not available"; case CS_ERR_BAD_FLAGS: return "Bad flags"; case CS_ERR_TOO_BIG: return "Too big"; case CS_ERR_NO_SECTIONS: return "No sections"; } # endif return "Corosync error"; } # if SUPPORT_COROSYNC #if 0 /* This is the new way to do it, but we still support all Corosync 2 versions, * and this isn't always available. A better alternative here would be to check * for support in the configure script and enable this conditionally. */ #define pcmk__init_cmap(handle) cmap_initialize_map((handle), CMAP_MAP_ICMAP) #else #define pcmk__init_cmap(handle) cmap_initialize(handle) #endif char *pcmk__corosync_cluster_name(void); bool pcmk__corosync_add_nodes(xmlNode *xml_parent); # endif crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); crm_node_t *pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership); void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected); void pcmk__reap_unseen_nodes(uint64_t ring_id); void pcmk__corosync_quorum_connect(gboolean (*dispatch)(unsigned long long, gboolean), void (*destroy) (gpointer)); bool pcmk__cluster_is_node_active(const crm_node_t *node); unsigned int pcmk__cluster_num_active_nodes(void); unsigned int pcmk__cluster_num_remote_nodes(void); + crm_node_t *pcmk__cluster_lookup_remote_node(const char *node_name); +void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name); void pcmk__cluster_forget_remote_node(const char *node_name); crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags); crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname, const char *uuid); void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); void pcmk__refresh_node_caches_from_cib(xmlNode *cib); crm_node_t *pcmk__get_node(unsigned int id, const char *uname, const char *uuid, uint32_t flags); #endif // PCMK__CRM_CLUSTER_INTERNAL__H diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 4efc58d56c..7cd8608f66 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1485 +1,1549 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif +#include // PRIu32 #include #include #include #include #include #include #include #include #include #include #include #include "crmcluster_private.h" /* The peer cache remembers cluster nodes that have been seen. * This is managed mostly automatically by libcluster, based on * cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. */ GHashTable *crm_peer_cache = NULL; /* * The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * That said, using a single cache would be more logical and less error-prone, * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: * pcmk__cluster_lookup_remote_node() and pcmk__cluster_forget_remote_node() * directly manage it, while refresh_remote_nodes() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; /* * The known node cache tracks cluster and remote nodes that have been seen in * the CIB. It is useful mainly when a caller needs to know about a node that * may no longer be in the membership, but doesn't want to add the node to the * main peer cache tables. */ static GHashTable *known_node_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; static gboolean crm_autoreap = TRUE; // Flag setting and clearing for crm_node_t:flags #define set_peer_flags(peer, flags_to_set) do { \ (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define clear_peer_flags(peer, flags_to_clear) do { \ (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) static void update_peer_uname(crm_node_t *node, const char *uname); static crm_node_t *find_known_node(const char *id, const char *uname); /*! * \internal * \brief Get the number of Pacemaker Remote nodes that have been seen * * \return Number of cached Pacemaker Remote nodes */ unsigned int pcmk__cluster_num_remote_nodes(void) { if (crm_remote_peer_cache == NULL) { return 0U; } return g_hash_table_size(crm_remote_peer_cache); } /*! * \internal * \brief Get a remote node cache entry, creating it if necessary * * \param[in] node_name Name of remote node * * \return Cache entry for node on success, or \c NULL (and set \c errno) * otherwise * * \note When creating a new entry, this will leave the node state undetermined. * The caller should also call \c pcmk__update_peer_state() if the state * is known. * \note Because this can add and remove cache entries, callers should not * assume any previously obtained cache entry pointers remain valid. */ crm_node_t * pcmk__cluster_lookup_remote_node(const char *node_name) { crm_node_t *node; char *node_name_copy = NULL; if (node_name == NULL) { errno = EINVAL; return NULL; } /* It's theoretically possible that the node was added to the cluster peer * cache before it was known to be a Pacemaker Remote node. Remove that * entry unless it has a node ID, which means the name actually is * associated with a cluster node. (@TODO return an error in that case?) */ node = pcmk__search_node_caches(0, node_name, pcmk__node_search_cluster); if ((node != NULL) && (node->uuid == NULL)) { /* node_name could be a pointer into the cache entry being removed, so * reassign it to a copy before the original gets freed */ node_name_copy = strdup(node_name); if (node_name_copy == NULL) { errno = ENOMEM; return NULL; } node_name = node_name_copy; - reap_crm_member(0, node_name); + pcmk__cluster_forget_cluster_node(0, node_name); } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { free(node_name_copy); return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { free(node_name_copy); return NULL; } /* Populate the essential information */ set_peer_flags(node, crm_remote_node); node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); errno = ENOMEM; free(node_name_copy); return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ update_peer_uname(node, node_name); free(node_name_copy); return node; } /*! * \internal * \brief Remove a node from the Pacemaker Remote node cache * * \param[in] node_name Name of node to remove from cache * * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into the cache entry being removed. */ void pcmk__cluster_forget_remote_node(const char *node_name) { /* Do a lookup first, because node_name could be a pointer within the entry * being removed -- we can't log it *after* removing it. */ if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) { crm_trace("Removing %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(crm_remote_peer_cache, node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return \c CRM_NODE_LOST if \c PCMK__XA_IN_CCM is false in * \c PCMK__XE_NODE_STATE, \c CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older controllers that don't set it. */ static const char * remote_state_from_cib(const xmlNode *node_state) { bool status = false; if ((pcmk__xe_get_bool_attr(node_state, PCMK__XA_IN_CCM, &status) == pcmk_rc_ok) && !status) { return CRM_NODE_LOST; } else { return CRM_NODE_MEMBER; } } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { const struct refresh_data *data = user_data; const char *remote = crm_element_value(result, data->field); const char *state = NULL; crm_node_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(crm_remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ node = pcmk__cluster_lookup_remote_node(remote); CRM_ASSERT(node); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } else if (pcmk_is_set(node->flags, crm_node_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_peer_flags((crm_node_t *) value, crm_node_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty); } /*! * \internal * \brief Repopulate the remote node cache based on CIB XML * * \param[in] cib CIB XML to parse */ static void refresh_remote_nodes(xmlNode *cib) { struct refresh_data data; crm_peer_init(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = PCMK_XA_ID; data.has_state = TRUE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = PCMK_XA_VALUE; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = PCMK_XA_ID; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } /*! * \internal * \brief Check whether a node is an active cluster node * * Remote nodes are never considered active. This guarantees that they can never * become DC. * * \param[in] node Node to check * * \return \c true if the node is an active cluster node, or \c false otherwise */ bool pcmk__cluster_is_node_active(const crm_node_t *node) { const enum cluster_type_e type = get_cluster_type(); if ((node == NULL) || pcmk_is_set(node->flags, crm_remote_node)) { return false; } switch (type) { case pcmk_cluster_corosync: #if SUPPORT_COROSYNC return crm_is_corosync_peer_active(node); #else break; #endif // SUPPORT_COROSYNC default: break; } crm_err("Unhandled cluster type: %s", name_for_cluster_type(type)); return false; } /*! * \internal * \brief Check if a node's entry should be removed from the cluster node cache * * A node should be removed from the cache if it's inactive and matches another * \c crm_node_t (the search object). The node is considered a mismatch if any * of the following are true: * * The search object is \c NULL. * * The search object has an ID set and the cached node's ID does not match it. * * The search object does not have an ID set, and the cached node's name does * not match the search node's name. (If both names are \c NULL, it's a * match.) * * Otherwise, the node is considered a match. * * Note that if the search object has both an ID and a name set, the name is * ignored for matching purposes. * * \param[in] key Ignored * \param[in] value \c crm_node_t object from cluster node cache * \param[in] user_data \c crm_node_t object to match against (search object) * * \return \c TRUE if the node entry should be removed from \c crm_peer_cache, * or \c FALSE otherwise */ static gboolean should_forget_cluster_node(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; } if ((search->id != 0) && (node->id != search->id)) { return FALSE; } if ((search->id == 0) && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { // @TODO Consider name even if ID is set? return FALSE; } if (pcmk__cluster_is_node_active(value)) { return FALSE; } crm_info("Removing node with name %s and " PCMK_XA_ID " %u from membership " "cache", pcmk__s(node->uname, "(unknown)"), node->id); return TRUE; } +/*! + * \internal + * \brief Remove one or more inactive nodes from the cluster node cache + * + * All inactive nodes matching \p id and \p node_name as described in + * \c should_forget_cluster_node documentation are removed from the cache. + * + * If \p id is 0 and \p node_name is \c NULL, all inactive nodes are removed + * from the cache regardless of ID and name. This differs from clearing the + * cache, in that entries for active nodes are preserved. + * + * \param[in] id ID of node to remove from cache (0 to ignore) + * \param[in] node_name Name of node to remove from cache (ignored if \p id is + * nonzero) + * + * \note \p node_name is not modified directly, but it will be freed if it's a + * pointer into a cache entry that is removed. + */ +void +pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name) +{ + crm_node_t search = { 0, }; + char *criterion = NULL; // For logging + guint matches = 0; + + if (crm_peer_cache == NULL) { + crm_trace("Membership cache not initialized, ignoring removal request"); + return; + } + + search.id = id; + search.uname = pcmk__str_copy(node_name); // May log after original freed + + if (id > 0) { + criterion = crm_strdup_printf(PCMK_XA_ID "=%" PRIu32, id); + + } else if (node_name != NULL) { + criterion = crm_strdup_printf(PCMK_XA_UNAME "=%s", node_name); + } + + matches = g_hash_table_foreach_remove(crm_peer_cache, + should_forget_cluster_node, &search); + if (matches > 0) { + if (criterion != NULL) { + crm_notice("Removed %u inactive node%s with %s from the membership " + "cache", + matches, pcmk__plural_s(matches), criterion); + } else { + crm_notice("Removed all (%u) inactive cluster nodes from the " + "membership cache", + matches); + } + + } else { + crm_info("No inactive cluster nodes%s%s to remove from the membership " + "cache", + ((criterion != NULL)? " with" : ""), pcmk__s(criterion, "")); + } + + free(search.uname); + free(criterion); +} + /*! * \brief Remove all peer cache entries matching a node ID and/or uname * * \param[in] id ID of node to remove (or 0 to ignore) * \param[in] name Uname of node to remove (or NULL to ignore) * * \return Number of cache entries removed * * \note The caller must be careful not to use \p name after calling this * function if it might be a pointer into the cache entry being removed. */ guint reap_crm_member(uint32_t id, const char *name) { int matches = 0; crm_node_t search = { 0, }; if (crm_peer_cache == NULL) { crm_trace("Membership cache not initialized, ignoring purge request"); return 0; } search.id = id; search.uname = pcmk__str_copy(name); matches = g_hash_table_foreach_remove(crm_peer_cache, should_forget_cluster_node, &search); if(matches) { crm_notice("Purged %d peer%s with " PCMK_XA_ID "=%u%s%s from the membership cache", matches, pcmk__plural_s(matches), search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } else { crm_info("No peers with " PCMK_XA_ID "=%u%s%s to purge from the membership cache", search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } free(search.uname); return matches; } static void count_peer(gpointer key, gpointer value, gpointer user_data) { unsigned int *count = user_data; crm_node_t *node = value; if (pcmk__cluster_is_node_active(node)) { *count = *count + 1; } } /*! * \internal * \brief Get the number of active cluster nodes that have been seen * * Remote nodes are never considered active. This guarantees that they can never * become DC. * * \return Number of active nodes in the cluster node cache */ unsigned int pcmk__cluster_num_active_nodes(void) { unsigned int count = 0; if (crm_peer_cache != NULL) { g_hash_table_foreach(crm_peer_cache, count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u: %s", node->id, node->uname); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node->conn_host); free(node); } void crm_peer_init(void) { if (crm_peer_cache == NULL) { crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node); } if (crm_remote_peer_cache == NULL) { crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); } if (known_node_cache == NULL) { known_node_cache = pcmk__strikey_table(free, destroy_crm_node); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { crm_trace("Destroying remote peer cache with %d members", pcmk__cluster_num_remote_nodes()); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } if (known_node_cache != NULL) { crm_trace("Destroying known node cache with %d members", g_hash_table_size(known_node_cache)); g_hash_table_destroy(known_node_cache); known_node_cache = NULL; } } static void (*peer_status_callback)(enum crm_status_type, crm_node_t *, const void *) = NULL; /*! * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Previously, client callbacks were responsible for peer cache * management. This is no longer the case, and client callbacks should do * only client-specific handling. Callbacks MUST NOT add or remove entries * in the peer caches. */ void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { peer_status_callback = dispatch; } /*! * \brief Tell the library whether to automatically reap lost nodes * * If TRUE (the default), calling crm_update_peer_proc() will also update the * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state() * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable */ void crm_set_autoreap(gboolean autoreap) { crm_autoreap = autoreap; } static void dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); } } static gboolean hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { return value == user_data; } /*! * \internal * \brief Search caches for a node (cluster or Pacemaker Remote) * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] flags Group of enum pcmk__node_search_flags * * \return Node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) { node = pcmk__search_cluster_node_cache(id, uname, NULL); } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_known)) { char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); node = find_known_node(id_str, uname); free(id_str); } return node; } /*! * \internal * \brief Purge a node from cache (both cluster and Pacemaker Remote) * * \param[in] node_name If not NULL, purge only nodes with this name * \param[in] node_id If not 0, purge cluster nodes only if they have this ID * * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote * nodes that match \p node_name will be purged, and cluster nodes that * match both \p node_name and \p node_id will be purged. * \note The caller must be careful not to use \p node_name after calling this * function if it might be a pointer into a cache entry being removed. */ void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) { char *node_name_copy = NULL; if ((node_name == NULL) && (node_id == 0U)) { return; } // Purge from Pacemaker Remote node cache if ((node_name != NULL) && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) { /* node_name could be a pointer into the cache entry being purged, * so reassign it to a copy before the original gets freed */ node_name_copy = pcmk__str_copy(node_name); node_name = node_name_copy; crm_trace("Purging %s from Pacemaker Remote node cache", node_name); g_hash_table_remove(crm_remote_peer_cache, node_name); } - reap_crm_member(node_id, node_name); + pcmk__cluster_forget_cluster_node(node_id, node_name); free(node_name_copy); } /*! * \internal * \brief Search cluster node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for * * \return Cluster node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_cluster_node_cache(unsigned int id, const char *uname, const char *uuid) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } else if (uuid != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (pcmk__str_eq(node->uuid, uuid, pcmk__str_casei)) { crm_trace("UUID match: %s = %p", node->uuid, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } else { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); dump_peer_hash(LOG_INFO, __func__); crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE, TRUE); } } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync-based clusters use node IDs. The functions that call * pcmk__update_peer_state() and crm_update_peer_proc() only know * nodeid, so 'by_id' is authoritative when merging. */ dump_peer_hash(LOG_DEBUG, __func__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } return node; } #if SUPPORT_COROSYNC static guint remove_conflicting_peer(crm_node_t *node) { int matches = 0; GHashTableIter iter; crm_node_t *existing_node = NULL; if (node->id == 0 || node->uname == NULL) { return 0; } if (!pcmk__corosync_has_nodelist()) { return 0; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if (existing_node->id > 0 && existing_node->id != node->id && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { if (pcmk__cluster_is_node_active(existing_node)) { continue; } crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", existing_node->id, existing_node->uname, node->id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /*! * \brief Get a cluster node cache entry * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster * node ID to search for * \param[in] flags Group of enum pcmk__node_search_flags * * \return (Possibly newly created) cluster node cache entry */ /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * pcmk__get_node(unsigned int id, const char *uname, const char *uuid, uint32_t flags) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); // Check the Pacemaker Remote node cache first if (pcmk_is_set(flags, pcmk__node_search_remote)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); if (node != NULL) { return node; } } if (!pcmk_is_set(flags, pcmk__node_search_cluster)) { return NULL; } node = pcmk__search_cluster_node_cache(id, uname, uuid); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { uname_lookup = get_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = pcmk__search_cluster_node_cache(id, uname, uuid); } } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = pcmk__assert_alloc(1, sizeof(crm_node_t)); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if (uname && (node->uname == NULL)) { update_peer_uname(node, uname); } if(node->uuid == NULL) { if (uuid == NULL) { uuid = crm_peer_uuid(node); } if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); } } free(uname_lookup); return node; } /*! * \internal * \brief Update a node's uname * * \param[in,out] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ static void update_peer_uname(crm_node_t *node, const char *uname) { CRM_CHECK(uname != NULL, crm_err("Bug: can't update node name without name"); return); CRM_CHECK(node != NULL, crm_err("Bug: can't update node name to %s without node", uname); return); if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) { crm_debug("Node uname '%s' did not change", uname); return; } for (const char *c = uname; *c; ++c) { if ((*c >= 'A') && (*c <= 'Z')) { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } pcmk__str_update(&node->uname, uname); if (peer_status_callback != NULL) { peer_status_callback(crm_status_uname, node, NULL); } #if SUPPORT_COROSYNC if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) { remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Get log-friendly string equivalent of a process flag * * \param[in] proc Process flag * * \return Log-friendly string equivalent of \p proc */ static inline const char * proc2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_based: text = "pacemaker-based"; break; case crm_proc_controld: text = "pacemaker-controld"; break; case crm_proc_schedulerd: text = "pacemaker-schedulerd"; break; case crm_proc_execd: text = "pacemaker-execd"; break; case crm_proc_attrd: text = "pacemaker-attrd"; break; case crm_proc_fenced: text = "pacemaker-fenced"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, proc2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (pcmk_is_set(node->flags, crm_remote_node)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (pcmk__str_eq(status, PCMK_VALUE_ONLINE, pcmk__str_casei)) { if ((node->processes & flag) != flag) { node->processes = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } } else if (node->processes & flag) { node->processes = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, proc2text(flag), status); } if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { node->when_online = time(NULL); } else { node->when_online = 0; } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (peer_status_callback != NULL) { peer_status_callback(crm_status_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (crm_peer_cache == NULL) { return NULL; } if (crm_autoreap) { const char *peer_state = NULL; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { peer_state = CRM_NODE_MEMBER; } else { peer_state = CRM_NODE_LOST; } node = pcmk__update_peer_state(__func__, node, peer_state, 0); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, proc2text(flag), status); } return node; } /*! * \internal * \brief Update a cluster node cache entry's expected join state * * \param[in] source Caller's function name (for logging) * \param[in,out] node Node to update * \param[in] expected Node's new join state */ void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (pcmk_is_set(node->flags, crm_remote_node)) { return; } last = node->expected; if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, expected, last); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in,out] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * update_peer_state_iter(const char *source, crm_node_t *node, const char *state, uint64_t membership, GHashTableIter *iter) { gboolean is_member; CRM_CHECK(node != NULL, crm_err("Could not set state for unknown host to %s" CRM_XS " source=%s", state, source); return NULL); is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei); if (is_member) { node->when_lost = 0; if (membership) { node->last_seen = membership; } } if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) { char *last = node->state; if (is_member) { node->when_member = time(NULL); } else { node->when_member = 0; } node->state = strdup(state); crm_notice("Node %s state is now %s " CRM_XS " nodeid=%u previous=%s source=%s", node->uname, state, node->id, (last? last : "unknown"), source); if (peer_status_callback != NULL) { peer_status_callback(crm_status_nstate, node, last); } free(last); if (crm_autoreap && !is_member && !pcmk_is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by * refresh_remote_nodes(). */ if(iter) { crm_notice("Purged 1 peer with " PCMK_XA_ID "=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); } else { - reap_crm_member(node->id, node->uname); + pcmk__cluster_forget_cluster_node(node->id, node->uname); } node = NULL; } } else { crm_trace("Node %s state is unchanged (%s) " CRM_XS " nodeid=%u source=%s", node->uname, state, node->id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in,out] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership) { return update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void pcmk__reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; crm_node_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->last_seen != membership) { if (node->state) { /* * Calling update_peer_state_iter() allows us to * remove the node from crm_peer_cache without * invalidating our iterator */ update_peer_state_iter(__func__, node, CRM_NODE_LOST, membership, &iter); } else { crm_info("State of node %s[%u] is still unknown", node->uname, node->id); } } } } static crm_node_t * find_known_node(const char *id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; if (uname) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(strcasecmp(node->uuid, id) == 0) { crm_trace("ID match: %s= %p", id, node); by_id = node; break; } } } node = by_id; /* Good default */ if (by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %s/%s", by_id, id, uname); } else if (by_id == NULL && by_name) { crm_trace("Only one: %p for %s/%s", by_name, id, uname); if (id) { node = NULL; } else { node = by_name; } } else if (by_name == NULL && by_id) { crm_trace("Only one: %p for %s/%s", by_id, id, uname); if (uname) { node = NULL; } } else if (uname && by_id->uname && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { /* Multiple nodes have the same uname in the CIB. * Return by_id. */ } else if (id && by_name->uuid && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) { /* Multiple nodes have the same id in the CIB. * Return by_name. */ node = by_name; } else { node = NULL; } if (node == NULL) { crm_debug("Couldn't find node%s%s%s%s", id? " " : "", id? id : "", uname? " with name " : "", uname? uname : ""); } return node; } static void known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) { const char *id = crm_element_value(xml_node, PCMK_XA_ID); const char *uname = crm_element_value(xml_node, PCMK_XA_UNAME); crm_node_t * node = NULL; CRM_CHECK(id != NULL && uname !=NULL, return); node = find_known_node(id, uname); if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = pcmk__assert_alloc(1, sizeof(crm_node_t)); node->uname = pcmk__str_copy(uname); node->uuid = pcmk__str_copy(id); g_hash_table_replace(known_node_cache, uniqueid, node); } else if (pcmk_is_set(node->flags, crm_node_dirty)) { pcmk__str_update(&node->uname, uname); /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); } } static void refresh_known_node_cache(xmlNode *cib) { crm_peer_init(); g_hash_table_foreach(known_node_cache, mark_dirty, NULL); crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG, known_node_cache_refresh_helper, NULL); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL); } void pcmk__refresh_node_caches_from_cib(xmlNode *cib) { refresh_remote_nodes(cib); refresh_known_node_cache(cib); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include int crm_terminate_member(int nodeid, const char *uname, void *unused) { return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } crm_node_t * crm_get_peer(unsigned int id, const char *uname) { return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster); } crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags) { return pcmk__get_node(id, uname, NULL, flags); } int crm_remote_peer_cache_size(void) { unsigned int count = pcmk__cluster_num_remote_nodes(); return QB_MIN(count, INT_MAX); } void crm_remote_peer_cache_refresh(xmlNode *cib) { refresh_remote_nodes(cib); } crm_node_t * crm_remote_peer_get(const char *node_name) { return pcmk__cluster_lookup_remote_node(node_name); } void crm_remote_peer_cache_remove(const char *node_name) { pcmk__cluster_forget_remote_node(node_name); } gboolean crm_is_peer_active(const crm_node_t * node) { return pcmk__cluster_is_node_active(node); } guint crm_active_peers(void) { return pcmk__cluster_num_active_nodes(); } // LCOV_EXCL_STOP // End deprecated API