diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index d0c2e859d3..ada4007cb1 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -1,1633 +1,1629 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // uint32_t, uint64_t, UINT64_C() #include #include #include /* U64T ~ PRIu64 */ #include #include #include #include #include #include #include #define EXIT_ESCALATION_MS 10000 static unsigned long cib_local_bcast_num = 0; typedef struct cib_local_notify_s { xmlNode *notify_src; char *client_id; gboolean from_peer; gboolean sync_reply; } cib_local_notify_t; int next_client_id = 0; gboolean legacy_mode = FALSE; qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; void send_cib_replace(const xmlNode * sync_request, const char *host); static void cib_process_request(xmlNode *request, gboolean privileged, pcmk__client_t *cib_client); static int cib_process_command(xmlNode *request, xmlNode **reply, xmlNode **cib_diff, gboolean privileged); gboolean cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged); gboolean cib_legacy_mode(void) { return legacy_mode; } static int32_t cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (cib_shutdown_flag) { crm_info("Ignoring new IPC client [%d] during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size) { pcmk__client_t *client = pcmk__find_client(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); pcmk__free_client(client); return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); cib_ipc_closed(c); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = NULL, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, pcmk__client_t *cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, F_CIB_OPERATION); if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { if (flags & crm_ipc_client_response) { xmlNode *ack = create_xml_node(NULL, __func__); crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER); crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id); pcmk__ipc_send_xml(cib_client, id, ack, flags); cib_client->request_id = 0; free_xml(ack); } return; } else if (pcmk__str_eq(op, T_CIB_NOTIFY, pcmk__str_none)) { /* Update the notify filters for this client */ int on_off = 0; crm_exit_t status = CRM_EX_OK; uint64_t bit = UINT64_C(0); const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE); crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks %s for client %s", type, (on_off? "on" : "off"), pcmk__client_name(cib_client)); if (pcmk__str_eq(type, T_CIB_POST_NOTIFY, pcmk__str_casei)) { bit = cib_notify_post; } else if (pcmk__str_eq(type, T_CIB_PRE_NOTIFY, pcmk__str_casei)) { bit = cib_notify_pre; } else if (pcmk__str_eq(type, T_CIB_UPDATE_CONFIRM, pcmk__str_casei)) { bit = cib_notify_confirm; } else if (pcmk__str_eq(type, T_CIB_DIFF_NOTIFY, pcmk__str_casei)) { bit = cib_notify_diff; } else if (pcmk__str_eq(type, T_CIB_REPLACE_NOTIFY, pcmk__str_casei)) { bit = cib_notify_replace; } else { status = CRM_EX_INVALID_PARAM; } if (bit != 0) { if (on_off) { pcmk__set_client_flags(cib_client, bit); } else { pcmk__clear_client_flags(cib_client, bit); } } pcmk__ipc_send_ack(cib_client, id, flags, "ack", status); return; } cib_process_request(op_request, privileged, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; pcmk__client_t *cib_client = pcmk__find_client(c); xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags); if (op_request) { crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); } if (op_request == NULL) { crm_trace("Invalid message from %p", c); pcmk__ipc_send_ack(cib_client, id, flags, "nack", CRM_EX_PROTOCOL); return 0; } else if(cib_client == NULL) { crm_trace("Invalid client %p", c); return 0; } if (pcmk_is_set(call_options, cib_sync_call)) { CRM_LOG_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ cib_client->request_id = id; /* Reply only to the last one */ } if (cib_client->name == NULL) { const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = pcmk__itoa(cib_client->pid); } else { cib_client->name = strdup(value); if (crm_is_daemon_name(value)) { pcmk__set_client_flags(cib_client, cib_is_daemon); } } } /* Allow cluster daemons more leeway before being evicted */ if (pcmk_is_set(cib_client->flags, cib_is_daemon)) { const char *qmax = cib_config_lookup("cluster-ipc-limit"); if (pcmk__set_client_queue_max(cib_client, qmax)) { crm_trace("IPC threshold for client %s[%u] is now %u", pcmk__client_name(cib_client), cib_client->pid, cib_client->queue_max); } } crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name); CRM_LOG_ASSERT(cib_client->user != NULL); pcmk__update_acl_user(op_request, F_CIB_USER, cib_client->user); cib_common_callback_worker(id, flags, op_request, cib_client, privileged); free_xml(op_request); return 0; } static uint64_t ping_seq = 0; static char *ping_digest = NULL; static bool ping_modified_since = FALSE; int sync_our_cib(xmlNode * request, gboolean all); static gboolean cib_digester_cb(gpointer data) { if (cib_is_master) { char buffer[32]; xmlNode *ping = create_xml_node(NULL, "ping"); ping_seq++; free(ping_digest); ping_digest = NULL; ping_modified_since = FALSE; snprintf(buffer, 32, "%" U64T, ping_seq); crm_trace("Requesting peer digests (%s)", buffer); crm_xml_add(ping, F_TYPE, "cib"); crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING); crm_xml_add(ping, F_CIB_PING_ID, buffer); crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); send_cluster_message(NULL, crm_msg_cib, ping, TRUE); free_xml(ping); } return FALSE; } static void process_ping_reply(xmlNode *reply) { uint64_t seq = 0; const char *host = crm_element_value(reply, F_ORIG); xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA); const char *seq_s = crm_element_value(pong, F_CIB_PING_ID); const char *digest = crm_element_value(pong, XML_ATTR_DIGEST); if (seq_s == NULL) { crm_debug("Ignoring ping reply with no " F_CIB_PING_ID); return; } else { long long seq_ll; if (pcmk__scan_ll(seq_s, &seq_ll, 0LL) != pcmk_rc_ok) { return; } seq = (uint64_t) seq_ll; } if(digest == NULL) { crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host); } else if(seq != ping_seq) { crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host); } else if(ping_modified_since) { crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host); } else { const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION); if(ping_digest == NULL) { crm_trace("Calculating new digest"); ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version); } crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest); if (!pcmk__str_eq(ping_digest, digest, pcmk__str_casei)) { xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA); crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p", crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(the_cib, XML_ATTR_GENERATION), crm_element_value(the_cib, XML_ATTR_NUMUPDATES), ping_digest, host, remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_", digest, remote_cib); if(remote_cib && remote_cib->children) { /* Additional debug */ xml_calculate_changes(the_cib, remote_cib); xml_log_changes(LOG_INFO, __func__, remote_cib); crm_trace("End of differences"); } free_xml(remote_cib); sync_our_cib(reply, FALSE); } } } static void do_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { int rid = 0; int call_id = 0; pcmk__client_t *client_obj = NULL; CRM_ASSERT(notify_src && client_id); crm_element_value_int(notify_src, F_CIB_CALLID, &call_id); client_obj = pcmk__find_client_by_id(client_id); if (client_obj == NULL) { crm_debug("Could not send response %d: client %s not found", call_id, client_id); return; } if (sync_reply) { if (client_obj->ipcs) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to client %s%s", rid, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } else { crm_trace("Sending response (call %d) to client %s%s", call_id, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } } else { crm_trace("Sending event %d to client %s%s", call_id, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } switch (PCMK__CLIENT_TYPE(client_obj)) { case pcmk__client_ipc: { int rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, (sync_reply? crm_ipc_flags_none : crm_ipc_server_event)); if (rc != pcmk_rc_ok) { crm_warn("%s reply to client %s failed: %s " CRM_XS " rc=%d", (sync_reply? "Synchronous" : "Asynchronous"), pcmk__client_name(client_obj), pcmk_rc_str(rc), rc); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: #endif case pcmk__client_tcp: pcmk__remote_send_xml(client_obj->remote, notify_src); break; default: crm_err("Unknown transport for client %s " CRM_XS " flags=0x%016" PRIx64, pcmk__client_name(client_obj), client_obj->flags); } } static void local_notify_destroy_callback(gpointer data) { cib_local_notify_t *notify = data; free_xml(notify->notify_src); free(notify->client_id); free(notify); } static void check_local_notify(int bcast_id) { cib_local_notify_t *notify = NULL; if (!local_notify_queue) { return; } notify = pcmk__intkey_table_lookup(local_notify_queue, bcast_id); if (notify) { do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, notify->from_peer); pcmk__intkey_table_remove(local_notify_queue, bcast_id); } } static void queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); notify->notify_src = notify_src; notify->client_id = strdup(client_id); notify->sync_reply = sync_reply; notify->from_peer = from_peer; if (!local_notify_queue) { local_notify_queue = pcmk__intkey_table(local_notify_destroy_callback); } pcmk__intkey_table_insert(local_notify_queue, cib_local_bcast_num, notify); // cppcheck doesn't know notify will get freed when hash table is destroyed // cppcheck-suppress memleak } static void parse_local_options_v1(pcmk__client_t *cib_client, int call_type, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if (cib_op_modifies(call_type) && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if (host == NULL && (call_options & cib_scope_local)) { crm_trace("Processing locally scoped %s op from client %s", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if (host == NULL && cib_is_master) { crm_trace("Processing master %s op locally from client %s", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if (pcmk__str_eq(host, cib_our_uname, pcmk__str_casei)) { crm_trace("Processing locally addressed %s op from client %s", op, pcmk__client_name(cib_client)); *local_notify = TRUE; } else if (stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_trace("%s op from %s needs to be forwarded to client %s", op, pcmk__client_name(cib_client), (host? host : "the master instance")); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options_v2(pcmk__client_t *cib_client, int call_type, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if (cib_op_modifies(call_type)) { if (pcmk__strcase_any_of(op, CIB_OP_MASTER, CIB_OP_SLAVE, NULL)) { /* Always handle these locally */ *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; return; } else { /* Redirect all other updates via CPG */ *needs_reply = TRUE; *needs_forward = TRUE; *process = FALSE; crm_trace("%s op from %s needs to be forwarded to client %s", op, pcmk__client_name(cib_client), (host? host : "the master instance")); return; } } *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; if (stand_alone) { crm_trace("Processing %s op from client %s (stand-alone)", op, pcmk__client_name(cib_client)); } else if (host == NULL) { crm_trace("Processing unaddressed %s op from client %s", op, pcmk__client_name(cib_client)); } else if (pcmk__str_eq(host, cib_our_uname, pcmk__str_casei)) { crm_trace("Processing locally addressed %s op from client %s", op, pcmk__client_name(cib_client)); } else { crm_trace("%s op from %s needs to be forwarded to client %s", op, pcmk__client_name(cib_client), host); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options(pcmk__client_t *cib_client, int call_type, int call_options, const char *host, const char *op, gboolean *local_notify, gboolean *needs_reply, gboolean *process, gboolean *needs_forward) { if(cib_legacy_mode()) { parse_local_options_v1(cib_client, call_type, call_options, host, op, local_notify, needs_reply, process, needs_forward); } else { parse_local_options_v2(cib_client, call_type, call_options, host, op, local_notify, needs_reply, process, needs_forward); } } static gboolean parse_peer_options_v1(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *op = NULL; const char *host = NULL; const char *delegated = NULL; const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); - const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = pcmk__str_eq(reply_to, cib_our_uname, pcmk__str_casei); - if (crm_is_true(update)) { + if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { *needs_reply = FALSE; if (is_reply) { *local_notify = TRUE; crm_trace("Processing global/peer update from %s" " that originated from us", originator); } else { crm_trace("Processing global/peer update from %s", originator); } return TRUE; } op = crm_element_value(request, F_CIB_OPERATION); crm_trace("Processing %s request sent by %s", op, originator); if (pcmk__str_eq(op, "cib_shutdown_req", pcmk__str_casei)) { /* Always process these */ *local_notify = FALSE; if (reply_to == NULL || is_reply) { *process = TRUE; } if (is_reply) { *needs_reply = FALSE; } return *process; } if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { process_ping_reply(request); return FALSE; } if (is_reply) { crm_trace("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && pcmk__str_eq(host, cib_our_uname, pcmk__str_casei)) { crm_trace("Processing %s request sent to us from %s", op, originator); return TRUE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator); *needs_reply = TRUE; return TRUE; } else if (host == NULL && cib_is_master == TRUE) { crm_trace("Processing %s request sent to master instance from %s", op, originator); return TRUE; } delegated = crm_element_value(request, F_CIB_DELEGATED); if (delegated != NULL) { crm_trace("Ignoring msg for master instance"); } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring msg for instance on %s", crm_str(host)); } else if (reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_trace("Ignoring reply to %s", crm_str(reply_to)); } else if (pcmk__str_eq(op, "cib_shutdown_req", pcmk__str_casei)) { if (reply_to != NULL) { crm_debug("Processing %s from %s", op, originator); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, originator); } return TRUE; } else { crm_err("Nothing for us to do?"); crm_log_xml_err(request, "Peer[inbound]"); } return FALSE; } static gboolean parse_peer_options_v2(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *host = NULL; const char *delegated = crm_element_value(request, F_CIB_DELEGATED); const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); - const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = pcmk__str_eq(reply_to, cib_our_uname, pcmk__str_casei); if(pcmk__str_eq(op, CIB_OP_REPLACE, pcmk__str_casei)) { /* sync_our_cib() sets F_CIB_ISREPLY */ if (reply_to) { delegated = reply_to; } goto skip_is_reply; } else if(pcmk__str_eq(op, CIB_OP_SYNC, pcmk__str_casei)) { } else if (is_reply && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { process_ping_reply(request); return FALSE; } else if (pcmk__str_eq(op, CIB_OP_UPGRADE, pcmk__str_casei)) { /* Only the DC (node with the oldest software) should process * this operation if F_CIB_SCHEMA_MAX is unset * * If the DC is happy it will then send out another * CIB_OP_UPGRADE which will tell all nodes to do the actual * upgrade. * * Except this time F_CIB_SCHEMA_MAX will be set which puts a * limit on how far newer nodes will go */ const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX); const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC); crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s", op, (is_reply? " reply" : ""), (cib_is_master? "master" : "slave"), (max? max : "none"), (upgrade_rc? upgrade_rc : "none")); if (upgrade_rc != NULL) { // Our upgrade request was rejected by DC, notify clients of result crm_xml_add(request, F_CIB_RC, upgrade_rc); } else if ((max == NULL) && cib_is_master) { /* We are the DC, check if this upgrade is allowed */ goto skip_is_reply; } else if(max) { /* Ok, go ahead and upgrade to 'max' */ goto skip_is_reply; } else { // Ignore broadcast client requests when we're not DC return FALSE; } - } else if (crm_is_true(update)) { + } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { crm_info("Detected legacy %s global update from %s", op, originator); send_sync_request(NULL); legacy_mode = TRUE; return FALSE; } else if (is_reply && cib_op_modifies(call_type)) { crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); return FALSE; } else if (pcmk__str_eq(op, "cib_shutdown_req", pcmk__str_casei)) { /* Legacy handling */ crm_debug("Legacy handling of %s message from %s", op, originator); *local_notify = FALSE; if (reply_to == NULL) { *process = TRUE; } return *process; } if(is_reply) { crm_trace("Handling %s reply sent from %s to local clients", op, originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } skip_is_reply: *process = TRUE; *needs_reply = FALSE; if(pcmk__str_eq(delegated, cib_our_uname, pcmk__str_casei)) { *local_notify = TRUE; } else { *local_notify = FALSE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && pcmk__str_eq(host, cib_our_uname, pcmk__str_casei)) { crm_trace("Processing %s request sent to us from %s", op, originator); *needs_reply = TRUE; return TRUE; } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring %s operation for instance on %s", op, crm_str(host)); return FALSE; } else if(is_reply == FALSE && pcmk__str_eq(op, CRM_OP_PING, pcmk__str_casei)) { *needs_reply = TRUE; } crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op, crm_element_value(request, F_CIB_CLIENTNAME), crm_element_value(request, F_CIB_CALLID), originator, (*local_notify)?"(notify)":""); return TRUE; } static gboolean parse_peer_options(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { /* TODO: What happens when an update comes in after node A * requests the CIB from node B, but before it gets the reply (and * sends out the replace operation) */ if(cib_legacy_mode()) { return parse_peer_options_v1( call_type, request, local_notify, needs_reply, process, needs_forward); } else { return parse_peer_options_v2( call_type, request, local_notify, needs_reply, process, needs_forward); } } static void forward_request(xmlNode * request, pcmk__client_t *cib_client, int call_options) { const char *op = crm_element_value(request, F_CIB_OPERATION); const char *host = crm_element_value(request, F_CIB_HOST); crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname); if (host != NULL) { crm_trace("Forwarding %s op to %s", op, host); send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE); } else { crm_trace("Forwarding %s op to master instance", op); send_cluster_message(NULL, crm_msg_cib, request, FALSE); } /* Return the request to its original state */ xml_remove_prop(request, F_CIB_DELEGATED); if (call_options & cib_discard_reply) { crm_trace("Client not interested in reply"); } } static gboolean send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) { CRM_ASSERT(msg != NULL); if (broadcast) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *digest = NULL; int format = 1; CRM_LOG_ASSERT(result_diff != NULL); digest = crm_element_value(result_diff, XML_ATTR_DIGEST); crm_element_value_int(result_diff, "format", &format); cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest); crm_xml_add(msg, F_CIB_ISREPLY, originator); - crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); + pcmk__xe_set_bool_attr(msg, F_CIB_GLOBAL_UPDATE, true); crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); crm_xml_add(msg, F_CIB_USER, CRM_DAEMON_USER); if (format == 1) { CRM_ASSERT(digest != NULL); } add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); crm_log_xml_explicit(msg, "copy"); return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); } else if (originator != NULL) { /* send reply via HA to originating node */ crm_trace("Sending request result to %s only", originator); crm_xml_add(msg, F_CIB_ISREPLY, originator); return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); } return FALSE; } /*! * \internal * \brief Handle an IPC or CPG message containing a request * * \param[in] request Request XML * \param[in] privileged Whether privileged commands may be run * (see cib_server_ops[] definition) * \param[in] cib_client IPC client that sent request (or NULL if CPG) */ static void cib_process_request(xmlNode *request, gboolean privileged, pcmk__client_t *cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; // Whether to process request locally now gboolean is_update = TRUE; // Whether request would modify CIB gboolean needs_reply = TRUE; // Whether to build a reply gboolean local_notify = FALSE; // Whether to notify (local) requester gboolean needs_forward = FALSE; // Whether to forward request somewhere else - gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *host = crm_element_value(request, F_CIB_HOST); const char *target = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client_id = crm_element_value(request, F_CIB_CLIENTID); const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if ((host != NULL) && (*host == '\0')) { host = NULL; } if (host) { target = host; } else if (call_options & cib_scope_local) { target = "local host"; } else { target = "master"; } if (cib_client == NULL) { crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)", op, client_name, call_id, originator, target, reply_to); } else { crm_xml_add(request, F_ORIG, cib_our_uname); crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target); } rc = cib_get_operation_id(op, &call_type); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return; } if (cib_client != NULL) { parse_local_options(cib_client, call_type, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (parse_peer_options(call_type, request, &local_notify, &needs_reply, &process, &needs_forward) == FALSE) { return; } is_update = cib_op_modifies(call_type); if (call_options & cib_discard_reply) { /* If the request will modify the CIB, and we are in legacy mode, we * need to build a reply so we can broadcast a diff, even if the * requester doesn't want one. */ needs_reply = is_update && cib_legacy_mode(); local_notify = FALSE; } if (needs_forward) { const char *section = crm_element_value(request, F_CIB_SECTION); int log_level = LOG_INFO; if (pcmk__str_eq(op, CRM_OP_NOOP, pcmk__str_casei)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", op, section ? section : "'all'", host ? host : cib_legacy_mode() ? "master" : "all", originator ? originator : "local", client_name, call_id); forward_request(request, cib_client, call_options); return; } if (cib_status != pcmk_ok) { const char *call = crm_element_value(request, F_CIB_CALLID); rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); op_reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(op_reply, F_TYPE, T_CIB); crm_xml_add(op_reply, F_CIB_OPERATION, op); crm_xml_add(op_reply, F_CIB_CALLID, call); crm_xml_add(op_reply, F_CIB_CLIENTID, client_id); crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options); crm_xml_add_int(op_reply, F_CIB_RC, rc); crm_trace("Attaching reply output"); add_message_xml(op_reply, F_CIB_CALLDATA, the_cib); crm_log_xml_explicit(op_reply, "cib:reply"); } else if (process) { time_t finished = 0; time_t now = time(NULL); int level = LOG_INFO; const char *section = crm_element_value(request, F_CIB_SECTION); rc = cib_process_command(request, &op_reply, &result_diff, privileged); if (!is_update) { level = LOG_TRACE; - } else if (global_update) { + } else if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { switch (rc) { case pcmk_ok: level = LOG_INFO; break; case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_TRACE; break; default: level = LOG_ERR; } } else if (rc != pcmk_ok) { level = LOG_WARNING; } do_crm_log(level, "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", op, section ? section : "'all'", pcmk_strerror(rc), rc, originator ? originator : "local", client_name, call_id, the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0"); finished = time(NULL); if ((finished - now) > 3) { crm_trace("%s operation took %lds to complete", op, (long)(finished - now)); crm_write_blackbox(0, NULL); } if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } if (is_update && !cib_legacy_mode()) { crm_trace("Completed pre-sync update from %s/%s/%s%s", originator ? originator : "local", client_name, call_id, local_notify?" with local notification":""); } else if (!needs_reply || stand_alone) { // This was a non-originating slave update crm_trace("Completed slave update"); } else if (cib_legacy_mode() && rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { gboolean broadcast = FALSE; cib_local_bcast_num++; crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); broadcast = send_peer_reply(request, result_diff, originator, TRUE); if (broadcast && client_id && local_notify && op_reply) { /* If we have been asked to sync the reply, * and a bcast msg has gone out, we queue the local notify * until we know the bcast message has been received */ local_notify = FALSE; crm_trace("Queuing local %ssync notification for %s", (call_options & cib_sync_call) ? "" : "a-", client_id); queue_local_notify(op_reply, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); op_reply = NULL; /* the reply is queued, so don't free here */ } } else if (call_options & cib_discard_reply) { crm_trace("Caller isn't interested in reply"); } else if (cib_client == NULL) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (call_options & cib_inhibit_bcast) { crm_trace("Request not broadcast: inhibited"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, result_diff, originator, FALSE); } if (local_notify && client_id) { crm_trace("Performing local %ssync notification for %s", (pcmk_is_set(call_options, cib_sync_call)? "" : "a"), client_id); if (process == FALSE) { do_local_notify(request, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } else { do_local_notify(op_reply, client_id, pcmk_is_set(call_options, cib_sync_call), (cib_client == NULL)); } } free_xml(op_reply); free_xml(result_diff); return; } static char * calculate_section_digest(const char *xpath, xmlNode * xml_obj) { xmlNode *xml_section = NULL; if (xml_obj == NULL) { return NULL; } xml_section = get_xpath_object(xpath, xml_obj, LOG_TRACE); if (xml_section == NULL) { return NULL; } return calculate_xml_versioned_digest(xml_section, FALSE, TRUE, CRM_FEATURE_SET); } static int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; xmlNode *current_cib = NULL; int call_type = 0; int call_options = 0; const char *op = NULL; const char *section = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); int rc = pcmk_ok; int rc2 = pcmk_ok; gboolean send_r_notify = FALSE; gboolean global_update = FALSE; gboolean config_changed = FALSE; gboolean manage_counters = TRUE; static mainloop_timer_t *digest_timer = NULL; char *current_nodes_digest = NULL; char *current_alerts_digest = NULL; char *current_status_digest = NULL; int change_section = cib_change_section_nodes | cib_change_section_alerts | cib_change_section_status; CRM_ASSERT(cib_status == pcmk_ok); if(digest_timer == NULL) { digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL); } *reply = NULL; *cib_diff = NULL; current_cib = the_cib; /* Start processing the request... */ op = crm_element_value(request, F_CIB_OPERATION); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(op, &call_type); if (rc == pcmk_ok && privileged == FALSE) { rc = cib_op_can_run(call_type, call_options, privileged, global_update); } rc2 = cib_op_prepare(call_type, request, &input, §ion); if (rc == pcmk_ok) { rc = rc2; } if (rc != pcmk_ok) { crm_trace("Call setup failed: %s", pcmk_strerror(rc)); goto done; } else if (cib_op_modifies(call_type) == FALSE) { rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, current_cib, &result_cib, NULL, &output); CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* Handle a valid write action */ - global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); - if (global_update) { + if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { /* legacy code */ manage_counters = FALSE; cib__set_call_options(call_options, "call", cib_force_diff); crm_trace("Global update detected"); CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type); crm_log_xml_err(request, "bad op")); } if (rc == pcmk_ok) { ping_modified_since = TRUE; if (call_options & cib_inhibit_bcast) { /* skip */ crm_trace("Skipping update: inhibit broadcast"); manage_counters = FALSE; } if (!pcmk_is_set(call_options, cib_dryrun) && pcmk__str_eq(section, XML_CIB_TAG_STATUS, pcmk__str_casei)) { /* Copying large CIBs accounts for a huge percentage of our CIB usage */ cib__set_call_options(call_options, "call", cib_zero_copy); } else { cib__clear_call_options(call_options, "call", cib_zero_copy); } /* Calculate the hash value of the section before the change. */ if (pcmk__str_eq(CIB_OP_REPLACE, op, pcmk__str_none)) { current_nodes_digest = calculate_section_digest("//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES, current_cib); current_alerts_digest = calculate_section_digest("//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS, current_cib); current_status_digest = calculate_section_digest("//" XML_TAG_CIB "/" XML_CIB_TAG_STATUS, current_cib); crm_trace("current-digest %s:%s:%s", current_nodes_digest, current_alerts_digest, current_status_digest); } /* result_cib must not be modified after cib_perform_op() returns */ rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE, section, request, input, manage_counters, &config_changed, current_cib, &result_cib, cib_diff, &output); if (manage_counters == FALSE) { int format = 1; /* Legacy code * If the diff is NULL at this point, it's because nothing changed */ if (*cib_diff) { crm_element_value_int(*cib_diff, "format", &format); } if (format == 1) { config_changed = cib_config_changed(NULL, NULL, cib_diff); } } /* Always write to disk for replace ops, * this also negates the need to detect ordering changes */ if (pcmk__str_eq(CIB_OP_REPLACE, op, pcmk__str_none)) { config_changed = TRUE; } } if (rc == pcmk_ok && !pcmk_is_set(call_options, cib_dryrun)) { crm_trace("Activating %s->%s%s%s", crm_element_value(current_cib, XML_ATTR_NUMUPDATES), crm_element_value(result_cib, XML_ATTR_NUMUPDATES), (pcmk_is_set(call_options, cib_zero_copy)? " zero-copy" : ""), (config_changed? " changed" : "")); if (!pcmk_is_set(call_options, cib_zero_copy)) { rc = activateCibXml(result_cib, config_changed, op); crm_trace("Activated %s (%d)", crm_element_value(current_cib, XML_ATTR_NUMUPDATES), rc); } if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) { cib_read_config(config_hash, result_cib); } if (pcmk__str_eq(CIB_OP_REPLACE, op, pcmk__str_none)) { char *result_nodes_digest = NULL; char *result_alerts_digest = NULL; char *result_status_digest = NULL; /* Calculate the hash value of the changed section. */ result_nodes_digest = calculate_section_digest("//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES, result_cib); result_alerts_digest = calculate_section_digest("//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS, result_cib); result_status_digest = calculate_section_digest("//" XML_TAG_CIB "/" XML_CIB_TAG_STATUS, result_cib); crm_trace("result-digest %s:%s:%s", result_nodes_digest, result_alerts_digest, result_status_digest); if (pcmk__str_eq(current_nodes_digest, result_nodes_digest, pcmk__str_none)) { change_section = change_section ^ cib_change_section_nodes; } if (pcmk__str_eq(current_alerts_digest, result_alerts_digest, pcmk__str_none)) { change_section = change_section ^ cib_change_section_alerts; } if (pcmk__str_eq(current_status_digest, result_status_digest, pcmk__str_none)) { change_section = change_section ^ cib_change_section_status; } if (change_section) { send_r_notify = TRUE; } free(result_nodes_digest); free(result_alerts_digest); free(result_status_digest); } else if (pcmk__str_eq(CIB_OP_ERASE, op, pcmk__str_none)) { send_r_notify = TRUE; } mainloop_timer_stop(digest_timer); mainloop_timer_start(digest_timer); } else if (rc == -pcmk_err_schema_validation) { CRM_ASSERT(!pcmk_is_set(call_options, cib_zero_copy)); if (output != NULL) { crm_log_xml_info(output, "cib:output"); free_xml(output); } output = result_cib; } else { crm_trace("Not activating %d %d %s", rc, pcmk_is_set(call_options, cib_dryrun), crm_element_value(result_cib, XML_ATTR_NUMUPDATES)); if (!pcmk_is_set(call_options, cib_zero_copy)) { free_xml(result_cib); } } if ((call_options & (cib_inhibit_notify|cib_dryrun)) == 0) { const char *client = crm_element_value(request, F_CIB_CLIENTNAME); crm_trace("Sending notifications %d", pcmk_is_set(call_options, cib_dryrun)); cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } if (send_r_notify) { const char *origin = crm_element_value(request, F_ORIG); cib_replace_notify(origin, the_cib, rc, *cib_diff, change_section); } xml_log_patchset(LOG_TRACE, "cib:diff", *cib_diff); done: if (!pcmk_is_set(call_options, cib_discard_reply) || cib_legacy_mode()) { const char *caller = crm_element_value(request, F_CIB_CLIENTID); *reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(*reply, F_TYPE, T_CIB); crm_xml_add(*reply, F_CIB_OPERATION, op); crm_xml_add(*reply, F_CIB_CALLID, call_id); crm_xml_add(*reply, F_CIB_CLIENTID, caller); crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options); crm_xml_add_int(*reply, F_CIB_RC, rc); if (output != NULL) { crm_trace("Attaching reply output"); add_message_xml(*reply, F_CIB_CALLDATA, output); } crm_log_xml_explicit(*reply, "cib:reply"); } crm_trace("cleanup"); if (cib_op_modifies(call_type) == FALSE && output != current_cib) { free_xml(output); output = NULL; } if (call_type >= 0) { cib_op_cleanup(call_type, call_options, &input, &output); } free(current_nodes_digest); free(current_alerts_digest); free(current_status_digest); crm_trace("done"); return rc; } void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = crm_element_value(msg, F_ORIG); if (cib_legacy_mode() && pcmk__str_eq(originator, cib_our_uname, pcmk__str_null_matches)) { /* message is from ourselves */ int bcast_id = 0; if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { check_local_notify(bcast_id); } return; } else if (crm_peer_cache == NULL) { reason = "membership not established"; goto bail; } if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, F_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace("Peer[inbound]", msg); */ cib_process_request(msg, TRUE, NULL); return; bail: if (reason) { const char *seq = crm_element_value(msg, F_SEQ); const char *op = crm_element_value(msg, F_CIB_OPERATION); crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason); } } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__func__, CRM_EX_ERROR); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *a_client = value; crm_err("Can't disconnect client %s: Not implemented", pcmk__client_name(a_client)); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; c = qb_ipcs_connection_first_get(ipcs_rw); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_rw, last); crm_debug("Disconnecting r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_ro); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_ro, last); crm_debug("Disconnecting r/o client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_shm); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_shm, last); crm_debug("Disconnecting non-blocking r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } disconnects += pcmk__ipc_client_count(); crm_debug("Disconnecting %d remote clients", pcmk__ipc_client_count()); pcmk__foreach_ipc_client(disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if (pcmk__ipc_client_count() == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", pcmk__ipc_client_count(), srv_stats.active_connections); } } void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; active = crm_active_peers(); if (active < 2) { terminate_cib(__func__, 0); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = create_xml_node(NULL, "exit-notification"); crm_xml_add(leaving, F_TYPE, "cib"); crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); free_xml(leaving); g_timeout_add(EXIT_ESCALATION_MS, cib_force_exit, NULL); } extern int remote_fd; extern int remote_tls_fd; /*! * \internal * \brief Close remote sockets, free the global CIB and quit * * \param[in] caller Name of calling function (for log message) * \param[in] fast If -1, skip disconnect; if positive, exit that */ void terminate_cib(const char *caller, int fast) { crm_info("%s: Exiting%s...", caller, (fast > 0)? " fast" : mainloop ? " from mainloop" : ""); if (remote_fd > 0) { close(remote_fd); remote_fd = 0; } if (remote_tls_fd > 0) { close(remote_tls_fd); remote_tls_fd = 0; } uninitializeCib(); if (fast > 0) { /* Quit fast on error */ pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(fast); } else if ((mainloop != NULL) && g_main_loop_is_running(mainloop)) { /* Quit via returning from the main loop. If fast == -1, we skip the * disconnect here, and it will be done when the main loop returns * (this allows the peer status callback to avoid messing with the * peer caches). */ if (fast == 0) { crm_cluster_disconnect(&crm_cluster); } g_main_loop_quit(mainloop); } else { /* Quit via clean exit. Even the peer status callback can disconnect * here, because we're not returning control to the caller. */ crm_cluster_disconnect(&crm_cluster); pcmk__stop_based_ipc(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(CRM_EX_OK); } } diff --git a/daemons/based/based_common.c b/daemons/based/based_common.c index c80f252adb..db451522de 100644 --- a/daemons/based/based_common.c +++ b/daemons/based/based_common.c @@ -1,304 +1,302 @@ /* * Copyright 2008-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include gboolean stand_alone = FALSE; extern int cib_perform_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged); static xmlNode * cib_prepare_common(xmlNode * root, const char *section) { xmlNode *data = NULL; /* extract the CIB from the fragment */ if (root == NULL) { return NULL; } else if (pcmk__strcase_any_of(crm_element_name(root), XML_TAG_FRAGMENT, F_CRM_DATA, F_CIB_CALLDATA, NULL)) { data = first_named_child(root, XML_TAG_CIB); } else { data = root; } /* grab the section specified for the command */ if (section != NULL && data != NULL && pcmk__str_eq(crm_element_name(data), XML_TAG_CIB, pcmk__str_none)) { data = get_object_root(section, data); } /* crm_log_xml_trace(root, "cib:input"); */ return data; } static int cib_prepare_none(xmlNode * request, xmlNode ** data, const char **section) { *data = NULL; *section = crm_element_value(request, F_CIB_SECTION); return pcmk_ok; } static int cib_prepare_data(xmlNode * request, xmlNode ** data, const char **section) { xmlNode *input_fragment = get_message_xml(request, F_CIB_CALLDATA); *section = crm_element_value(request, F_CIB_SECTION); *data = cib_prepare_common(input_fragment, *section); /* crm_log_xml_debug(*data, "data"); */ return pcmk_ok; } static int cib_prepare_sync(xmlNode * request, xmlNode ** data, const char **section) { *data = NULL; *section = crm_element_value(request, F_CIB_SECTION); return pcmk_ok; } static int cib_prepare_diff(xmlNode * request, xmlNode ** data, const char **section) { xmlNode *input_fragment = NULL; - const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); *data = NULL; *section = NULL; - if (crm_is_true(update)) { + if (pcmk__xe_attr_is_true(request, F_CIB_GLOBAL_UPDATE)) { input_fragment = get_message_xml(request, F_CIB_UPDATE_DIFF); - } else { input_fragment = get_message_xml(request, F_CIB_CALLDATA); } CRM_CHECK(input_fragment != NULL, crm_log_xml_warn(request, "no input")); *data = cib_prepare_common(input_fragment, NULL); return pcmk_ok; } static int cib_cleanup_query(int options, xmlNode ** data, xmlNode ** output) { CRM_LOG_ASSERT(*data == NULL); if ((options & cib_no_children) || pcmk__str_eq(crm_element_name(*output), "xpath-query", pcmk__str_casei)) { free_xml(*output); } return pcmk_ok; } static int cib_cleanup_data(int options, xmlNode ** data, xmlNode ** output) { free_xml(*output); *data = NULL; return pcmk_ok; } static int cib_cleanup_output(int options, xmlNode ** data, xmlNode ** output) { free_xml(*output); return pcmk_ok; } static int cib_cleanup_none(int options, xmlNode ** data, xmlNode ** output) { CRM_LOG_ASSERT(*data == NULL); CRM_LOG_ASSERT(*output == NULL); return pcmk_ok; } static cib_operation_t cib_server_ops[] = { // Booleans are modifies_cib, needs_privileges, needs_quorum {NULL, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {CIB_OP_QUERY, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_query, cib_process_query}, {CIB_OP_MODIFY, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_modify}, {CIB_OP_APPLY_DIFF,TRUE, TRUE, TRUE, cib_prepare_diff, cib_cleanup_data, cib_server_process_diff}, {CIB_OP_REPLACE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_replace_svr}, {CIB_OP_CREATE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_create}, {CIB_OP_DELETE, TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_delete}, {CIB_OP_SYNC, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_none, cib_process_sync}, {CIB_OP_BUMP, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_bump}, {CIB_OP_ERASE, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_erase}, {CRM_OP_NOOP, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_default}, {CIB_OP_DELETE_ALT,TRUE, TRUE, TRUE, cib_prepare_data, cib_cleanup_data, cib_process_delete_absolute}, {CIB_OP_UPGRADE, TRUE, TRUE, TRUE, cib_prepare_none, cib_cleanup_output, cib_process_upgrade_server}, {CIB_OP_SLAVE, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SLAVEALL, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {CIB_OP_SYNC_ONE, FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_none, cib_process_sync_one}, {CIB_OP_MASTER, TRUE, TRUE, FALSE, cib_prepare_data, cib_cleanup_data, cib_process_readwrite}, {CIB_OP_ISMASTER, FALSE, TRUE, FALSE, cib_prepare_none, cib_cleanup_none, cib_process_readwrite}, {"cib_shutdown_req",FALSE, TRUE, FALSE, cib_prepare_sync, cib_cleanup_none, cib_process_shutdown_req}, {CRM_OP_PING, FALSE, FALSE, FALSE, cib_prepare_none, cib_cleanup_output, cib_process_ping}, }; int cib_get_operation_id(const char *op, int *operation) { static GHashTable *operation_hash = NULL; if (operation_hash == NULL) { int lpc = 0; int max_msg_types = PCMK__NELEM(cib_server_ops); operation_hash = pcmk__strkey_table(NULL, free); for (lpc = 1; lpc < max_msg_types; lpc++) { int *value = malloc(sizeof(int)); if(value) { *value = lpc; g_hash_table_insert(operation_hash, (gpointer) cib_server_ops[lpc].operation, value); } } } if (op != NULL) { int *value = g_hash_table_lookup(operation_hash, op); if (value) { *operation = *value; return pcmk_ok; } } crm_err("Operation %s is not valid", op); *operation = -1; return -EINVAL; } xmlNode * cib_msg_copy(xmlNode * msg, gboolean with_data) { int lpc = 0; const char *field = NULL; const char *value = NULL; xmlNode *value_struct = NULL; static const char *field_list[] = { F_XML_TAGNAME, F_TYPE, F_CIB_CLIENTID, F_CIB_CALLOPTS, F_CIB_CALLID, F_CIB_OPERATION, F_CIB_ISREPLY, F_CIB_SECTION, F_CIB_HOST, F_CIB_RC, F_CIB_DELEGATED, F_CIB_OBJID, F_CIB_OBJTYPE, F_CIB_EXISTING, F_CIB_SEENCOUNT, F_CIB_TIMEOUT, F_CIB_CALLBACK_TOKEN, F_CIB_GLOBAL_UPDATE, F_CIB_CLIENTNAME, F_CIB_USER, F_CIB_NOTIFY_TYPE, F_CIB_NOTIFY_ACTIVATE }; static const char *data_list[] = { F_CIB_CALLDATA, F_CIB_UPDATE, F_CIB_UPDATE_RESULT }; xmlNode *copy = create_xml_node(NULL, "copy"); CRM_ASSERT(copy != NULL); for (lpc = 0; lpc < PCMK__NELEM(field_list); lpc++) { field = field_list[lpc]; value = crm_element_value(msg, field); if (value != NULL) { crm_xml_add(copy, field, value); } } for (lpc = 0; with_data && lpc < PCMK__NELEM(data_list); lpc++) { field = data_list[lpc]; value_struct = get_message_xml(msg, field); if (value_struct != NULL) { add_message_xml(copy, field, value_struct); } } return copy; } cib_op_t * cib_op_func(int call_type) { return &(cib_server_ops[call_type].fn); } gboolean cib_op_modifies(int call_type) { return cib_server_ops[call_type].modifies_cib; } int cib_op_can_run(int call_type, int call_options, gboolean privileged, gboolean global_update) { if (privileged == FALSE && cib_server_ops[call_type].needs_privileges) { /* abort */ return -EACCES; } #if 0 if (rc == pcmk_ok && stand_alone == FALSE && global_update == FALSE && (call_options & cib_quorum_override) == 0 && cib_server_ops[call_type].needs_quorum) { return -pcmk_err_no_quorum; } #endif return pcmk_ok; } int cib_op_prepare(int call_type, xmlNode * request, xmlNode ** input, const char **section) { crm_trace("Prepare %d", call_type); return cib_server_ops[call_type].prepare(request, input, section); } int cib_op_cleanup(int call_type, int options, xmlNode ** input, xmlNode ** output) { crm_trace("Cleanup %d", call_type); return cib_server_ops[call_type].cleanup(options, input, output); } diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c index c0bd578367..6aaf0c231a 100644 --- a/daemons/based/based_messages.c +++ b/daemons/based/based_messages.c @@ -1,540 +1,540 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Maximum number of diffs to ignore while waiting for a resync */ #define MAX_DIFF_RETRY 5 gboolean cib_is_master = FALSE; xmlNode *the_cib = NULL; int revision_check(xmlNode * cib_update, xmlNode * cib_copy, int flags); int get_revision(xmlNode * xml_obj, int cur_revision); int updateList(xmlNode * local_cib, xmlNode * update_command, xmlNode * failed, int operation, const char *section); gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code); int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); int sync_our_cib(xmlNode * request, gboolean all); int cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *host = crm_element_value(req, F_ORIG); *answer = NULL; if (crm_element_value(req, F_CIB_ISREPLY) == NULL) { crm_info("Peer %s is requesting to shut down", host); return pcmk_ok; } if (cib_shutdown_flag == FALSE) { crm_err("Peer %s mistakenly thinks we wanted to shut down", host); return -EINVAL; } crm_info("Peer %s has acknowledged our shutdown request", host); terminate_cib(__func__, 0); return pcmk_ok; } int cib_process_default(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); *answer = NULL; if (op == NULL) { result = -EINVAL; crm_err("No operation specified"); } else if (strcasecmp(CRM_OP_NOOP, op) == 0) { ; } else { result = -EPROTONOSUPPORT; crm_err("Action [%s] is not supported by the CIB manager", op); } return result; } int cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); if (pcmk__str_eq(op, CIB_OP_ISMASTER, pcmk__str_casei)) { if (cib_is_master == TRUE) { result = pcmk_ok; } else { result = -EPERM; } return result; } if (pcmk__str_eq(op, CIB_OP_MASTER, pcmk__str_casei)) { if (cib_is_master == FALSE) { crm_info("We are now in R/W mode"); cib_is_master = TRUE; } else { crm_debug("We are still in R/W mode"); } } else if (cib_is_master) { crm_info("We are now in R/O mode"); cib_is_master = FALSE; } return result; } /* Set to 1 when a sync is requested, incremented when a diff is ignored, * reset to 0 when a sync is received */ static int sync_in_progress = 0; void send_sync_request(const char *host) { xmlNode *sync_me = create_xml_node(NULL, "sync-me"); crm_info("Requesting re-sync from %s", (host? host : "all peers")); sync_in_progress = 1; crm_xml_add(sync_me, F_TYPE, "cib"); crm_xml_add(sync_me, F_CIB_OPERATION, CIB_OP_SYNC_ONE); crm_xml_add(sync_me, F_CIB_DELEGATED, cib_our_uname); send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE); free_xml(sync_me); } int cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *host = crm_element_value(req, F_ORIG); const char *seq = crm_element_value(req, F_CIB_PING_ID); char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); static struct qb_log_callsite *cs = NULL; crm_trace("Processing \"%s\" event %s from %s", op, seq, host); *answer = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(*answer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(*answer, XML_ATTR_DIGEST, digest); crm_xml_add(*answer, F_CIB_PING_ID, seq); if (cs == NULL) { cs = qb_log_callsite_get(__func__, __FILE__, __func__, LOG_TRACE, __LINE__, crm_trace_nonlog); } if (cs && cs->targets) { /* Append additional detail so the reciever can log the differences */ add_message_xml(*answer, F_CIB_CALLDATA, the_cib); } else { /* Always include at least the version details */ const char *tag = TYPE(the_cib); xmlNode *shallow = create_xml_node(NULL, tag); copy_in_properties(shallow, the_cib); add_message_xml(*answer, F_CIB_CALLDATA, shallow); free_xml(shallow); } crm_info("Reporting our current digest to %s: %s for %s.%s.%s (%p %d)", host, digest, crm_element_value(existing_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(existing_cib, XML_ATTR_GENERATION), crm_element_value(existing_cib, XML_ATTR_NUMUPDATES), existing_cib, cs && cs->targets); free(digest); return pcmk_ok; } int cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { return sync_our_cib(req, TRUE); } int cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; *answer = NULL; if(crm_element_value(req, F_CIB_SCHEMA_MAX)) { /* The originator of an upgrade request sends it to the DC, without * F_CIB_SCHEMA_MAX. If an upgrade is needed, the DC re-broadcasts the * request with F_CIB_SCHEMA_MAX, and each node performs the upgrade * (and notifies its local clients) here. */ return cib_process_upgrade( op, options, section, req, input, existing_cib, result_cib, answer); } else { int new_version = 0; int current_version = 0; xmlNode *scratch = copy_xml(existing_cib); const char *host = crm_element_value(req, F_ORIG); const char *value = crm_element_value(existing_cib, XML_ATTR_VALIDATION); const char *client_id = crm_element_value(req, F_CIB_CLIENTID); const char *call_opts = crm_element_value(req, F_CIB_CALLOPTS); const char *call_id = crm_element_value(req, F_CIB_CALLID); crm_trace("Processing \"%s\" event", op); if (value != NULL) { current_version = get_schema_version(value); } rc = update_validation(&scratch, &new_version, 0, TRUE, TRUE); if (new_version > current_version) { xmlNode *up = create_xml_node(NULL, __func__); rc = pcmk_ok; crm_notice("Upgrade request from %s verified", host); crm_xml_add(up, F_TYPE, "cib"); crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version)); crm_xml_add(up, F_CIB_DELEGATED, host); crm_xml_add(up, F_CIB_CLIENTID, client_id); crm_xml_add(up, F_CIB_CALLOPTS, call_opts); crm_xml_add(up, F_CIB_CALLID, call_id); if (cib_legacy_mode() && cib_is_master) { rc = cib_process_upgrade( op, options, section, up, input, existing_cib, result_cib, answer); } else { send_cluster_message(NULL, crm_msg_cib, up, FALSE); } free_xml(up); } else if(rc == pcmk_ok) { rc = -pcmk_err_schema_unchanged; } if (rc != pcmk_ok) { // Notify originating peer so it can notify its local clients crm_node_t *origin = pcmk__search_cluster_node_cache(0, host); crm_info("Rejecting upgrade request from %s: %s " CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, (origin? origin->uname : "lost")); if (origin) { xmlNode *up = create_xml_node(NULL, __func__); crm_xml_add(up, F_TYPE, "cib"); crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); crm_xml_add(up, F_CIB_DELEGATED, host); crm_xml_add(up, F_CIB_ISREPLY, host); crm_xml_add(up, F_CIB_CLIENTID, client_id); crm_xml_add(up, F_CIB_CALLOPTS, call_opts); crm_xml_add(up, F_CIB_CALLID, call_id); crm_xml_add_int(up, F_CIB_UPGRADE_RC, rc); if (send_cluster_message(origin, crm_msg_cib, up, TRUE) == FALSE) { crm_warn("Could not send CIB upgrade result to %s", host); } free_xml(up); } } free_xml(scratch); } return rc; } int cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { return sync_our_cib(req, FALSE); } int cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; if (sync_in_progress > MAX_DIFF_RETRY) { /* Don't ignore diffs forever; the last request may have been lost. * If the diff fails, we'll ask for another full resync. */ sync_in_progress = 0; } /* The master should never ignore a diff */ if (sync_in_progress && !cib_is_master) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details(input, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); sync_in_progress++; crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates); return -pcmk_err_diff_resync; } rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer); crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc, cib_is_master?"master":"slave"); if (rc == -pcmk_err_diff_resync && cib_is_master == FALSE) { free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); } else if (rc == -pcmk_err_diff_resync) { rc = -pcmk_err_diff_failed; if (options & cib_force_diff) { crm_warn("Not requesting full refresh in R/W mode"); } } else if ((rc != pcmk_ok) && !cib_is_master && cib_legacy_mode()) { crm_warn("Requesting full CIB refresh because update failed: %s" CRM_XS " rc=%d", pcmk_strerror(rc), rc); xml_log_patchset(LOG_INFO, __func__, input); free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); } return rc; } int cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *tag = crm_element_name(input); int rc = cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); if (rc == pcmk_ok && pcmk__str_eq(tag, XML_TAG_CIB, pcmk__str_casei)) { sync_in_progress = 0; } return rc; } static int delete_cib_object(xmlNode * parent, xmlNode * delete_spec) { const char *object_name = NULL; const char *object_id = NULL; xmlNode *equiv_node = NULL; int result = pcmk_ok; if (delete_spec != NULL) { object_name = crm_element_name(delete_spec); } object_id = crm_element_value(delete_spec, XML_ATTR_ID); crm_trace("Processing: <%s id=%s>", crm_str(object_name), crm_str(object_id)); if (delete_spec == NULL) { result = -EINVAL; } else if (parent == NULL) { result = -EINVAL; } else if (object_id == NULL) { /* placeholder object */ equiv_node = find_xml_node(parent, object_name, FALSE); } else { equiv_node = pcmk__xe_match(parent, object_name, XML_ATTR_ID, object_id); } if (result != pcmk_ok) { ; /* nothing */ } else if (equiv_node == NULL) { result = pcmk_ok; } else if (xml_has_children(delete_spec) == FALSE) { /* only leaves are deleted */ crm_debug("Removing leaf: <%s id=%s>", crm_str(object_name), crm_str(object_id)); free_xml(equiv_node); equiv_node = NULL; } else { xmlNode *child = NULL; for (child = pcmk__xml_first_child(delete_spec); child != NULL; child = pcmk__xml_next(child)) { int tmp_result = delete_cib_object(equiv_node, child); /* only the first error is likely to be interesting */ if (tmp_result != pcmk_ok && result == pcmk_ok) { result = tmp_result; } } } return result; } int cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { xmlNode *failed = NULL; int result = pcmk_ok; xmlNode *update_section = NULL; crm_trace("Processing \"%s\" event for section=%s", op, crm_str(section)); if (pcmk__str_eq(XML_CIB_TAG_SECTION_ALL, section, pcmk__str_casei)) { section = NULL; } else if (pcmk__str_eq(XML_TAG_CIB, section, pcmk__str_casei)) { section = NULL; } else if (pcmk__str_eq(crm_element_name(input), XML_TAG_CIB, pcmk__str_casei)) { section = NULL; } CRM_CHECK(strcasecmp(CIB_OP_DELETE, op) == 0, return -EINVAL); if (input == NULL) { crm_err("Cannot perform modification with no data"); return -EINVAL; } failed = create_xml_node(NULL, XML_TAG_FAILED); update_section = get_object_root(section, *result_cib); result = delete_cib_object(update_section, input); update_results(failed, input, op, result); if ((result == pcmk_ok) && xml_has_children(failed)) { result = -EINVAL; } if (result != pcmk_ok) { crm_log_xml_err(failed, "CIB Update failures"); *answer = failed; } else { free_xml(failed); } return result; } int sync_our_cib(xmlNode * request, gboolean all) { int result = pcmk_ok; char *digest = NULL; const char *host = crm_element_value(request, F_ORIG); const char *op = crm_element_value(request, F_CIB_OPERATION); xmlNode *replace_request = cib_msg_copy(request, FALSE); CRM_CHECK(the_cib != NULL,;); CRM_CHECK(replace_request != NULL,;); crm_debug("Syncing CIB to %s", all ? "all peers" : host); if (all == FALSE && host == NULL) { crm_log_xml_err(request, "bad sync"); } /* remove the "all == FALSE" condition * * sync_from was failing, the local client wasn't being notified * because it didn't know it was a reply * setting this does not prevent the other nodes from applying it * if all == TRUE */ if (host != NULL) { crm_xml_add(replace_request, F_CIB_ISREPLY, host); } if (all) { xml_remove_prop(replace_request, F_CIB_HOST); } crm_xml_add(replace_request, F_CIB_OPERATION, CIB_OP_REPLACE); crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op); - crm_xml_add(replace_request, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); + pcmk__xe_set_bool_attr(replace_request, F_CIB_GLOBAL_UPDATE, true); crm_xml_add(replace_request, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); crm_xml_add(replace_request, XML_ATTR_DIGEST, digest); add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); if (send_cluster_message (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) { result = -ENOTCONN; } free_xml(replace_request); free(digest); return result; } diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 3ddff6e13f..0e357e48f9 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2930 +1,2930 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // lrmd_event_data_t, lrmd_rsc_info_t, etc. #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *operation, xmlNode *msg); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time); static void lrm_connection_destroy(void) { if (pcmk_is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Connection to executor failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); controld_clear_fsa_input_flags(R_LRM_CONNECTED); } else { crm_info("Disconnected from executor"); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none) && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); controld_delete_resource_history(op->rsc_id, lrm_state->node_name, NULL, crmd_cib_smart_opt()); return; } if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_EXEC_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT, CRMD_ACTION_STATUS, NULL)) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = pcmk__strkey_table(free, free); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " PCMK__OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) { crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } static inline const char * op_node_name(lrmd_event_data_t *op) { return op->remote_nodename? op->remote_nodename : fsa_our_uname; } void lrm_op_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: if (op->remote_nodename == NULL) { /* If this is the local executor IPC connection, set the right * bits in the controller when the connection goes down. */ lrm_connection_destroy(); } break; case lrmd_event_exec_complete: { lrm_state_t *lrm_state = lrm_state_find(op_node_name(op)); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL, NULL); } break; default: break; } } static void try_local_executor_connect(long long action, fsa_data_t *msg_data, lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; crm_debug("Connecting to the local executor"); // If we can connect, great rc = controld_connect_local_executor(lrm_state); if (rc == pcmk_rc_ok) { controld_set_fsa_input_flags(R_LRM_CONNECTED); crm_info("Connection to the local executor established"); return; } // Otherwise, if we can try again, set a timer to do so if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the local executor %d time%s " "(%d max): %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS, pcmk_rc_str(rc)); controld_start_timer(wait_timer); crmd_fsa_stall(FALSE); return; } // Otherwise give up crm_err("Failed to connect to the executor the max allowed " "%d time%s: %s", lrm_state->num_lrm_register_fails, pcmk__plural_s(lrm_state->num_lrm_register_fails), pcmk_rc_str(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } controld_clear_fsa_input_flags(R_LRM_CONNECTED); crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { try_local_executor_connect(action, msg_data, lrm_state); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; active_op_t *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, pcmk__plural_s(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, pcmk__plural_s(counter), when); if ((cur_state == S_TERMINATE) || !pcmk_is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } /*! * \internal * \brief Build XML and string of parameters meeting some criteria, for digest * * \param[in] op Executor event with parameter table to use * \param[in] metadata Parsed meta-data for executed resource agent * \param[in] param_type Flag used for selection criteria * \param[out] result Will be set to newly created XML with selected * parameters as attributes * * \return Newly allocated space-separated string of parameter names * \note Selection criteria varies by param_type: for the restart digest, we * want parameters that are *not* marked reloadable (OCF 1.1) or that * *are* marked unique (pre-1.1), for both string and XML results; for the * secure digest, we want parameters that *are* marked private for the * string, but parameters that are *not* marked private for the XML. * \note It is the caller's responsibility to free the string return value with * free() and the XML result with free_xml(). */ static char * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, enum ra_param_flags_e param_type, xmlNode **result) { char *list = NULL; size_t len = 0; *result = create_xml_node(NULL, XML_TAG_PARAMS); for (GList *iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept_for_list = false; bool accept_for_xml = false; switch (param_type) { case ra_param_reloadable: accept_for_list = !pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_unique: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = accept_for_list; break; case ra_param_private: accept_for_list = pcmk_is_set(param->rap_flags, param_type); accept_for_xml = !accept_for_list; break; } if (accept_for_list) { crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); if (list == NULL) { // We will later search for " WORD ", so start list with a space pcmk__add_word(&list, &len, " "); } pcmk__add_word(&list, &len, param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (accept_for_xml) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(*result, param->rap_name, v); } } } if (list != NULL) { // We will later search for " WORD ", so end list with a space pcmk__add_word(&list, &len, " "); } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (pcmk_is_set(metadata->ra_flags, ra_supports_reload_agent)) { // Add parameters not marked reloadable to the "op-force-restart" list list = build_parameter_list(op, metadata, ra_param_reloadable, &restart); } else if (pcmk_is_set(metadata->ra_flags, ra_supports_legacy_reload)) { /* @COMPAT pre-OCF-1.1 resource agents * * Before OCF 1.1, Pacemaker abused "unique=0" to indicate * reloadability. Add any parameters with unique="1" to the * "op-force-restart" list. */ list = build_parameter_list(op, metadata, ra_param_unique, &restart); } else { // Resource does not support agent reloads return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ list = build_parameter_list(op, metadata, ra_param_private, &secure); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *node_name, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; uint32_t metadata_source = controld_metadata_from_agent; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return TRUE; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " PCMK__OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return TRUE; } /* Getting meta-data from cache is OK unless this is a successful start * action -- always refresh from the agent for those, in case the * resource agent was updated. * * @TODO Only refresh the meta-data after starts if the agent actually * changed (using something like inotify, or a hash or modification time of * the agent executable). */ if ((op->op_status != PCMK_EXEC_DONE) || (op->rc != target_rc) || !pcmk__str_eq(op->op_type, CRMD_ACTION_START, pcmk__str_none)) { metadata_source |= controld_metadata_from_cache; } metadata = controld_get_rsc_metadata(lrm_state, rsc, metadata_source); if (metadata == NULL) { return TRUE; } #if ENABLE_VERSIONED_ATTRS crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version); #endif crm_trace("Including additional digests for %s:%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) { // A stricter check is too complex ... leave that to the scheduler return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __func__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __func__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __func__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __func__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } xmlNode * controld_query_executor_state(const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (!lrm_state) { crm_err("Could not find executor state for node %s", node_name); return NULL; } return do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); } /*! * \internal * \brief Map standard Pacemaker return code to operation status and OCF code * * \param[out] event Executor event whose status and return code should be set * \param[in] rc Standard Pacemaker return code */ void controld_rc2event(lrmd_event_data_t *event, int rc) { /* This is called for cleanup requests from controller peers/clients, not * for resource actions, so no exit reason is needed. */ switch (rc) { case pcmk_rc_ok: lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); break; case EACCES: lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV, PCMK_EXEC_ERROR, NULL); break; default: lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, NULL); break; } } /*! * \internal * \brief Trigger a new transition after CIB status was deleted * * If a CIB status delete was not expected (as part of the transition graph), * trigger a new transition by updating the (arbitrary) "last-lrm-refresh" * cluster property. * * \param[in] from_sys IPC name that requested the delete * \param[in] rsc_id Resource whose status was deleted (for logging only) */ void controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { char *now_s = crm_strdup_printf("%lld", (long long) time(NULL)); crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); controld_rc2event(op, pcmk_legacy2rc(rc)); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; active_op_t *pending = value; if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) { g_hash_table_iter_remove(rsc_gIter); } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); } controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] lrm_state LRM state of the desired node * \param[in] op Operation whose history should be deleted */ static void erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " PCMK__OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Erase an LRM history entry from the CIB, given operation identifiers * * \param[in] lrm_state LRM state of the node to clear history for * \param[in] rsc_id Name of resource to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] orig_op If specified, delete only if it has this original op * \param[in] call_id If specified, delete entry only if it has this call ID */ static void erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id, const char *key, const char *orig_op, int call_id) { char *op_xpath = NULL; CRM_CHECK((rsc_id != NULL) && (key != NULL), return); if (call_id > 0) { op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL, lrm_state->node_name, rsc_id, key, call_id); } else if (orig_op) { op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, lrm_state->node_name, rsc_id, key, orig_op); } else { op_xpath = crm_strdup_printf(XPATH_HISTORY_ID, lrm_state->node_name, rsc_id, key); } crm_debug("Erasing resource operation history for %s on %s (call=%d)", key, rsc_id, call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { char *op_key = NULL; char *orig_op_key = NULL; lrm_state_t *lrm_state = NULL; lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } /* Erase from CIB */ op_key = pcmk__op_key(rsc_id, "last_failure", 0); if (operation) { orig_op_key = pcmk__op_key(rsc_id, operation, interval_ms); } erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0); free(op_key); free(orig_op_key); /* Remove from memory */ if (lrm_state->resource_history) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; active_op_t *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && !pcmk_is_set(pending->flags, active_op_remove)) { controld_set_active_op_flags(pending, active_op_remove); crm_debug("Scheduling %s for removal", key); } if (pcmk_is_set(pending->flags, active_op_cancelled)) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } controld_set_active_op_flags(pending, active_op_cancelled); } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; active_op_t *op = value; if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in] lrm_state LRM connection to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource with LRM if not already * \param[out] rsc_info Where to store resource information obtained from LRM * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s from executor for %s%s%s", id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Could not delete '%s' from executor for %s%s%s: %s " CRM_XS " rc=%d", id, sys, (user? " as " : ""), (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode, const char *exit_reason) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; lrmd__set_result(op, op_exitcode, op_status, exit_reason); } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ controld_delete_node_state(lrm_state->node_name, controld_section_lrm, cib_scope_local); /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE * would result in the scheduler sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } /*! * \internal * \brief Fail a requested action without actually executing it * * For an action that can't be executed, process it similarly to an actual * execution result, with specified error status (except for notify actions, * which will always be treated as successful). * * \param[in] lrm_state Executor connection that action is for * \param[in] action Action XML from request * \param[in] rc Desired return code to use * \param[in] op_status Desired operation status to use * \param[in] exit_reason Human-friendly detail, if error */ static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int op_status, enum ocf_exitcode rc, const char *exit_reason) { lrmd_event_data_t *op = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL); } else { fake_op_status(lrm_state, op, op_status, rc, exit_reason); } crm_info("Faking " PCMK__OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); // Process the result as if it came from the LRM process_lrm_event(lrm_state, op, NULL, action); lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = fsa_our_uname; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; if (user_name && !pcmk__is_privileged(user_name)) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_INSUFFICIENT_PRIV, "Unprivileged user cannot fail resources"); controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR, "Simulated failure"); process_lrm_event(lrm_state, op, NULL, xml); op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR, "Cannot fail unknown resource"); } controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local resource history refresh: call=%d", rc); if (!pcmk__str_eq(CRM_SYSTEM_CRMD, from_sys, pcmk__str_casei)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } static void handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; guint interval_ms = 0; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) { free(meta_key); return FALSE; } free(meta_key); op_key = pcmk__op_key(rsc->id, op_task, interval_ms); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); pcmk__scan_min_int(call_id, &call, 0); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { gboolean unregister = TRUE; int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name, user_name, cib_dryrun|cib_sync_call); if (cib_rc != pcmk_rc_ok) { lrmd_event_data_t *op = NULL; op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); /* These are resource clean-ups, not actions, so no exit reason is * needed. */ lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; target_node = lrm_op_target(input->xml); is_remote_node = !pcmk__str_eq(target_node, fsa_our_uname, pcmk__str_casei); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Local node has no connection to remote"); return; } CRM_ASSERT(lrm_state != NULL); user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL); crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("Executor %s command from %s as user %s", crm_op, from_sys, user_name); if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_casei)) { if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) { crm_rsc_delete = TRUE; // from crm_resource } operation = CRMD_ACTION_DELETE; } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_casei)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_casei)) { handle_refresh_op(lrm_state, user_name, from_host, from_sys); } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_QUERY, pcmk__str_casei)) { handle_query_op(input->msg, lrm_state); } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_casei)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_casei) || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_casei)) { handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_casei); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, "Not connected to remote executor"); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_NOT_CONFIGURED, // fatal error "Invalid resource definition"); return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR, PCMK_OCF_INVALID_PARAM, // hard error "Could not register resource with executor"); return; } if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_casei)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_casei)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD, CRMD_ACTION_RELOAD_AGENT, NULL)) { /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs * will schedule reload-agent actions only. In either case, we need * to map that to whatever the resource agent actually supports. * Default to the OCF 1.1 name. */ struct ra_metadata_s *md = NULL; const char *reload_name = CRMD_ACTION_RELOAD_AGENT; md = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); if ((md != NULL) && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) { reload_name = CRMD_ACTION_RELOAD; } do_lrm_rsc_op(lrm_state, rsc, reload_name, input->xml); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } #if ENABLE_VERSIONED_ATTRS static void resolve_versioned_parameters(lrm_state_t *lrm_state, const char *rsc_id, const xmlNode *rsc_op, GHashTable *params) { /* Resource info *should* already be cached, so we don't get * executor call */ lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0); struct ra_metadata_s *metadata; metadata = controld_get_rsc_metadata(lrm_state, rsc, controld_metadata_from_cache); if (metadata) { xmlNode *versioned_attrs = NULL; GHashTable *hash = NULL; char *key = NULL; char *value = NULL; GHashTableIter iter; versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_replace(params, crm_meta_name(key), strdup(value)); if (pcmk__str_eq(key, XML_ATTR_TIMEOUT, pcmk__str_casei)) { pcmk__scan_min_int(value, &op->timeout, 0); } else if (pcmk__str_eq(key, XML_OP_ATTR_START_DELAY, pcmk__str_casei)) { pcmk__scan_min_int(value, &op->start_delay, 0); } } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); } lrmd_free_rsc_info(rsc); } #endif static lrmd_event_data_t * construct_op(lrm_state_t *lrm_state, xmlNode *rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; GHashTable *params = NULL; xmlNode *primitive = NULL; const char *class = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = lrmd_new_event(rsc_id, operation, 0); op->type = lrmd_event_exec_complete; op->timeout = 0; op->start_delay = 0; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); if (rsc_op == NULL) { CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = pcmk__strkey_table(free, free); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); pcmk__scan_min_int(op_delay, &op->start_delay, 0); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); pcmk__scan_min_int(op_timeout, &op->timeout, 0); if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0, &(op->interval_ms)) != pcmk_rc_ok) { op->interval_ms = 0; } /* Use pcmk_monitor_timeout instead of meta timeout for stonith recurring monitor, if set */ primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE); class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS); if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params) && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei) && (op->interval_ms > 0)) { op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (op_timeout != NULL) { op->timeout = crm_get_msec(op_timeout); } } #if ENABLE_VERSIONED_ATTRS if (lrm_state && !is_remote_lrmd_ra(NULL, NULL, rsc_id) && !pcmk__strcase_any_of(op_type, CRMD_ACTION_METADATA, CRMD_ACTION_DELETE, NULL)) { resolve_versioned_parameters(lrm_state, rsc_id, rsc_op, params); } #endif if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = pcmk__strkey_table(free, free); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } /*! * \internal * \brief Send a (synthesized) event result * * Reply with a synthesized event result directly, as opposed to going through * the executor. * * \param[in] to_host Host to send result to * \param[in] to_sys IPC name to send result to (NULL for transition engine) * \param[in] rsc Type information about resource the result is for * \param[in] op Event with result to send * \param[in] rsc_id ID of resource the result is for */ void controld_ack_event_directly(const char *to_host, const char *to_sys, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __func__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, fsa_our_uname, __func__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } controld_set_fsa_input_flags(R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; active_op_t *op = value; if ((op->interval_ms != 0) && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; active_op_t *op = value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return; } // defaults to true record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending && !crm_is_true(record_pending)) { return; } op->call_id = -1; lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL); op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op " PCMK__OP_FMT " on %s in the CIB", op->rsc_id, op->op_type, op->interval_ms, node_name); do_update_resource(node_name, rsc, op, 0); } static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *operation, xmlNode *msg) { int rc; int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; const char *nack_reason = NULL; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && (op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if ((op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " PCMK__OP_FMT, removed, pcmk__plural_s(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_notice("Requesting local execution of %s operation for %s on %s " CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT, crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name, transition, rsc->id, operation, op->interval_ms); if (pcmk_is_set(fsa_input_register, R_SHUTDOWN) && pcmk__str_eq(operation, RSC_START, pcmk__str_casei)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); nack_reason = "Not attempting start due to shutdown in progress"; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && !pcmk__str_eq(operation, CRMD_ACTION_STOP, pcmk__str_casei)) { nack_reason = "Controller cannot attempt actions at this time"; } if (nack_reason != NULL) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), pcmk__btoa(pcmk_is_set(fsa_input_register, R_SHUTDOWN))); lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID, nack_reason); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, params, &call_id); if (rc == pcmk_rc_ok) { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); active_op_t *pending = NULL; pending = calloc(1, sizeof(active_op_t)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = op->user_data? strdup(op->user_data) : NULL; if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &(pending->lock_time)) != pcmk_ok) { pending->lock_time = 0; } g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { int target_rc = PCMK_OCF_OK; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc); lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL); controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } else if (lrm_state_is_local(lrm_state)) { crm_err("Could not initiate %s action for resource %s locally: %s " CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else { crm_err("Could not initiate %s action for resource %s remotely on %s: " "%s " CRM_XS " rc=%d", operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc); fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED, PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc)); process_lrm_event(lrm_state, op, NULL, NULL); } free(op_id); lrmd_free_event(op); } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(); } } /* Only successful stops, and probes that found the resource inactive, get locks * recorded in the history. This ensures the resource stays locked to the node * until it is active there again after the node comes back up. */ static bool should_preserve_lock(lrmd_event_data_t *op) { if (!controld_shutdown_lock_enabled) { return false; } if (!strcmp(op->op_type, RSC_STOP) && (op->rc == PCMK_OCF_OK)) { return true; } if (!strcmp(op->op_type, RSC_STATUS) && (op->rc == PCMK_OCF_NOT_RUNNING)) { return true; } return false; } static int do_update_resource(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op, time_t lock_time) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (pcmk__str_eq(node_name, fsa_our_uname, pcmk__str_casei)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; - crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); + pcmk__xe_set_bool_attr(iter, XML_NODE_IS_REMOTE, true); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __func__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, node_name, __func__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (lock_time != 0) { /* Actions on a locked resource should either preserve the lock by * recording it with the action result, or clear it. */ if (!should_preserve_lock(op)) { lock_time = 0; } crm_xml_add_ll(iter, XML_CONFIG_ATTR_SHUTDOWN_LOCK, (long long) lock_time); } if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); controld_ack_event_directly(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __func__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" that really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%u on %s", rc, op->op_type, op->interval_ms, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* Replace newline escape pattern with actual newline (and a space so we * don't have to shuffle the rest of the buffer) */ pch[0] = '\n'; pch[1] = ' '; pch = strstr(pch, escaped_newline); } return ret; } static bool did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id, const char * op_type, guint interval_ms) { rsc_history_t *entry = NULL; CRM_CHECK(lrm_state != NULL, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); CRM_CHECK(op_type != NULL, return FALSE); entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->failed == NULL) { return FALSE; } if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none) && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei) && entry->failed->interval_ms == interval_ms) { return TRUE; } return FALSE; } /*! * \internal * \brief Log the result of an executor action (actual or synthesized) * * \param[in] op Executor action to log result for * \param[in] op_key Operation key for action * \param[in] node_name Name of node action was performed on, if known * \param[in] update_id Call ID for CIB update (or 0 if none) * \param[in] confirmed Whether to log that graph action was confirmed */ static void log_executor_event(lrmd_event_data_t *op, const char *op_key, const char *node_name, int update_id, gboolean confirmed) { int log_level = LOG_ERR; GString *str = g_string_sized_new(100); // reasonable starting size g_string_printf(str, "Result of %s operation for %s", crm_action_str(op->op_type, op->interval_ms), op->rsc_id); if (node_name != NULL) { g_string_append_printf(str, " on %s", node_name); } switch (op->op_status) { case PCMK_EXEC_DONE: log_level = LOG_NOTICE; g_string_append_printf(str, ": %s", services_ocf_exitcode_str(op->rc)); break; case PCMK_EXEC_TIMEOUT: g_string_append_printf(str, ": %s after %s", pcmk_exec_status_str(op->op_status), pcmk__readable_interval(op->timeout)); break; case PCMK_EXEC_CANCELLED: log_level = LOG_INFO; // Fall through default: g_string_append_printf(str, ": %s", pcmk_exec_status_str(op->op_status)); } if ((op->exit_reason != NULL) && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) { g_string_append_printf(str, " (%s)", op->exit_reason); } g_string_append(str, " " CRM_XS); if (update_id != 0) { g_string_append_printf(str, " CIB update %d,", update_id); } g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s", (confirmed? "" : "un"), op->call_id, op_key); if (op->op_status == PCMK_EXEC_DONE) { g_string_append_printf(str, " rc=%d", op->rc); } do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); /* The services library has already logged the output at info or debug * level, so just raise to notice if it looks like a failure. */ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", op->rsc_id, op->op_type, op->interval_ms, node_name); crm_log_output(LOG_NOTICE, prefix, op->output); free(prefix); } } void process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op, active_op_t *pending, xmlNode *action_xml) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; bool need_direct_ack = FALSE; lrmd_rsc_info_t *rsc = NULL; const char *node_name = NULL; CRM_CHECK(op != NULL, return); CRM_CHECK(op->rsc_id != NULL, return); // Remap new status codes for older DCs if (compare_version(fsa_our_dc_version, "3.2.0") < 0) { switch (op->op_status) { case PCMK_EXEC_NOT_CONNECTED: lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED, PCMK_EXEC_ERROR, op->exit_reason); break; case PCMK_EXEC_INVALID: lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR, op->exit_reason); break; default: break; } } op_id = make_stop_id(op->rsc_id, op->call_id); op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms); // Get resource info if available (from executor state or action XML) if (lrm_state) { rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if ((rsc == NULL) && action_xml) { xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE); const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(xml, XML_ATTR_TYPE); if (standard && type) { crm_info("%s agent information not cached, using %s%s%s:%s from action XML", op->rsc_id, standard, (provider? ":" : ""), (provider? provider : ""), type); rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); } else { crm_err("Can't process %s result because %s agent information not cached or in XML", op_key, op->rsc_id); } } // Get node name if available (from executor state or action XML) if (lrm_state) { node_name = lrm_state->node_name; } else if (action_xml) { node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET); } if(pending == NULL) { remove = TRUE; if (lrm_state) { pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } } if (op->op_status == PCMK_EXEC_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_EXEC_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_EXEC_CANCELLED) { /* We might not record the result, so directly acknowledge it to the * originator instead, so it doesn't time out waiting for the result * (especially important if part of a transition). */ need_direct_ack = TRUE; if (controld_action_is_recordable(op->op_type)) { if (node_name && rsc) { // We should record the result, and happily, we can update_id = do_update_resource(node_name, rsc, op, pending? pending->lock_time : 0); need_direct_ack = FALSE; } else if (op->rsc_deleted) { /* We shouldn't record the result (likely the resource was * refreshed, cleaned, or removed while this operation was * in flight). */ crm_notice("Not recording %s result in CIB because " "resource information was removed since it was initiated", op_key); } else { /* This shouldn't be possible; the executor didn't consider the * resource deleted, but we couldn't find resource or node * information. */ crm_err("Unable to record %s result in CIB: %s", op_key, (node_name? "No resource information" : "No node name")); } } } else if (op->interval_ms == 0) { /* A non-recurring operation was cancelled. Most likely, the * never-initiated action was removed from the executor's pending * operations list upon resource removal. */ need_direct_ack = TRUE; } else if (pending == NULL) { /* This recurring operation was cancelled, but was not pending. No * transition actions are waiting on it, nothing needs to be done. */ } else if (op->user_data == NULL) { /* This recurring operation was cancelled and pending, but we don't * have a transition key. This should never happen. */ crm_err("Recurring operation %s was cancelled without transition information", op_key); } else if (pcmk_is_set(pending->flags, active_op_remove)) { /* This recurring operation was cancelled (by us) and pending, and we * have been waiting for it to finish. */ if (lrm_state) { erase_lrm_history_by_op(lrm_state, op); } /* If the recurring operation had failed, the lrm_rsc_op is recorded as * "last_failure" which won't get erased from the cib given the logic on * purpose in erase_lrm_history_by_op(). So that the cancel action won't * have a chance to get confirmed by DC with process_op_deletion(). * Cluster transition would get stuck waiting for the remaining action * timer to time out. * * Directly acknowledge the cancel operation in this case. */ if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms)) { need_direct_ack = TRUE; } } else if (op->rsc_deleted) { /* This recurring operation was cancelled (but not by us, and the * executor does not have resource information, likely due to resource * cleanup, refresh, or removal) and pending. */ crm_debug("Recurring op %s was cancelled due to resource deletion", op_key); need_direct_ack = TRUE; } else { /* This recurring operation was cancelled (but not by us, likely by the * executor before stopping the resource) and pending. We don't need to * do anything special. */ } if (need_direct_ack) { controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if (lrm_state && ((op->interval_ms == 0) || (op->op_status == PCMK_EXEC_CANCELLED))) { gboolean found = g_hash_table_remove(lrm_state->pending_ops, op_id); if (op->interval_ms != 0) { removed = TRUE; } else if (found) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } } log_executor_event(op, op_key, node_name, update_id, removed); if (lrm_state) { if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (rsc && (op->rc == PCMK_OCF_OK)) { char *metadata = unescape_newlines(op->output); metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); free(metadata); } } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); if (lrm_state) { delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); if (lrm_state && rsc) { update_history_cache(lrm_state, rsc, op); } lrmd_free_rsc_info(rsc); free(op_key); free(op_id); } diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c index 51947a4225..f84baff845 100644 --- a/daemons/controld/controld_join_client.c +++ b/daemons/controld/controld_join_client.c @@ -1,312 +1,311 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include int reannounce_count = 0; void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig); /*! * \internal * \brief Remember if DC is shutting down as we join * * If we're joining while the current DC is shutting down, update its expected * state, so we don't fence it if we become the new DC. (We weren't a peer * when it broadcast its shutdown request.) * * \param[in] msg A join message from the DC */ static void update_dc_expected(xmlNode *msg) { - if (fsa_our_dc && crm_is_true(crm_element_value(msg, F_CRM_DC_LEAVING))) { + if (fsa_our_dc && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { crm_node_t *dc_node = crm_get_peer(0, fsa_our_dc); pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); } } /* A_CL_JOIN_QUERY */ /* is there a DC out there? */ void do_cl_join_query(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); sleep(1); // Give the cluster layer time to propagate to the DC update_dc(NULL); /* Unset any existing value so that the result is not discarded */ crm_debug("Querying for a DC"); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } /* A_CL_JOIN_ANNOUNCE */ /* this is kind of a workaround for the fact that we may not be around or * are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL */ void do_cl_join_announce(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* don't announce if we're in one of these states */ if (cur_state != S_PENDING) { crm_warn("Not announcing cluster join because in state %s", fsa_state2string(cur_state)); return; } if (AM_I_OPERATIONAL) { /* send as a broadcast */ xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_debug("Announcing availability"); update_dc(NULL); send_cluster_message(NULL, crm_msg_crmd, req, FALSE); free_xml(req); } else { /* Delay announce until we have finished local startup */ crm_warn("Delaying announce of cluster join until local startup is complete"); return; } } static int query_call_id = 0; /* A_CL_JOIN_REQUEST */ /* aka. accept the welcome offer */ void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *welcome_from; const char *join_id; CRM_CHECK(input != NULL, return); #if 0 if (we are sick) { log error; /* save the request for later? */ return; } #endif welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); join_id = crm_element_value(input->msg, F_CRM_JOIN_ID); crm_trace("Accepting cluster join offer from node %s "CRM_XS" join-%s", welcome_from, crm_element_value(input->msg, F_CRM_JOIN_ID)); /* we only ever want the last one */ if (query_call_id > 0) { crm_trace("Cancelling previous join query: %d", query_call_id); remove_cib_op_callback(query_call_id, FALSE); query_call_id = 0; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding cluster join offer from node %s (expected %s)", welcome_from, fsa_our_dc); return; } update_dc_expected(input->msg); query_call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local | cib_no_children); fsa_register_cib_callback(query_call_id, FALSE, strdup(join_id), join_query_callback); crm_trace("Registered join query callback: %d", query_call_id); controld_set_fsa_action_flags(A_DC_TIMER_STOP); trigger_fsa(); } void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *join_id = user_data; xmlNode *generation = create_xml_node(NULL, XML_CIB_TAG_GENERATION_TUPPLE); CRM_LOG_ASSERT(join_id != NULL); if (query_call_id != call_id) { crm_trace("Query %d superseded", call_id); goto done; } query_call_id = 0; if(rc != pcmk_ok || output == NULL) { crm_err("Could not retrieve version details for join-%s: %s (%d)", join_id, pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else if (fsa_our_dc == NULL) { crm_debug("Membership is in flux, not continuing join-%s", join_id); } else { xmlNode *reply = NULL; crm_debug("Respond to join offer join-%s from %s", join_id, fsa_our_dc); copy_in_properties(generation, output); reply = create_request(CRM_OP_JOIN_REQUEST, generation, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add(reply, F_CRM_JOIN_ID, join_id); crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); } done: free_xml(generation); } static void set_join_state(const char * start_state) { if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured environment", fsa_our_uname, start_state); update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid, NULL, NULL, NULL, "standby", "on", TRUE, NULL, NULL); } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { crm_notice("Forcing node %s to join in %s state per configured environment", fsa_our_uname, start_state); update_attr_delegate(fsa_cib_conn, cib_sync_call, XML_CIB_TAG_NODES, fsa_our_uuid, NULL, NULL, NULL, "standby", "off", TRUE, NULL, NULL); } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { crm_debug("Not forcing a starting state on node %s", fsa_our_uname); } else { crm_warn("Unrecognized start state '%s', using 'default' (%s)", start_state, fsa_our_uname); } } /* A_CL_JOIN_RESULT */ /* aka. this is notification that we have (or have not) been accepted */ void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *tmp1 = NULL; gboolean was_nack = TRUE; static gboolean first_join = TRUE; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); int join_id = -1; const char *op = crm_element_value(input->msg, F_CRM_TASK); - const char *ack_nack = crm_element_value(input->msg, CRM_OP_JOIN_ACKNAK); const char *welcome_from = crm_element_value(input->msg, F_CRM_HOST_FROM); if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) { crm_trace("Ignoring op=%s message", op); return; } /* calculate if it was an ack or a nack */ - if (crm_is_true(ack_nack)) { + if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) { was_nack = FALSE; } crm_element_value_int(input->msg, F_CRM_JOIN_ID, &join_id); if (was_nack) { crm_err("Shutting down because cluster join with leader %s failed " CRM_XS" join-%d NACK'd", welcome_from, join_id); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (AM_I_DC == FALSE && pcmk__str_eq(welcome_from, fsa_our_uname, pcmk__str_casei)) { crm_warn("Discarding our own welcome - we're no longer the DC"); return; } if (update_dc(input->msg) == FALSE) { crm_warn("Discarding %s from node %s (expected from %s)", op, welcome_from, fsa_our_dc); return; } update_dc_expected(input->msg); /* send our status section to the DC */ tmp1 = controld_query_executor_state(fsa_our_uname); if (tmp1 != NULL) { xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_xml_add_int(reply, F_CRM_JOIN_ID, join_id); crm_debug("Confirming join-%d: sending local operation history to %s", join_id, fsa_our_dc); /* * If this is the node's first join since the controller started on it, * set its initial state (standby or member) according to the user's * preference. * * We do not clear the LRM history here. Even if the DC failed to do it * when we last left, removing them here creates a race condition if the * controller is being recovered. Instead of a list of active resources * from the executor, we may end up with a blank status section. If we * are _NOT_ lucky, we will probe for the "wrong" instance of anonymous * clones and end up with multiple active instances on the machine. */ if (first_join && !pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { first_join = FALSE; if (start_state) { set_join_state(start_state); } } send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, reply, TRUE); free_xml(reply); if (AM_I_DC == FALSE) { register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE, __func__); } free_xml(tmp1); } else { crm_err("Could not confirm join-%d with %s: Local operation history failed", join_id, fsa_our_dc); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index e3af310506..83e276097d 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -1,776 +1,776 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include char *max_epoch = NULL; char *max_generation_from = NULL; xmlNode *max_generation_xml = NULL; void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); /* Numeric counter used to identify join rounds (an unsigned int would be * appropriate, except we get and set it in XML as int) */ static int current_join_id = 0; unsigned long long saved_ccm_membership_id = 0; void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) { enum crm_join_phase last = 0; CRM_CHECK(node != NULL, return); /* Remote nodes do not participate in joins */ if (pcmk_is_set(node->flags, crm_remote_node)) { return; } last = node->join; if(phase == last) { crm_trace("Node %s join-%d phase is still %s " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(last), node->id, source); } else if ((phase <= crm_join_none) || (phase == (last + 1))) { node->join = phase; crm_trace("Node %s join-%d phase is now %s (was %s) " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(phase), crm_join_phase_str(last), node->id, source); } else { crm_warn("Rejecting join-%d phase update for node %s because " "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", current_join_id, node->uname, crm_join_phase_str(last), crm_join_phase_str(phase), node->id, source); } } static void start_join_round(void) { GHashTableIter iter; crm_node_t *peer = NULL; crm_debug("Starting new join round join-%d", current_join_id); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__func__, peer, crm_join_none); } if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } controld_clear_fsa_input_flags(R_HAVE_CIB|R_CIB_ASKED); } /*! * \internal * \brief Create a join message from the DC * * \param[in] join_op Join operation name * \param[in] host_to Recipient of message */ static xmlNode * create_dc_message(const char *join_op, const char *host_to) { xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); /* Identify which election this is a part of */ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id); /* Add a field specifying whether the DC is shutting down. This keeps the * joining node from fencing the old DC if it becomes the new DC. */ - crm_xml_add_boolean(msg, F_CRM_DC_LEAVING, - pcmk_is_set(fsa_input_register, R_SHUTDOWN)); + pcmk__xe_set_bool_attr(msg, F_CRM_DC_LEAVING, + pcmk_is_set(fsa_input_register, R_SHUTDOWN)); return msg; } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { xmlNode *offer = NULL; crm_node_t *member = (crm_node_t *)value; CRM_ASSERT(member != NULL); if (crm_is_peer_active(member) == FALSE) { crm_info("Not making join-%d offer to inactive node %s", current_join_id, (member->uname? member->uname : "with unknown name")); if(member->expected == NULL && pcmk__str_eq(member->state, CRM_NODE_LOST, pcmk__str_casei)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->uname == NULL) { crm_info("Not making join-%d offer to node uuid %s with unknown name", current_join_id, member->uuid); return; } if (saved_ccm_membership_id != crm_peer_seq) { saved_ccm_membership_id = crm_peer_seq; crm_info("Making join-%d offers based on membership event %llu", current_join_id, crm_peer_seq); } if(user_data && member->join > crm_join_none) { crm_info("Not making join-%d offer to already known node %s (%s)", current_join_id, member->uname, crm_join_phase_str(member->join)); return; } crm_update_peer_join(__func__, (crm_node_t*)member, crm_join_none); offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname); // Advertise our feature set so the joining node can bail if not compatible crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_info("Sending join-%d offer to %s", current_join_id, member->uname); send_cluster_message(member, crm_msg_crmd, offer, TRUE); free_xml(offer); crm_update_peer_join(__func__, member, crm_join_welcomed); } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int count; /* Reset everyone's status back to down or in_ccm in the CIB. * Any nodes that are active in the CIB but not in the cluster membership * will be seen as offline by the scheduler anyway. */ current_join_id++; start_join_round(); // do_update_cib_nodes(TRUE, __func__); update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; int count; const char *join_to = NULL; if (msg_data->data == NULL) { crm_info("Making join-%d offers to any unconfirmed nodes " "because an unknown node joined", current_join_id); g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); check_join_state(cur_state, __func__); return; } welcome = fsa_typed_data(fsa_dt_ha_msg); if (welcome == NULL) { // fsa_typed_data() already logged an error return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if (join_to == NULL) { crm_err("Can't make join-%d offer to unknown node", current_join_id); return; } member = crm_get_peer(0, join_to); /* It is possible that a node will have been sick or starting up when the * original offer was made. However, it will either re-announce itself in * due course, or we can re-store the original offer on the client. */ crm_update_peer_join(__func__, member, crm_join_none); join_make_offer(NULL, member, NULL); /* If the offer isn't to the local node, make an offer to the local node as * well, to ensure the correct value for max_generation_from. */ if (strcmp(join_to, fsa_our_uname) != 0) { member = crm_get_peer(0, fsa_our_uname); join_make_offer(NULL, member, NULL); } /* This was a genuine join request; cancel any existing transition and * invoke the scheduler. */ abort_transition(INFINITY, tg_restart, "Node join", NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = crm_element_value(left, field); const char *elem_r = crm_element_value(right, field); long long int_elem_l; long long int_elem_r; pcmk__scan_ll(elem_l, &int_elem_l, -1LL); pcmk__scan_ll(elem_r, &int_elem_r, -1LL); if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; int count = 0; gboolean ack_nack_bool = TRUE; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); const char *join_version = crm_element_value(join_ack->msg, XML_ATTR_CRM_VERSION); crm_node_t *join_node = NULL; if (join_from == NULL) { crm_err("Ignoring invalid join request without node name"); return; } join_node = crm_get_peer(0, join_from); crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (join_id != current_join_id) { crm_debug("Ignoring join-%d request from %s because we are on join-%d", join_id, join_from, current_join_id); check_join_state(cur_state, __func__); return; } generation = join_ack->xml; if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } if (ref == NULL) { ref = "none"; // for logging only } if (crm_is_peer_active(join_node) == FALSE) { crm_err("Rejecting join-%d request from inactive node %s " CRM_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Rejecting invalid join-%d request from node %s " "missing CIB generation " CRM_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if ((join_version == NULL) || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { crm_err("Rejecting join-%d request from node %s because feature set %s" " is incompatible with ours (%s) " CRM_XS " ref=%s", join_id, join_from, (join_version? join_version : "pre-3.1.0"), CRM_FEATURE_SET, ref); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { crm_debug("Accepting join-%d request from %s " "(with first CIB generation) " CRM_XS " ref=%s", join_id, join_from, ref); max_generation_xml = copy_xml(generation); max_generation_from = strdup(join_from); } else if (cmp < 0 || (cmp == 0 && pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei))) { crm_debug("Accepting join-%d request from %s (with better " "CIB generation than current best from %s) " CRM_XS " ref=%s", join_id, join_from, max_generation_from, ref); crm_log_xml_debug(max_generation_xml, "Old max generation"); crm_log_xml_debug(generation, "New max generation"); free(max_generation_from); free_xml(max_generation_xml); max_generation_from = strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } else { crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", join_id, join_from, ref); } if (ack_nack_bool == FALSE) { crm_update_peer_join(__func__, join_node, crm_join_nack); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK); } else { crm_update_peer_join(__func__, join_node, crm_join_integrated); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); } count = crmd_join_phase_count(crm_join_integrated); crm_debug("%d node%s currently integrated in join-%d", count, pcmk__plural_s(count), join_id); if (check_join_state(cur_state, __func__) == FALSE) { // Don't waste time by invoking the scheduler yet count = crmd_join_phase_count(crm_join_welcomed); crm_debug("Waiting on join-%d requests from %d outstanding node%s", join_id, count, pcmk__plural_s(count)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; int count_welcomed = crmd_join_phase_count(crm_join_welcomed); int count_integrated = crmd_join_phase_count(crm_join_integrated); /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ if (count_welcomed != 0) { crm_debug("Waiting on join-%d requests from %d outstanding node%s " "before finalizing join", current_join_id, count_welcomed, pcmk__plural_s(count_welcomed)); crmd_join_phase_log(LOG_DEBUG); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (count_integrated == 0) { crm_debug("Finalization not needed for join-%d at the current time", current_join_id); crmd_join_phase_log(LOG_DEBUG); check_join_state(fsa_state, __func__); return; } controld_clear_fsa_input_flags(R_HAVE_CIB); if (pcmk__str_eq(max_generation_from, fsa_our_uname, pcmk__str_null_matches | pcmk__str_casei)) { controld_set_fsa_input_flags(R_HAVE_CIB); } if (pcmk_is_set(fsa_input_register, R_IN_TRANSITION)) { crm_warn("Delaying join-%d finalization while transition in progress", current_join_id); crmd_join_phase_log(LOG_DEBUG); crmd_fsa_stall(FALSE); return; } if (max_generation_from && !pcmk_is_set(fsa_input_register, R_HAVE_CIB)) { /* ask for the agreed best CIB */ sync_from = strdup(max_generation_from); controld_set_fsa_input_flags(R_CIB_ASKED); crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", current_join_id, count_integrated, pcmk__plural_s(count_integrated), sync_from); crm_log_xml_notice(max_generation_xml, "Requested CIB version"); } else { /* Send _our_ CIB out to everyone */ sync_from = strdup(fsa_our_uname); crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", current_join_id, count_integrated, pcmk__plural_s(count_integrated)); crm_log_xml_debug(max_generation_xml, "Requested CIB version"); } crmd_join_phase_log(LOG_DEBUG); rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override); fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback); } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); controld_clear_fsa_input_flags(R_CIB_ASKED); if (rc != pcmk_ok) { do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), "Could not sync CIB from %s in join-%d: %s", (char *) user_data, current_join_id, pcmk_strerror(rc)); /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __func__); } else if (!AM_I_DC) { crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); } else if (fsa_state != S_FINALIZE_JOIN) { crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)", current_join_id, fsa_state2string(fsa_state)); } else { controld_set_fsa_input_flags(R_HAVE_CIB); controld_clear_fsa_input_flags(R_CIB_ASKED); /* make sure dc_uuid is re-set to us */ if (check_join_state(fsa_state, __func__) == FALSE) { int count_integrated = crmd_join_phase_count(crm_join_integrated); crm_debug("Notifying %d node%s of join-%d results", count_integrated, pcmk__plural_s(count_integrated), current_join_id); g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); } } } static void join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_debug("join-%d node history update (via CIB call %d) complete", current_join_id, call_id); check_join_state(fsa_state, __func__); } else { crm_err("join-%d node history update (via CIB call %d) failed: %s " "(next transition may determine resource status incorrectly)", current_join_id, call_id, pcmk_strerror(rc)); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); enum controld_section_e section = controld_section_lrm; const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); crm_node_t *peer = NULL; // Sanity checks if (join_from == NULL) { crm_warn("Ignoring message received without node identification"); return; } if (op == NULL) { crm_warn("Ignoring message received from %s without task", join_from); return; } if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring '%s' message from %s while waiting for '%s'", op, join_from, CRM_OP_JOIN_CONFIRM); return; } if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { crm_warn("Ignoring join confirmation from %s without valid join ID", join_from); return; } peer = crm_get_peer(0, join_from); if (peer->join != crm_join_finalized) { crm_info("Ignoring out-of-sequence join-%d confirmation from %s " "(currently %s not %s)", join_id, join_from, crm_join_phase_str(peer->join), crm_join_phase_str(crm_join_finalized)); return; } if (join_id != current_join_id) { crm_err("Rejecting join-%d confirmation from %s " "because currently on join-%d", join_id, join_from, current_join_id); crm_update_peer_join(__func__, peer, crm_join_nack); return; } crm_update_peer_join(__func__, peer, crm_join_confirmed); /* Update CIB with node's current executor state. A new transition will be * triggered later, when the CIB notifies us of the change. */ if (controld_shutdown_lock_enabled) { section = controld_section_lrm_unlocked; } controld_delete_node_state(join_from, section, cib_scope_local); if (pcmk__str_eq(join_from, fsa_our_uname, pcmk__str_casei)) { xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); if (now_dc_lrmd_state != NULL) { fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); free_xml(now_dc_lrmd_state); crm_debug("Updating local node history for join-%d " "from query result (via CIB call %d)", join_id, call_id); } else { fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); crm_warn("Updating local node history from join-%d confirmation " "because query failed (via CIB call %d)", join_id, call_id); } } else { fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); crm_debug("Updating node history for %s from join-%d confirmation " "(via CIB call %d)", join_from, join_id, call_id); } fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; crm_node_t *join_node = value; const char *join_to = join_node->uname; if(join_node->join != crm_join_integrated) { crm_trace("Not updating non-integrated node %s (%s) for join-%d", join_to, crm_join_phase_str(join_node->join), current_join_id); return; } crm_trace("Updating node state for %s", join_to); tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); set_uuid(tmp1, XML_ATTR_UUID, join_node); crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); free_xml(tmp1); join_node = crm_get_peer(0, join_to); if (crm_is_peer_active(join_node) == FALSE) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING); return; } // Acknowledge node's join request crm_debug("Acknowledging join-%d request from %s", current_join_id, join_to); acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); - crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); + pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, true); crm_update_peer_join(__func__, join_node, crm_join_finalized); pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER); send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE); free_xml(acknak); return; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; if (saved_ccm_membership_id != crm_peer_seq) { crm_debug("join-%d: Membership changed from %llu to %llu " CRM_XS " highest=%llu state=%s for=%s", current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq, fsa_state2string(cur_state), source); if(highest_seq < crm_peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = crm_peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(crm_join_welcomed) == 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Integration of %d peer%s complete " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) { crm_debug("join-%d: Delaying finalization until we have CIB " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); return TRUE; } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { int count = crmd_join_phase_count(crm_join_welcomed); crm_debug("join-%d: Still waiting on %d welcomed node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_integrated) != 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Still waiting on %d integrated node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_finalized) != 0) { int count = crmd_join_phase_count(crm_join_finalized); crm_debug("join-%d: Still waiting on %d finalized node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); crm_update_quorum(crm_have_quorum, TRUE); } int crmd_join_phase_count(enum crm_join_phase phase) { int count = 0; crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if(peer->join == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, crm_join_phase_str(peer->join)); } } diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c index 550c19c5cb..493675a50a 100644 --- a/daemons/controld/controld_membership.c +++ b/daemons/controld/controld_membership.c @@ -1,439 +1,439 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ /* put these first so that uuid_t is defined without conflicts */ #include #include #include #include #include #include #include #include gboolean membership_flux_hack = FALSE; void post_cache_update(int instance); int last_peer_update = 0; guint highest_born_on = -1; extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); static void reap_dead_nodes(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; if (crm_is_peer_active(node) == FALSE) { crm_update_peer_join(__func__, node, crm_join_none); if(node && node->uname) { if (pcmk__str_eq(fsa_our_uname, node->uname, pcmk__str_casei)) { crm_err("We're not part of the cluster anymore"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); } else if (AM_I_DC == FALSE && pcmk__str_eq(node->uname, fsa_our_dc, pcmk__str_casei)) { crm_warn("Our DC node (%s) left the cluster", node->uname); register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL); } } if (fsa_state == S_INTEGRATION || fsa_state == S_FINALIZE_JOIN) { check_join_state(fsa_state, __func__); } if(node && node->uuid) { fail_incompletable_actions(transition_graph, node->uuid); } } } gboolean ever_had_quorum = FALSE; void post_cache_update(int instance) { xmlNode *no_op = NULL; crm_peer_seq = instance; crm_debug("Updated cache after membership event %d.", instance); g_hash_table_foreach(crm_peer_cache, reap_dead_nodes, NULL); controld_set_fsa_input_flags(R_MEMBERSHIP); if (AM_I_DC) { populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer | node_update_expected, __func__); } /* * If we lost nodes, we should re-check the election status * Safe to call outside of an election */ controld_set_fsa_action_flags(A_ELECTION_CHECK); trigger_fsa(); /* Membership changed, remind everyone we're here. * This will aid detection of duplicate DCs */ no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD, AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL); send_cluster_message(NULL, crm_msg_crmd, no_op, FALSE); free_xml(no_op); } static void crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; last_peer_update = 0; if (rc == pcmk_ok) { crm_trace("Node update %d complete", call_id); } else if(call_id < pcmk_ok) { crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /*! * \internal * \brief Create an XML node state tag with updates * * \param[in,out] node Node whose state will be used for update * \param[in] flags Bitmask of node_update_flags indicating what to update * \param[in,out] parent XML node to contain update (or NULL) * \param[in] source Who requested the update (only used for logging) * * \return Pointer to created node state tag */ xmlNode * create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, const char *source) { const char *value = NULL; xmlNode *node_state; if (!node->state) { crm_info("Node update for %s cancelled: no state, not seen yet", node->uname); return NULL; } node_state = create_xml_node(parent, XML_CIB_TAG_STATE); if (pcmk_is_set(node->flags, crm_remote_node)) { - crm_xml_add(node_state, XML_NODE_IS_REMOTE, XML_BOOLEAN_TRUE); + pcmk__xe_set_bool_attr(node_state, XML_NODE_IS_REMOTE, true); } set_uuid(node_state, XML_ATTR_UUID, node); if (crm_element_value(node_state, XML_ATTR_UUID) == NULL) { crm_info("Node update for %s cancelled: no id", node->uname); free_xml(node_state); return NULL; } crm_xml_add(node_state, XML_ATTR_UNAME, node->uname); if ((flags & node_update_cluster) && node->state) { - crm_xml_add_boolean(node_state, XML_NODE_IN_CLUSTER, - pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)); + pcmk__xe_set_bool_attr(node_state, XML_NODE_IN_CLUSTER, + pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)); } if (!pcmk_is_set(node->flags, crm_remote_node)) { if (flags & node_update_peer) { value = OFFLINESTATUS; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { value = ONLINESTATUS; } crm_xml_add(node_state, XML_NODE_IS_PEER, value); } if (flags & node_update_join) { if (node->join <= crm_join_none) { value = CRMD_JOINSTATE_DOWN; } else { value = CRMD_JOINSTATE_MEMBER; } crm_xml_add(node_state, XML_NODE_JOIN_STATE, value); } if (flags & node_update_expected) { crm_xml_add(node_state, XML_NODE_EXPECTED, node->expected); } } crm_xml_add(node_state, XML_ATTR_ORIGIN, source); return node_state; } static void remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *node_uuid = user_data; do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Deletion of the unknown conflicting node \"%s\": %s (rc=%d)", node_uuid, pcmk_strerror(rc), rc); } static void search_conflicting_node_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { char *new_node_uuid = user_data; xmlNode *node_xml = NULL; if (rc != pcmk_ok) { if (rc != -ENXIO) { crm_notice("Searching conflicting nodes for %s failed: %s (%d)", new_node_uuid, pcmk_strerror(rc), rc); } return; } else if (output == NULL) { return; } if (pcmk__str_eq(crm_element_name(output), XML_CIB_TAG_NODE, pcmk__str_casei)) { node_xml = output; } else { node_xml = pcmk__xml_first_child(output); } for (; node_xml != NULL; node_xml = pcmk__xml_next(node_xml)) { const char *node_uuid = NULL; const char *node_uname = NULL; GHashTableIter iter; crm_node_t *node = NULL; gboolean known = FALSE; if (!pcmk__str_eq(crm_element_name(node_xml), XML_CIB_TAG_NODE, pcmk__str_casei)) { continue; } node_uuid = crm_element_value(node_xml, XML_ATTR_ID); node_uname = crm_element_value(node_xml, XML_ATTR_UNAME); if (node_uuid == NULL || node_uname == NULL) { continue; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uuid && pcmk__str_eq(node->uuid, node_uuid, pcmk__str_casei) && node->uname && pcmk__str_eq(node->uname, node_uname, pcmk__str_casei)) { known = TRUE; break; } } if (known == FALSE) { int delete_call_id = 0; xmlNode *node_state_xml = NULL; crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s", node_uuid, node_uname, new_node_uuid); delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_NODES, node_xml, cib_scope_local | cib_quorum_override); fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid), remove_conflicting_node_callback); node_state_xml = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(node_state_xml, XML_ATTR_ID, node_uuid); crm_xml_add(node_state_xml, XML_ATTR_UNAME, node_uname); delete_call_id = fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, node_state_xml, cib_scope_local | cib_quorum_override); fsa_register_cib_callback(delete_call_id, FALSE, strdup(node_uuid), remove_conflicting_node_callback); free_xml(node_state_xml); } } } static void node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if(call_id < pcmk_ok) { crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else if(rc < pcmk_ok) { crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "update:failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } #define NODE_PATH_MAX 512 void populate_cib_nodes(enum node_update_flags flags, const char *source) { int call_id = 0; gboolean from_hashtable = TRUE; int call_options = cib_scope_local | cib_quorum_override; xmlNode *node_list = create_xml_node(NULL, XML_CIB_TAG_NODES); #if SUPPORT_COROSYNC if (!pcmk_is_set(flags, node_update_quick) && is_corosync_cluster()) { from_hashtable = pcmk__corosync_add_nodes(node_list); } #endif if (from_hashtable) { GHashTableIter iter; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { xmlNode *new_node = NULL; crm_trace("Creating node entry for %s/%s", node->uname, node->uuid); if(node->uuid && node->uname) { char xpath[NODE_PATH_MAX]; /* We need both to be valid */ new_node = create_xml_node(node_list, XML_CIB_TAG_NODE); crm_xml_add(new_node, XML_ATTR_ID, node->uuid); crm_xml_add(new_node, XML_ATTR_UNAME, node->uname); /* Search and remove unknown nodes with the conflicting uname from CIB */ snprintf(xpath, NODE_PATH_MAX, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s'][@id!='%s']", node->uname, node->uuid); call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, xpath, NULL, cib_scope_local | cib_xpath); fsa_register_cib_callback(call_id, FALSE, strdup(node->uuid), search_conflicting_node_callback); } } } crm_trace("Populating section from %s", from_hashtable ? "hashtable" : "cluster"); fsa_cib_update(XML_CIB_TAG_NODES, node_list, call_options, call_id, NULL); fsa_register_cib_callback(call_id, FALSE, NULL, node_list_update_callback); free_xml(node_list); if (call_id >= pcmk_ok && crm_peer_cache != NULL && AM_I_DC) { /* * There is no need to update the local CIB with our values if * we've not seen valid membership data */ GHashTableIter iter; crm_node_t *node = NULL; node_list = create_xml_node(NULL, XML_CIB_TAG_STATUS); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { create_node_state_update(node, flags, node_list, source); } if (crm_remote_peer_cache) { g_hash_table_iter_init(&iter, crm_remote_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { create_node_state_update(node, flags, node_list, source); } } fsa_cib_update(XML_CIB_TAG_STATUS, node_list, call_options, call_id, NULL); fsa_register_cib_callback(call_id, FALSE, NULL, crmd_node_update_complete); last_peer_update = call_id; free_xml(node_list); } } static void cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_trace("Quorum update %d complete", call_id); } else { crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } void crm_update_quorum(gboolean quorum, gboolean force_update) { ever_had_quorum |= quorum; if(ever_had_quorum && quorum == FALSE && no_quorum_suicide_escalation) { pcmk__panic(__func__); } if (AM_I_DC && (force_update || fsa_has_quorum != quorum)) { int call_id = 0; xmlNode *update = NULL; int call_options = cib_scope_local | cib_quorum_override; update = create_xml_node(NULL, XML_TAG_CIB); crm_xml_add_int(update, XML_ATTR_HAVE_QUORUM, quorum); crm_xml_add(update, XML_ATTR_DC_UUID, fsa_our_uuid); fsa_cib_update(XML_TAG_CIB, update, call_options, call_id, NULL); crm_debug("Updating quorum status to %s (call=%d)", pcmk__btoa(quorum), call_id); fsa_register_cib_callback(call_id, FALSE, NULL, cib_quorum_update_complete); free_xml(update); /* Quorum changes usually cause a new transition via other activity: * quorum gained via a node joining will abort via the node join, * and quorum lost via a node leaving will usually abort via resource * activity and/or fencing. * * However, it is possible that nothing else causes a transition (e.g. * someone forces quorum via corosync-cmaptcl, or quorum is lost due to * a node in standby shutting down cleanly), so here ensure a new * transition is triggered. */ if (quorum) { /* If quorum was gained, abort after a short delay, in case multiple * nodes are joining around the same time, so the one that brings us * to quorum doesn't cause all the remaining ones to be fenced. */ abort_after_delay(INFINITY, tg_restart, "Quorum gained", 5000); } else { abort_transition(INFINITY, tg_restart, "Quorum lost", NULL); } } fsa_has_quorum = quorum; } diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 7ffb0561c0..31d3524c33 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -1,1283 +1,1286 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include GList *fsa_message_queue = NULL; extern void crm_shutdown(int nsig); static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause); static void handle_response(xmlNode *stored_msg); static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause); static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); static void send_msg_via_ipc(xmlNode * msg, const char *sys); /* debug only, can wrap all it likes */ int last_data_id = 0; void register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, fsa_data_t * cur_data, void *new_data, const char *raised_from) { /* save the current actions if any */ if (fsa_actions != A_NOTHING) { register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL, I_NULL, cur_data ? cur_data->data : NULL, fsa_actions, TRUE, __func__); } /* reset the action list */ crm_info("Resetting the current action list"); fsa_dump_actions(fsa_actions, "Drop"); fsa_actions = A_NOTHING; /* register the error */ register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from); } int register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, void *data, uint64_t with_actions, gboolean prepend, const char *raised_from) { unsigned old_len = g_list_length(fsa_message_queue); fsa_data_t *fsa_data = NULL; if (raised_from == NULL) { raised_from = ""; } if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) { /* no point doing anything */ crm_err("Cannot add entry to queue: no input and no action"); return 0; } if (input == I_WAIT_FOR_EVENT) { do_fsa_stall = TRUE; crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", raised_from, fsa_cause2string(cause), data, old_len); if (old_len > 0) { fsa_dump_queue(LOG_TRACE); prepend = FALSE; } if (data == NULL) { controld_set_fsa_action_flags(with_actions); fsa_dump_actions(with_actions, "Restored"); return 0; } /* Store everything in the new event and reset fsa_actions */ with_actions |= fsa_actions; fsa_actions = A_NOTHING; } last_data_id++; crm_trace("%s %s FSA input %d (%s) due to %s, %s data", raised_from, (prepend? "prepended" : "appended"), last_data_id, fsa_input2string(input), fsa_cause2string(cause), (data? "with" : "without")); fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; fsa_data->fsa_input = input; fsa_data->fsa_cause = cause; fsa_data->origin = raised_from; fsa_data->data = NULL; fsa_data->data_type = fsa_dt_none; fsa_data->actions = with_actions; if (with_actions != A_NOTHING) { crm_trace("Adding actions %.16llx to input", (unsigned long long) with_actions); } if (data != NULL) { switch (cause) { case C_FSA_INTERNAL: case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); crm_trace("Copying %s data from %s as cluster message data", fsa_cause2string(cause), raised_from); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; case C_LRM_OP_CALLBACK: crm_trace("Copying %s data from %s as lrmd_event_data_t", fsa_cause2string(cause), raised_from); fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data); fsa_data->data_type = fsa_dt_lrm; break; case C_TIMER_POPPED: case C_SHUTDOWN: case C_UNKNOWN: case C_STARTUP: crm_crit("Copying %s data (from %s) is not yet implemented", fsa_cause2string(cause), raised_from); crmd_exit(CRM_EX_SOFTWARE); break; } } /* make sure to free it properly later */ if (prepend) { fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } crm_trace("FSA message queue length is %d", g_list_length(fsa_message_queue)); /* fsa_dump_queue(LOG_TRACE); */ if (old_len == g_list_length(fsa_message_queue)) { crm_err("Couldn't add message to the queue"); } if (fsa_source && input != I_WAIT_FOR_EVENT) { crm_trace("Triggering FSA"); mainloop_set_trigger(fsa_source); } return last_data_id; } void fsa_dump_queue(int log_level) { int offset = 0; GList *lpc = NULL; for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) { fsa_data_t *data = (fsa_data_t *) lpc->data; do_crm_log_unlikely(log_level, "queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)", offset++, data->id, fsa_input2string(data->fsa_input), data->origin, data->data, data->data_type, fsa_cause2string(data->fsa_cause)); } } ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t)); CRM_ASSERT(copy != NULL); copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL; copy->xml = get_message_xml(copy->msg, F_CRM_DATA); return copy; } void delete_fsa_input(fsa_data_t * fsa_data) { lrmd_event_data_t *op = NULL; xmlNode *foo = NULL; if (fsa_data == NULL) { return; } crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause)); if (fsa_data->data != NULL) { switch (fsa_data->data_type) { case fsa_dt_ha_msg: delete_ha_msg_input(fsa_data->data); break; case fsa_dt_xml: foo = fsa_data->data; free_xml(foo); break; case fsa_dt_lrm: op = (lrmd_event_data_t *) fsa_data->data; lrmd_free_event(op); break; case fsa_dt_none: if (fsa_data->data != NULL) { crm_err("Don't know how to free %s data from %s", fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); crmd_exit(CRM_EX_SOFTWARE); } break; } crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause)); } free(fsa_data); } /* returns the next message */ fsa_data_t * get_message(void) { fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0); fsa_message_queue = g_list_remove(fsa_message_queue, message); crm_trace("Processing input %d", message->id); return message; } void * fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller) { void *ret_val = NULL; if (fsa_data == NULL) { crm_err("%s: No FSA data available", caller); } else if (fsa_data->data == NULL) { crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin); } else if (fsa_data->data_type != a_type) { crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s", caller, fsa_data->data_type, a_type, fsa_data->origin); CRM_ASSERT(fsa_data->data_type == a_type); } else { ret_val = fsa_data->data; } return ret_val; } /* A_MSG_ROUTE */ void do_msg_route(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); route_message(msg_data->fsa_cause, input->msg); } void route_message(enum crmd_fsa_cause cause, xmlNode * input) { ha_msg_input_t fsa_input; enum crmd_fsa_input result = I_NULL; fsa_input.msg = input; CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return); /* try passing the buck first */ if (relay_message(input, cause == C_IPC_MESSAGE)) { return; } /* handle locally */ result = handle_message(input, cause); /* done or process later? */ switch (result) { case I_NULL: case I_CIB_OP: case I_ROUTER: case I_NODE_JOIN: case I_JOIN_REQUEST: case I_JOIN_RESULT: break; default: /* Defering local processing of message */ register_fsa_input_later(cause, result, &fsa_input); return; } if (result != I_NULL) { /* add to the front of the queue */ register_fsa_input(cause, result, &fsa_input); } } gboolean relay_message(xmlNode * msg, gboolean originated_locally) { int dest = 1; bool is_for_dc = false; bool is_for_dcib = false; bool is_for_te = false; bool is_for_crm = false; bool is_for_cib = false; bool is_local = false; const char *host_to = crm_element_value(msg, F_CRM_HOST_TO); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *type = crm_element_value(msg, F_TYPE); const char *task = crm_element_value(msg, F_CRM_TASK); const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE); if (ref == NULL) { ref = "without reference ID"; } if (msg == NULL) { crm_warn("Cannot route empty message"); return TRUE; } else if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) { /* quietly ignore */ crm_trace("No routing needed for hello message %s", ref); return TRUE; } else if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) { crm_warn("Cannot route message %s: Type is '%s' not '" T_CRM "'", ref, (type? type : "missing")); crm_log_xml_warn(msg, "[bad message type]"); return TRUE; } else if (sys_to == NULL) { crm_warn("Cannot route message %s: No subsystem specified", ref); crm_log_xml_warn(msg, "[no subsystem]"); return TRUE; } is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0); is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0); is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0); is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0); is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0); is_local = false; if (pcmk__str_empty(host_to)) { if (is_for_dc || is_for_te) { is_local = false; } else if (is_for_crm) { if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO, PCMK__CONTROLD_CMD_NODES, NULL)) { /* Node info requests do not specify a host, which is normally * treated as "all hosts", because the whole point is that the * client may not know the local node name. Always handle these * requests locally. */ is_local = true; } else { is_local = !originated_locally; } } else { is_local = true; } } else if (pcmk__str_eq(fsa_our_uname, host_to, pcmk__str_casei)) { is_local = true; } else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA); const char *mode = crm_element_value(msg_data, PCMK__XA_MODE); if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) { // Local delete of an offline node's resource history is_local = true; } } if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC && is_for_te) { crm_trace("Route message %s locally as transition request", ref); send_msg_via_ipc(msg, sys_to); } else if (AM_I_DC) { crm_trace("Route message %s locally as DC request", ref); return FALSE; // More to be done by caller } else if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) { #if SUPPORT_COROSYNC if (is_corosync_cluster()) { dest = text2msg_type(sys_to); } #endif crm_trace("Relay message %s to DC", ref); send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); } else { /* Neither the TE nor the scheduler should be sending messages * to DCs on other nodes. By definition, if we are no longer the DC, * then the scheduler's or TE's data should be discarded. */ crm_trace("Discard message %s because we are not DC", ref); } } else if (is_local && (is_for_crm || is_for_cib)) { crm_trace("Route message %s locally as controller request", ref); return FALSE; // More to be done by caller } else if (is_local) { crm_trace("Relay message %s locally to %s", ref, (sys_to? sys_to : "unknown client")); crm_log_xml_trace(msg, "[IPC relay]"); send_msg_via_ipc(msg, sys_to); } else { crm_node_t *node_to = NULL; #if SUPPORT_COROSYNC if (is_corosync_cluster()) { dest = text2msg_type(sys_to); if (dest == crm_msg_none || dest > crm_msg_stonith_ng) { dest = crm_msg_crmd; } } #endif if (host_to) { node_to = pcmk__search_cluster_node_cache(0, host_to); if (node_to == NULL) { crm_warn("Cannot route message %s: Unknown node %s", ref, host_to); return TRUE; } crm_trace("Relay message %s to %s", ref, (node_to->uname? node_to->uname : "peer")); } else { crm_trace("Broadcast message %s to all peers", ref); } send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE); } return TRUE; // No further processing of message is needed } // Return true if field contains a positive integer static bool authorize_version(xmlNode *message_data, const char *field, const char *client_name, const char *ref, const char *uuid) { const char *version = crm_element_value(message_data, field); long long version_num; if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok) || (version_num < 0LL)) { crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s " CRM_XS " ref=%s uuid=%s", client_name, ((version == NULL)? "" : version), field, (ref? ref : "none"), uuid); return false; } return true; } /*! * \internal * \brief Check whether a client IPC message is acceptable * * If a given client IPC message is a hello, "authorize" it by ensuring it has * valid information such as a protocol version, and return false indicating * that nothing further needs to be done with the message. If the message is not * a hello, just return true to indicate it needs further processing. * * \param[in] client_msg XML of IPC message * \param[in] curr_client If IPC is not proxied, client that sent message * \param[in] proxy_session If IPC is proxied, the session ID * * \return true if message needs further processing, false if it doesn't */ bool controld_authorize_ipc_message(xmlNode *client_msg, pcmk__client_t *curr_client, const char *proxy_session) { xmlNode *message_data = NULL; const char *client_name = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE); const char *uuid = (curr_client? curr_client->id : proxy_session); if (uuid == NULL) { crm_warn("IPC message from client rejected: No client identifier " CRM_XS " ref=%s", (ref? ref : "none")); goto rejected; } if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) { // Only hello messages need to be authorized return true; } message_data = get_message_xml(client_msg, F_CRM_DATA); client_name = crm_element_value(message_data, "client_name"); if (pcmk__str_empty(client_name)) { crm_warn("IPC hello from client rejected: No client name", CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid); goto rejected; } if (!authorize_version(message_data, "major_version", client_name, ref, uuid)) { goto rejected; } if (!authorize_version(message_data, "minor_version", client_name, ref, uuid)) { goto rejected; } crm_trace("Validated IPC hello from client %s", client_name); if (curr_client) { curr_client->userdata = strdup(client_name); } mainloop_set_trigger(fsa_source); return false; rejected: if (curr_client) { qb_ipcs_disconnect(curr_client->ipcs); } return false; } static enum crmd_fsa_input handle_message(xmlNode *msg, enum crmd_fsa_cause cause) { const char *type = NULL; CRM_CHECK(msg != NULL, return I_NULL); type = crm_element_value(msg, F_CRM_MSG_TYPE); if (pcmk__str_eq(type, XML_ATTR_REQUEST, pcmk__str_none)) { return handle_request(msg, cause); } else if (pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_none)) { handle_response(msg); return I_NULL; } crm_err("Unknown message type: %s", type); return I_NULL; } static enum crmd_fsa_input handle_failcount_op(xmlNode * stored_msg) { const char *rsc = NULL; const char *uname = NULL; const char *op = NULL; char *interval_spec = NULL; guint interval_ms = 0; gboolean is_remote_node = FALSE; xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA); if (xml_op) { xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE); xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS); if (xml_rsc) { rsc = ID(xml_rsc); } if (xml_attrs) { op = crm_element_value(xml_attrs, CRM_META "_" XML_RSC_ATTR_CLEAR_OP); crm_element_value_ms(xml_attrs, CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL, &interval_ms); } } uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET); if ((rsc == NULL) || (uname == NULL)) { crm_log_xml_warn(stored_msg, "invalid failcount op"); return I_NULL; } if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) { is_remote_node = TRUE; } if (interval_ms) { interval_spec = crm_strdup_printf("%ums", interval_ms); } update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node); free(interval_spec); lrm_clear_last_failure(rsc, uname, op, interval_ms); return I_NULL; } static enum crmd_fsa_input handle_lrm_delete(xmlNode *stored_msg) { const char *mode = NULL; xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA); CRM_CHECK(msg_data != NULL, return I_NULL); /* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to * relay the operation to the affected node, which will unregister the * resource from the local executor, clear the resource's history from the * CIB, and do some bookkeeping in the controller. * * However, if the affected node is offline, the client will specify * mode="cib" which means the controller receiving the operation should * clear the resource's history from the CIB and nothing else. This is used * to clear shutdown locks. */ mode = crm_element_value(msg_data, PCMK__XA_MODE); if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) { // Relay to affected node crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else { // Delete CIB history locally (compare with do_lrm_delete()) const char *from_sys = NULL; const char *user_name = NULL; const char *rsc_id = NULL; const char *node = NULL; xmlNode *rsc_xml = NULL; int rc = pcmk_rc_ok; rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE); CRM_CHECK(rsc_xml != NULL, return I_NULL); rsc_id = ID(rsc_xml); from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM); node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET); user_name = pcmk__update_acl_user(stored_msg, F_CRM_USER, NULL); crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s " "(clearing CIB resource history only)", rsc_id, node, (user_name? " for user " : ""), (user_name? user_name : "")); rc = controld_delete_resource_history(rsc_id, node, user_name, cib_dryrun|cib_sync_call); if (rc == pcmk_rc_ok) { rc = controld_delete_resource_history(rsc_id, node, user_name, crmd_cib_smart_opt()); } //Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.) if (from_sys) { lrmd_event_data_t *op = NULL; const char *from_host = crm_element_value(stored_msg, F_CRM_HOST_FROM); const char *transition; if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) { transition = crm_element_value(msg_data, XML_ATTR_TRANSITION_KEY); } else { transition = crm_element_value(stored_msg, XML_ATTR_TRANSITION_KEY); } crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "local node"), rsc_id, ((rc == pcmk_rc_ok)? "" : " not")); op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0); op->type = lrmd_event_exec_complete; op->user_data = strdup(transition? transition : FAKE_TE_ID); op->params = pcmk__strkey_table(free, free); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); controld_rc2event(op, rc); controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); controld_trigger_delete_refresh(from_sys, rsc_id); } return I_NULL; } } /*! * \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_remote_state(xmlNode *msg) { const char *remote_uname = ID(msg); - const char *remote_is_up = crm_element_value(msg, XML_NODE_IN_CLUSTER); crm_node_t *remote_peer; + bool remote_is_up = false; + int rc = pcmk_rc_ok; - CRM_CHECK(remote_uname && remote_is_up, return I_NULL); + rc = pcmk__xe_get_bool_attr(msg, XML_NODE_IN_CLUSTER, &remote_is_up); + + CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL); remote_peer = crm_remote_peer_get(remote_uname); CRM_CHECK(remote_peer, return I_NULL); pcmk__update_peer_state(__func__, remote_peer, - crm_is_true(remote_is_up)? CRM_NODE_MEMBER : CRM_NODE_LOST, + remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST, 0); return I_NULL; } /*! * \brief Handle a CRM_OP_PING message * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_ping(xmlNode *msg) { const char *value = NULL; xmlNode *ping = NULL; // Build reply ping = create_xml_node(NULL, XML_CRM_TAG_PING); value = crm_element_value(msg, F_CRM_SYS_TO); crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); // Add controller state value = fsa_state2string(fsa_state); crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value); crm_notice("Current ping state: %s", value); // CTS needs this // Add controller health // @TODO maybe do some checks to determine meaningful status crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); // Send reply msg = create_reply(msg, ping); free_xml(ping); if (msg) { (void) relay_message(msg, TRUE); free_xml(msg); } // Nothing further to do return I_NULL; } /*! * \brief Handle a PCMK__CONTROLD_CMD_NODES message * * \return Next FSA input */ static enum crmd_fsa_input handle_node_list(xmlNode *request) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *reply = NULL; xmlNode *reply_data = NULL; // Create message data for reply reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE); crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t crm_xml_add(xml, XML_ATTR_UNAME, node->uname); crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state); } // Create and send reply reply = create_reply(request, reply_data); free_xml(reply_data); if (reply) { (void) relay_message(reply, TRUE); free_xml(reply); } // Nothing further to do return I_NULL; } /*! * \brief Handle a CRM_OP_NODE_INFO request * * \param[in] msg Message XML * * \return Next FSA input */ static enum crmd_fsa_input handle_node_info_request(xmlNode *msg) { const char *value = NULL; crm_node_t *node = NULL; int node_id = 0; xmlNode *reply = NULL; // Build reply reply = create_xml_node(NULL, XML_CIB_TAG_NODE); crm_xml_add(reply, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD); // Add whether current partition has quorum - crm_xml_add_boolean(reply, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); + pcmk__xe_set_bool_attr(reply, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); // Check whether client requested node info by ID and/or name crm_element_value_int(msg, XML_ATTR_ID, &node_id); if (node_id < 0) { node_id = 0; } value = crm_element_value(msg, XML_ATTR_UNAME); // Default to local node if none given if ((node_id == 0) && (value == NULL)) { value = fsa_our_uname; } node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY); if (node) { crm_xml_add_int(reply, XML_ATTR_ID, node->id); crm_xml_add(reply, XML_ATTR_UUID, node->uuid); crm_xml_add(reply, XML_ATTR_UNAME, node->uname); crm_xml_add(reply, XML_NODE_IS_PEER, node->state); - crm_xml_add_boolean(reply, XML_NODE_IS_REMOTE, - node->flags & crm_remote_node); + pcmk__xe_set_bool_attr(reply, XML_NODE_IS_REMOTE, + pcmk_is_set(node->flags, crm_remote_node)); } // Send reply msg = create_reply(msg, reply); free_xml(reply); if (msg) { (void) relay_message(msg, TRUE); free_xml(msg); } // Nothing further to do return I_NULL; } static void verify_feature_set(xmlNode *msg) { const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION); if (dc_version == NULL) { /* All we really know is that the DC feature set is older than 3.1.0, * but that's also all that really matters. */ dc_version = "3.0.14"; } if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) { crm_trace("Local feature set (%s) is compatible with DC's (%s)", CRM_FEATURE_SET, dc_version); } else { crm_err("Local feature set (%s) is incompatible with DC's (%s)", CRM_FEATURE_SET, dc_version); // Nothing is likely to improve without administrator involvement controld_set_fsa_input_flags(R_STAYDOWN); crmd_exit(CRM_EX_FATAL); } } // DC gets own shutdown all-clear static enum crmd_fsa_input handle_shutdown_self_ack(xmlNode *stored_msg) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { // The expected case -- we initiated own shutdown sequence crm_info("Shutting down controller"); return I_STOP; } if (pcmk__str_eq(host_from, fsa_our_dc, pcmk__str_casei)) { // Must be logic error -- DC confirming its own unrequested shutdown crm_err("Shutting down controller immediately due to " "unexpected shutdown confirmation"); return I_TERMINATE; } if (fsa_state != S_STOPPING) { // Shouldn't happen -- non-DC confirming unrequested shutdown crm_err("Starting new DC election because %s is " "confirming shutdown we did not request", (host_from? host_from : "another node")); return I_ELECTION; } // Shouldn't happen, but we are already stopping anyway crm_debug("Ignoring unexpected shutdown confirmation from %s", (host_from? host_from : "another node")); return I_NULL; } // Non-DC gets shutdown all-clear from DC static enum crmd_fsa_input handle_shutdown_ack(xmlNode *stored_msg) { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (host_from == NULL) { crm_warn("Ignoring shutdown request without origin specified"); return I_NULL; } if ((fsa_our_dc == NULL) || (strcmp(host_from, fsa_our_dc) == 0)) { if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { crm_info("Shutting down controller after confirmation from %s", host_from); } else { crm_err("Shutting down controller after unexpected " "shutdown request from %s", host_from); controld_set_fsa_input_flags(R_STAYDOWN); } return I_STOP; } crm_warn("Ignoring shutdown request from %s because DC is %s", host_from, fsa_our_dc); return I_NULL; } static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) { xmlNode *msg = NULL; const char *op = crm_element_value(stored_msg, F_CRM_TASK); /* Optimize this for the DC - it has the most to do */ if (op == NULL) { crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]"); return I_NULL; } if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_node_t *node = pcmk__search_cluster_node_cache(0, from); pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); if(AM_I_DC == FALSE) { return I_NULL; /* Done */ } } /*========== DC-Only Actions ==========*/ if (AM_I_DC) { if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) { return I_NODE_JOIN; } else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) { return I_JOIN_REQUEST; } else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) { return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_self_ack(stored_msg); } else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { // Another controller wants to shut down its node return handle_shutdown_request(stored_msg); } else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) { /* a remote connection host is letting us know the node state */ return handle_remote_state(stored_msg); } } /*========== common actions ==========*/ if (strcmp(op, CRM_OP_NOVOTE) == 0) { ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); } else if (strcmp(op, CRM_OP_THROTTLE) == 0) { throttle_update(stored_msg); if (AM_I_DC && transition_graph != NULL) { if (transition_graph->complete == FALSE) { crm_debug("The throttle changed. Trigger a graph."); trigger_graph(); } } return I_NULL; } else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) { return handle_failcount_op(stored_msg); } else if (strcmp(op, CRM_OP_VOTE) == 0) { /* count the vote and decide what to do after that */ ha_msg_input_t fsa_input; fsa_input.msg = stored_msg; register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input, A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE, __func__); /* Sometimes we _must_ go into S_ELECTION */ if (fsa_state == S_HALT) { crm_debug("Forcing an election from S_HALT"); return I_ELECTION; #if 0 } else if (AM_I_DC) { /* This is the old way of doing things but what is gained? */ return I_ELECTION; #endif } } else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) { verify_feature_set(stored_msg); crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_OFFER; } else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) { crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID)); return I_JOIN_RESULT; } else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { return handle_lrm_delete(stored_msg); } else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0) || (strcmp(op, CRM_OP_LRM_REFRESH) == 0) || (strcmp(op, CRM_OP_REPROBE) == 0)) { crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD); return I_ROUTER; } else if (strcmp(op, CRM_OP_NOOP) == 0) { return I_NULL; } else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) { crm_shutdown(SIGTERM); /*return I_SHUTDOWN; */ return I_NULL; } else if (strcmp(op, CRM_OP_PING) == 0) { return handle_ping(stored_msg); } else if (strcmp(op, CRM_OP_NODE_INFO) == 0) { return handle_node_info_request(stored_msg); } else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) { int id = 0; const char *name = NULL; crm_element_value_int(stored_msg, XML_ATTR_ID, &id); name = crm_element_value(stored_msg, XML_ATTR_UNAME); if(cause == C_IPC_MESSAGE) { msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { crm_err("Could not instruct peers to remove references to node %s/%u", name, id); } else { crm_notice("Instructing peers to remove references to node %s/%u", name, id); } free_xml(msg); } else { reap_crm_member(id, name); /* If we're forgetting this node, also forget any failures to fence * it, so we don't carry that over to any node added later with the * same name. */ st_fail_count_reset(name); } } else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) { xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA); remote_ra_process_maintenance_nodes(xml); } else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) { return handle_node_list(stored_msg); /*========== (NOT_DC)-Only Actions ==========*/ } else if (!AM_I_DC) { if (strcmp(op, CRM_OP_SHUTDOWN) == 0) { return handle_shutdown_ack(stored_msg); } } else { crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node"); crm_log_xml_err(stored_msg, "Unexpected"); } return I_NULL; } static void handle_response(xmlNode *stored_msg) { const char *op = crm_element_value(stored_msg, F_CRM_TASK); if (op == NULL) { crm_log_xml_err(stored_msg, "Bad message"); } else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) { // Check whether scheduler answer been superseded by subsequent request const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE); if (msg_ref == NULL) { crm_err("%s - Ignoring calculation with no reference", op); } else if (pcmk__str_eq(msg_ref, fsa_pe_ref, pcmk__str_casei)) { ha_msg_input_t fsa_input; controld_stop_sched_timer(); fsa_input.msg = stored_msg; register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); } else { crm_info("%s calculation %s is obsolete", op, msg_ref); } } else if (strcmp(op, CRM_OP_VOTE) == 0 || strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) { } else { const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); crm_err("Unexpected response (op=%s, src=%s) sent to the %s", op, host_from, AM_I_DC ? "DC" : "controller"); } } static enum crmd_fsa_input handle_shutdown_request(xmlNode * stored_msg) { /* handle here to avoid potential version issues * where the shutdown message/procedure may have * been changed in later versions. * * This way the DC is always in control of the shutdown */ char *now_s = NULL; const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM); if (host_from == NULL) { /* we're shutting down and the DC */ host_from = fsa_our_uname; } crm_info("Creating shutdown request for %s (state=%s)", host_from, fsa_state2string(fsa_state)); crm_log_xml_trace(stored_msg, "message"); now_s = pcmk__ttoa(time(NULL)); update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE); free(now_s); /* will be picked up by the TE as long as its running */ return I_NULL; } /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); static void send_msg_via_ipc(xmlNode * msg, const char *sys) { pcmk__client_t *client_channel = pcmk__find_client_by_id(sys); if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { crm_xml_add(msg, F_CRM_HOST_FROM, fsa_our_uname); } if (client_channel != NULL) { /* Transient clients such as crmadmin */ pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); process_te_message(msg, data); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_LRMD) == 0) { fsa_data_t fsa_data; ha_msg_input_t fsa_input; fsa_input.msg = msg; fsa_input.xml = get_message_xml(msg, F_CRM_DATA); fsa_data.id = 0; fsa_data.actions = 0; fsa_data.data = &fsa_input; fsa_data.fsa_input = I_MESSAGE; fsa_data.fsa_cause = C_IPC_MESSAGE; fsa_data.origin = __func__; fsa_data.data_type = fsa_dt_ha_msg; do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else if (sys != NULL && crmd_is_proxy_session(sys)) { crmd_proxy_send(sys, msg); } else { crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); } } void delete_ha_msg_input(ha_msg_input_t * orig) { if (orig == NULL) { return; } free_xml(orig->msg); free(orig); } /*! * \internal * \brief Notify the DC of a remote node state change * * \param[in] node_name Node's name * \param[in] node_up TRUE if node is up, FALSE if down */ void send_remote_state_message(const char *node_name, gboolean node_up) { /* If we don't have a DC, or the message fails, we have a failsafe: * the DC will eventually pick up the change via the CIB node state. * The message allows it to happen sooner if possible. */ if (fsa_our_dc) { xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, fsa_our_dc, CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL); crm_info("Notifying DC %s of Pacemaker Remote node %s %s", fsa_our_dc, node_name, (node_up? "coming up" : "going down")); crm_xml_add(msg, XML_ATTR_ID, node_name); - crm_xml_add_boolean(msg, XML_NODE_IN_CLUSTER, node_up); + pcmk__xe_set_bool_attr(msg, XML_NODE_IN_CLUSTER, node_up); send_cluster_message(crm_get_peer(0, fsa_our_dc), crm_msg_crmd, msg, TRUE); free_xml(msg); } else { crm_debug("No DC to notify of Pacemaker Remote node %s %s", node_name, (node_up? "coming up" : "going down")); } } diff --git a/daemons/controld/controld_metadata.c b/daemons/controld/controld_metadata.c index 9bf8355a10..fca169f2c0 100644 --- a/daemons/controld/controld_metadata.c +++ b/daemons/controld/controld_metadata.c @@ -1,390 +1,386 @@ /* * Copyright 2017-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #if ENABLE_VERSIONED_ATTRS static regex_t *version_format_regex = NULL; #endif static void ra_param_free(void *param) { if (param) { struct ra_param_s *p = (struct ra_param_s *) param; if (p->rap_name) { free(p->rap_name); } free(param); } } static void metadata_free(void *metadata) { if (metadata) { struct ra_metadata_s *md = (struct ra_metadata_s *) metadata; if (md->ra_version) { free(md->ra_version); } g_list_free_full(md->ra_params, ra_param_free); free(metadata); } } GHashTable * metadata_cache_new() { return pcmk__strkey_table(free, metadata_free); } void metadata_cache_free(GHashTable *mdc) { if (mdc) { crm_trace("Destroying metadata cache with %d members", g_hash_table_size(mdc)); g_hash_table_destroy(mdc); } } void metadata_cache_reset(GHashTable *mdc) { if (mdc) { crm_trace("Resetting metadata cache with %d members", g_hash_table_size(mdc)); g_hash_table_remove_all(mdc); } } #if ENABLE_VERSIONED_ATTRS static gboolean valid_version_format(const char *version) { if (version == NULL) { return FALSE; } if (version_format_regex == NULL) { /* The OCF standard allows free-form versioning, but for our purposes of * versioned resource and operation attributes, we constrain it to * dot-separated numbers. Agents are still free to use other schemes, * but we can't determine attributes based on them. */ const char *regex_string = "^[[:digit:]]+([.][[:digit:]]+)*$"; version_format_regex = calloc(1, sizeof(regex_t)); regcomp(version_format_regex, regex_string, REG_EXTENDED | REG_NOSUB); /* If our regex doesn't compile, it's a bug on our side, so CRM_CHECK() * will give us a core dump to catch it. Pretend the version is OK * because we don't want our mistake to break versioned attributes * (which should only ever happen in a development branch anyway). */ CRM_CHECK(version_format_regex != NULL, return TRUE); } return regexec(version_format_regex, version, 0, NULL, 0) == 0; } #endif void metadata_cache_fini() { #if ENABLE_VERSIONED_ATTRS if (version_format_regex) { regfree(version_format_regex); free(version_format_regex); version_format_regex = NULL; } #endif } #if ENABLE_VERSIONED_ATTRS static char * ra_version_from_xml(xmlNode *metadata_xml, const lrmd_rsc_info_t *rsc) { const char *version = crm_element_value(metadata_xml, XML_ATTR_VERSION); if (version == NULL) { crm_debug("Metadata for %s:%s:%s does not specify a version", rsc->standard, rsc->provider, rsc->type); version = PCMK_DEFAULT_AGENT_VERSION; } else if (!valid_version_format(version)) { crm_notice("%s:%s:%s metadata version has unrecognized format", rsc->standard, rsc->provider, rsc->type); version = PCMK_DEFAULT_AGENT_VERSION; } else { crm_debug("Metadata for %s:%s:%s has version %s", rsc->standard, rsc->provider, rsc->type, version); } return strdup(version); } #endif static struct ra_param_s * ra_param_from_xml(xmlNode *param_xml) { const char *param_name = crm_element_value(param_xml, "name"); - const char *value; struct ra_param_s *p; p = calloc(1, sizeof(struct ra_param_s)); if (p == NULL) { crm_crit("Could not allocate memory for resource metadata"); return NULL; } p->rap_name = strdup(param_name); if (p->rap_name == NULL) { crm_crit("Could not allocate memory for resource metadata"); free(p); return NULL; } - value = crm_element_value(param_xml, "reloadable"); - if (crm_is_true(value)) { + if (pcmk__xe_attr_is_true(param_xml, "reloadable")) { controld_set_ra_param_flags(p, ra_param_reloadable); } - value = crm_element_value(param_xml, "unique"); - if (crm_is_true(value)) { + if (pcmk__xe_attr_is_true(param_xml, "unique")) { controld_set_ra_param_flags(p, ra_param_unique); } - value = crm_element_value(param_xml, "private"); - if (crm_is_true(value)) { + if (pcmk__xe_attr_is_true(param_xml, "private")) { controld_set_ra_param_flags(p, ra_param_private); } return p; } static void log_ra_ocf_version(const char *ra_key, const char *ra_ocf_version) { if (pcmk__str_empty(ra_ocf_version)) { crm_warn("%s does not advertise OCF version supported", ra_key); } else if (compare_version(ra_ocf_version, "2") >= 0) { crm_warn("%s supports OCF version %s (this Pacemaker version supports " PCMK_OCF_VERSION " and might not work properly with agent)", ra_key, ra_ocf_version); } else if (compare_version(ra_ocf_version, PCMK_OCF_VERSION) > 0) { crm_info("%s supports OCF version %s (this Pacemaker version supports " PCMK_OCF_VERSION " and might not use all agent features)", ra_key, ra_ocf_version); } else { crm_debug("%s supports OCF version %s", ra_key, ra_ocf_version); } } struct ra_metadata_s * metadata_cache_update(GHashTable *mdc, lrmd_rsc_info_t *rsc, const char *metadata_str) { char *key = NULL; xmlNode *metadata = NULL; xmlNode *match = NULL; struct ra_metadata_s *md = NULL; bool any_private_params = false; bool ocf1_1 = false; CRM_CHECK(mdc && rsc && metadata_str, return NULL); key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); if (!key) { crm_crit("Could not allocate memory for resource metadata"); goto err; } metadata = string2xml(metadata_str); if (!metadata) { crm_err("Metadata for %s:%s:%s is not valid XML", rsc->standard, rsc->provider, rsc->type); goto err; } md = calloc(1, sizeof(struct ra_metadata_s)); if (md == NULL) { crm_crit("Could not allocate memory for resource metadata"); goto err; } #if ENABLE_VERSIONED_ATTRS md->ra_version = ra_version_from_xml(metadata, rsc); #endif if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_OCF) == 0) { xmlChar *content = NULL; xmlNode *version_element = first_named_child(metadata, "version"); if (version_element != NULL) { content = xmlNodeGetContent(version_element); } log_ra_ocf_version(key, (const char *) content); if (content != NULL) { ocf1_1 = (compare_version((const char *) content, "1.1") >= 0); xmlFree(content); } } // Check supported actions match = first_named_child(metadata, "actions"); for (match = first_named_child(match, "action"); match != NULL; match = crm_next_same_xml(match)) { const char *action_name = crm_element_value(match, "name"); if (pcmk__str_eq(action_name, CRMD_ACTION_RELOAD_AGENT, pcmk__str_none)) { if (ocf1_1) { controld_set_ra_flags(md, key, ra_supports_reload_agent); } else { crm_notice("reload-agent action will not be used with %s " "because it does not support OCF 1.1 or later", key); } } else if (!ocf1_1 && pcmk__str_eq(action_name, CRMD_ACTION_RELOAD, pcmk__str_casei)) { controld_set_ra_flags(md, key, ra_supports_legacy_reload); } } // Build a parameter list match = first_named_child(metadata, "parameters"); for (match = first_named_child(match, "parameter"); match != NULL; match = crm_next_same_xml(match)) { const char *param_name = crm_element_value(match, "name"); if (param_name == NULL) { crm_warn("Metadata for %s:%s:%s has parameter without a name", rsc->standard, rsc->provider, rsc->type); } else { struct ra_param_s *p = ra_param_from_xml(match); if (p == NULL) { goto err; } if (pcmk_is_set(p->rap_flags, ra_param_private)) { any_private_params = true; } md->ra_params = g_list_prepend(md->ra_params, p); } } /* Newer resource agents support the "private" parameter attribute to * indicate sensitive parameters. For backward compatibility with older * agents, implicitly treat a few common names as private when the agent * doesn't specify any explicitly. */ if (!any_private_params) { for (GList *iter = md->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *p = iter->data; if (pcmk__str_any_of(p->rap_name, "password", "passwd", "user", NULL)) { controld_set_ra_param_flags(p, ra_param_private); } } } g_hash_table_replace(mdc, key, md); free_xml(metadata); return md; err: free(key); free_xml(metadata); metadata_free(md); return NULL; } /*! * \internal * \brief Get meta-data for a resource * * \param[in] lrm_state Use meta-data cache from this executor connection * \param[in] rsc Resource to get meta-data for * \param[in] source Allowed meta-data sources (bitmask of enum * controld_metadata_source_e values) * * \return Meta-data cache entry for given resource, or NULL if not available */ struct ra_metadata_s * controld_get_rsc_metadata(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, uint32_t source) { struct ra_metadata_s *metadata = NULL; char *metadata_str = NULL; char *key = NULL; int rc = pcmk_ok; CRM_CHECK((lrm_state != NULL) && (rsc != NULL), return NULL); if (pcmk_is_set(source, controld_metadata_from_cache)) { key = crm_generate_ra_key(rsc->standard, rsc->provider, rsc->type); if (key != NULL) { metadata = g_hash_table_lookup(lrm_state->metadata_cache, key); free(key); } if (metadata != NULL) { return metadata; } } if (!pcmk_is_set(source, controld_metadata_from_agent)) { return NULL; } /* For now, we always collect resource agent meta-data via a local, * synchronous, direct execution of the agent. This has multiple issues: * the executor should execute agents, not the controller; meta-data for * Pacemaker Remote nodes should be collected on those nodes, not * locally; and the meta-data call shouldn't eat into the timeout of the * real action being performed. * * These issues are planned to be addressed by having the scheduler * schedule a meta-data cache check at the beginning of each transition. * Once that is working, this block will only be a fallback in case the * initial collection fails. */ rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, rsc->type, &metadata_str, 0); if (rc != pcmk_ok) { crm_warn("Failed to get metadata for %s (%s:%s:%s): %s", rsc->id, rsc->standard, rsc->provider, rsc->type, pcmk_strerror(rc)); return NULL; } metadata = metadata_cache_update(lrm_state->metadata_cache, rsc, metadata_str); free(metadata_str); if (metadata == NULL) { crm_warn("Failed to update metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); } return metadata; } diff --git a/daemons/controld/controld_te_callbacks.c b/daemons/controld/controld_te_callbacks.c index 228572df30..5ceb324653 100644 --- a/daemons/controld/controld_te_callbacks.c +++ b/daemons/controld/controld_te_callbacks.c @@ -1,702 +1,698 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include /* For ONLINESTATUS etc */ #include void te_update_confirm(const char *event, xmlNode * msg); extern char *te_uuid; gboolean shuttingdown = FALSE; crm_graph_t *transition_graph; crm_trigger_t *transition_trigger = NULL; /* #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */ #define RSC_OP_TEMPLATE "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']" // An explicit shutdown-lock of 0 means the lock has been cleared static bool shutdown_lock_cleared(xmlNode *lrm_resource) { time_t shutdown_lock = 0; return (crm_element_value_epoch(lrm_resource, XML_CONFIG_ATTR_SHUTDOWN_LOCK, &shutdown_lock) == pcmk_ok) && (shutdown_lock == 0); } static void te_update_diff_v1(const char *event, xmlNode *diff) { int lpc, max; xmlXPathObject *xpathObj = NULL; CRM_CHECK(diff != NULL, return); xml_log_patchset(LOG_TRACE, __func__, diff); if (cib_config_changed(NULL, NULL, &diff)) { abort_transition(INFINITY, tg_restart, "Non-status change", diff); goto bail; /* configuration changed */ } /* Tickets Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted); goto bail; } freeXpathObject(xpathObj); /* Tickets Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); /* Transient Attributes - Added/Updated */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *attr = getXpathResult(xpathObj, lpc); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); - const char *value = NULL; - if (pcmk__str_eq(CRM_OP_PROBED, name, pcmk__str_casei)) { - value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); - } - - if (crm_is_true(value) == FALSE) { + if (pcmk__str_eq(CRM_OP_PROBED, name, pcmk__str_casei) && + !pcmk__xe_attr_is_true(attr, XML_NVPAIR_ATTR_VALUE)) { abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr); crm_log_xml_trace(attr, "Abort"); goto bail; } } freeXpathObject(xpathObj); /* Transient Attributes - Removed */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_TRANSIENT_NODEATTRS); if (numXpathResults(xpathObj) > 0) { xmlNode *aborted = getXpathResult(xpathObj, 0); abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted); goto bail; } freeXpathObject(xpathObj); // Check for lrm_resource entries xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RESOURCE); max = numXpathResults(xpathObj); /* * Updates by, or in response to, graph actions will never affect more than * one resource at a time, so such updates indicate an LRM refresh. In that * case, start a new transition rather than check each result individually, * which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && (max > 1)) { crm_debug("Ignoring resource operation updates due to history refresh of %d resources", max); crm_log_xml_trace(diff, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); goto bail; } if (max == 1) { xmlNode *lrm_resource = getXpathResult(xpathObj, 0); if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", lrm_resource); // Still process results, so we stop timers and update failcounts } } freeXpathObject(xpathObj); /* Process operation updates */ xpathObj = xpath_search(diff, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); if (max > 0) { int lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } } freeXpathObject(xpathObj); /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */ xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP); max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { int path_max = 0; const char *op_id = NULL; char *rsc_op_xpath = NULL; xmlXPathObject *op_match = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; op_id = ID(match); path_max = strlen(RSC_OP_TEMPLATE) + strlen(op_id) + 1; rsc_op_xpath = calloc(1, path_max); snprintf(rsc_op_xpath, path_max, RSC_OP_TEMPLATE, op_id); op_match = xpath_search(diff, rsc_op_xpath); if (numXpathResults(op_match) == 0) { /* Prevent false positives by matching cancelations too */ const char *node = get_node_id(match); crm_action_t *cancelled = get_cancel_action(op_id, node); if (cancelled == NULL) { crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id, node); abort_transition(INFINITY, tg_restart, "Resource op removal", match); freeXpathObject(op_match); free(rsc_op_xpath); goto bail; } else { crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d", op_id, node, cancelled->id); } } freeXpathObject(op_match); free(rsc_op_xpath); } bail: freeXpathObject(xpathObj); } static void process_lrm_resource_diff(xmlNode *lrm_resource, const char *node) { for (xmlNode *rsc_op = pcmk__xml_first_child(lrm_resource); rsc_op != NULL; rsc_op = pcmk__xml_next(rsc_op)) { process_graph_event(rsc_op, node); } if (shutdown_lock_cleared(lrm_resource)) { // @TODO would be more efficient to abort once after transition done abort_transition(INFINITY, tg_restart, "Shutdown lock cleared", lrm_resource); } } static void process_resource_updates(const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) { xmlNode *rsc = NULL; if (xml == NULL) { return; } if (strcmp(TYPE(xml), XML_CIB_TAG_LRM) == 0) { xml = first_named_child(xml, XML_LRM_TAG_RESOURCES); CRM_CHECK(xml != NULL, return); } CRM_CHECK(strcmp(TYPE(xml), XML_LRM_TAG_RESOURCES) == 0, return); /* * Updates by, or in response to, TE actions will never contain updates * for more than one resource at a time, so such updates indicate an * LRM refresh. * * In that case, start a new transition rather than check each result * individually, which can result in _huge_ speedups in large clusters. * * Unfortunately, we can only do so when there are no pending actions. * Otherwise, we could mistakenly throw away those results here, and * the cluster will stall waiting for them and time out the operation. */ if ((transition_graph->pending == 0) && xml->children && xml->children->next) { crm_log_xml_trace(change, "lrm-refresh"); abort_transition(INFINITY, tg_restart, "History refresh", NULL); return; } for (rsc = pcmk__xml_first_child(xml); rsc != NULL; rsc = pcmk__xml_next(rsc)) { crm_trace("Processing %s", ID(rsc)); process_lrm_resource_diff(rsc, node); } } static char *extract_node_uuid(const char *xpath) { char *mutable_path = strdup(xpath); char *node_uuid = NULL; char *search = NULL; char *match = NULL; match = strstr(mutable_path, "node_state[@id=\'"); if (match == NULL) { free(mutable_path); return NULL; } match += strlen("node_state[@id=\'"); search = strchr(match, '\''); if (search == NULL) { free(mutable_path); return NULL; } search[0] = 0; node_uuid = strdup(match); free(mutable_path); return node_uuid; } static void abort_unless_down(const char *xpath, const char *op, xmlNode *change, const char *reason) { char *node_uuid = NULL; crm_action_t *down = NULL; if(!pcmk__str_eq(op, "delete", pcmk__str_casei)) { abort_transition(INFINITY, tg_restart, reason, change); return; } node_uuid = extract_node_uuid(xpath); if(node_uuid == NULL) { crm_err("Could not extract node ID from %s", xpath); abort_transition(INFINITY, tg_restart, reason, change); return; } down = match_down_event(node_uuid); if (down == NULL) { crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath); abort_transition(INFINITY, tg_restart, reason, change); } else { crm_trace("Expecting changes to %s (%s)", node_uuid, xpath); } free(node_uuid); } static void process_op_deletion(const char *xpath, xmlNode *change) { char *mutable_key = strdup(xpath); char *key; char *node_uuid; // Extract the part of xpath between last pair of single quotes key = strrchr(mutable_key, '\''); if (key != NULL) { *key = '\0'; key = strrchr(mutable_key, '\''); } if (key == NULL) { crm_warn("Ignoring malformed CIB update (resource deletion of %s)", xpath); free(mutable_key); return; } ++key; node_uuid = extract_node_uuid(xpath); if (confirm_cancel_action(key, node_uuid) == FALSE) { abort_transition(INFINITY, tg_restart, "Resource operation removal", change); } free(mutable_key); free(node_uuid); } static void process_delete_diff(const char *xpath, const char *op, xmlNode *change) { if (strstr(xpath, "/" XML_LRM_TAG_RSC_OP "[")) { process_op_deletion(xpath, change); } else if (strstr(xpath, "/" XML_CIB_TAG_LRM "[")) { abort_unless_down(xpath, op, change, "Resource state removal"); } else if (strstr(xpath, "/" XML_CIB_TAG_STATE "[")) { abort_unless_down(xpath, op, change, "Node state removal"); } else { crm_trace("Ignoring delete of %s", xpath); } } static void process_node_state_diff(xmlNode *state, xmlNode *change, const char *op, const char *xpath) { xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM); process_resource_updates(ID(state), lrm, change, op, xpath); } static void process_status_diff(xmlNode *status, xmlNode *change, const char *op, const char *xpath) { for (xmlNode *state = pcmk__xml_first_child(status); state != NULL; state = pcmk__xml_next(state)) { process_node_state_diff(state, change, op, xpath); } } static void process_cib_diff(xmlNode *cib, xmlNode *change, const char *op, const char *xpath) { xmlNode *status = first_named_child(cib, XML_CIB_TAG_STATUS); xmlNode *config = first_named_child(cib, XML_CIB_TAG_CONFIGURATION); if (status) { process_status_diff(status, change, op, xpath); } if (config) { abort_transition(INFINITY, tg_restart, "Non-status-only change", change); } } static void te_update_diff_v2(xmlNode *diff) { crm_log_xml_trace(diff, "Patch:Raw"); for (xmlNode *change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { xmlNode *match = NULL; const char *name = NULL; const char *xpath = crm_element_value(change, XML_DIFF_PATH); // Possible ops: create, modify, delete, move const char *op = crm_element_value(change, XML_DIFF_OP); // Ignore uninteresting updates if (op == NULL) { continue; } else if (xpath == NULL) { crm_trace("Ignoring %s change for version field", op); continue; } else if (strcmp(op, "move") == 0) { crm_trace("Ignoring move change at %s", xpath); continue; } // Find the result of create/modify ops if (strcmp(op, "create") == 0) { match = change->children; } else if (strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } else if (strcmp(op, "delete") != 0) { crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)", op, xpath); continue; } if (match) { if (match->type == XML_COMMENT_NODE) { crm_trace("Ignoring %s operation for comment at %s", op, xpath); continue; } name = (const char *)match->name; } crm_trace("Handling %s operation for %s%s%s", op, (xpath? xpath : "CIB"), (name? " matched by " : ""), (name? name : "")); if (strstr(xpath, "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION)) { abort_transition(INFINITY, tg_restart, "Configuration change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_CIB_TAG_TICKETS) || pcmk__str_eq(name, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { abort_transition(INFINITY, tg_restart, "Ticket attribute change", change); break; // Won't be packaged with operation results we may be waiting for } else if (strstr(xpath, "/" XML_TAG_TRANSIENT_NODEATTRS "[") || pcmk__str_eq(name, XML_TAG_TRANSIENT_NODEATTRS, pcmk__str_casei)) { abort_unless_down(xpath, op, change, "Transient attribute change"); break; // Won't be packaged with operation results we may be waiting for } else if (strcmp(op, "delete") == 0) { process_delete_diff(xpath, op, change); } else if (name == NULL) { crm_warn("Ignoring malformed CIB update (%s at %s has no result)", op, xpath); } else if (strcmp(name, XML_TAG_CIB) == 0) { process_cib_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATUS) == 0) { process_status_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_STATE) == 0) { process_node_state_diff(match, change, op, xpath); } else if (strcmp(name, XML_CIB_TAG_LRM) == 0) { process_resource_updates(ID(match), match, change, op, xpath); } else if (strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_resource_updates(local_node, match, change, op, xpath); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_lrm_resource_diff(match, local_node); free(local_node); } else if (strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); process_graph_event(match, local_node); free(local_node); } else { crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)", op, xpath, name); } } } void te_update_diff(const char *event, xmlNode * msg) { xmlNode *diff = NULL; const char *op = NULL; int rc = -EINVAL; int format = 1; int p_add[] = { 0, 0, 0 }; int p_del[] = { 0, 0, 0 }; CRM_CHECK(msg != NULL, return); crm_element_value_int(msg, F_CIB_RC, &rc); if (transition_graph == NULL) { crm_trace("No graph"); return; } else if (rc < pcmk_ok) { crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc)); return; } else if (transition_graph->complete && fsa_state != S_IDLE && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) { crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state), transition_graph->complete); return; } op = crm_element_value(msg, F_CIB_OPERATION); diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_patch_versions(diff, p_add, p_del); crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op, p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2], fsa_state2string(fsa_state)); crm_element_value_int(diff, "format", &format); switch (format) { case 1: te_update_diff_v1(event, diff); break; case 2: te_update_diff_v2(diff); break; default: crm_warn("Ignoring malformed CIB update (unknown patch format %d)", format); } } gboolean process_te_message(xmlNode * msg, xmlNode * xml_data) { const char *from = crm_element_value(msg, F_ORIG); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM); const char *ref = crm_element_value(msg, F_CRM_REFERENCE); const char *op = crm_element_value(msg, F_CRM_TASK); const char *type = crm_element_value(msg, F_CRM_MSG_TYPE); crm_trace("Processing %s (%s) message", op, ref); crm_log_xml_trace(msg, "ipc"); if (op == NULL) { /* error */ } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) { crm_trace("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if (pcmk__str_eq(op, CRM_OP_INVOKE_LRM, pcmk__str_casei) && pcmk__str_eq(sys_from, CRM_SYSTEM_LRMD, pcmk__str_casei) /* && pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_casei) */ ) { xmlXPathObject *xpathObj = NULL; crm_log_xml_trace(msg, "Processing (N)ACK"); crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from); xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP); if (numXpathResults(xpathObj)) { int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); const char *node = get_node_id(rsc_op); process_graph_event(rsc_op, node); } freeXpathObject(xpathObj); } else { crm_log_xml_err(msg, "Invalid (N)ACK"); freeXpathObject(xpathObj); return FALSE; } } else { crm_err("Unknown command: %s::%s from %s", type, op, sys_from); } crm_trace("finished processing message"); return TRUE; } void cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc < pcmk_ok) { crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc)); } } /*! * \brief Handle a timeout in node-to-node communication * * \param[in] data Pointer to action timer * * \return FALSE (indicating that source should be not be re-added) */ gboolean action_timer_callback(gpointer data) { crm_action_timer_t *timer = NULL; const char *task = NULL; const char *on_node = NULL; const char *via_node = NULL; CRM_CHECK(data != NULL, return FALSE); timer = (crm_action_timer_t *) data; stop_te_timer(timer); CRM_CHECK(timer->action != NULL, return FALSE); task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK); on_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_TARGET); via_node = crm_element_value(timer->action->xml, XML_LRM_ATTR_ROUTER_NODE); if (transition_graph->complete) { crm_notice("Node %s did not send %s result (via %s) within %dms " "(ignoring because transition not in progress)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout); } else { /* fail the action */ crm_err("Node %s did not send %s result (via %s) within %dms " "(action timeout plus cluster-delay)", (on_node? on_node : ""), (task? task : "unknown action"), (via_node? via_node : "controller"), timer->timeout + transition_graph->network_delay); pcmk__log_graph_action(LOG_ERR, timer->action); crm__set_graph_action_flags(timer->action, pcmk__graph_action_failed); te_action_confirmed(timer->action, transition_graph); abort_transition(INFINITY, tg_restart, "Action lost", NULL); // Record timeout in the CIB if appropriate if ((timer->action->type == action_type_rsc) && controld_action_is_recordable(task)) { controld_record_action_timeout(timer->action); } } return FALSE; } diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 999b1be2de..667525039d 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1,1923 +1,1922 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include // Check whether we have a high-resolution monotonic clock #undef PCMK__TIME_USE_CGT #if HAVE_DECL_CLOCK_MONOTONIC && defined(CLOCK_MONOTONIC) # define PCMK__TIME_USE_CGT # include /* clock_gettime */ #endif #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; guint interval_ms; int start_delay; int timeout_orig; int call_id; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *userdata_str; pcmk__action_result_t result; /* We can track operation queue time and run time, to be saved with the CIB * resource history (and displayed in cluster status). We need * high-resolution monotonic time for this purpose, so we use * clock_gettime(CLOCK_MONOTONIC, ...) (if available, otherwise this feature * is disabled). * * However, we also need epoch timestamps for recording the time the command * last ran and the time its return value last changed, for use in time * displays (as opposed to interval calculations). We keep time_t values for * this purpose. * * The last run time is used for both purposes, so we keep redundant * monotonic and epoch values for this. Technically the two could represent * different times, but since time_t has only second resolution and the * values are used for distinct purposes, that is not significant. */ #ifdef PCMK__TIME_USE_CGT /* Recurring and systemd operations may involve more than one executor * command per operation, so they need info about the original and the most * recent. */ struct timespec t_first_run; // When op first ran struct timespec t_run; // When op most recently ran struct timespec t_first_queue; // When op was first queued struct timespec t_queue; // When op was most recently queued #endif time_t epoch_last_run; // Epoch timestamp of when op last ran time_t epoch_rcchange; // Epoch timestamp of when rc last changed bool first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); #ifdef PCMK__TIME_USE_CGT /*! * \internal * \brief Check whether a struct timespec has been set * * \param[in] timespec Time to check * * \return true if timespec has been set (i.e. is nonzero), false otherwise */ static inline bool time_is_set(struct timespec *timespec) { return (timespec != NULL) && ((timespec->tv_sec != 0) || (timespec->tv_nsec != 0)); } /* * \internal * \brief Set a timespec (and its original if unset) to the current time * * \param[out] t_current Where to store current time * \param[out] t_orig Where to copy t_current if unset */ static void get_current_time(struct timespec *t_current, struct timespec *t_orig) { clock_gettime(CLOCK_MONOTONIC, t_current); if ((t_orig != NULL) && !time_is_set(t_orig)) { *t_orig = *t_current; } } /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or unset) * * \note Can overflow on 32bit machines when the differences is around * 24 days or more. */ static int time_diff_ms(struct timespec *now, struct timespec *old) { int diff_ms = 0; if (time_is_set(old)) { struct timespec local_now = { 0, }; if (now == NULL) { clock_gettime(CLOCK_MONOTONIC, &local_now); now = &local_now; } diff_ms = (now->tv_sec - old->tv_sec) * 1000 + (now->tv_nsec - old->tv_nsec) / 1000000; } return diff_ms; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * \param[in] cmd Executor command object to reset * * \note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static inline bool action_matches(lrmd_cmd_t *cmd, const char *action, guint interval_ms) { return (cmd->interval_ms == interval_ms) && pcmk__str_eq(cmd->action, action, pcmk__str_casei); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command to log result for * \param[in] exec_time_ms Execution time in milliseconds, if known * \param[in] queue_time_ms Queue time in milliseconds, if known */ static void log_finished(lrmd_cmd_t *cmd, int exec_time_ms, int queue_time_ms) { int log_level = LOG_INFO; GString *str = g_string_sized_new(100); // reasonable starting size if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } g_string_printf(str, "%s %s (call %d", cmd->rsc_id, cmd->action, cmd->call_id); if (cmd->last_pid != 0) { g_string_append_printf(str, ", PID %d", cmd->last_pid); } if (cmd->result.execution_status == PCMK_EXEC_DONE) { g_string_append_printf(str, ") exited with status %d", cmd->result.exit_status); } else { g_string_append_printf(str, ") could not be executed: %s", pcmk_exec_status_str(cmd->result.execution_status)); } if (cmd->result.exit_reason != NULL) { g_string_append_printf(str, " (%s)", cmd->result.exit_reason); } #ifdef PCMK__TIME_USE_CGT g_string_append_printf(str, " (execution time %s", pcmk__readable_interval(exec_time_ms)); if (queue_time_ms > 0) { g_string_append_printf(str, " after being queued %s", pcmk__readable_interval(queue_time_ms)); } g_string_append(str, ")"); #endif do_crm_log(log_level, "%s", str->str); g_string_free(str, TRUE); } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (pcmk__str_eq(action, "monitor", pcmk__str_casei) && pcmk_is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); rsc->st_probe_rc = -ENODEV; // if stonith, initialize to "not running" return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode *msg, pcmk__client_t *client) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); if (pcmk__str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block", pcmk__str_casei)) { crm_debug("Setting flag to leave pid group on timeout and " "only kill action pid for " PCMK__OP_FMT, cmd->rsc_id, cmd->action, cmd->interval_ms); cmd->service_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Action", cmd->action, 0, SVC_ACTION_LEAVE_GROUP, "SVC_ACTION_LEAVE_GROUP"); } return cmd; } static void stop_recurring_timer(lrmd_cmd_t *cmd) { if (cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = 0; } } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { stop_recurring_timer(cmd); if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } pcmk__reset_result(&(cmd->result)); free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); return FALSE; } static inline void start_recurring_timer(lrmd_cmd_t *cmd) { if (cmd && (cmd->interval_ms > 0)) { cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms, stonith_recurring_op_helper, cmd); } } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } /*! * \internal * \brief Check whether a list already contains the equivalent of a given action */ static lrmd_cmd_t * find_duplicate_action(GList *action_list, lrmd_cmd_t *cmd) { for (GList *item = action_list; item != NULL; item = item->next) { lrmd_cmd_t *dup = item->data; if (action_matches(cmd, dup->action, dup->interval_ms)) { return dup; } } return NULL; } static bool merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { lrmd_cmd_t * dup = NULL; bool dup_pending = true; if (cmd->interval_ms == 0) { return false; } // Search for a duplicate of this action (in-flight or not) dup = find_duplicate_action(rsc->pending_ops, cmd); if (dup == NULL) { dup_pending = false; dup = find_duplicate_action(rsc->recurring_ops, cmd); if (dup == NULL) { return false; } } /* Do not merge fencing monitors marked for cancellation, so we can reply to * the cancellation separately. */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei) && (dup->result.execution_status == PCMK_EXEC_CANCELLED)) { return false; } /* This should not occur. If it does, we need to investigate how something * like this is possible in the controller. */ crm_warn("Duplicate recurring op entry detected (" PCMK__OP_FMT "), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); // Merge new action's call ID and user data into existing action dup->first_notify_sent = false; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; free_lrmd_cmd(cmd); cmd = NULL; /* If dup is not pending, that means it has already executed at least once * and is waiting in the interval. In that case, stop waiting and initiate * a new instance now. */ if (!dup_pending) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stop_recurring_timer(dup); stonith_recurring_op_helper(dup); } else { services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); } } return true; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); if (merge_recurring_duplicate(rsc, cmd)) { // Equivalent of cmd has already been scheduled return; } /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_queue), &(cmd->t_first_queue)); #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static xmlNode * create_lrmd_reply(const char *origin, int rc, int call_id) { xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, origin); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); return reply; } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; int rc; int log_level = LOG_WARNING; const char *msg = NULL; CRM_CHECK(client != NULL, return); if (client->name == NULL) { crm_trace("Skipping notification to client without name"); return; } if (pcmk_is_set(client->flags, pcmk__client_to_proxy)) { /* We only want to notify clients of the executor IPC API. If we are * running as Pacemaker Remote, we may have clients proxied to other * IPC services in the cluster, so skip those. */ crm_trace("Skipping executor API notification to client %s", pcmk__client_name(client)); return; } rc = lrmd_server_send_notify(client, update_msg); if (rc == pcmk_rc_ok) { return; } switch (rc) { case ENOTCONN: case EPIPE: // Client exited without waiting for notification log_level = LOG_INFO; msg = "Disconnected"; break; default: msg = pcmk_rc_str(rc); break; } do_crm_log(log_level, "Could not notify client %s: %s " CRM_XS " rc=%d", pcmk__client_name(client), msg, rc); } static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { xmlNode *notify = NULL; int exec_time = 0; int queue_time = 0; #ifdef PCMK__TIME_USE_CGT exec_time = time_diff_ms(NULL, &(cmd->t_run)); queue_time = time_diff_ms(&cmd->t_run, &(cmd->t_queue)); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if ((cmd->last_notify_rc == cmd->result.exit_status) && (cmd->last_notify_op_status == cmd->result.execution_status)) { /* only send changes */ return; } } cmd->first_notify_sent = true; cmd->last_notify_rc = cmd->result.exit_status; cmd->last_notify_op_status = cmd->result.execution_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->result.exit_status); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->result.execution_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); crm_xml_add_ll(notify, F_LRMD_RSC_RUN_TIME, (long long) cmd->epoch_last_run); crm_xml_add_ll(notify, F_LRMD_RSC_RCCHANGE_TIME, (long long) cmd->epoch_rcchange); #ifdef PCMK__TIME_USE_CGT crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->result.exit_reason); if (cmd->result.action_stderr != NULL) { crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stderr); } else if (cmd->result.action_stdout != NULL) { crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->result.action_stdout); } if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { pcmk__client_t *client = pcmk__find_client_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else { pcmk__foreach_ipc_client(send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (pcmk__ipc_client_count() != 0) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __func__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); pcmk__foreach_ipc_client(send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->last_pid = 0; #ifdef PCMK__TIME_USE_CGT memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); #endif cmd->epoch_last_run = 0; pcmk__reset_result(&(cmd->result)); cmd->result.execution_status = PCMK_EXEC_DONE; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if ((cmd->interval_ms != 0) && (cmd->result.execution_status == PCMK_EXEC_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval_ms == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } static int stonith2uniform_rc(const char *action, int rc) { switch (rc) { case pcmk_ok: rc = PCMK_OCF_OK; break; case -ENODEV: /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) { rc = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(action, "stop", pcmk__str_casei)) { rc = PCMK_OCF_OK; } else { rc = PCMK_OCF_NOT_INSTALLED; } break; case -EOPNOTSUPP: rc = PCMK_OCF_UNIMPLEMENT_FEATURE; break; default: rc = PCMK_OCF_UNKNOWN_ERROR; break; } return rc; } static int action_get_uniform_rc(svc_action_t *action) { lrmd_cmd_t *cmd = action->cb_data; if (pcmk__str_eq(action->standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { return stonith2uniform_rc(cmd->action, action->rc); } else { enum ocf_exitcode code = services_result2ocf(action->standard, cmd->action, action->rc); // Cast variable instead of function return to keep compilers happy return (int) code; } } struct notify_new_client_data { xmlNode *notify; pcmk__client_t *new_client; }; static void notify_one_client(gpointer key, gpointer value, gpointer user_data) { pcmk__client_t *client = value; struct notify_new_client_data *data = user_data; if (!pcmk__str_eq(client->id, data->new_client->id, pcmk__str_casei)) { send_client_notify(key, (gpointer) client, (gpointer) data->notify); } } void notify_of_new_client(pcmk__client_t *new_client) { struct notify_new_client_data data; data.new_client = new_client; data.notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(data.notify, F_LRMD_ORIGIN, __func__); crm_xml_add(data.notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); pcmk__foreach_ipc_client(notify_one_client, &data); free_xml(data.notify); } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; #ifdef PCMK__TIME_USE_CGT const char *rclass = NULL; bool goagain = false; #endif if (!cmd) { crm_err("Completed executor action (%s) does not match any known operations", action->id); return; } #ifdef PCMK__TIME_USE_CGT if (cmd->result.exit_status != action->rc) { cmd->epoch_rcchange = time(NULL); } #endif cmd->last_pid = action->pid; pcmk__set_result(&(cmd->result), action_get_uniform_rc(action), action->status, services__exit_reason(action)); rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; #ifdef PCMK__TIME_USE_CGT if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { rclass = resources_find_service_class(rsc->type); } else if(rsc) { rclass = rsc->class; } if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { if ((cmd->result.exit_status == PCMK_OCF_OK) && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) { /* systemd returns from start and stop actions after the action * begins, not after it completes. We have to jump through a few * hoops so that we don't report 'complete' to the rest of pacemaker * until it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if (cmd->real_action != NULL) { // This is follow-up monitor to check whether start/stop completed if (cmd->result.execution_status == PCMK_EXEC_PENDING) { goagain = true; } else if ((cmd->result.exit_status == PCMK_OCF_OK) && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { goagain = true; } else { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s systemd %s is now complete (elapsed=%dms, " "remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->result.exit_status), cmd->result.exit_status); cmd_original_times(cmd); // Monitors may return "not running", but start/stop shouldn't if ((cmd->result.execution_status == PCMK_EXEC_DONE) && (cmd->result.exit_status == PCMK_OCF_NOT_RUNNING)) { if (pcmk__str_eq(cmd->real_action, "start", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_UNKNOWN_ERROR; } else if (pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; } } } } } #endif #if SUPPORT_NAGIOS if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { if (action_matches(cmd, "monitor", 0) && (cmd->result.exit_status == PCMK_OCF_OK)) { /* Successfully executed --version for the nagios plugin */ cmd->result.exit_status = PCMK_OCF_NOT_RUNNING; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) && (cmd->result.exit_status != PCMK_OCF_OK)) { #ifdef PCMK__TIME_USE_CGT goagain = true; #endif } } #endif #ifdef PCMK__TIME_USE_CGT if (goagain) { int time_sum = time_diff_ms(NULL, &(cmd->t_first_run)); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if (cmd->result.exit_status == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if (cmd->result.execution_status == PCMK_EXEC_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->result.exit_status), cmd->result.exit_status, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, (cmd->real_action? cmd->real_action : cmd->action), cmd->result.exit_status, time_sum, timeout_left); pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, "Investigate reason for timeout, and adjust " "configured operation timeout if necessary"); cmd_original_times(cmd); } } #endif pcmk__set_result_output(&(cmd->result), services__grab_stdout(action), services__grab_stderr(action)); cmd_finalize(cmd, rsc); } /*! * \internal * \brief Determine operation status of a stonith operation * * Non-stonith resource operations get their operation status directly from the * service library, but the fencer does not have an equivalent, so we must infer * an operation status from the fencer API's return code. * * \param[in] action Name of action performed on stonith resource * \param[in] interval_ms Action interval * \param[in] rc Action result from fencer * * \return Operation status corresponding to fencer API return code */ static int stonith_rc2status(const char *action, guint interval_ms, int rc) { int status = PCMK_EXEC_DONE; switch (rc) { case pcmk_ok: break; case -EOPNOTSUPP: case -EPROTONOSUPPORT: status = PCMK_EXEC_NOT_SUPPORTED; break; case -ETIME: case -ETIMEDOUT: status = PCMK_EXEC_TIMEOUT; break; case -ENOTCONN: case -ECOMM: // Couldn't talk to fencer status = PCMK_EXEC_ERROR; break; case -ENODEV: // The device is not registered with the fencer status = PCMK_EXEC_ERROR; break; default: break; } return status; } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { // This can be NULL if resource was removed before command completed lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); cmd->result.exit_status = stonith2uniform_rc(cmd->action, rc); /* This function may be called with status already set to cancelled, if a * pending action was aborted. Otherwise, we need to determine status from * the fencer return code. */ if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) { cmd->result.execution_status = stonith_rc2status(cmd->action, cmd->interval_ms, rc); // Certain successful actions change the known state of the resource if ((rsc != NULL) && (cmd->result.exit_status == PCMK_OCF_OK)) { if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { rsc->st_probe_rc = -ENODEV; // maps to PCMK_OCF_NOT_RUNNING } } } // Give the user more detail than an OCF code if (rc != -pcmk_err_generic) { cmd->result.exit_reason = strdup(pcmk_strerror(rc)); } /* The recurring timer should not be running at this point in any case, but * as a failsafe, stop it if it is. */ stop_recurring_timer(cmd); /* Reschedule this command if appropriate. If a recurring command is *not* * rescheduled, its status must be PCMK_EXEC_CANCELLED, otherwise it will * not be removed from recurring_ops by cmd_finalize(). */ if (rsc && (cmd->interval_ms > 0) && (cmd->result.execution_status != PCMK_EXEC_CANCELLED)) { start_recurring_timer(cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* If we registered this fence device, we don't know whether the * fencer still has the registration or not. Cause future probes to * return PCMK_OCF_UNKNOWN_ERROR until the resource is stopped or * started successfully. This is especially important if the * controller also went away (possibly due to a cluster layer * restart) and won't receive our client notification of any * monitors finalized below. */ if (rsc->st_probe_rc == pcmk_ok) { rsc->st_probe_rc = pcmk_err_generic; } if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("Connection to fencer failed, finalizing %d pending operations", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } /*! * \internal * \brief Execute a stonith resource "start" action * * Start a stonith resource by registering it with the fencer. * (Stonith agents don't have a start command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to start * \param[in] cmd Start command to execute * * \return pcmk_ok on success, -errno otherwise */ static int execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; int rc = pcmk_ok; // Convert command parameters to stonith API key/values if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* The fencer will automatically register devices via CIB notifications * when the CIB changes, but to avoid a possible race condition between * the fencer receiving the notification and the executor requesting that * resource, the executor registers the device as well. The fencer knows how * to handle duplicate registrations. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); return rc; } /*! * \internal * \brief Execute a stonith resource "stop" action * * Stop a stonith resource by unregistering it with the fencer. * (Stonith agents don't have a stop command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to stop * * \return pcmk_ok on success, -errno otherwise */ static inline int execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) { /* @TODO Failure would indicate a problem communicating with fencer; * perhaps we should try reconnecting and retrying a few times? */ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, rsc->rsc_id); } /*! * \internal * \brief Initiate a stonith resource agent recurring "monitor" action * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to monitor * \param[in] cmd Monitor command being executed * * \return pcmk_ok if monitor was successfully initiated, -errno otherwise */ static inline int execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); if (rc == TRUE) { rsc->active = cmd; rc = pcmk_ok; } else { rc = -pcmk_err_generic; } return rc; } static void lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; bool do_monitor = FALSE; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { rc = -ENOTCONN; } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == 0) { do_monitor = TRUE; } } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { rc = execd_stonith_stop(stonith_api, rsc); } else if (pcmk__str_eq(cmd->action, "monitor", pcmk__str_casei)) { if (cmd->interval_ms > 0) { do_monitor = TRUE; } else { rc = rsc->st_probe_rc; } } if (do_monitor) { rc = execd_stonith_monitor(stonith_api, rsc, cmd); if (rc == pcmk_ok) { // Don't clean up yet, we will find out result of the monitor later return; } } stonith_action_complete(cmd, rc); } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei) && pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { cmd->result.exit_status = PCMK_OCF_OK; goto exec_done; } #endif params_copy = pcmk__str_table_dup(cmd->params); action = services__create_resource_action(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval_ms, cmd->timeout, params_copy, cmd->service_flags); if (action == NULL) { pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); goto exec_done; } if (action->rc != PCMK_OCF_UNKNOWN) { pcmk__set_result(&(cmd->result), action->rc, action->status, services__exit_reason(action)); services_action_free(action); goto exec_done; } action->cb_data = cmd; if (services_action_async(action, action_complete)) { /* When services_action_async() returns TRUE, the callback might have * been called -- in this case action_complete(), which might free cmd, * so cmd cannot be used here. */ return TRUE; } pcmk__set_result(&(cmd->result), action->rc, action->status, services__exit_reason(action)); services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef PCMK__TIME_USE_CGT get_current_time(&(cmd->t_run), &(cmd->t_first_run)); #endif cmd->epoch_last_run = time(NULL); } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval_ms) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GList *gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei); gIter = rsc->pending_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GList *next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval_ms); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static int process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, xmlNode **reply) { int rc = pcmk_ok; - const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_MIN_PROTOCOL_VERSION, protocol_version); rc = -EPROTO; } - if (crm_is_true(is_ipc_provider)) { + if (pcmk__xe_attr_is_true(request, F_LRMD_IS_IPC_PROVIDER)) { #ifdef PCMK__COMPILE_REMOTE if ((client->remote != NULL) && client->remote->tls_handshake_complete) { // This is a remote connection from a cluster node's controller ipc_proxy_add_provider(client); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif } *reply = create_lrmd_reply(__func__, rc, call_id); crm_xml_add(*reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(*reply, F_LRMD_CLIENTID, client->id); crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); return rc; } static int process_lrmd_rsc_register(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && pcmk__str_eq(rsc->class, dup->class, pcmk__str_casei) && pcmk__str_eq(rsc->provider, dup->provider, pcmk__str_casei) && pcmk__str_eq(rsc->type, dup->type, pcmk__str_casei)) { crm_notice("Ignoring duplicate registration of '%s'", rsc->rsc_id); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Cached agent information for '%s'", rsc->rsc_id); return rc; } static xmlNode * process_lrmd_get_rsc_info(xmlNode *request, int call_id) { int rc = pcmk_ok; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; if (rsc_id == NULL) { rc = -ENODEV; } else { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Agent information for '%s' not in cache", rsc_id); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } return reply; } static int process_lrmd_rsc_unregister(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Ignoring unregistration of resource '%s', which is not registered", rsc_id); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation (0x%p) still in progress for unregistered resource %s", rsc->active, rsc_id); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, guint interval_ms) { GList *gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (action_matches(cmd, action, interval_ms)) { cmd->result.execution_status = PCMK_EXEC_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval_ms) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval_ms == 0) { continue; } if (client_id && !pcmk__str_eq(cmd->client_id, client_id, pcmk__str_casei)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(pcmk__client_t *client, uint32_t id, xmlNode *request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); guint interval_ms = 0; crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval_ms); } static void add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) { xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC); crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id); for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { lrmd_cmd_t *cmd = item->data; xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP); crm_xml_add(op_xml, F_LRMD_RSC_ACTION, (cmd->real_action? cmd->real_action : cmd->action)); crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig); } } static xmlNode * process_lrmd_get_recurring(xmlNode *request, int call_id) { int rc = pcmk_ok; const char *rsc_id = NULL; lrmd_rsc_t *rsc = NULL; xmlNode *reply = NULL; xmlNode *rsc_xml = NULL; // Resource ID is optional rsc_xml = first_named_child(request, F_LRMD_CALLDATA); if (rsc_xml) { rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC); } if (rsc_xml) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); } // If resource ID is specified, resource must exist if (rsc_id != NULL) { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; } } reply = create_lrmd_reply(__func__, rc, call_id); // If resource ID is not specified, check all resources if (rsc_id == NULL) { GHashTableIter iter; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rsc)) { add_recurring_op_xml(reply, rsc); } } else if (rsc) { add_recurring_op_xml(reply, rsc); } return reply; } void process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; xmlNode *reply = NULL; /* Certain IPC commands may be done only by privileged users (i.e. root or * hacluster), because they would otherwise provide a means of bypassing * ACLs. */ bool allowed = pcmk_is_set(client->flags, pcmk__client_privileged); crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (pcmk__str_eq(op, CRM_OP_IPC_FWD, pcmk__str_none)) { #ifdef PCMK__COMPILE_REMOTE if (allowed) { ipc_proxy_forward_client(client, request); } else { rc = -EACCES; } #else rc = -EPROTONOSUPPORT; #endif do_reply = 1; } else if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { rc = process_lrmd_signon(client, request, call_id, &reply); do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_REG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_INFO, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_rsc_info(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_UNREG, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_RSC_CANCEL, pcmk__str_none)) { if (allowed) { rc = process_lrmd_rsc_cancel(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_POKE, pcmk__str_none)) { do_notify = 1; do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_CHECK, pcmk__str_none)) { if (allowed) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); CRM_LOG_ASSERT(data != NULL); pcmk__valid_sbd_timeout(crm_element_value(data, F_LRMD_WATCHDOG)); } else { rc = -EACCES; } } else if (pcmk__str_eq(op, LRMD_OP_ALERT_EXEC, pcmk__str_none)) { if (allowed) { rc = process_lrmd_alert_exec(client, id, request); } else { rc = -EACCES; } do_reply = 1; } else if (pcmk__str_eq(op, LRMD_OP_GET_RECURRING, pcmk__str_none)) { if (allowed) { reply = process_lrmd_get_recurring(request, call_id); } else { rc = -EACCES; } do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown IPC request '%s' from client %s", op, pcmk__client_name(client)); } if (rc == -EACCES) { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", op, pcmk__client_name(client)); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { int send_rc = pcmk_rc_ok; if (reply == NULL) { reply = create_lrmd_reply(__func__, rc, call_id); } send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc != pcmk_rc_ok) { crm_warn("Reply to client %s failed: %s " CRM_XS " rc=%d", pcmk__client_name(client), pcmk_rc_str(send_rc), send_rc); } } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 9d06c68dc2..87600573ee 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3232 +1,3226 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; /* seconds */ int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; uint32_t victim_nodeid; char *action; char *device; GList *device_list; GList *device_next; void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, stonith_device_t *device) { return device && device->automatic_unfencing && pcmk__str_eq(action, "on", pcmk__str_casei); } static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_max = 0; if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { delay_max = crm_parse_interval_spec(value) / 1000; } return delay_max; } static int get_action_delay_base(stonith_device_t *device, const char *action, const char *victim) { char *hash_value = NULL; int delay_base = 0; if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = strdup(hash_value); char *valptr = value; CRM_ASSERT(value != NULL); if (victim) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(victim, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, victim); break; } } } if (strchr(value, ':') == 0) { delay_base = crm_parse_interval_spec(value) / 1000; } free(valptr); } return delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(stonith_device_t * device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, "reboot", pcmk__str_casei) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = "off"; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } static async_command_t * create_async_command(xmlNode * msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; // Value -1 means disable any static/random fencing delays crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd->done_cb = st_child_done; cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->victim == NULL)? "" : " targeting "), ((cmd->victim == NULL)? "" : cmd->victim), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = { // Ensure we don't pass garbage to free() .exit_reason = NULL, .action_stdout = NULL, .action_stderr = NULL }; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->victim == NULL)? "" : " targeting "), ((pending_op->victim == NULL)? "" : pending_op->victim), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) { if (node_does_watchdog_fencing(stonith_our_uname)) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->victim == NULL)? "" : " targeting "), ((cmd->victim == NULL)? "" : cmd->victim), device->id, device->agent); action_str = "off"; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith_action_create(device->agent, action_str, cmd->victim, cmd->victim_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = NULL; cmd->delay_id = 0; device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && cmd->victim) { crm_node_t *node = crm_get_peer(0, cmd->victim); cmd->victim_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->victim); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dont_call] We're not using rand() for security cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); free(device->on_target_actions); free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } *metadata = string2xml(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } #define MAX_ACTION_LEN 256 static char * add_action(char *actions, const char *action) { int offset = 0; if (actions == NULL) { actions = calloc(1, MAX_ACTION_LEN); } else { offset = strlen(actions); } if (offset > 0) { offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " "); } offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action); return actions; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { - const char *on_target = NULL; const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; - on_target = crm_element_value(match, "on_target"); action = crm_element_value(match, "name"); if(pcmk__str_eq(action, "list", pcmk__str_casei)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if(pcmk__str_eq(action, "status", pcmk__str_casei)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if(pcmk__str_eq(action, "reboot", pcmk__str_casei)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, "on", pcmk__str_casei)) { /* "automatic" means the cluster will unfence node when it joins */ - const char *automatic = crm_element_value(match, "automatic"); - /* "required" is a deprecated synonym for "automatic" */ - const char *required = crm_element_value(match, "required"); - - if (crm_is_true(automatic) || crm_is_true(required)) { + if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) { device->automatic_unfencing = TRUE; } } - if (action && crm_is_true(on_target)) { + if (action && pcmk__xe_attr_is_true(match, "on_target")) { device->on_target_actions = add_action(device->on_target_actions, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in,out] params Device parameters */ static GHashTable * xml2device_params(const char *name, xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "reboot") == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "off") == 0) { map_action(params, "reboot", value); } else { map_action(params, "off", value); map_action(params, "reboot", value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = "static-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = "dynamic-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = "status"; } else { check_type = "none"; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode * msg) { const char *value; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, "agent"); CRM_CHECK(agent != NULL, return device); device = calloc(1, sizeof(stonith_device_t)); CRM_CHECK(device != NULL, {free(agent); return device;}); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) { /* Other than "static-list", dev-> targets is unnecessary. */ g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (pcmk__str_eq(value, "unfencing", pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required("on", device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, device->on_target_actions); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *victim, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); cmd->victim = victim ? strdup(victim) : NULL; cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if ((result->execution_status == PCMK_EXEC_DONE) && (result->exit_status == CRM_EX_OK)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check="status" if the user didn't explicitly * specify "dynamic-list". */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK), strdup("status")); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t * device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " "stonith-watchdog-timeout set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(stonith_our_uname)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(stonith_our_uname); g_hash_table_replace(device->params, strdup(PCMK_STONITH_HOST_LIST), strdup(stonith_our_uname)); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (desc) { *desc = device->id; } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } int stonith_device_remove(const char *id, gboolean from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return pcmk_ok; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } return pcmk_ok; } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(stonith_topology_t * tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char *stonith_level_key(xmlNode *level, int mode) { if(mode == -1) { mode = stonith_level_kind(level); } switch(mode) { case 0: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case 1: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case 2: { const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE); if(name && value) { return crm_strdup_printf("%s=%s", name, value); } } default: return crm_strdup_printf("Unknown-%d-%s", mode, ID(level)); } } int stonith_level_kind(xmlNode * level) { int mode = 0; const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET); if(target == NULL) { mode++; target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN); } if(stand_alone == FALSE && target == NULL) { mode++; if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) { mode++; } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) { mode++; } } return mode; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Register a STONITH level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]") * * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index */ int stonith_level_register(xmlNode *msg, char **desc) { int id = 0; xmlNode *level; int mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; /* Allow the XML here to point to the level tag directly, or wrapped in * another tag. If directly, don't search by xpath, because it might give * multiple hits (e.g. if the XML is the CIB). */ if (pcmk__str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL, pcmk__str_casei)) { level = msg; } else { level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); } CRM_CHECK(level != NULL, return -EINVAL); mode = stonith_level_kind(level); target = stonith_level_key(level, mode); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) { crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology)); free(target); crm_log_xml_err(level, "Bad topology"); return -EINVAL; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } return pcmk_ok; } int stonith_level_remove(xmlNode *msg, char **desc) { int id = 0; stonith_topology_t *tp; char *target; /* Unlike additions, removal requests should always have one level tag */ xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); CRM_CHECK(level != NULL, return -EINVAL); target = stonith_level_key(level, -1); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (id >= ST_LEVEL_MAX) { free(target); return -EINVAL; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id > 0 && tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); return pcmk_ok; } /*! * \internal * \brief Schedule an (asynchronous) action directly on a stonith device * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg API message XML with desired action * \param[out] output Unused * * \return -EINPROGRESS on success, -errno otherwise * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = calloc(alloc_size, sizeof(char)); if (rv) { char *pos = rv; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } } return rv; } static int stonith_device_action(xmlNode * msg, char **output) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); const char *action = crm_element_value(op, F_STONITH_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); return -EPROTO; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { if (stonith_watchdog_timeout_ms <= 0) { return -ENODEV; } else { if (pcmk__str_eq(action, "list", pcmk__str_casei)) { *output = list_to_string(stonith_watchdog_targets, "\n", TRUE); return pcmk_ok; } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) { return pcmk_ok; } } } device = g_hash_table_lookup(device_list, id); if ((device == NULL) || (!device->api_registered && !strcmp(action, "monitor"))) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not found", action, id); return -ENODEV; } cmd = create_async_command(msg); if (cmd == NULL) { return -EPROTO; } schedule_stonith_command(cmd, device); return -EINPROGRESS; } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if (device && action && device->on_target_actions && strstr(device->on_target_actions, action)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for " "local host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } static void can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = NULL; const char *host = search->host; const char *alias = NULL; CRM_LOG_ASSERT(dev != NULL); if (dev == NULL) { goto search_report_results; } else if (host == NULL) { can = TRUE; goto search_report_results; } /* Short-circuit query if this host is not allowed to perform the action */ if (pcmk__str_eq(search->action, "reboot", pcmk__str_casei)) { /* A "reboot" *might* get remapped to "off" then "on", so short-circuit * only if all three are disallowed. If only one or two are disallowed, * we'll report that with the results. We never allow suicide for * remapped "on" operations because the host is off at that point. */ if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide) && !localhost_is_eligible(dev, "off", host, search->allow_suicide) && !localhost_is_eligible(dev, "on", host, FALSE)) { goto search_report_results; } } else if (!localhost_is_eligible(dev, search->action, host, search->allow_suicide)) { goto search_report_results; } alias = g_hash_table_lookup(dev->aliases, host); if (alias == NULL) { alias = host; } check_type = target_list_type(dev); if (pcmk__str_eq(check_type, "none", pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if (pcmk__str_in_list(host, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, "list", search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur", device_timeout, dev->id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__func__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout(%ds) parameter of %s is larger than stonith-timeout(%ds), timeout may occur", device_timeout, dev->id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__func__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } if (pcmk__str_eq(host, alias, pcmk__str_casei)) { crm_notice("%s is%s eligible to fence (%s) %s: %s", dev->id, (can? "" : " not"), search->action, host, check_type); } else { crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s", dev->id, (can? "" : " not"), search->action, host, alias, check_type); } search_report_results: search_devices_record_result(search, dev ? dev->id : NULL, can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { crm_crit("Cannot search for capable fence devices: %s", strerror(ENOMEM)); callback(NULL, user_data); return; } search->host = host ? strdup(host) : NULL; search->action = action ? strdup(action) : NULL; search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %dms using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %dms using %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %dms using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %dms using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen " "maximum delay of %dms using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); - crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE); + pcmk__xe_set_bool_attr(xml, F_STONITH_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GList *lpc = NULL; /* Pack the results into XML */ list = create_xml_node(NULL, __func__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = "off"; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, "reboot", pcmk__str_casei)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "off", device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "on", device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } static void stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options) { struct st_query_data *query = NULL; const char *action = NULL; const char *target = NULL; int timeout = 0; xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER); crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout); if (dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); if (device && pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { /* No query or reply necessary */ return; } } crm_log_xml_debug(msg, "Query"); query = calloc(1, sizeof(struct st_query_data)); query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok); query->remote_peer = remote_peer ? strdup(remote_peer) : NULL; query->client_id = client_id ? strdup(client_id) : NULL; query->target = target ? strdup(target) : NULL; query->action = action ? strdup(action) : NULL; query->call_options = call_options; get_capable_devices(target, action, timeout, pcmk_is_set(call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->device_next); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if ((result->execution_status == PCMK_EXEC_DONE) && (result->exit_status == CRM_EX_OK)) { // Success log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { output_log_level = LOG_DEBUG; } next = NULL; } else { // Failure log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece g_string_printf(msg, "Operation '%s' ", cmd->action); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->victim != NULL) { g_string_append_printf(msg, "targeting %s ", cmd->victim); } g_string_append_printf(msg, "using %s ", cmd->device); // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { g_string_append_printf(msg, "could not be executed: %s", pcmk_exec_status_str(result->execution_status)); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { g_string_append_printf(msg, " (%s)", result->exit_reason); } if (next != NULL) { g_string_append_printf(msg, ", retrying with %s", next); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " CRM_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; gboolean bcast = FALSE; CRM_CHECK((cmd != NULL) && (result != NULL), return); reply = construct_async_reply(cmd, result); // Only replies for certain actions are broadcast if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status", NULL)) { crm_trace("Never broadcast '%s' replies", cmd->action); } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) { crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_async_result(cmd, result, pid, NULL, merged); crm_log_xml_trace(reply, "Reply"); if (merged) { - crm_xml_add(reply, F_STONITH_MERGED, "true"); + pcmk__xe_set_bool_attr(reply, F_STONITH_MERGED, true); } if (bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if (cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); int rc = pcmk_rc2legacy(stonith__result2rc(result)); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } free_xml(reply); } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; async_command_t *cmd = user_data; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(cmd != NULL, return); cmd->active_on = NULL; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if (device) { if (!device->verified && (result->exit_status == CRM_EX_OK) && (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (result->exit_status == CRM_EX_OK) { GList *iter; /* see if there are any required devices left to execute for this op */ for (iter = cmd->device_next; iter != NULL; iter = iter->next) { next_device = g_hash_table_lookup(device_list, iter->data); if (next_device != NULL && is_action_required(cmd->action, next_device)) { cmd->device_next = iter->next; break; } next_device = NULL; } } else if ((cmd->device_next != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->device_next->data); cmd->device_next = cmd->device_next->next; } /* this operation requires more fencing, hooray! */ if (next_device) { log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); /* Prevent cmd from being freed */ cmd = NULL; goto done; } send_async_reply(cmd, result, pid, false); if (result->exit_status != CRM_EX_OK) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if (cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->victim, cmd_other->victim, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_casei) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } /* Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to * off, then cmd->action will be reboot and will be merged with any * other reboot requests. If the originating fencer remapped a * topology reboot to off then on, we will get here once with * cmd->action "off" and once with "on", and they will be merged * separately with similar requests. */ crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical fencing request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->victim); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = { // Ensure we don't pass garbage to free() .exit_reason = NULL, .action_stdout = NULL, .action_stderr = NULL }; pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No fence device configured for target"); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->device_next = devices->next; schedule_stonith_command(cmd, device); } } static int stonith_fence(xmlNode * msg) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); if (cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); return -ENODEV; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (cmd->options & st_opt_cs_nodeid) { int nodeid; crm_node_t *node; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); if (node) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb); } return -EINPROGRESS; } xmlNode * stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result %d (initiated before we came up?)", rc); } else { const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a result reply with%s reply output (rc=%d)", (data? "" : "out"), rc); for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { add_message_xml(reply, F_STONITH_CALLDATA, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, pcmk_rc2legacy(stonith__result2rc(result))); crm_xml_add(reply, "st_output", result->action_stdout); return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } void set_fencing_completed(remote_fencing_op_t * op) { #ifdef CLOCK_MONOTONIC struct timespec tv; clock_gettime(CLOCK_MONOTONIC, &tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; #else op->completed = time(NULL); op->completed_nsec = 0L; #endif } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target); if (find_topology_for_host(target) && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if (fencing_peer_active(entry) && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { alternate_host = entry->uname; break; } } if (alternate_host == NULL) { crm_err("No alternate host available to handle request " "for self-fencing with topology"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id) { if (remote_peer) { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, client_id, pcmk_is_set(call_options, st_opt_sync_call), (remote_peer != NULL)); } } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, F_STONITH_TARGET); } relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY); op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); client_name = crm_element_value(request, F_STONITH_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s' targeting %s for %s), " "replaced by op %s ('%s' targeting %s for %s)", relay_op->id, relay_op->action, relay_op->target, relay_op->client_name, op_id, relay_op->action, target, client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } static int handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; xmlNode *data = NULL; xmlNode *reply = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); /* IPC commands related to fencing configuration may be done only by * privileged users (i.e. root or hacluster), because all other users should * go through the CIB to have ACLs applied. * * If no client was given, this is a peer request, which is always allowed. */ bool allowed = (client == NULL) || pcmk_is_set(client->flags, pcmk__client_privileged); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); pcmk__ipc_send_xml(client, id, reply, flags); client->request_id = 0; free_xml(reply); return 0; } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) { rc = stonith_device_action(request, &output); } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } /* Delete the DC node RELAY operation. */ remove_relay_op(request); stonith_query(request, remote_peer, client_id, call_options); return 0; } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if (flag_name) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__client_name(client)); pcmk__set_client_flags(client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__client_name(client)); pcmk__clear_client_flags(client, get_stonith_flag(flag_name)); } pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK); return 0; } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client)), crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { if (remote_peer || stand_alone) { rc = stonith_fence(request); } else if (call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); crm_notice("Received manual confirmation that %s is fenced", target); rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (client) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__client_name(client), action, target, (device? device : "any device")); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device ? device : "(any)"); } alternate_host = check_alternate_host(target); if (alternate_host && client) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; crm_notice("Forwarding self-fencing request to peer %s" "due to topology", alternate_host); if (client->id) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } /* Create an operation for RELAY and send the ID in the RELAY message. */ /* When a QUERY response is received, delete the RELAY operation to avoid the existence of duplicate operations. */ op = create_remote_stonith_op(client_id, request, FALSE); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); crm_xml_add(request, F_STONITH_REMOTE_OP_ID, op->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) { rc = stonith_fence_history(request, &data, remote_peer, call_options); if (call_options & st_opt_discard_reply) { /* we don't expect answers to the broadcast * we might have sent out */ free_xml(data); return pcmk_ok; } } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { const char *device_id = NULL; if (allowed) { rc = stonith_device_register(request, &device_id, FALSE); } else { rc = -EACCES; } do_stonith_notify_device(call_options, op, rc, device_id); } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); if (allowed) { rc = stonith_device_remove(device_id, FALSE); } else { rc = -EACCES; } do_stonith_notify_device(call_options, op, rc, device_id); } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { char *device_id = NULL; if (allowed) { rc = stonith_level_register(request, &device_id); } else { rc = -EACCES; } do_stonith_notify_level(call_options, op, rc, device_id); free(device_id); } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { char *device_id = NULL; if (allowed) { rc = stonith_level_remove(request, &device_id); } else { rc = -EACCES; } do_stonith_notify_level(call_options, op, rc, device_id); } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { int node_id = 0; const char *name = NULL; crm_element_value_int(request, XML_ATTR_ID, &node_id); name = crm_element_value(request, XML_ATTR_UNAME); reap_crm_member(node_id, name); return pcmk_ok; } else { crm_err("Unknown IPC request %s from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); } done: if (rc == -EACCES) { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", crm_str(op), pcmk__client_name(client)); } /* Always reply unless the request is in process still. * If in progress, a reply will happen async after the request * processing is finished */ if (rc != -EINPROGRESS) { crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, id, pcmk_is_set(call_options, st_opt_sync_call), call_options, crm_element_value(request, F_STONITH_CALLOPTS)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } reply = stonith_construct_reply(request, output, data, rc); stonith_send_reply(reply, call_options, remote_peer, client_id); } free(output); free_xml(data); free_xml(reply); return rc; } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { process_remote_stonith_exec(request); } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; /* Copy op for reporting. The original might get freed by handle_reply() * before we use it in crm_debug(): * handle_reply() * |- process_remote_stonith_exec() * |-- remote_op_done() * |--- handle_local_reply_and_notify() * |---- crm_xml_add(...F_STONITH_OPERATION...) * |--- free_xml(op->request) */ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s %u from %s %s with call options 0x%08x", op, (is_reply? " reply" : ""), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client)), call_options); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } crm_debug("Processed %s%s from %s %s: %s (rc=%d)", op, (is_reply? " reply" : ""), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client)), ((rc > 0)? "" : pcmk_strerror(rc)), rc); free(op); } diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c index fa8e2d87e6..1ba034ba90 100644 --- a/daemons/fenced/fenced_history.c +++ b/daemons/fenced/fenced_history.c @@ -1,532 +1,529 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_STONITH_HISTORY 500 /*! * \internal * \brief Send a broadcast to all nodes to trigger cleanup or * history synchronisation * * \param[in] history Optional history to be attached * \param[in] callopts We control cleanup via a flag in the callopts * \param[in] target Cleanup can be limited to certain fence-targets */ static void stonith_send_broadcast_history(xmlNode *history, int callopts, const char *target) { xmlNode *bcast = create_xml_node(NULL, "stonith_command"); xmlNode *data = create_xml_node(NULL, __func__); if (target) { crm_xml_add(data, F_STONITH_TARGET, target); } crm_xml_add(bcast, F_TYPE, T_STONITH_NG); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, STONITH_OP_FENCE_HISTORY); crm_xml_add_int(bcast, F_STONITH_CALLOPTS, callopts); if (history) { add_node_copy(data, history); } add_message_xml(bcast, F_STONITH_CALLDATA, data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(data); free_xml(bcast); } static gboolean stonith_remove_history_entry (gpointer key, gpointer value, gpointer user_data) { remote_fencing_op_t *op = value; const char *target = (const char *) user_data; if ((op->state == st_failed) || (op->state == st_done)) { if ((target) && (strcmp(op->target, target) != 0)) { return FALSE; } return TRUE; } return FALSE; /* don't clean pending operations */ } /*! * \internal * \brief Send out a cleanup broadcast or do a local history-cleanup * * \param[in] target Cleanup can be limited to certain fence-targets * \param[in] broadcast Send out a cleanup broadcast */ static void stonith_fence_history_cleanup(const char *target, gboolean broadcast) { if (broadcast) { stonith_send_broadcast_history(NULL, st_opt_cleanup | st_opt_discard_reply, target); /* we'll do the local clean when we receive back our own broadcast */ } else if (stonith_remote_op_list) { g_hash_table_foreach_remove(stonith_remote_op_list, stonith_remove_history_entry, (gpointer) target); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } } /* keeping the length of fence-history within bounds * ================================================= * * If things are really running wild a lot of fencing-attempts * might fill up the hash-map, eventually using up a lot * of memory and creating huge history-sync messages. * Before the history being synced across nodes at least * the reboot of a cluster-node helped keeping the * history within bounds even though not in a reliable * manner. * * stonith_remote_op_list isn't sorted for time-stamps * thus it would be kind of expensive to delete e.g. * the oldest entry if it would grow past MAX_STONITH_HISTORY * entries. * It is more efficient to purge MAX_STONITH_HISTORY/2 * entries whenever the list grows beyond MAX_STONITH_HISTORY. * (sort for age + purge the MAX_STONITH_HISTORY/2 oldest) * That done on a per-node-base might raise the * probability of large syncs to occur. * Things like introducing a broadcast to purge * MAX_STONITH_HISTORY/2 entries or not sync above a certain * threshold coming to mind ... * Simplest thing though is to purge the full history * throughout the cluster once MAX_STONITH_HISTORY is reached. * On the other hand this leads to purging the history in * situations where it would be handy to have it probably. */ static int op_time_sort(const void *a_voidp, const void *b_voidp) { const remote_fencing_op_t **a = (const remote_fencing_op_t **) a_voidp; const remote_fencing_op_t **b = (const remote_fencing_op_t **) b_voidp; gboolean a_pending = ((*a)->state != st_failed) && ((*a)->state != st_done); gboolean b_pending = ((*b)->state != st_failed) && ((*b)->state != st_done); if (a_pending && b_pending) { return 0; } else if (a_pending) { return -1; } else if (b_pending) { return 1; } else if ((*b)->completed == (*a)->completed) { if ((*b)->completed_nsec > (*a)->completed_nsec) { return 1; } else if ((*b)->completed_nsec == (*a)->completed_nsec) { return 0; } } else if ((*b)->completed > (*a)->completed) { return 1; } return -1; } /*! * \internal * \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries * once over MAX_STONITH_HISTORY */ void stonith_fence_history_trim(void) { guint num_ops; if (!stonith_remote_op_list) { return; } num_ops = g_hash_table_size(stonith_remote_op_list); if (num_ops > MAX_STONITH_HISTORY) { remote_fencing_op_t *ops[num_ops]; remote_fencing_op_t *op = NULL; GHashTableIter iter; int i; crm_trace("Fencing History growing beyond limit of %d so purge " "half of failed/successful attempts", MAX_STONITH_HISTORY); /* write all ops into an array */ i = 0; g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { ops[i++] = op; } /* run quicksort over the array so that we get pending ops * first and then sorted most recent to oldest */ qsort(ops, num_ops, sizeof(remote_fencing_op_t *), op_time_sort); /* purgest oldest half of the history entries */ for (i = MAX_STONITH_HISTORY / 2; i < num_ops; i++) { /* keep pending ops even if they shouldn't fill more than * half of our buffer */ if ((ops[i]->state == st_failed) || (ops[i]->state == st_done)) { g_hash_table_remove(stonith_remote_op_list, ops[i]->id); } } /* we've just purged valid data from the list so there is no need * to create a notification - if displayed it can stay */ } } /*! * \internal * \brief Convert xml fence-history to a hash-table like stonith_remote_op_list * * \param[in] history Fence-history in xml * * \return Fence-history as hash-table */ static GHashTable * stonith_xml_history_to_list(xmlNode *history) { xmlNode *xml_op = NULL; GHashTable *rv = NULL; init_stonith_remote_op_hash_table(&rv); CRM_LOG_ASSERT(rv != NULL); for (xml_op = pcmk__xml_first_child(history); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { remote_fencing_op_t *op = NULL; char *id = crm_element_value_copy(xml_op, F_STONITH_REMOTE_OP_ID); int state; long long completed; long long completed_nsec = 0L; if (!id) { crm_warn("Malformed fencing history received from peer"); continue; } crm_trace("Attaching op %s to hashtable", id); op = calloc(1, sizeof(remote_fencing_op_t)); op->id = id; op->target = crm_element_value_copy(xml_op, F_STONITH_TARGET); op->action = crm_element_value_copy(xml_op, F_STONITH_ACTION); op->originator = crm_element_value_copy(xml_op, F_STONITH_ORIGIN); op->delegate = crm_element_value_copy(xml_op, F_STONITH_DELEGATE); op->client_name = crm_element_value_copy(xml_op, F_STONITH_CLIENTNAME); crm_element_value_ll(xml_op, F_STONITH_DATE, &completed); op->completed = (time_t) completed; crm_element_value_ll(xml_op, F_STONITH_DATE_NSEC, &completed_nsec); op->completed_nsec = completed_nsec; crm_element_value_int(xml_op, F_STONITH_STATE, &state); op->state = (enum op_state) state; g_hash_table_replace(rv, id, op); CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL); } return rv; } /*! * \internal * \brief Craft xml difference between local fence-history and a history * coming from remote, and merge the remote history into the local * * \param[in] remote_history Fence-history as hash-table (may be NULL) * \param[in] add_id If crafting the answer for an API * history-request there is no need for the id * \param[in] target Optionally limit to certain fence-target * * \return The fence-history as xml */ static xmlNode * stonith_local_history_diff_and_merge(GHashTable *remote_history, gboolean add_id, const char *target) { xmlNode *history = NULL; GHashTableIter iter; remote_fencing_op_t *op = NULL; gboolean updated = FALSE; int cnt = 0; if (stonith_remote_op_list) { char *id = NULL; history = create_xml_node(NULL, F_STONITH_HISTORY_LIST); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) { xmlNode *entry = NULL; if (remote_history) { remote_fencing_op_t *remote_op = g_hash_table_lookup(remote_history, op->id); if (remote_op) { if (stonith__op_state_pending(op->state) && !stonith__op_state_pending(remote_op->state)) { crm_debug("Updating outdated pending operation %.8s " "(state=%s) according to the one (state=%s) from " "remote peer history", op->id, stonith_op_state_str(op->state), stonith_op_state_str(remote_op->state)); g_hash_table_steal(remote_history, op->id); op->id = remote_op->id; remote_op->id = id; g_hash_table_iter_replace(&iter, remote_op); updated = TRUE; continue; /* skip outdated entries */ } else if (!stonith__op_state_pending(op->state) && stonith__op_state_pending(remote_op->state)) { crm_debug("Broadcasting operation %.8s (state=%s) to " "update the outdated pending one " "(state=%s) in remote peer history", op->id, stonith_op_state_str(op->state), stonith_op_state_str(remote_op->state)); g_hash_table_remove(remote_history, op->id); } else { g_hash_table_remove(remote_history, op->id); continue; /* skip entries broadcasted already */ } } } if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) { continue; } cnt++; crm_trace("Attaching op %s", op->id); entry = create_xml_node(history, STONITH_OP_EXEC); if (add_id) { crm_xml_add(entry, F_STONITH_REMOTE_OP_ID, op->id); } crm_xml_add(entry, F_STONITH_TARGET, op->target); crm_xml_add(entry, F_STONITH_ACTION, op->action); crm_xml_add(entry, F_STONITH_ORIGIN, op->originator); crm_xml_add(entry, F_STONITH_DELEGATE, op->delegate); crm_xml_add(entry, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_ll(entry, F_STONITH_DATE, op->completed); crm_xml_add_ll(entry, F_STONITH_DATE_NSEC, op->completed_nsec); crm_xml_add_int(entry, F_STONITH_STATE, op->state); } } if (remote_history) { init_stonith_remote_op_hash_table(&stonith_remote_op_list); updated |= g_hash_table_size(remote_history); g_hash_table_iter_init(&iter, remote_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { if (stonith__op_state_pending(op->state) && pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { crm_warn("Failing pending operation %.8s originated by us but " "known only from peer history", op->id); op->state = st_failed; set_fencing_completed(op); /* use -EHOSTUNREACH to not introduce a new return-code that might trigger unexpected results at other places and to prevent remote_op_done from setting the delegate if not present */ stonith_bcast_result_to_peers(op, -EHOSTUNREACH, FALSE); } g_hash_table_iter_steal(&iter); g_hash_table_replace(stonith_remote_op_list, op->id, op); /* we could trim the history here but if we bail * out after trim we might miss more recent entries * of those that might still be in the list * if we don't bail out trimming once is more * efficient and memory overhead is minimal as * we are just moving pointers from one hash to * another */ } g_hash_table_destroy(remote_history); /* remove what is left */ } if (updated) { stonith_fence_history_trim(); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } if (cnt == 0) { free_xml(history); return NULL; } else { return history; } } /*! * \internal * \brief Craft xml from the local fence-history * * \param[in] add_id If crafting the answer for an API * history-request there is no need for the id * \param[in] target Optionally limit to certain fence-target * * \return The fence-history as xml */ static xmlNode * stonith_local_history(gboolean add_id, const char *target) { return stonith_local_history_diff_and_merge(NULL, add_id, target); } /*! * \internal * \brief Handle fence-history messages (either from API or coming in as * broadcasts * * \param[in] msg Request message * \param[in] output In case of a request from the API used to craft * a reply from * \param[in] remote_peer * \param[in] options call-options from the request * * \return always success as there is actully nothing that can go really wrong */ int stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options) { int rc = 0; const char *target = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_NEVER); xmlNode *out_history = NULL; if (dev) { target = crm_element_value(dev, F_STONITH_TARGET); if (target && (options & st_opt_cs_nodeid)) { int nodeid; crm_node_t *node; pcmk__scan_min_int(target, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); if (node) { target = node->uname; } } } if (options & st_opt_cleanup) { crm_trace("Cleaning up operations on %s in %p", target, stonith_remote_op_list); stonith_fence_history_cleanup(target, crm_element_value(msg, F_STONITH_CALLID) != NULL); } else if (options & st_opt_broadcast) { /* there is no clear sign atm for when a history sync is done so send a notification for anything that smells like history-sync */ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY_SYNCED, 0, NULL); if (crm_element_value(msg, F_STONITH_CALLID)) { /* this is coming from the stonith-API * * craft a broadcast with node's history * so that every node can merge and broadcast * what it has on top */ out_history = stonith_local_history(TRUE, NULL); crm_trace("Broadcasting history to peers"); stonith_send_broadcast_history(out_history, st_opt_broadcast | st_opt_discard_reply, NULL); } else if (remote_peer && !pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) { xmlNode *history = get_xpath_object("//" F_STONITH_HISTORY_LIST, msg, LOG_NEVER); GHashTable *received_history = history?stonith_xml_history_to_list(history):NULL; /* either a broadcast created directly upon stonith-API request * or a diff as response to such a thing * * in both cases it may have a history or not * if we have differential data * merge in what we've received and stop * otherwise broadcast what we have on top * marking as differential and merge in afterwards */ - if (!history || - !crm_is_true(crm_element_value(history, - F_STONITH_DIFFERENTIAL))) { + if (!history || !pcmk__xe_attr_is_true(history, F_STONITH_DIFFERENTIAL)) { out_history = stonith_local_history_diff_and_merge(received_history, TRUE, NULL); if (out_history) { crm_trace("Broadcasting history-diff to peers"); - crm_xml_add(out_history, F_STONITH_DIFFERENTIAL, - XML_BOOLEAN_TRUE); + pcmk__xe_set_bool_attr(out_history, F_STONITH_DIFFERENTIAL, true); stonith_send_broadcast_history(out_history, st_opt_broadcast | st_opt_discard_reply, NULL); } else { crm_trace("History-diff is empty - skip broadcast"); } } } else { crm_trace("Skipping history-query-broadcast (%s%s)" " we sent ourselves", remote_peer?"remote-peer=":"local-ipc", remote_peer?remote_peer:""); } } else { /* plain history request */ crm_trace("Looking for operations on %s in %p", target, stonith_remote_op_list); *output = stonith_local_history(FALSE, target); } free_xml(out_history); return rc; } diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index b09d2865e3..37443d743e 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -1,2211 +1,2211 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 /* When one fencer queries its peers for devices able to handle a fencing * request, each peer will reply with a list of such devices available to it. * Each reply will be parsed into a st_query_result_t, with each device's * information kept in a device_properties_t. */ typedef struct device_properties_s { /* Whether access to this device has been verified */ gboolean verified; /* The remaining members are indexed by the operation's "phase" */ /* Whether this device has been executed in each phase */ gboolean executed[st_phase_max]; /* Whether this device is disallowed from executing in each phase */ gboolean disallowed[st_phase_max]; /* Action-specific timeout for each phase */ int custom_action_timeout[st_phase_max]; /* Action-specific maximum random delay for each phase */ int delay_max[st_phase_max]; /* Action-specific base delay for each phase */ int delay_base[st_phase_max]; } device_properties_t; typedef struct st_query_result_s { /* Name of peer that sent this result */ char *host; /* Only try peers for non-topology based operations once */ gboolean tried; /* Number of entries in the devices table */ int ndevices; /* Devices available to this host that are capable of fencing the target */ GHashTable *devices; } st_query_result_t; GHashTable *stonith_remote_op_list = NULL; void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc); static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup); extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(const remote_fencing_op_t *op, const st_query_result_t *chosen_peer); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data) { st_query_result_t *query = data; crm_trace("Free'ing query result from %s", query->host); g_hash_table_destroy(query->devices); free(query->host); free(query); } } void free_stonith_remote_op_list() { if (stonith_remote_op_list != NULL) { g_hash_table_destroy(stonith_remote_op_list); stonith_remote_op_list = NULL; } } struct peer_count_data { const remote_fencing_op_t *op; gboolean verified_only; int count; }; /*! * \internal * \brief Increment a counter if a device has not been executed yet * * \param[in] key Device ID (ignored) * \param[in] value Device properties * \param[in] user_data Peer count data */ static void count_peer_device(gpointer key, gpointer value, gpointer user_data) { device_properties_t *props = (device_properties_t*)value; struct peer_count_data *data = user_data; if (!props->executed[data->op->phase] && (!data->verified_only || props->verified)) { ++(data->count); } } /*! * \internal * \brief Check the number of available devices in a peer's query results * * \param[in] op Operation that results are for * \param[in] peer Peer to count * \param[in] verified_only Whether to count only verified devices * * \return Number of devices available to peer that were not already executed */ static int count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer, gboolean verified_only) { struct peer_count_data data; data.op = op; data.verified_only = verified_only; data.count = 0; if (peer) { g_hash_table_foreach(peer->devices, count_peer_device, &data); } return data.count; } /*! * \internal * \brief Search for a device in a query result * * \param[in] op Operation that result is for * \param[in] peer Query result for a peer * \param[in] device Device ID to search for * * \return Device properties if found, NULL otherwise */ static device_properties_t * find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer, const char *device) { device_properties_t *props = g_hash_table_lookup(peer->devices, device); return (props && !props->executed[op->phase] && !props->disallowed[op->phase])? props : NULL; } /*! * \internal * \brief Find a device in a peer's device list and mark it as executed * * \param[in] op Operation that peer result is for * \param[in,out] peer Peer with results to search * \param[in] device ID of device to mark as done * \param[in] verified_devices_only Only consider verified devices * * \return TRUE if device was found and marked, FALSE otherwise */ static gboolean grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer, const char *device, gboolean verified_devices_only) { device_properties_t *props = find_peer_device(op, peer, device); if ((props == NULL) || (verified_devices_only && !props->verified)) { return FALSE; } crm_trace("Removing %s from %s (%d remaining)", device, peer->host, count_peer_devices(op, peer, FALSE)); props->executed[op->phase] = TRUE; return TRUE; } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->delegate); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { free_xml(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); g_list_free(op->duplicates); free(op); } void init_stonith_remote_op_hash_table(GHashTable **table) { if (*table == NULL) { *table = pcmk__strkey_table(NULL, free_remote_op); } } /*! * \internal * \brief Return an operation's originally requested action (before any remap) * * \param[in] op Operation to check * * \return Operation's original action */ static const char * op_requested_action(const remote_fencing_op_t *op) { return ((op->phase > st_phase_requested)? "reboot" : op->action); } /*! * \internal * \brief Remap a "reboot" operation to the "off" phase * * \param[in,out] op Operation to remap */ static void op_phase_off(remote_fencing_op_t *op) { crm_info("Remapping multiple-device reboot targeting %s to 'off' " CRM_XS " id=%.8s", op->target, op->id); op->phase = st_phase_off; /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ strcpy(op->action, "off"); } /*! * \internal * \brief Advance a remapped reboot operation to the "on" phase * * \param[in,out] op Operation to remap */ static void op_phase_on(remote_fencing_op_t *op) { GList *iter = NULL; crm_info("Remapped 'off' targeting %s complete, " "remapping to 'on' for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; strcpy(op->action, "on"); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. */ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { GList *match = g_list_find_custom(op->devices_list, iter->data, sort_strings); if (match) { op->devices_list = g_list_remove(op->devices_list, match->data); } } g_list_free_full(op->automatic_list, free); op->automatic_list = NULL; /* Rewind device list pointer */ op->devices = op->devices_list; } /*! * \internal * \brief Reset a remapped reboot operation * * \param[in,out] op Operation to reset */ static void undo_op_remap(remote_fencing_op_t *op) { if (op->phase > 0) { crm_info("Undoing remap of reboot targeting %s for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; strcpy(op->action, "reboot"); } } static xmlNode * create_op_done_notify(remote_fencing_op_t * op, int rc) { xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_ACTION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); return notify_data; } void stonith_bcast_result_to_peers(remote_fencing_op_t * op, int rc, gboolean op_merged) { static int count = 0; xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); xmlNode *notify_data = create_op_done_notify(op, rc); count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_xml_add_int(bcast, "count", count); if (op_merged) { - crm_xml_add(bcast, F_STONITH_MERGED, "true"); + pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true); } add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(notify_data); free_xml(bcast); return; } static void handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ notify_data = create_op_done_notify(op, rc); crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); reply = stonith_construct_reply(op->request, NULL, data, rc); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); /* bcast to all local clients that the fencing operation happend */ do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; free_xml(reply); free_xml(notify_data); } static void handle_duplicates(remote_fencing_op_t * op, xmlNode * data, int rc) { GList *iter = NULL; for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { other->state = op->state; crm_debug("Performing duplicate notification for %s@%s: %s " CRM_XS " id=%.8s", other->client_name, other->originator, pcmk_strerror(rc), other->id); remote_op_done(other, data, rc, TRUE); } else { // Possible if (for example) it timed out already crm_err("Skipping duplicate notification for %s@%s " CRM_XS " state=%s id=%.8s", other->client_name, other->originator, stonith_op_state_str(other->state), other->id); } } } static char * delegate_from_xml(xmlNode *xml) { xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER); if (match == NULL) { return crm_element_value_copy(xml, F_ORIG); } else { return crm_element_value_copy(match, F_STONITH_DELEGATE); } } /*! * \internal * \brief Finalize a remote operation. * * \description This function has two code paths. * * Path 1. This node is the owner of the operation and needs * to notify the cpg group via a broadcast as to the operation's * results. * * Path 2. The cpg broadcast is received. All nodes notify their local * stonith clients the operation results. * * So, The owner of the operation first notifies the cluster of the result, * and once that cpg notify is received back it notifies all the local clients. * * Nodes that are passive watchers of the operation will receive the * broadcast and only need to notify their local clients the operation finished. * * \param op, The fencing operation to finalize * \param data, The xml msg reply (if present) of the last delegated fencing * operation. * \param dup, Is this operation a duplicate, if so treat it a little differently * making sure the broadcast is not sent out. */ static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; gboolean op_merged = FALSE; set_fencing_completed(op); clear_remote_op_timers(op); undo_op_remap(op); if (op->notify_sent == TRUE) { crm_err("Already sent notifications for '%s' targeting %s by %s for " "client %s@%s: %s " CRM_XS " rc=%d state=%s id=%.8s", op->action, op->target, (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, pcmk_strerror(rc), rc, stonith_op_state_str(op->state), op->id); goto remote_op_done_cleanup; } if (data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } else if (op->delegate == NULL) { switch (rc) { case -ENODEV: case -EHOSTUNREACH: break; default: op->delegate = delegate_from_xml(data); break; } } if(dup) { op_merged = TRUE; } else if (crm_element_value(data, F_STONITH_MERGED)) { op_merged = TRUE; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, F_SUBTYPE); if (dup == FALSE && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) { /* Defer notification until the bcast message arrives */ stonith_bcast_result_to_peers(op, rc, (op_merged? TRUE: FALSE)); goto remote_op_done_cleanup; } if (rc == pcmk_ok || dup) { level = LOG_NOTICE; } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { level = LOG_NOTICE; } do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s " CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), pcmk_strerror(rc), op->id); handle_local_reply_and_notify(op, data, rc); if (dup == FALSE) { handle_duplicates(op, data, rc); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { free_xml(op->request); op->request = NULL; } remote_op_done_cleanup: free_xml(local_data); } static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Self-fencing (%s) by %s for %s assumed complete " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); op->state = st_done; remote_op_done(op, NULL, pcmk_ok, FALSE); return FALSE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); call_remote_stonith(op, NULL, pcmk_ok); return FALSE; } static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action '%s' targeting %s for client %s already completed " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); return FALSE; } crm_debug("Action '%s' targeting %s for client %s timed out " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); if (op->phase == st_phase_on) { /* A remapped reboot operation timed out in the "on" phase, but the * "off" phase completed successfully, so quit trying any further * devices, and return success. */ remote_op_done(op, NULL, pcmk_ok, FALSE); return FALSE; } op->state = st_failed; remote_op_done(op, NULL, -ETIME, FALSE); return FALSE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %.8s targeting %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %.8s targeting %s already in progress", op->id, op->target); } else if (op->query_results) { crm_debug("Query %.8s targeting %s complete (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); call_remote_stonith(op, NULL, pcmk_ok); } else { crm_debug("Query %.8s targeting %s timed out (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } remote_op_timeout(op); } return FALSE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } /*! * \internal * \brief Add a device to an operation's automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to add */ static void add_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (!match) { op->automatic_list = g_list_prepend(op->automatic_list, strdup(device)); } } /*! * \internal * \brief Remove a device from the automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to remove */ static void remove_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (match) { op->automatic_list = g_list_remove(op->automatic_list, match->data); } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GList *devices) { GList *lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } op->devices = op->devices_list; } /*! * \internal * \brief Check whether a node matches a topology target * * \param[in] tp Topology table entry to check * \param[in] node Name of node to check * * \return TRUE if node matches topology target */ static gboolean topology_matches(const stonith_topology_t *tp, const char *node) { regex_t r_patt; CRM_CHECK(node && tp && tp->target, return FALSE); switch(tp->kind) { case 2: /* This level targets by attribute, so tp->target is a NAME=VALUE pair * of a permanent attribute applied to targeted nodes. The test below * relies on the locally cached copy of the CIB, so if fencing needs to * be done before the initial CIB is received or after a malformed CIB * is received, then the topology will be unable to be used. */ if (node_has_attr(node, tp->target_attribute, tp->target_value)) { crm_notice("Matched %s with %s by attribute", node, tp->target); return TRUE; } break; case 1: /* This level targets by name, so tp->target is a regular expression * matching names of nodes to be targeted. */ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { crm_info("Bad regex '%s' for fencing level", tp->target); } else { int status = regexec(&r_patt, node, 0, NULL, 0); regfree(&r_patt); if (status == 0) { crm_notice("Matched %s with %s by name", node, tp->target); return TRUE; } } break; case 0: crm_trace("Testing %s against %s", node, tp->target); return pcmk__str_eq(tp->target, node, pcmk__str_casei); } crm_trace("No match for %s with %s", node, tp->target); return FALSE; } stonith_topology_t * find_topology_for_host(const char *host) { GHashTableIter tIter; stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp != NULL) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (topology_matches(tp, host)) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } } crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); return NULL; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * \param[in] empty_ok If true, an operation without a target (i.e. * queries) or a target without a topology will get a * pcmk_rc_ok return value instead of ENODEV * * \return Standard Pacemaker return value */ static int advance_topology_level(remote_fencing_op_t *op, bool empty_ok) { stonith_topology_t *tp = NULL; if (op->target) { tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return empty_ok? pcmk_rc_ok : ENODEV; } CRM_ASSERT(tp->levels != NULL); stonith__set_call_options(op->call_options, op->id, st_opt_topology); /* This is a new level, so undo any remapping left over from previous */ undo_op_remap(op); do { op->level++; } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if (op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d targeting %s (%d devices) " "for client %s@%s (id=%.8s)", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level if (op->level > 1 && op->delay > 0) { op->delay = 0; } if (g_list_next(op->devices_list) && pcmk__str_eq(op->action, "reboot", pcmk__str_casei)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about * switched power outlets for redundant power supplies.) */ op_phase_off(op); } return pcmk_rc_ok; } crm_notice("All fencing options targeting %s for client %s@%s failed " CRM_XS " id=%.8s", op->target, op->client_name, op->originator, op->id); return ENODEV; } /*! * \brief Check to see if this operation is a duplicate of another in flight * operation. If so merge this operation into the inflight operation, and mark * it as a duplicate. */ static void merge_duplicates(remote_fencing_op_t * op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { const char *other_action = op_requested_action(other); if (!strcmp(op->id, other->id)) { continue; // Don't compare against self } if (other->state > st_exec) { crm_trace("%.8s not duplicate of %.8s: not in progress", op->id, other->id); continue; } if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", op->id, other->id, op->target, other->target); continue; } if (!pcmk__str_eq(op->action, other_action, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", op->id, other->id, op->action, other_action); continue; } if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: same client %s", op->id, other->id, op->client_name); continue; } if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: suicide for %s", op->id, other->id, other->target); continue; } if (!fencing_peer_active(crm_get_peer(0, other->originator))) { crm_notice("Failing action '%s' targeting %s originating from " "client %s@%s: Originator is dead " CRM_XS " id=%.8s", other->action, other->target, other->client_name, other->originator, other->id); crm_trace("%.8s not duplicate of %.8s: originator dead", op->id, other->id); other->state = st_failed; continue; } if ((other->total_timeout > 0) && (now > (other->total_timeout + other->created))) { crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)", op->id, other->id, now, other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); crm_trace("Best guess as to timeout used for %.8s: %d", other->id, other->total_timeout); } crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical request from %s@%s " CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds", op->action, op->target, op->client_name, other->client_name, other->originator, op->id, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; crm_node_t *entry; GHashTableIter gIter; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); op->state = st_done; set_fencing_completed(op); op->delegate = strdup("a human"); crm_notice("Injecting manual confirmation that %s is safely off/down", crm_element_value(dev, F_STONITH_TARGET)); remote_op_done(op, msg, pcmk_ok, FALSE); // Replies are sent via done_cb -> send_async_reply() -> do_local_reply() return -EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith operation * * \param[in] client ID of local stonith client that initiated the operation * \param[in] request The request from the client that started the operation * \param[in] peer TRUE if this operation is owned by another stonith peer * (an operation owned by one peer is stored on all peers, * but only the owner executes it; all nodes get the results * once the owner finishes execution) */ void * create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER); int call_options = 0; const char *operation = NULL; init_stonith_remote_op_hash_table(&stonith_remote_op_list); /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(stonith_remote_op_list, op_id); if (op) { crm_debug("Reusing existing remote fencing op %.8s for %s", op_id, ((client == NULL)? "unknown client" : client)); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); CRM_ASSERT(op != NULL); crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays crm_element_value_int(request, F_STONITH_DELAY, &(op->delay)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(stonith_remote_op_list, op->id, op); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ op->created = time(NULL); if (op->originator == NULL) { /* Local or relayed request */ op->originator = strdup(stonith_our_uname); } CRM_LOG_ASSERT(client != NULL); if (client) { op->client_id = strdup(client); } /* For a RELAY operation, set fenced on the client. */ operation = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } else { op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); } op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); op->call_options = call_options; crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid)); crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " "base timeout %d, %u %s expected)", (peer && dev)? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name, op->base_timeout, op->replies_expected, pcmk__plural_alt(op->replies_expected, "reply", "replies")); if (op->call_options & st_opt_cs_nodeid) { int nodeid; crm_node_t *node; pcmk__scan_min_int(op->target, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); /* Ensure the conversion only happens once */ stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); if (node && node->uname) { free(op->target); op->target = strdup(node->uname); } else { crm_warn("Could not expand nodeid '%s' into a host name", op->target); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); if (op->state != st_duplicate) { /* kick history readers */ do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } /* safe to trim as long as that doesn't touch pending ops */ stonith_fence_history_trim(); return op; } remote_fencing_op_t * initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; const char *relay_op_id = NULL; const char *operation = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { crm_notice("Processing manual confirmation of fencing targeting %s " CRM_XS " id=%.8s", op->target, op->id); return op; } CRM_CHECK(op->action, return NULL); if (advance_topology_level(op, true) != pcmk_rc_ok) { op->state = st_failed; } switch (op->state) { case st_failed: crm_warn("Could not request peer fencing (%s) targeting %s " CRM_XS " id=%.8s", op->action, op->target, op->id); remote_op_done(op, NULL, -EINVAL, FALSE); return op; case st_duplicate: crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " CRM_XS " id=%.8s", op->action, op->target, op->id); return op; default: crm_notice("Requesting peer fencing (%s) targeting %s " CRM_XS " id=%.8s state=%s base_timeout=%d", op->action, op->target, op->id, stonith_op_state_str(op->state), op->base_timeout); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, op->call_options); crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op)); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ operation = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); if (relay_op_id) { crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id); } } send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static st_query_result_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GList *iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { st_query_result_t *peer = iter->data; crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", peer->host, op->target, peer->ndevices, pcmk__plural_s(peer->ndevices), peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if (pcmk_is_set(op->call_options, st_opt_topology)) { if (grab_peer_device(op, peer, device, verified_devices_only)) { return peer; } } else if ((peer->tried == FALSE) && count_peer_devices(op, peer, verified_devices_only)) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static st_query_result_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; st_query_result_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence (%s) %s using %s", op->action, op->target, device); } else { crm_trace("Checking for someone to fence (%s) %s", op->action, op->target); } /* Best choice is a peer other than the target with verified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } /* If no other peer has verified access, next best is unverified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } /* If no other peer can do it, last option is self-fencing * (which is never allowed for the "on" phase of a remapped reboot) */ if (op->phase != st_phase_on) { peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if (peer) { crm_trace("%s will fence itself", peer->host); return peer; } } /* Try the next fencing level if there is one (unless we're in the "on" * phase of a remapped "reboot", because we ignore errors in that case) */ } while ((op->phase != st_phase_on) && pcmk_is_set(op->call_options, st_opt_topology) && (advance_topology_level(op, false) == pcmk_rc_ok)); crm_notice("Couldn't find anyone to fence (%s) %s using %s", op->action, op->target, (device? device : "any device")); return NULL; } static int get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer, const char *device) { device_properties_t *props; if (!peer || !device) { return op->base_timeout; } props = g_hash_table_lookup(peer->devices, device); if (!props) { return op->base_timeout; } return (props->custom_action_timeout[op->phase]? props->custom_action_timeout[op->phase] : op->base_timeout) + props->delay_max[op->phase]; } struct timeout_data { const remote_fencing_op_t *op; const st_query_result_t *peer; int total_timeout; }; /*! * \internal * \brief Add timeout to a total if device has not been executed yet * * \param[in] key GHashTable key (device ID) * \param[in] value GHashTable value (device properties) * \param[in] user_data Timeout data */ static void add_device_timeout(gpointer key, gpointer value, gpointer user_data) { const char *device_id = key; device_properties_t *props = value; struct timeout_data *timeout = user_data; if (!props->executed[timeout->op->phase] && !props->disallowed[timeout->op->phase]) { timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, device_id); } } static int get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer) { struct timeout_data timeout; timeout.op = op; timeout.peer = peer; timeout.total_timeout = 0; g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); } static int get_op_total_timeout(const remote_fencing_op_t *op, const st_query_result_t *chosen_peer) { int total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { int i; GList *device_list = NULL; GList *iter = NULL; /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST_LEVEL_MAX; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { for (iter = op->query_results; iter != NULL; iter = iter->next) { const st_query_result_t *peer = iter->data; if (find_peer_device(op, peer, device_list->data)) { total_timeout += get_device_timeout(op, peer, device_list->data); break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ } else if (chosen_peer) { total_timeout = get_peer_timeout(op, chosen_peer); } else { total_timeout = op->base_timeout; } return total_timeout ? total_timeout : op->base_timeout; } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GList *iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a synchronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); call_id = crm_element_value(op->request, F_STONITH_CALLID); client_id = crm_element_value(op->request, F_STONITH_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) { /* The client is connected to this node, send the update direclty to them */ do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(update, F_STONITH_CLIENTID, client_id); crm_xml_add(update, F_STONITH_CALLID, call_id); crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); free_xml(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %.8s to client %s", dup->id, dup->client_name); report_timeout_period(iter->data, op_timeout); } } /*! * \internal * \brief Advance an operation to the next device in its topology * * \param[in,out] op Operation to advance * \param[in] device ID of device just completed * \param[in] msg XML reply that contained device result (if available) * \param[in] rc Return code of device's execution */ static void advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, xmlNode *msg, int rc) { /* Advance to the next device at this topology level, if any */ if (op->devices) { op->devices = op->devices->next; } /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) && pcmk__str_eq(op->action, "on", pcmk__str_casei)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); /* If there are no more devices at this topology level, run through any * remaining devices with automatic unfencing */ if (op->devices == NULL) { op->devices = op->automatic_list; } } if ((op->devices == NULL) && (op->phase == st_phase_off)) { /* We're done with this level and with required devices, but we had * remapped "reboot" to "off", so start over with "on". If any devices * need to be turned back on, op->devices will be non-NULL after this. */ op_phase_on(op); } if (op->devices) { /* Necessary devices remain, so execute the next one */ crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)", op->target, op->client_name, op->originator, rc); // The requested delay has been applied for the first device if (op->delay > 0) { op->delay = 0; } call_remote_stonith(op, NULL, pcmk_ok); } else { /* We're done with all devices and phases, so finalize operation */ crm_trace("Marking complex fencing op targeting %s as complete", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); } } static gboolean check_watchdog_fencing_and_wait(remote_fencing_op_t * op) { if (node_does_watchdog_fencing(op->target)) { crm_notice("Waiting %lds for %s to self-fence (%s) for " "client %s " CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), op->target, op->action, op->client_name, op->id); op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); return TRUE; } else { crm_debug("Skipping fallback to watchdog-fencing as %s is " "not in host-list", op->target); } return FALSE; } void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) { const char *device = NULL; int timeout = op->base_timeout; crm_trace("Action %.8s targeting %s for %s is %s", op->id, op->target, op->client_name, stonith_op_state_str(op->state)); if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { int total_timeout = get_op_total_timeout(op, peer); op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * total_timeout; op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total timeout set to %d for peer's fencing targeting %s for %s" CRM_XS "id=%.8s", total_timeout, op->target, op->client_name, op->id); } if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore any peer preference, they might not have the device we need */ /* When using topology, stonith_choose_peer() removes the device from * further consideration, so be sure to calculate timeout beforehand */ peer = stonith_choose_peer(op); device = op->devices->data; timeout = get_device_timeout(op, peer, device); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay); if (device) { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device); crm_notice("Requesting that %s perform '%s' action targeting %s " "using %s " CRM_XS " for client %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, F_STONITH_DEVICE, device); } else { timeout_one = TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); crm_notice("Requesting that %s perform '%s' action targeting %s " CRM_XS " for client %s (%ds, %lds)", peer->host, op->action, op->target, op->client_name, timeout_one, stonith_watchdog_timeout_ms); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); } if (!(stonith_watchdog_timeout_ms > 0 && ( (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)) || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) && check_watchdog_fencing_and_wait(op))) { /* Some thoughts about self-fencing cases reaching this point: - Actually check in check_watchdog_fencing_and_wait shouldn't fail if STONITH_WATCHDOG_ID is chosen as fencing-device and it being present implies watchdog-fencing is enabled anyway - If watchdog-fencing is disabled either in general or for a specific target - detected in check_watchdog_fencing_and_wait - for some other kind of self-fencing we can't expect a success answer but timeout is fine if the node doesn't come back in between - Delicate might be the case where we have watchdog-fencing enabled for a node but the watchdog-fencing-device isn't explicitly chosen for suicide. Local pe-execution in sbd may detect the node as unclean and lead to timely suicide. Otherwise the selection of stonith-watchdog-timeout at least is questionable. */ op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); peer->tried = TRUE; free_xml(remote_op); return; } else if (op->phase == st_phase_on) { /* A remapped "on" cannot be executed, but the node was already * turned off successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " "after successful 'off'", device, op->target); advance_topology_device_in_level(op, device, NULL, pcmk_ok); return; } else if (op->owner == FALSE) { crm_err("Fencing (%s) targeting %s for client %s is not ours to control", op->action, op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of fencing (%s) %s for client %s " CRM_XS " state=%s", op->action, op->target, op->client_name, stonith_op_state_str(op->state)); CRM_LOG_ASSERT(op->state < st_done); remote_op_timeout(op); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { // int rc = -EHOSTUNREACH; /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches)) { if (check_watchdog_fencing_and_wait(op)) { return; } } if (op->state == st_query) { crm_info("No peers (out of %d) have devices capable of fencing " "(%s) %s for client %s " CRM_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); rc = -ENODEV; } else { if (pcmk_is_set(op->call_options, st_opt_topology)) { rc = -EHOSTUNREACH; } crm_info("No peers (out of %d) are capable of fencing (%s) %s " "for client %s " CRM_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); } op->state = st_failed; remote_op_done(op, NULL, rc, FALSE); } else { crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " "for client %s " CRM_XS " id=%.8s", op->action, op->target, (device? " using " : ""), (device? device : ""), op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const st_query_result_t *peer_a = a; const st_query_result_t *peer_b = b; return (peer_b->ndevices - peer_a->ndevices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not */ static gboolean all_topology_devices_found(remote_fencing_op_t * op) { GList *device = NULL; GList *iter = NULL; device_properties_t *match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (pcmk__strcase_any_of(op->action, "off", "reboot", NULL)) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { st_query_result_t *peer = iter->data; if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } match = find_peer_device(op, peer, device->data); } if (!match) { return FALSE; } } } return TRUE; } /*! * \internal * \brief Parse action-specific device properties from XML * * \param[in] msg XML element containing the properties * \param[in] peer Name of peer that sent XML (for logs) * \param[in] device Device ID (for logs) * \param[in] action Action the properties relate to (for logs) * \param[in] phase Phase the properties relate to * \param[in,out] props Device properties to update */ static void parse_action_specific(xmlNode *xml, const char *peer, const char *device, const char *action, remote_fencing_op_t *op, enum st_remap_phase phase, device_properties_t *props) { props->custom_action_timeout[phase] = 0; crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, &props->custom_action_timeout[phase]); if (props->custom_action_timeout[phase]) { crm_trace("Peer %s with device %s returned %s action timeout %d", peer, device, action, props->custom_action_timeout[phase]); } props->delay_max[phase] = 0; crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]); if (props->delay_max[phase]) { crm_trace("Peer %s with device %s returned maximum of random delay %d for %s", peer, device, props->delay_max[phase], action); } props->delay_base[phase] = 0; crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]); if (props->delay_base[phase]) { crm_trace("Peer %s with device %s returned base delay %d for %s", peer, device, props->delay_base[phase], action); } /* Handle devices with automatic unfencing */ if (pcmk__str_eq(action, "on", pcmk__str_casei)) { int required = 0; crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); if (required) { crm_trace("Peer %s requires device %s to execute for action %s", peer, device, action); add_required_device(op, device); } } /* If a reboot is remapped to off+on, it's possible that a node is allowed * to perform one action but not another. */ - if (crm_is_true(crm_element_value(xml, F_STONITH_ACTION_DISALLOWED))) { + if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) { props->disallowed[phase] = TRUE; crm_trace("Peer %s is disallowed from executing %s for device %s", peer, action, device); } } /*! * \internal * \brief Parse one device's properties from peer's XML query reply * * \param[in] xml XML node containing device properties * \param[in,out] op Operation that query and reply relate to * \param[in,out] result Peer's results * \param[in] device ID of device being parsed */ static void add_device_properties(xmlNode *xml, remote_fencing_op_t *op, st_query_result_t *result, const char *device) { xmlNode *child; int verified = 0; device_properties_t *props = calloc(1, sizeof(device_properties_t)); /* Add a new entry to this result's devices list */ CRM_ASSERT(props != NULL); g_hash_table_insert(result->devices, strdup(device), props); /* Peers with verified (monitored) access will be preferred */ crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); if (verified) { crm_trace("Peer %s has confirmed a verified device %s", result->host, device); props->verified = TRUE; } /* Parse action-specific device properties */ parse_action_specific(xml, result->host, device, op_requested_action(op), op, st_phase_requested, props); for (child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { /* Replies for "reboot" operations will include the action-specific * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ if (pcmk__str_eq(ID(child), "off", pcmk__str_casei)) { parse_action_specific(child, result->host, device, "off", op, st_phase_off, props); } else if (pcmk__str_eq(ID(child), "on", pcmk__str_casei)) { parse_action_specific(child, result->host, device, "on", op, st_phase_on, props); } } } /*! * \internal * \brief Parse a peer's XML query reply and add it to operation's results * * \param[in,out] op Operation that query and reply relate to * \param[in] host Name of peer that sent this reply * \param[in] ndevices Number of devices expected in reply * \param[in] xml XML node containing device list * * \return Newly allocated result structure with parsed reply */ static st_query_result_t * add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml) { st_query_result_t *result = calloc(1, sizeof(st_query_result_t)); xmlNode *child; // cppcheck seems not to understand the abort logic in CRM_CHECK // cppcheck-suppress memleak CRM_CHECK(result != NULL, return NULL); result->host = strdup(host); result->devices = pcmk__strkey_table(free, free); /* Each child element describes one capable device available to the peer */ for (child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { const char *device = ID(child); if (device) { add_device_properties(child, op, result, device); } } result->ndevices = g_hash_table_size(result->devices); CRM_CHECK(ndevices == result->ndevices, crm_err("Query claimed to have %d device%s but %d found", ndevices, pcmk__plural_s(ndevices), result->ndevices)); op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); return result; } /*! * \internal * \brief Handle a peer's reply to our fencing query * * Parse a query result from XML and store it in the remote operation * table, and when enough replies have been received, issue a fencing request. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error * * \note See initiate_remote_stonith_op() for how the XML query was initially * formed, and stonith_query() for how the peer formed its XML reply. */ int process_remote_stonith_query(xmlNode * msg) { int ndevices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; st_query_result_t *result = NULL; uint32_t replies_expected; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices); op = g_hash_table_lookup(stonith_remote_op_list, id); if (op == NULL) { crm_debug("Received query reply for unknown or expired operation %s", id); return -EOPNOTSUPP; } replies_expected = fencing_active_peers(); if (op->replies_expected < replies_expected) { replies_expected = op->replies_expected; } if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = crm_element_value(msg, F_ORIG); host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", op->replies, replies_expected, host, op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); if (ndevices > 0) { result = add_result(op, host, ndevices, dev); } if (pcmk_is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); call_remote_stonith(op, result, pcmk_ok); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); call_remote_stonith(op, NULL, pcmk_ok); } } else if (op->state == st_query) { int nverified = count_peer_devices(op, result, TRUE); /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if (result && (host_is_target == FALSE) && nverified) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified device%s", nverified, pcmk__plural_s(nverified)); call_remote_stonith(op, result, pcmk_ok); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); call_remote_stonith(op, NULL, pcmk_ok); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if (result && (op->state == st_done)) { crm_info("Discarding query result from %s (%d device%s): " "Operation is %s", result->host, result->ndevices, pcmk__plural_s(result->ndevices), stonith_op_state_str(op->state)); } return pcmk_ok; } /*! * \internal * \brief Handle a peer's reply to a fencing request * * Parse a fencing reply from XML, and either finalize the operation * or attempt another device as appropriate. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error */ int process_remote_stonith_exec(xmlNode * msg) { int rc = 0; const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_RC, &rc); device = crm_element_value(dev, F_STONITH_DEVICE); if (stonith_remote_op_list) { op = g_hash_table_lookup(stonith_remote_op_list, id); } if (op == NULL && rc == pcmk_ok) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Received peer result of unknown or expired operation %s", id); return -EOPNOTSUPP; } if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { crm_err("Received outdated reply for device %s (instead of %s) to " "fence (%s) %s. Operation already timed out at peer level.", device, (const char *) op->devices->data, op->action, op->target); return rc; } if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) { crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s " CRM_XS " rc=%d id=%.8s", op->action, op->target, op->client_name, op->originator, pcmk_strerror(rc), rc, op->id); if (rc == pcmk_ok) { op->state = st_done; } else { op->state = st_failed; } remote_op_done(op, msg, rc, FALSE); return pcmk_ok; } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err("Received non-broadcast fencing result for operation %.8s " "we do not own (device %s targeting %s)", op->id, device, op->target); return rc; } if (pcmk_is_set(op->call_options, st_opt_topology)) { const char *device = crm_element_value(msg, F_STONITH_DEVICE); crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s " CRM_XS " rc=%d", op->action, op->target, device, op->client_name, op->originator, pcmk_strerror(rc), rc); /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { remote_op_done(op, msg, rc, FALSE); return rc; } if ((op->phase == 2) && (rc != pcmk_ok)) { /* A remapped "on" failed, but the node was already turned off * successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (exit code %d) targeting %s " "after successful 'off'", device, rc, op->target); rc = pcmk_ok; } if (rc == pcmk_ok) { /* An operation completed successfully. Try another device if * necessary, otherwise mark the operation as done. */ advance_topology_device_in_level(op, device, msg, rc); return rc; } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (advance_topology_level(op, false) != pcmk_rc_ok) { op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } } } else if (rc == pcmk_ok && op->devices == NULL) { crm_trace("All done for %s", op->target); op->state = st_done; remote_op_done(op, msg, rc, FALSE); return rc; } else if (rc == -ETIME && op->devices == NULL) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; remote_op_done(op, msg, rc, FALSE); return rc; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure */ crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, op->client_name, rc); call_remote_stonith(op, NULL, rc); return rc; } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; /* We don't have to worry about remapped reboots here * because if state is done, any remapping has been undone */ } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 3e7a4fdaea..a64004ce17 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,1673 +1,1668 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; static gboolean no_cib_connect = FALSE; static gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static xmlNode *local_cib = NULL; static pe_working_set_t *fenced_data_set = NULL; static cib_t *cib_api = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_LOG, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, { NULL, NULL, NULL } }; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, "nack", CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, F_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags 0x%08" PRIx32 "/0x%08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (pcmk__str_eq(op, "poke", pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ pcmk__client_t *client_obj = NULL; int local_rc = pcmk_rc_ok; crm_trace("Sending response"); client_obj = pcmk__find_client_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = EPROTO; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to client %s%s", rid, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } else { crm_trace("Sending an event to client %s%s", pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } local_rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, (sync_reply? crm_ipc_flags_none : crm_ipc_server_event)); } if ((local_rc != pcmk_rc_ok) && (client_obj != NULL)) { crm_warn("%s reply to client %s failed: %s", (sync_reply? "Synchronous" : "Asynchronous"), pcmk__client_name(client_obj), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event|crm_ipc_server_error); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static void do_stonith_notify_config(int options, const char *op, int rc, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); do_stonith_notify(options, op, rc, notify_data); free_xml(notify_data); } void do_stonith_notify_device(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); } void do_stonith_notify_level(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { int rc; char *desc = NULL; CRM_CHECK(match != NULL, return); crm_trace("Updating %s", ID(match)); if(remove) { int index = 0; char *key = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } rc = stonith_level_register(match, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void fencing_topology_init(void) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static pe_node_t * our_node_allowed_for(pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } static void watchdog_device_update(void) { if (stonith_watchdog_timeout_ms > 0) { if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && !stonith_watchdog_targets) { /* getting here watchdog-fencing enabled, no device there yet and reason isn't stonith_watchdog_targets preventing that */ int rc; xmlNode *xml; xml = create_device_registration_xml( STONITH_WATCHDOG_ID, st_namespace_internal, STONITH_WATCHDOG_AGENT, NULL, /* stonith_device_register will add our own name as PCMK_STONITH_HOST_LIST param so we can skip that here */ NULL); rc = stonith_device_register(xml, NULL, TRUE); free_xml(xml); if (rc != pcmk_ok) { crm_crit("Cannot register watchdog pseudo fence agent"); crm_exit(CRM_EX_FATAL); } } } else { /* be silent if no device - todo parameter to stonith_device_remove */ if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) { stonith_device_remove(STONITH_WATCHDOG_ID, TRUE); } } } static void update_stonith_watchdog_timeout_ms(xmlNode *cib) { xmlNode *stonith_enabled_xml = NULL; - const char *stonith_enabled_s = NULL; + bool stonith_enabled = false; + int rc = pcmk_rc_ok; long timeout_ms = 0; stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", cib, LOG_NEVER); - if (stonith_enabled_xml) { - stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); - } + rc = pcmk__xe_get_bool_attr(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE, &stonith_enabled); - if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) { + if (rc != pcmk_rc_ok || stonith_enabled) { xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", cib, LOG_NEVER); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if (value) { timeout_ms = crm_get_msec(value); } if (timeout_ms < 0) { timeout_ms = pcmk__auto_watchdog_timeout(); } } stonith_watchdog_timeout_ms = timeout_ms; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in] rsc Resource to check * \param[in] data_set Cluster working set with device information */ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; pe_node_t *parent = NULL; /* If this is a complex resource, check children rather than this resource itself. */ if(rsc->children) { GList *gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(pe_rsc_is_clone(rsc)) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { return; } /* If this STONITH resource is disabled, remove it. */ if (pe__resource_is_disabled(rsc)) { crm_info("Device %s has been disabled", rsc->id); return; } /* if watchdog-fencing is disabled handle any watchdog-fence resource as if it was disabled */ if ((stonith_watchdog_timeout_ms <= 0) && pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_info("Watchdog-fencing disabled thus handling " "device %s as disabled", rsc->id); return; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } return; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); return; } else { /* Our node is allowed, so update the device information */ int rc; xmlNode *data; GHashTable *rsc_params = NULL; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); rsc_params = pe_rsc_params(rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); g_hash_table_iter_init(&gIter, rsc_params); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, agent, params, rsc_provides); stonith_key_value_freeall(params, 1, 1); rc = stonith_device_register(data, NULL, TRUE); CRM_ASSERT(rc == pcmk_ok); free_xml(data); } } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GHashTableIter iter; stonith_device_t *device = NULL; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); CRM_ASSERT(fenced_data_set != NULL); fenced_data_set->input = local_cib; fenced_data_set->now = crm_time_new(NULL); fenced_data_set->localhost = stonith_our_uname; pe__set_working_set_flags(fenced_data_set, pe_flag_quick_location); cluster_status(fenced_data_set); pcmk__schedule_actions(fenced_data_set, NULL, NULL); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->cib_registered) { device->dirty = TRUE; } } /* have list repopulated if cib has a watchdog-fencing-resource TODO: keep a cached list for queries happening while we are refreshing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = NULL; g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->dirty) { g_hash_table_iter_remove(&iter); } } fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it pe_reset_working_set(fenced_data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if ((op == NULL) || (strcmp(op, "move") == 0) || strstr(xpath, "/"XML_CIB_TAG_STATUS)) { continue; } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS) || strstr(xpath, XML_TAG_META_SETS)) { needs_update = TRUE; reason = strdup("(meta) attribute deleted from resource"); break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, TRUE); /* watchdog_device_update called afterwards to fall back to implicit definition if needed */ } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } else { crm_trace("No updates for device list found in CIB"); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_NEVER); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; - const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; long timeout_ms_saved = stonith_watchdog_timeout_ms; gboolean need_full_refresh = FALSE; + bool value = false; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting full CIB"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_NEVER); - if (stonith_enabled_xml) { - stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); - } - - if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { + if (pcmk__xe_get_bool_attr(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE, &value) == pcmk_rc_ok && !value) { crm_trace("Ignoring CIB updates while fencing is disabled"); stonith_enabled_saved = FALSE; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating fencing device and topology lists " "now that fencing is enabled"); stonith_enabled_saved = TRUE; need_full_refresh = TRUE; } else { if (timeout_ms_saved != stonith_watchdog_timeout_ms) { need_full_refresh = TRUE; } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); watchdog_device_update(); } } if (need_full_refresh) { fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from CIB"); have_cib_devices = TRUE; local_cib = copy_xml(output); pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } static void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } else { stonith_cleanup(); crm_exit(CRM_EX_OK); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb); cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); free(stonith_our_uname); stonith_our_uname = NULL; free_xml(local_cib); local_cib = NULL; } static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "stand-alone", no_argument, 0, 's', NULL, pcmk__option_default }, { "stand-alone-w-cpg", no_argument, 0, 'c', NULL, pcmk__option_default }, { "logfile", required_argument, 0, 'l', NULL, pcmk__option_default }, { "verbose", no_argument, 0, 'V', NULL, pcmk__option_default }, { "version", no_argument, 0, '$', NULL, pcmk__option_default }, { "help", no_argument, 0, '?', NULL, pcmk__option_default }, { 0, 0, 0, 0 } }; static void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for fencing topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !pcmk_is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } int main(int argc, char **argv) { int flag; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t *cluster = NULL; const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" }; crm_ipc_t *old_instance = NULL; int rc = pcmk_rc_ok; crm_log_preinit(NULL, argc, argv); pcmk__set_cli_options(NULL, "[options]", long_options, "daemon for executing fencing devices in a " "Pacemaker cluster"); while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': { int rc = pcmk__add_logfile(optarg); if (rc != pcmk_rc_ok) { /* Logging has not yet been initialized, so stderr is * the only way to get information out */ fprintf(stderr, "Logging to %s is disabled: %s\n", optarg, pcmk_rc_str(rc)); } } break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && pcmk__str_eq("metadata", argv[optind], pcmk__str_casei)) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" Instance attributes available for all \"stonith\"-class resources" " and used by Pacemaker's fence daemon, formerly known as stonithd\n"); printf(" Instance attributes available for all \"stonith\"-class resources\n"); printf(" \n"); #if 0 // priority is not implemented yet printf(" \n"); printf(" Devices that are not in a topology " "are tried in order of highest to lowest integer priority\n"); printf(" \n"); printf(" \n"); #endif printf(" \n", PCMK_STONITH_HOST_ARGUMENT); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_HOST_MAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_HOST_LIST); printf(" A list of machines controlled by " "this device (Optional unless %s=static-list).\n", PCMK_STONITH_HOST_CHECK); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_HOST_CHECK); printf (" How to determine which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list " "(query the device via the 'list' command), static-list " "(check the " PCMK_STONITH_HOST_LIST " attribute), status " "(query the device via the 'status' command), none (assume " "every device can fence every machine)\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_DELAY_MAX); printf(" Enable a delay of no more than the " "time specified before executing fencing actions. Pacemaker " "derives the overall delay by taking the value of " PCMK_STONITH_DELAY_BASE " and adding a random delay value such " "that the sum is kept below this maximum.\n"); printf(" This prevents double fencing when " "using slow devices such as sbd.\nUse this to enable a random " "delay for fencing actions.\nThe overall delay is derived from " "this random delay value adding a static delay so that the sum " "is kept below the maximum delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_DELAY_BASE); printf(" Enable a base delay for " "fencing actions and specify base delay value.\n"); printf(" This prevents double fencing when " "different delays are configured on the nodes.\nUse this to " "enable a static delay for fencing actions.\nThe overall delay " "is derived from a random delay value adding this static delay " "so that the sum is kept below the maximum delay.\nSet to eg. " "node1:1s;node2:5 to set different value per node.\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_ACTION_LIMIT); printf (" The maximum number of actions can be performed in parallel on this device\n"); printf (" Cluster property concurrent-fencing=true needs to be configured first.\n" "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < PCMK__NELEM(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return CRM_EX_OK; } if (optind != argc) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); fenced_data_set = pe_new_working_set(); CRM_ASSERT(fenced_data_set != NULL); pe__set_working_set_flags(fenced_data_set, pe_flag_no_counts|pe_flag_no_compat); pe__set_working_set_flags(fenced_data_set, pe_flag_show_utilization); cluster = calloc(1, sizeof(crm_cluster_t)); CRM_ASSERT(cluster != NULL); if (stand_alone == FALSE) { if (is_corosync_cluster()) { #if SUPPORT_COROSYNC cluster->destroy = stonith_peer_cs_destroy; cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } stonith_our_uname = strdup(cluster->uname); if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); pcmk__register_formats(NULL, formats); rc = pcmk__output_new(&out, "log", NULL, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { crm_err("Can't log resource details due to internal error: %s\n", pcmk_rc_str(rc)); crm_exit(CRM_EX_FATAL); } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_TRACE); fenced_data_set->priv = out; /* Create the mainloop and run it... */ mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); stonith_cleanup(); free(cluster->uuid); free(cluster->uname); free(cluster); pe_free_working_set(fenced_data_set); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); pcmk__unregister_formats(); crm_exit(CRM_EX_OK); } diff --git a/lib/cib/cib_attrs.c b/lib/cib/cib_attrs.c index 0ec90926ed..4687b523f8 100644 --- a/lib/cib/cib_attrs.c +++ b/lib/cib/cib_attrs.c @@ -1,592 +1,592 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define attr_msg(level, fmt, args...) do { \ if(to_console) { \ printf(fmt"\n", ##args); \ } else { \ do_crm_log(level, fmt , ##args); \ } \ } while(0) /* could also check for possible truncation */ #define attr_snprintf(_str, _offset, _limit, ...) do { \ _offset += snprintf(_str + _offset, \ (_limit > _offset) ? _limit - _offset : 0, \ __VA_ARGS__); \ } while(0) #define XPATH_MAX 1024 extern int find_nvpair_attr_delegate(cib_t * the_cib, const char *attr, const char *section, const char *node_uuid, const char *attr_set_type, const char *set_name, const char *attr_id, const char *attr_name, gboolean to_console, char **value, const char *user_name) { int offset = 0; int rc = pcmk_ok; char *xpath_string = NULL; xmlNode *xml_search = NULL; const char *set_type = NULL; const char *node_type = NULL; if (attr_set_type) { set_type = attr_set_type; } else { set_type = XML_TAG_ATTR_SETS; } CRM_ASSERT(value != NULL); *value = NULL; if (pcmk__str_eq(section, XML_CIB_TAG_CRMCONFIG, pcmk__str_casei)) { node_uuid = NULL; set_type = XML_CIB_TAG_PROPSET; } else if (pcmk__strcase_any_of(section, XML_CIB_TAG_OPCONFIG, XML_CIB_TAG_RSCCONFIG, NULL)) { node_uuid = NULL; set_type = XML_TAG_META_SETS; } else if (pcmk__str_eq(section, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { node_uuid = NULL; section = XML_CIB_TAG_STATUS; node_type = XML_CIB_TAG_TICKETS; } else if (node_uuid == NULL) { return -EINVAL; } xpath_string = calloc(1, XPATH_MAX); if (xpath_string == NULL) { crm_perror(LOG_CRIT, "Could not create xpath"); return -ENOMEM; } attr_snprintf(xpath_string, offset, XPATH_MAX, "%.128s", get_object_path(section)); if (pcmk__str_eq(node_type, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { attr_snprintf(xpath_string, offset, XPATH_MAX, "//%s", node_type); } else if (node_uuid) { const char *node_type = XML_CIB_TAG_NODE; if (pcmk__str_eq(section, XML_CIB_TAG_STATUS, pcmk__str_casei)) { node_type = XML_CIB_TAG_STATE; set_type = XML_TAG_TRANSIENT_NODEATTRS; } attr_snprintf(xpath_string, offset, XPATH_MAX, "//%s[@id='%s']", node_type, node_uuid); } if (set_name) { attr_snprintf(xpath_string, offset, XPATH_MAX, "//%s[@id='%.128s']", set_type, set_name); } else { attr_snprintf(xpath_string, offset, XPATH_MAX, "//%s", set_type); } attr_snprintf(xpath_string, offset, XPATH_MAX, "//nvpair["); if (attr_id) { attr_snprintf(xpath_string, offset, XPATH_MAX, "@id='%s'", attr_id); } if (attr_name) { if (attr_id) { attr_snprintf(xpath_string, offset, XPATH_MAX, " and "); } attr_snprintf(xpath_string, offset, XPATH_MAX, "@name='%.128s'", attr_name); } attr_snprintf(xpath_string, offset, XPATH_MAX, "]"); CRM_LOG_ASSERT(offset > 0); rc = cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, &xml_search, cib_sync_call | cib_scope_local | cib_xpath, user_name); if (rc != pcmk_ok) { crm_trace("Query failed for attribute %s (section=%s, node=%s, set=%s, xpath=%s): %s", attr_name, section, crm_str(node_uuid), crm_str(set_name), xpath_string, pcmk_strerror(rc)); goto done; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = -ENOTUNIQ; attr_msg(LOG_WARNING, "Multiple attributes match name=%s", attr_name); for (child = pcmk__xml_first_child(xml_search); child != NULL; child = pcmk__xml_next(child)) { attr_msg(LOG_INFO, " Value: %s \t(id=%s)", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } } else { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } done: free(xpath_string); free_xml(xml_search); return rc; } int update_attr_delegate(cib_t * the_cib, int call_options, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *attr_value, gboolean to_console, const char *user_name, const char *node_type) { const char *tag = NULL; int rc = pcmk_ok; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; char *local_attr_id = NULL; char *local_set_name = NULL; CRM_CHECK(section != NULL, return -EINVAL); CRM_CHECK(attr_value != NULL, return -EINVAL); CRM_CHECK(attr_name != NULL || attr_id != NULL, return -EINVAL); rc = find_nvpair_attr_delegate(the_cib, XML_ATTR_ID, section, node_uuid, set_type, set_name, attr_id, attr_name, to_console, &local_attr_id, user_name); if (rc == pcmk_ok) { attr_id = local_attr_id; goto do_modify; } else if (rc != -ENXIO) { return rc; /* } else if(attr_id == NULL) { */ /* return -EINVAL; */ } else { crm_trace("%s does not exist, create it", attr_name); if (pcmk__str_eq(section, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { node_uuid = NULL; section = XML_CIB_TAG_STATUS; node_type = XML_CIB_TAG_TICKETS; xml_top = create_xml_node(xml_obj, XML_CIB_TAG_STATUS); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS); } else if (pcmk__str_eq(section, XML_CIB_TAG_NODES, pcmk__str_casei)) { if (node_uuid == NULL) { return -EINVAL; } if (pcmk__str_eq(node_type, "remote", pcmk__str_casei)) { xml_top = create_xml_node(xml_obj, XML_CIB_TAG_NODES); xml_obj = create_xml_node(xml_top, XML_CIB_TAG_NODE); crm_xml_add(xml_obj, XML_ATTR_TYPE, "remote"); crm_xml_add(xml_obj, XML_ATTR_ID, node_uuid); crm_xml_add(xml_obj, XML_ATTR_UNAME, node_uuid); } else { tag = XML_CIB_TAG_NODE; } } else if (pcmk__str_eq(section, XML_CIB_TAG_STATUS, pcmk__str_casei)) { tag = XML_TAG_TRANSIENT_NODEATTRS; if (node_uuid == NULL) { return -EINVAL; } xml_top = create_xml_node(xml_obj, XML_CIB_TAG_STATE); crm_xml_add(xml_top, XML_ATTR_ID, node_uuid); xml_obj = xml_top; } else { tag = section; node_uuid = NULL; } if (set_name == NULL) { if (pcmk__str_eq(section, XML_CIB_TAG_CRMCONFIG, pcmk__str_casei)) { local_set_name = strdup(CIB_OPTIONS_FIRST); } else if (pcmk__str_eq(node_type, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { local_set_name = crm_strdup_printf("%s-%s", section, XML_CIB_TAG_TICKETS); } else if (node_uuid) { local_set_name = crm_strdup_printf("%s-%s", section, node_uuid); if (set_type) { char *tmp_set_name = local_set_name; local_set_name = crm_strdup_printf("%s-%s", tmp_set_name, set_type); free(tmp_set_name); } } else { local_set_name = crm_strdup_printf("%s-options", section); } set_name = local_set_name; } if (attr_id == NULL) { local_attr_id = crm_strdup_printf("%s-%s", set_name, attr_name); crm_xml_sanitize_id(local_attr_id); attr_id = local_attr_id; } else if (attr_name == NULL) { attr_name = attr_id; } crm_trace("Creating %s/%s", section, tag); if (tag != NULL) { xml_obj = create_xml_node(xml_obj, tag); crm_xml_add(xml_obj, XML_ATTR_ID, node_uuid); if (xml_top == NULL) { xml_top = xml_obj; } } if (node_uuid == NULL && !pcmk__str_eq(node_type, XML_CIB_TAG_TICKETS, pcmk__str_casei)) { if (pcmk__str_eq(section, XML_CIB_TAG_CRMCONFIG, pcmk__str_casei)) { xml_obj = create_xml_node(xml_obj, XML_CIB_TAG_PROPSET); } else { xml_obj = create_xml_node(xml_obj, XML_TAG_META_SETS); } } else if (set_type) { xml_obj = create_xml_node(xml_obj, set_type); } else { xml_obj = create_xml_node(xml_obj, XML_TAG_ATTR_SETS); } crm_xml_add(xml_obj, XML_ATTR_ID, set_name); if (xml_top == NULL) { xml_top = xml_obj; } } do_modify: xml_obj = crm_create_nvpair_xml(xml_obj, attr_id, attr_name, attr_value); if (xml_top == NULL) { xml_top = xml_obj; } crm_log_xml_trace(xml_top, "update_attr"); rc = cib_internal_op(the_cib, CIB_OP_MODIFY, NULL, section, xml_top, NULL, call_options | cib_quorum_override, user_name); if (rc < pcmk_ok) { attr_msg(LOG_ERR, "Error setting %s=%s (section=%s, set=%s): %s", attr_name, attr_value, section, crm_str(set_name), pcmk_strerror(rc)); crm_log_xml_info(xml_top, "Update"); } free(local_set_name); free(local_attr_id); free_xml(xml_top); return rc; } int read_attr_delegate(cib_t * the_cib, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **attr_value, gboolean to_console, const char *user_name) { int rc = pcmk_ok; CRM_ASSERT(attr_value != NULL); CRM_CHECK(section != NULL, return -EINVAL); CRM_CHECK(attr_name != NULL || attr_id != NULL, return -EINVAL); *attr_value = NULL; rc = find_nvpair_attr_delegate(the_cib, XML_NVPAIR_ATTR_VALUE, section, node_uuid, set_type, set_name, attr_id, attr_name, to_console, attr_value, user_name); if (rc != pcmk_ok) { crm_trace("Query failed for attribute %s (section=%s, node=%s, set=%s): %s", attr_name, section, crm_str(set_name), crm_str(node_uuid), pcmk_strerror(rc)); } return rc; } int delete_attr_delegate(cib_t * the_cib, int options, const char *section, const char *node_uuid, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, const char *attr_value, gboolean to_console, const char *user_name) { int rc = pcmk_ok; xmlNode *xml_obj = NULL; char *local_attr_id = NULL; CRM_CHECK(section != NULL, return -EINVAL); CRM_CHECK(attr_name != NULL || attr_id != NULL, return -EINVAL); if (attr_id == NULL) { rc = find_nvpair_attr_delegate(the_cib, XML_ATTR_ID, section, node_uuid, set_type, set_name, attr_id, attr_name, to_console, &local_attr_id, user_name); if (rc != pcmk_ok) { return rc; } attr_id = local_attr_id; } xml_obj = crm_create_nvpair_xml(NULL, attr_id, attr_name, attr_value); rc = cib_internal_op(the_cib, CIB_OP_DELETE, NULL, section, xml_obj, NULL, options | cib_quorum_override, user_name); if (rc == pcmk_ok) { attr_msg(LOG_DEBUG, "Deleted %s %s: id=%s%s%s%s%s\n", section, node_uuid ? "attribute" : "option", local_attr_id, set_name ? " set=" : "", set_name ? set_name : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(local_attr_id); free_xml(xml_obj); return rc; } /*! * \internal * \brief Parse node UUID from search result * * \param[in] result XML search result * \param[out] uuid If non-NULL, where to store parsed UUID * \param[out] is_remote If non-NULL, set TRUE if result is remote node * * \return pcmk_ok if UUID was successfully parsed, -ENXIO otherwise */ static int get_uuid_from_result(xmlNode *result, char **uuid, int *is_remote) { int rc = -ENXIO; const char *tag; const char *parsed_uuid = NULL; int parsed_is_remote = FALSE; if (result == NULL) { return rc; } /* If there are multiple results, the first is sufficient */ tag = (const char *) (result->name); if (pcmk__str_eq(tag, "xpath-query", pcmk__str_casei)) { result = pcmk__xml_first_child(result); CRM_CHECK(result != NULL, return rc); tag = (const char *) (result->name); } if (pcmk__str_eq(tag, XML_CIB_TAG_NODE, pcmk__str_casei)) { /* Result is tag from section */ if (pcmk__str_eq(crm_element_value(result, XML_ATTR_TYPE), "remote", pcmk__str_casei)) { parsed_uuid = crm_element_value(result, XML_ATTR_UNAME); parsed_is_remote = TRUE; } else { parsed_uuid = ID(result); parsed_is_remote = FALSE; } } else if (pcmk__str_eq(tag, XML_CIB_TAG_RESOURCE, pcmk__str_casei)) { /* Result is for ocf:pacemaker:remote resource */ parsed_uuid = ID(result); parsed_is_remote = TRUE; } else if (pcmk__str_eq(tag, XML_CIB_TAG_NVPAIR, pcmk__str_casei)) { /* Result is remote-node parameter of for guest node */ parsed_uuid = crm_element_value(result, XML_NVPAIR_ATTR_VALUE); parsed_is_remote = TRUE; } else if (pcmk__str_eq(tag, XML_CIB_TAG_STATE, pcmk__str_casei)) { /* Result is tag from section */ parsed_uuid = crm_element_value(result, XML_ATTR_UNAME); - if (crm_is_true(crm_element_value(result, XML_NODE_IS_REMOTE))) { + if (pcmk__xe_attr_is_true(result, XML_NODE_IS_REMOTE)) { parsed_is_remote = TRUE; } } if (parsed_uuid) { if (uuid) { *uuid = strdup(parsed_uuid); } if (is_remote) { *is_remote = parsed_is_remote; } rc = pcmk_ok; } return rc; } /* Search string to find a node by name, as: * - cluster or remote node in nodes section * - remote node in resources section * - guest node in resources section * - orphaned remote node or bundle guest node in status section */ #define XPATH_UPPER_TRANS "ABCDEFGHIJKLMNOPQRSTUVWXYZ" #define XPATH_LOWER_TRANS "abcdefghijklmnopqrstuvwxyz" #define XPATH_NODE \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \ "/" XML_CIB_TAG_NODE "[translate(@" XML_ATTR_UNAME ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "/" XML_CIB_TAG_RESOURCE \ "[@class='ocf'][@provider='pacemaker'][@type='remote'][translate(@id,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "/" XML_CIB_TAG_RESOURCE "/" XML_TAG_META_SETS "/" XML_CIB_TAG_NVPAIR \ "[@name='" XML_RSC_ATTR_REMOTE_NODE "'][translate(@value,'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS "/" XML_CIB_TAG_STATE \ "[@" XML_NODE_IS_REMOTE "='true'][translate(@" XML_ATTR_UUID ",'" XPATH_UPPER_TRANS "','" XPATH_LOWER_TRANS "') ='%s']" int query_node_uuid(cib_t * the_cib, const char *uname, char **uuid, int *is_remote_node) { int rc = pcmk_ok; char *xpath_string; xmlNode *xml_search = NULL; char *host_lowercase = g_ascii_strdown(uname, -1); CRM_ASSERT(uname != NULL); if (uuid) { *uuid = NULL; } if (is_remote_node) { *is_remote_node = FALSE; } xpath_string = crm_strdup_printf(XPATH_NODE, host_lowercase, host_lowercase, host_lowercase, host_lowercase); if (cib_internal_op(the_cib, CIB_OP_QUERY, NULL, xpath_string, NULL, &xml_search, cib_sync_call|cib_scope_local|cib_xpath, NULL) == pcmk_ok) { rc = get_uuid_from_result(xml_search, uuid, is_remote_node); } else { rc = -ENXIO; } free(xpath_string); free_xml(xml_search); g_free(host_lowercase); if (rc != pcmk_ok) { crm_debug("Could not map node name '%s' to a UUID: %s", uname, pcmk_strerror(rc)); } else { crm_info("Mapped node name '%s' to UUID %s", uname, (uuid? *uuid : "")); } return rc; } int query_node_uname(cib_t * the_cib, const char *uuid, char **uname) { int rc = pcmk_ok; xmlNode *a_child = NULL; xmlNode *xml_obj = NULL; xmlNode *fragment = NULL; const char *child_name = NULL; CRM_ASSERT(uname != NULL); CRM_ASSERT(uuid != NULL); rc = the_cib->cmds->query(the_cib, XML_CIB_TAG_NODES, &fragment, cib_sync_call | cib_scope_local); if (rc != pcmk_ok) { return rc; } xml_obj = fragment; CRM_CHECK(pcmk__str_eq(crm_element_name(xml_obj), XML_CIB_TAG_NODES, pcmk__str_casei), return -ENOMSG); CRM_ASSERT(xml_obj != NULL); crm_log_xml_trace(xml_obj, "Result section"); rc = -ENXIO; *uname = NULL; for (a_child = pcmk__xml_first_child(xml_obj); a_child != NULL; a_child = pcmk__xml_next(a_child)) { if (pcmk__str_eq((const char *)a_child->name, XML_CIB_TAG_NODE, pcmk__str_none)) { child_name = ID(a_child); if (pcmk__str_eq(uuid, child_name, pcmk__str_casei)) { child_name = crm_element_value(a_child, XML_ATTR_UNAME); if (child_name != NULL) { *uname = strdup(child_name); rc = pcmk_ok; } break; } } } free_xml(fragment); return rc; } int set_standby(cib_t * the_cib, const char *uuid, const char *scope, const char *standby_value) { int rc = pcmk_ok; char *attr_id = NULL; CRM_CHECK(uuid != NULL, return -EINVAL); CRM_CHECK(standby_value != NULL, return -EINVAL); if (pcmk__strcase_any_of(scope, "reboot", XML_CIB_TAG_STATUS, NULL)) { scope = XML_CIB_TAG_STATUS; attr_id = crm_strdup_printf("transient-standby-%.256s", uuid); } else { scope = XML_CIB_TAG_NODES; attr_id = crm_strdup_printf("standby-%.256s", uuid); } rc = update_attr_delegate(the_cib, cib_sync_call, scope, uuid, NULL, NULL, attr_id, "standby", standby_value, TRUE, NULL, NULL); free(attr_id); return rc; } diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index c8f1bf0a8c..7aafa71073 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1,1308 +1,1306 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include #include #include #include #include "crmcluster_private.h" /* The peer cache remembers cluster nodes that have been seen. * This is managed mostly automatically by libcluster, based on * cluster membership events. * * Because cluster nodes can have conflicting names or UUIDs, * the hash table key is a uniquely generated ID. */ GHashTable *crm_peer_cache = NULL; /* * The remote peer cache tracks pacemaker_remote nodes. While the * value has the same type as the peer cache's, it is tracked separately for * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs, * so the name (which is also the UUID) is used as the hash table key; there * is no equivalent of membership events, so management is not automatic; and * most users of the peer cache need to exclude pacemaker_remote nodes. * * That said, using a single cache would be more logical and less error-prone, * so it would be a good idea to merge them one day. * * libcluster provides two avenues for populating the cache: * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it, * while crm_remote_peer_cache_refresh() populates it via the CIB. */ GHashTable *crm_remote_peer_cache = NULL; /* * The known node cache tracks cluster and remote nodes that have been seen in * the CIB. It is useful mainly when a caller needs to know about a node that * may no longer be in the membership, but doesn't want to add the node to the * main peer cache tables. */ static GHashTable *known_node_cache = NULL; unsigned long long crm_peer_seq = 0; gboolean crm_have_quorum = FALSE; static gboolean crm_autoreap = TRUE; // Flag setting and clearing for crm_node_t:flags #define set_peer_flags(peer, flags_to_set) do { \ (peer)->flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_set), \ #flags_to_set); \ } while (0) #define clear_peer_flags(peer, flags_to_clear) do { \ (peer)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Peer", (peer)->uname, \ (peer)->flags, (flags_to_clear), \ #flags_to_clear); \ } while (0) static void update_peer_uname(crm_node_t *node, const char *uname); int crm_remote_peer_cache_size(void) { if (crm_remote_peer_cache == NULL) { return 0; } return g_hash_table_size(crm_remote_peer_cache); } /*! * \brief Get a remote node peer cache entry, creating it if necessary * * \param[in] node_name Name of remote node * * \return Cache entry for node on success, NULL (and set errno) otherwise * * \note When creating a new entry, this will leave the node state undetermined, * so the caller should also call pcmk__update_peer_state() if the state * is known. */ crm_node_t * crm_remote_peer_get(const char *node_name) { crm_node_t *node; if (node_name == NULL) { errno = -EINVAL; return NULL; } /* Return existing cache entry if one exists */ node = g_hash_table_lookup(crm_remote_peer_cache, node_name); if (node) { return node; } /* Allocate a new entry */ node = calloc(1, sizeof(crm_node_t)); if (node == NULL) { return NULL; } /* Populate the essential information */ set_peer_flags(node, crm_remote_node); node->uuid = strdup(node_name); if (node->uuid == NULL) { free(node); errno = -ENOMEM; return NULL; } /* Add the new entry to the cache */ g_hash_table_replace(crm_remote_peer_cache, node->uuid, node); crm_trace("added %s to remote cache", node_name); /* Update the entry's uname, ensuring peer status callbacks are called */ update_peer_uname(node, node_name); return node; } void crm_remote_peer_cache_remove(const char *node_name) { if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { crm_trace("removed %s from remote peer cache", node_name); } } /*! * \internal * \brief Return node status based on a CIB status entry * * \param[in] node_state XML of node state * * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state, * CRM_NODE_MEMBER otherwise * \note Unlike most boolean XML attributes, this one defaults to true, for * backward compatibility with older controllers that don't set it. */ static const char * remote_state_from_cib(xmlNode *node_state) { - const char *status; + bool status = false; - status = crm_element_value(node_state, XML_NODE_IN_CLUSTER); - if (status && !crm_is_true(status)) { - status = CRM_NODE_LOST; + if (pcmk__xe_get_bool_attr(node_state, XML_NODE_IN_CLUSTER, &status) == pcmk_rc_ok && !status) { + return CRM_NODE_LOST; } else { - status = CRM_NODE_MEMBER; + return CRM_NODE_MEMBER; } - return status; } /* user data for looping through remote node xpath searches */ struct refresh_data { const char *field; /* XML attribute to check for node name */ gboolean has_state; /* whether to update node state based on XML */ }; /*! * \internal * \brief Process one pacemaker_remote node xpath search result * * \param[in] result XML search result * \param[in] user_data what to look for in the XML */ static void remote_cache_refresh_helper(xmlNode *result, void *user_data) { struct refresh_data *data = user_data; const char *remote = crm_element_value(result, data->field); const char *state = NULL; crm_node_t *node; CRM_CHECK(remote != NULL, return); /* Determine node's state, if the result has it */ if (data->has_state) { state = remote_state_from_cib(result); } /* Check whether cache already has entry for node */ node = g_hash_table_lookup(crm_remote_peer_cache, remote); if (node == NULL) { /* Node is not in cache, so add a new entry for it */ node = crm_remote_peer_get(remote); CRM_ASSERT(node); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } else if (pcmk_is_set(node->flags, crm_node_dirty)) { /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); if (state) { pcmk__update_peer_state(__func__, node, state, 0); } } } static void mark_dirty(gpointer key, gpointer value, gpointer user_data) { set_peer_flags((crm_node_t *) value, crm_node_dirty); } static gboolean is_dirty(gpointer key, gpointer value, gpointer user_data) { return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty); } /*! * \brief Repopulate the remote peer cache based on CIB XML * * \param[in] xmlNode CIB XML to parse */ void crm_remote_peer_cache_refresh(xmlNode *cib) { struct refresh_data data; crm_peer_init(); /* First, we mark all existing cache entries as dirty, * so that later we can remove any that weren't in the CIB. * We don't empty the cache, because we need to detect changes in state. */ g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL); /* Look for guest nodes and remote nodes in the status section */ data.field = "id"; data.has_state = TRUE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_STATUS, remote_cache_refresh_helper, &data); /* Look for guest nodes and remote nodes in the configuration section, * because they may have just been added and not have a status entry yet. * In that case, the cached node state will be left NULL, so that the * peer status callback isn't called until we're sure the node started * successfully. */ data.field = "value"; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_GUEST_NODE_CONFIG, remote_cache_refresh_helper, &data); data.field = "id"; data.has_state = FALSE; crm_foreach_xpath_result(cib, PCMK__XP_REMOTE_NODE_CONFIG, remote_cache_refresh_helper, &data); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL); } gboolean crm_is_peer_active(const crm_node_t * node) { if(node == NULL) { return FALSE; } if (pcmk_is_set(node->flags, crm_remote_node)) { /* remote nodes are never considered active members. This * guarantees they will never be considered for DC membership.*/ return FALSE; } #if SUPPORT_COROSYNC if (is_corosync_cluster()) { return crm_is_corosync_peer_active(node); } #endif crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type())); return FALSE; } static gboolean crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) { crm_node_t *node = value; crm_node_t *search = user_data; if (search == NULL) { return FALSE; } else if (search->id && node->id != search->id) { return FALSE; } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) { return FALSE; } else if (crm_is_peer_active(value) == FALSE) { crm_info("Removing node with name %s and id %u from membership cache", (node->uname? node->uname : "unknown"), node->id); return TRUE; } return FALSE; } /*! * \brief Remove all peer cache entries matching a node ID and/or uname * * \param[in] id ID of node to remove (or 0 to ignore) * \param[in] name Uname of node to remove (or NULL to ignore) * * \return Number of cache entries removed */ guint reap_crm_member(uint32_t id, const char *name) { int matches = 0; crm_node_t search; if (crm_peer_cache == NULL) { crm_trace("Membership cache not initialized, ignoring purge request"); return 0; } search.id = id; search.uname = name ? strdup(name) : NULL; matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search); if(matches) { crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache", matches, pcmk__plural_s(matches), search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } else { crm_info("No peers with id=%u%s%s to purge from the membership cache", search.id, (search.uname? " and/or uname=" : ""), (search.uname? search.uname : "")); } free(search.uname); return matches; } static void count_peer(gpointer key, gpointer value, gpointer user_data) { guint *count = user_data; crm_node_t *node = value; if (crm_is_peer_active(node)) { *count = *count + 1; } } guint crm_active_peers(void) { guint count = 0; if (crm_peer_cache) { g_hash_table_foreach(crm_peer_cache, count_peer, &count); } return count; } static void destroy_crm_node(gpointer data) { crm_node_t *node = data; crm_trace("Destroying entry for node %u: %s", node->id, node->uname); free(node->uname); free(node->state); free(node->uuid); free(node->expected); free(node); } void crm_peer_init(void) { if (crm_peer_cache == NULL) { crm_peer_cache = pcmk__strikey_table(free, destroy_crm_node); } if (crm_remote_peer_cache == NULL) { crm_remote_peer_cache = pcmk__strikey_table(NULL, destroy_crm_node); } if (known_node_cache == NULL) { known_node_cache = pcmk__strikey_table(free, destroy_crm_node); } } void crm_peer_destroy(void) { if (crm_peer_cache != NULL) { crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache)); g_hash_table_destroy(crm_peer_cache); crm_peer_cache = NULL; } if (crm_remote_peer_cache != NULL) { crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache)); g_hash_table_destroy(crm_remote_peer_cache); crm_remote_peer_cache = NULL; } if (known_node_cache != NULL) { crm_trace("Destroying known node cache with %d members", g_hash_table_size(known_node_cache)); g_hash_table_destroy(known_node_cache); known_node_cache = NULL; } } static void (*peer_status_callback)(enum crm_status_type, crm_node_t *, const void *) = NULL; /*! * \brief Set a client function that will be called after peer status changes * * \param[in] dispatch Pointer to function to use as callback * * \note Previously, client callbacks were responsible for peer cache * management. This is no longer the case, and client callbacks should do * only client-specific handling. Callbacks MUST NOT add or remove entries * in the peer caches. */ void crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *)) { peer_status_callback = dispatch; } /*! * \brief Tell the library whether to automatically reap lost nodes * * If TRUE (the default), calling crm_update_peer_proc() will also update the * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and pcmk__update_peer_state() * will reap peers whose state changes to anything other than CRM_NODE_MEMBER. * Callers should leave this enabled unless they plan to manage the cache * separately on their own. * * \param[in] autoreap TRUE to enable automatic reaping, FALSE to disable */ void crm_set_autoreap(gboolean autoreap) { crm_autoreap = autoreap; } static void dump_peer_hash(int level, const char *caller) { GHashTableIter iter; const char *id = NULL; crm_node_t *node = NULL; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) { do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id); } } static gboolean hash_find_by_data(gpointer key, gpointer value, gpointer user_data) { return value == user_data; } /*! * \internal * \brief Search caches for a node (cluster or Pacemaker Remote) * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] flags Bitmask of enum crm_get_peer_flags * * \return Node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { node = pcmk__search_cluster_node_cache(id, uname); } return node; } /*! * \brief Get a node cache entry (cluster or Pacemaker Remote) * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] flags Bitmask of enum crm_get_peer_flags * * \return (Possibly newly created) node cache entry */ crm_node_t * crm_get_peer_full(unsigned int id, const char *uname, int flags) { crm_node_t *node = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { node = g_hash_table_lookup(crm_remote_peer_cache, uname); } if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { node = crm_get_peer(id, uname); } return node; } /*! * \internal * \brief Search cluster node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * * \return Cluster node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_cluster_node_cache(unsigned int id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); if (uname != NULL) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id > 0) { g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(node->id == id) { crm_trace("ID match: %u = %p", node->id, node); by_id = node; break; } } } node = by_id; /* Good default */ if(by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %u/%s", by_id, id, uname); } else if(by_id == NULL && by_name) { crm_trace("Only one: %p for %u/%s", by_name, id, uname); if(id && by_name->id) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node %u and %u share the same name '%s'", id, by_name->id, uname); node = NULL; /* Create a new one */ } else { node = by_name; } } else if(by_name == NULL && by_id) { crm_trace("Only one: %p for %u/%s", by_id, id, uname); if(uname && by_id->uname) { dump_peer_hash(LOG_WARNING, __func__); crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct", uname, by_id->uname, id, uname); } } else if(uname && by_id->uname) { if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } else { crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname); dump_peer_hash(LOG_INFO, __func__); crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE, TRUE); } } else if(id && by_name->id) { crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname); } else { /* Simple merge */ /* Only corosync-based clusters use node IDs. The functions that call * pcmk__update_peer_state() and crm_update_peer_proc() only know * nodeid, so 'by_id' is authoritative when merging. */ dump_peer_hash(LOG_DEBUG, __func__); crm_info("Merging %p into %p", by_name, by_id); g_hash_table_foreach_remove(crm_peer_cache, hash_find_by_data, by_name); } return node; } #if SUPPORT_COROSYNC static guint remove_conflicting_peer(crm_node_t *node) { int matches = 0; GHashTableIter iter; crm_node_t *existing_node = NULL; if (node->id == 0 || node->uname == NULL) { return 0; } if (!pcmk__corosync_has_nodelist()) { return 0; } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) { if (existing_node->id > 0 && existing_node->id != node->id && existing_node->uname != NULL && strcasecmp(existing_node->uname, node->uname) == 0) { if (crm_is_peer_active(existing_node)) { continue; } crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u", existing_node->id, existing_node->uname, node->id); g_hash_table_iter_remove(&iter); matches++; } } return matches; } #endif /*! * \brief Get a cluster node cache entry * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * * \return (Possibly newly created) cluster node cache entry */ /* coverity[-alloc] Memory is referenced in one or both hashtables */ crm_node_t * crm_get_peer(unsigned int id, const char *uname) { crm_node_t *node = NULL; char *uname_lookup = NULL; CRM_ASSERT(id > 0 || uname != NULL); crm_peer_init(); node = pcmk__search_cluster_node_cache(id, uname); /* if uname wasn't provided, and find_peer did not turn up a uname based on id. * we need to do a lookup of the node name using the id in the cluster membership. */ if ((node == NULL || node->uname == NULL) && (uname == NULL)) { uname_lookup = get_node_name(id); } if (uname_lookup) { uname = uname_lookup; crm_trace("Inferred a name of '%s' for node %u", uname, id); /* try to turn up the node one more time now that we know the uname. */ if (node == NULL) { node = pcmk__search_cluster_node_cache(id, uname); } } if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node); crm_info("Created entry %s/%p for node %s/%u (%d total)", uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache)); g_hash_table_replace(crm_peer_cache, uniqueid, node); } if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) { crm_info("Node %u is now known as %s", id, uname); } if(id > 0 && node->id == 0) { node->id = id; } if (uname && (node->uname == NULL)) { update_peer_uname(node, uname); } if(node->uuid == NULL) { const char *uuid = crm_peer_uuid(node); if (uuid) { crm_info("Node %u has uuid %s", id, uuid); } else { crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname); } } free(uname_lookup); return node; } /*! * \internal * \brief Update a node's uname * * \param[in] node Node object to update * \param[in] uname New name to set * * \note This function should not be called within a peer cache iteration, * because in some cases it can remove conflicting cache entries, * which would invalidate the iterator. */ static void update_peer_uname(crm_node_t *node, const char *uname) { CRM_CHECK(uname != NULL, crm_err("Bug: can't update node name without name"); return); CRM_CHECK(node != NULL, crm_err("Bug: can't update node name to %s without node", uname); return); if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) { crm_debug("Node uname '%s' did not change", uname); return; } for (const char *c = uname; *c; ++c) { if ((*c >= 'A') && (*c <= 'Z')) { crm_warn("Node names with capitals are discouraged, consider changing '%s'", uname); break; } } free(node->uname); node->uname = strdup(uname); CRM_ASSERT(node->uname != NULL); if (peer_status_callback != NULL) { peer_status_callback(crm_status_uname, node, NULL); } #if SUPPORT_COROSYNC if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) { remove_conflicting_peer(node); } #endif } /*! * \internal * \brief Get log-friendly string equivalent of a process flag * * \param[in] proc Process flag * * \return Log-friendly string equivalent of \p proc */ static inline const char * proc2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_based: text = "pacemaker-based"; break; case crm_proc_controld: text = "pacemaker-controld"; break; case crm_proc_schedulerd: text = "pacemaker-schedulerd"; break; case crm_proc_execd: text = "pacemaker-execd"; break; case crm_proc_attrd: text = "pacemaker-attrd"; break; case crm_proc_fenced: text = "pacemaker-fenced"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } /*! * \internal * \brief Update a node's process information (and potentially state) * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] flag Bitmask of new process information * \param[in] status node status (online, offline, etc.) * * \return NULL if any node was reaped from peer caches, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, otherwise * reaping could invalidate the iterator. */ crm_node_t * crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status) { uint32_t last = 0; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL", source, proc2text(flag), status); return NULL); /* Pacemaker doesn't spawn processes on remote nodes */ if (pcmk_is_set(node->flags, crm_remote_node)) { return node; } last = node->processes; if (status == NULL) { node->processes = flag; if (node->processes != last) { changed = TRUE; } } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) { if ((node->processes & flag) != flag) { node->processes = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } } else if (node->processes & flag) { node->processes = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Peer process", node->uname, node->processes, flag, "processes"); changed = TRUE; } if (changed) { if (status == NULL && flag <= crm_proc_none) { crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname, node->id); } else { crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id, proc2text(flag), status); } /* Call the client callback first, then update the peer state, * in case the node will be reaped */ if (peer_status_callback != NULL) { peer_status_callback(crm_status_processes, node, &last); } /* The client callback shouldn't touch the peer caches, * but as a safety net, bail if the peer cache was destroyed. */ if (crm_peer_cache == NULL) { return NULL; } if (crm_autoreap) { const char *peer_state = NULL; if (pcmk_is_set(node->processes, crm_get_cluster_proc())) { peer_state = CRM_NODE_MEMBER; } else { peer_state = CRM_NODE_LOST; } node = pcmk__update_peer_state(__func__, node, peer_state, 0); } } else { crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id, proc2text(flag), status); } return node; } /*! * \internal * \brief Update a cluster node cache entry's expected join state * * \param[in] source Caller's function name (for logging) * \param[in,out] node Node to update * \param[in] expected Node's new join state */ void pcmk__update_peer_expected(const char *source, crm_node_t *node, const char *expected) { char *last = NULL; gboolean changed = FALSE; CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected); return); /* Remote nodes don't participate in joins */ if (pcmk_is_set(node->flags, crm_remote_node)) { return; } last = node->expected; if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) { node->expected = strdup(expected); changed = TRUE; } if (changed) { crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id, expected, last); free(last); } else { crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname, node->id, expected); } } /*! * \internal * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * \param[in] iter If not NULL, pointer to node's peer cache iterator * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function may be called from * within a peer cache iteration if the iterator is supplied. */ static crm_node_t * update_peer_state_iter(const char *source, crm_node_t *node, const char *state, uint64_t membership, GHashTableIter *iter) { gboolean is_member; CRM_CHECK(node != NULL, crm_err("Could not set state for unknown host to %s" CRM_XS " source=%s", state, source); return NULL); is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei); if (is_member) { node->when_lost = 0; if (membership) { node->last_seen = membership; } } if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) { char *last = node->state; node->state = strdup(state); crm_notice("Node %s state is now %s " CRM_XS " nodeid=%u previous=%s source=%s", node->uname, state, node->id, (last? last : "unknown"), source); if (peer_status_callback != NULL) { peer_status_callback(crm_status_nstate, node, last); } free(last); if (crm_autoreap && !is_member && !pcmk_is_set(node->flags, crm_remote_node)) { /* We only autoreap from the peer cache, not the remote peer cache, * because the latter should be managed only by * crm_remote_peer_cache_refresh(). */ if(iter) { crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname); g_hash_table_iter_remove(iter); } else { reap_crm_member(node->id, node->uname); } node = NULL; } } else { crm_trace("Node %s state is unchanged (%s) " CRM_XS " nodeid=%u source=%s", node->uname, state, node->id, source); } return node; } /*! * \brief Update a node's state and membership information * * \param[in] source Caller's function name (for log messages) * \param[in] node Node object to update * \param[in] state Node's new state * \param[in] membership Node's new membership ID * * \return NULL if any node was reaped, value of node otherwise * * \note If this function returns NULL, the supplied node object was likely * freed and should not be used again. This function should not be * called within a cache iteration if reaping is possible, * otherwise reaping could invalidate the iterator. */ crm_node_t * pcmk__update_peer_state(const char *source, crm_node_t *node, const char *state, uint64_t membership) { return update_peer_state_iter(source, node, state, membership, NULL); } /*! * \internal * \brief Reap all nodes from cache whose membership information does not match * * \param[in] membership Membership ID of nodes to keep */ void pcmk__reap_unseen_nodes(uint64_t membership) { GHashTableIter iter; crm_node_t *node = NULL; crm_trace("Reaping unseen nodes..."); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) { if (node->last_seen != membership) { if (node->state) { /* * Calling update_peer_state_iter() allows us to * remove the node from crm_peer_cache without * invalidating our iterator */ update_peer_state_iter(__func__, node, CRM_NODE_LOST, membership, &iter); } else { crm_info("State of node %s[%u] is still unknown", node->uname, node->id); } } } } static crm_node_t * find_known_node(const char *id, const char *uname) { GHashTableIter iter; crm_node_t *node = NULL; crm_node_t *by_id = NULL; crm_node_t *by_name = NULL; if (uname) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if (node->uname && strcasecmp(node->uname, uname) == 0) { crm_trace("Name match: %s = %p", node->uname, node); by_name = node; break; } } } if (id) { g_hash_table_iter_init(&iter, known_node_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { if(strcasecmp(node->uuid, id) == 0) { crm_trace("ID match: %s= %p", id, node); by_id = node; break; } } } node = by_id; /* Good default */ if (by_id == by_name) { /* Nothing to do if they match (both NULL counts) */ crm_trace("Consistent: %p for %s/%s", by_id, id, uname); } else if (by_id == NULL && by_name) { crm_trace("Only one: %p for %s/%s", by_name, id, uname); if (id) { node = NULL; } else { node = by_name; } } else if (by_name == NULL && by_id) { crm_trace("Only one: %p for %s/%s", by_id, id, uname); if (uname) { node = NULL; } } else if (uname && by_id->uname && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) { /* Multiple nodes have the same uname in the CIB. * Return by_id. */ } else if (id && by_name->uuid && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) { /* Multiple nodes have the same id in the CIB. * Return by_name. */ node = by_name; } else { node = NULL; } if (node == NULL) { crm_debug("Couldn't find node%s%s%s%s", id? " " : "", id? id : "", uname? " with name " : "", uname? uname : ""); } return node; } static void known_node_cache_refresh_helper(xmlNode *xml_node, void *user_data) { const char *id = crm_element_value(xml_node, XML_ATTR_ID); const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME); crm_node_t * node = NULL; CRM_CHECK(id != NULL && uname !=NULL, return); node = find_known_node(id, uname); if (node == NULL) { char *uniqueid = crm_generate_uuid(); node = calloc(1, sizeof(crm_node_t)); CRM_ASSERT(node != NULL); node->uname = strdup(uname); CRM_ASSERT(node->uname != NULL); node->uuid = strdup(id); CRM_ASSERT(node->uuid != NULL); g_hash_table_replace(known_node_cache, uniqueid, node); } else if (pcmk_is_set(node->flags, crm_node_dirty)) { if (!pcmk__str_eq(uname, node->uname, pcmk__str_casei)) { free(node->uname); node->uname = strdup(uname); CRM_ASSERT(node->uname != NULL); } /* Node is in cache and hasn't been updated already, so mark it clean */ clear_peer_flags(node, crm_node_dirty); } } static void refresh_known_node_cache(xmlNode *cib) { crm_peer_init(); g_hash_table_foreach(known_node_cache, mark_dirty, NULL); crm_foreach_xpath_result(cib, PCMK__XP_MEMBER_NODE_CONFIG, known_node_cache_refresh_helper, NULL); /* Remove all old cache entries that weren't seen in the CIB */ g_hash_table_foreach_remove(known_node_cache, is_dirty, NULL); } void pcmk__refresh_node_caches_from_cib(xmlNode *cib) { crm_remote_peer_cache_refresh(cib); refresh_known_node_cache(cib); } /*! * \internal * \brief Search known node cache * * \param[in] id If not 0, cluster node ID to search for * \param[in] uname If not NULL, node name to search for * \param[in] flags Bitmask of enum crm_get_peer_flags * * \return Known node cache entry if found, otherwise NULL */ crm_node_t * pcmk__search_known_node_cache(unsigned int id, const char *uname, uint32_t flags) { crm_node_t *node = NULL; char *id_str = NULL; CRM_ASSERT(id > 0 || uname != NULL); node = pcmk__search_node_caches(id, uname, flags); if (node || !(flags & CRM_GET_PEER_CLUSTER)) { return node; } if (id > 0) { id_str = crm_strdup_printf("%u", id); } node = find_known_node(id_str, uname); free(id_str); return node; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include int crm_terminate_member(int nodeid, const char *uname, void *unused) { return stonith_api_kick(nodeid, uname, 120, TRUE); } int crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) { return stonith_api_kick(nodeid, uname, 120, TRUE); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c index 5c60cdf4fc..08979bac04 100644 --- a/lib/common/ipc_controld.c +++ b/lib/common/ipc_controld.c @@ -1,615 +1,613 @@ /* * Copyright 2020-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" struct controld_api_private_s { char *client_uuid; unsigned int replies_expected; }; // \return Standard Pacemaker return code static int new_data(pcmk_ipc_api_t *api) { struct controld_api_private_s *private = NULL; api->api_data = calloc(1, sizeof(struct controld_api_private_s)); if (api->api_data == NULL) { return errno; } private = api->api_data; /* This is set to the PID because that's how it was always done, but PIDs * are not unique because clients can be remote. The value appears to be * unused other than as part of F_CRM_SYS_FROM in IPC requests, which is * only compared against the internal system names (CRM_SYSTEM_TENGINE, * etc.), so it shouldn't be a problem. */ private->client_uuid = pcmk__getpid_s(); /* @TODO Implement a call ID model similar to the CIB, executor, and fencer * IPC APIs, so that requests and replies can be matched, and * duplicate replies can be discarded. */ return pcmk_rc_ok; } static void free_data(void *data) { free(((struct controld_api_private_s *) data)->client_uuid); free(data); } // \return Standard Pacemaker return code static int post_connect(pcmk_ipc_api_t *api) { /* The controller currently requires clients to register via a hello * request, but does not reply back. */ struct controld_api_private_s *private = api->api_data; const char *client_name = crm_system_name? crm_system_name : "client"; xmlNode *hello; int rc; hello = create_hello_message(private->client_uuid, client_name, PCMK__CONTROLD_API_MAJOR, PCMK__CONTROLD_API_MINOR); rc = pcmk__send_ipc_request(api, hello); free_xml(hello); if (rc != pcmk_rc_ok) { crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s", pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc); } else { crm_debug("Sent IPC hello to %s", pcmk_ipc_name(api, true)); } return rc; } -#define xml_true(xml, field) crm_is_true(crm_element_value(xml, field)) - static void set_node_info_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) { data->reply_type = pcmk_controld_reply_info; if (msg_data == NULL) { return; } - data->data.node_info.have_quorum = xml_true(msg_data, XML_ATTR_HAVE_QUORUM); - data->data.node_info.is_remote = xml_true(msg_data, XML_NODE_IS_REMOTE); + data->data.node_info.have_quorum = pcmk__xe_attr_is_true(msg_data, XML_ATTR_HAVE_QUORUM); + data->data.node_info.is_remote = pcmk__xe_attr_is_true(msg_data, XML_NODE_IS_REMOTE); crm_element_value_int(msg_data, XML_ATTR_ID, &(data->data.node_info.id)); data->data.node_info.uuid = crm_element_value(msg_data, XML_ATTR_UUID); data->data.node_info.uname = crm_element_value(msg_data, XML_ATTR_UNAME); data->data.node_info.state = crm_element_value(msg_data, XML_NODE_IS_PEER); } static void set_ping_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) { data->reply_type = pcmk_controld_reply_ping; if (msg_data == NULL) { return; } data->data.ping.sys_from = crm_element_value(msg_data, XML_PING_ATTR_SYSFROM); data->data.ping.fsa_state = crm_element_value(msg_data, XML_PING_ATTR_CRMDSTATE); data->data.ping.result = crm_element_value(msg_data, XML_PING_ATTR_STATUS); } static void set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data) { pcmk_controld_api_node_t *node_info; data->reply_type = pcmk_controld_reply_nodes; for (xmlNode *node = first_named_child(msg_data, XML_CIB_TAG_NODE); node != NULL; node = crm_next_same_xml(node)) { long long id_ll = 0; node_info = calloc(1, sizeof(pcmk_controld_api_node_t)); crm_element_value_ll(node, XML_ATTR_ID, &id_ll); if (id_ll > 0) { node_info->id = id_ll; } node_info->uname = crm_element_value(node, XML_ATTR_UNAME); node_info->state = crm_element_value(node, XML_NODE_IN_CLUSTER); data->data.nodes = g_list_prepend(data->data.nodes, node_info); } } static bool reply_expected(pcmk_ipc_api_t *api, xmlNode *request) { const char *command = crm_element_value(request, F_CRM_TASK); if (command == NULL) { return false; } // We only need to handle commands that functions in this file can send return !strcmp(command, CRM_OP_REPROBE) || !strcmp(command, CRM_OP_NODE_INFO) || !strcmp(command, CRM_OP_PING) || !strcmp(command, CRM_OP_LRM_FAIL) || !strcmp(command, CRM_OP_LRM_DELETE); } static void dispatch(pcmk_ipc_api_t *api, xmlNode *reply) { struct controld_api_private_s *private = api->api_data; crm_exit_t status = CRM_EX_OK; xmlNode *msg_data = NULL; const char *value = NULL; pcmk_controld_api_reply_t reply_data = { pcmk_controld_reply_unknown, NULL, NULL, }; if (private->replies_expected > 0) { private->replies_expected--; } // Do some basic validation of the reply /* @TODO We should be able to verify that value is always a response, but * currently the controller doesn't always properly set the type. Even * if we fix the controller, we'll still need to handle replies from * old versions (feature set could be used to differentiate). */ value = crm_element_value(reply, F_CRM_MSG_TYPE); if ((value == NULL) || (strcmp(value, XML_ATTR_REQUEST) && strcmp(value, XML_ATTR_RESPONSE))) { crm_debug("Unrecognizable controller message: invalid message type '%s'", crm_str(value)); status = CRM_EX_PROTOCOL; goto done; } if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { crm_debug("Unrecognizable controller message: no reference"); status = CRM_EX_PROTOCOL; goto done; } value = crm_element_value(reply, F_CRM_TASK); if (value == NULL) { crm_debug("Unrecognizable controller message: no command name"); status = CRM_EX_PROTOCOL; goto done; } // Parse useful info from reply reply_data.feature_set = crm_element_value(reply, XML_ATTR_VERSION); reply_data.host_from = crm_element_value(reply, F_CRM_HOST_FROM); msg_data = get_message_xml(reply, F_CRM_DATA); if (!strcmp(value, CRM_OP_REPROBE)) { reply_data.reply_type = pcmk_controld_reply_reprobe; } else if (!strcmp(value, CRM_OP_NODE_INFO)) { set_node_info_data(&reply_data, msg_data); } else if (!strcmp(value, CRM_OP_INVOKE_LRM)) { reply_data.reply_type = pcmk_controld_reply_resource; reply_data.data.resource.node_state = msg_data; } else if (!strcmp(value, CRM_OP_PING)) { set_ping_data(&reply_data, msg_data); } else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) { set_nodes_data(&reply_data, msg_data); } else { crm_debug("Unrecognizable controller message: unknown command '%s'", value); status = CRM_EX_PROTOCOL; } done: pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); // Free any reply data that was allocated if (pcmk__str_eq(value, PCMK__CONTROLD_CMD_NODES, pcmk__str_casei)) { g_list_free_full(reply_data.data.nodes, free); } } pcmk__ipc_methods_t * pcmk__controld_api_methods() { pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); if (cmds != NULL) { cmds->new_data = new_data; cmds->free_data = free_data; cmds->post_connect = post_connect; cmds->reply_expected = reply_expected; cmds->dispatch = dispatch; } return cmds; } /*! * \internal * \brief Create XML for a controller IPC request * * \param[in] api Controller connection * \param[in] op Controller IPC command name * \param[in] node Node name to set as destination host * \param[in] msg_data XML to attach to request as message data * * \return Newly allocated XML for request */ static xmlNode * create_controller_request(pcmk_ipc_api_t *api, const char *op, const char *node, xmlNode *msg_data) { struct controld_api_private_s *private = NULL; const char *sys_to = NULL; if (api == NULL) { return NULL; } private = api->api_data; if ((node == NULL) && !strcmp(op, CRM_OP_PING)) { sys_to = CRM_SYSTEM_DC; } else { sys_to = CRM_SYSTEM_CRMD; } return create_request(op, msg_data, node, sys_to, (crm_system_name? crm_system_name : "client"), private->client_uuid); } // \return Standard Pacemaker return code static int send_controller_request(pcmk_ipc_api_t *api, xmlNode *request, bool reply_is_expected) { int rc; if (crm_element_value(request, XML_ATTR_REFERENCE) == NULL) { return EINVAL; } rc = pcmk__send_ipc_request(api, request); if ((rc == pcmk_rc_ok) && reply_is_expected) { struct controld_api_private_s *private = api->api_data; private->replies_expected++; } return rc; } static xmlNode * create_reprobe_message_data(const char *target_node, const char *router_node) { xmlNode *msg_data; msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); if ((router_node != NULL) && !pcmk__str_eq(router_node, target_node, pcmk__str_casei)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } return msg_data; } /*! * \brief Send a reprobe controller operation * * \param[in] api Controller connection * \param[in] target_node Name of node to reprobe * \param[in] router_node Router node for host * * \return Standard Pacemaker return code * \note Event callback will get a reply of type pcmk_controld_reply_reprobe. */ int pcmk_controld_api_reprobe(pcmk_ipc_api_t *api, const char *target_node, const char *router_node) { xmlNode *request; xmlNode *msg_data; int rc = pcmk_rc_ok; if (api == NULL) { return EINVAL; } if (router_node == NULL) { router_node = target_node; } crm_debug("Sending %s IPC request to reprobe %s via %s", pcmk_ipc_name(api, true), crm_str(target_node), crm_str(router_node)); msg_data = create_reprobe_message_data(target_node, router_node); request = create_controller_request(api, CRM_OP_REPROBE, router_node, msg_data); rc = send_controller_request(api, request, true); free_xml(msg_data); free_xml(request); return rc; } /*! * \brief Send a "node info" controller operation * * \param[in] api Controller connection * \param[in] nodeid ID of node to get info for (or 0 for local node) * * \return Standard Pacemaker return code * \note Event callback will get a reply of type pcmk_controld_reply_info. */ int pcmk_controld_api_node_info(pcmk_ipc_api_t *api, uint32_t nodeid) { xmlNode *request; int rc = pcmk_rc_ok; request = create_controller_request(api, CRM_OP_NODE_INFO, NULL, NULL); if (request == NULL) { return EINVAL; } if (nodeid > 0) { crm_xml_set_id(request, "%lu", (unsigned long) nodeid); } rc = send_controller_request(api, request, true); free_xml(request); return rc; } /*! * \brief Ask the controller for status * * \param[in] api Controller connection * \param[in] node_name Name of node whose status is desired (or NULL for DC) * * \return Standard Pacemaker return code * \note Event callback will get a reply of type pcmk_controld_reply_ping. */ int pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name) { xmlNode *request; int rc = pcmk_rc_ok; request = create_controller_request(api, CRM_OP_PING, node_name, NULL); if (request == NULL) { return EINVAL; } rc = send_controller_request(api, request, true); free_xml(request); return rc; } /*! * \brief Ask the controller for cluster information * * \param[in] api Controller connection * * \return Standard Pacemaker return code * \note Event callback will get a reply of type pcmk_controld_reply_nodes. */ int pcmk_controld_api_list_nodes(pcmk_ipc_api_t *api) { xmlNode *request; int rc = EINVAL; request = create_controller_request(api, PCMK__CONTROLD_CMD_NODES, NULL, NULL); if (request != NULL) { rc = send_controller_request(api, request, true); free_xml(request); } return rc; } // \return Standard Pacemaker return code static int controller_resource_op(pcmk_ipc_api_t *api, const char *op, const char *target_node, const char *router_node, bool cib_only, const char *rsc_id, const char *rsc_long_id, const char *standard, const char *provider, const char *type) { int rc = pcmk_rc_ok; char *key; xmlNode *request, *msg_data, *xml_rsc, *params; if (api == NULL) { return EINVAL; } if (router_node == NULL) { router_node = target_node; } msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); /* The controller logs the transition key from resource op requests, so we * need to have *something* for it. * @TODO don't use "crm-resource" */ key = pcmk__transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); free(key); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); if (!pcmk__str_eq(router_node, target_node, pcmk__str_casei)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } if (cib_only) { // Indicate that only the CIB needs to be cleaned crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); } xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc_long_id); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, provider); crm_xml_add(xml_rsc, XML_ATTR_TYPE, type); params = create_xml_node(msg_data, XML_TAG_ATTRS); crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); // The controller parses the timeout from the request key = crm_meta_name(XML_ATTR_TIMEOUT); crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg free(key); request = create_controller_request(api, op, router_node, msg_data); rc = send_controller_request(api, request, true); free_xml(msg_data); free_xml(request); return rc; } /*! * \brief Ask the controller to fail a resource * * \param[in] api Controller connection * \param[in] target_node Name of node resource is on * \param[in] router_node Router node for target * \param[in] rsc_id ID of resource to fail * \param[in] rsc_long_id Long ID of resource (if any) * \param[in] standard Standard of resource * \param[in] provider Provider of resource (if any) * \param[in] type Type of resource to fail * * \return Standard Pacemaker return code * \note Event callback will get a reply of type pcmk_controld_reply_resource. */ int pcmk_controld_api_fail(pcmk_ipc_api_t *api, const char *target_node, const char *router_node, const char *rsc_id, const char *rsc_long_id, const char *standard, const char *provider, const char *type) { crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s", pcmk_ipc_name(api, true), crm_str(rsc_id), crm_str(rsc_long_id), crm_str(target_node), crm_str(router_node)); return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node, router_node, false, rsc_id, rsc_long_id, standard, provider, type); } /*! * \brief Ask the controller to refresh a resource * * \param[in] api Controller connection * \param[in] target_node Name of node resource is on * \param[in] router_node Router node for target * \param[in] rsc_id ID of resource to refresh * \param[in] rsc_long_id Long ID of resource (if any) * \param[in] standard Standard of resource * \param[in] provider Provider of resource (if any) * \param[in] type Type of resource * \param[in] cib_only If true, clean resource from CIB only * * \return Standard Pacemaker return code * \note Event callback will get a reply of type pcmk_controld_reply_resource. */ int pcmk_controld_api_refresh(pcmk_ipc_api_t *api, const char *target_node, const char *router_node, const char *rsc_id, const char *rsc_long_id, const char *standard, const char *provider, const char *type, bool cib_only) { crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s", pcmk_ipc_name(api, true), crm_str(rsc_id), crm_str(rsc_long_id), crm_str(target_node), crm_str(router_node)); return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node, router_node, cib_only, rsc_id, rsc_long_id, standard, provider, type); } /*! * \brief Get the number of IPC replies currently expected from the controller * * \param[in] api Controller IPC API connection * * \return Number of replies expected */ unsigned int pcmk_controld_api_replies_expected(pcmk_ipc_api_t *api) { struct controld_api_private_s *private = api->api_data; return private->replies_expected; } /*! * \brief Create XML for a controller IPC "hello" message * * \deprecated This function is deprecated as part of the public C API. */ // \todo make this static to this file when breaking API backward compatibility xmlNode * create_hello_message(const char *uuid, const char *client_name, const char *major_version, const char *minor_version) { xmlNode *hello_node = NULL; xmlNode *hello = NULL; if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { crm_err("Could not create IPC hello message from %s (UUID %s): " "missing information", client_name? client_name : "unknown client", uuid? uuid : "unknown"); return NULL; } hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); if (hello_node == NULL) { crm_err("Could not create IPC hello message from %s (UUID %s): " "Message data creation failed", client_name, uuid); return NULL; } crm_xml_add(hello_node, "major_version", major_version); crm_xml_add(hello_node, "minor_version", minor_version); crm_xml_add(hello_node, "client_name", client_name); crm_xml_add(hello_node, "client_uuid", uuid); hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); if (hello == NULL) { crm_err("Could not create IPC hello message from %s (UUID %s): " "Request creation failed", client_name, uuid); return NULL; } free_xml(hello_node); crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); return hello; } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 20ac67e69b..3698619ca8 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2397 +1,2397 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // uint32_t, uint64_t #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # undef KEYFILE # include #endif #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { uint64_t type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; pcmk__remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; /* while the async connection is occurring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } /*! * Create a new lrmd_event_data_t object * * \param[in] rsc_id ID of resource involved in event * \param[in] task Action name * \param[in] interval_ms Action interval * * \return Newly allocated and initialized lrmd_event_data_t * \note This functions asserts on memory errors, so the return value is * guaranteed to be non-NULL. The caller is responsible for freeing the * result with lrmd_free_event(). */ lrmd_event_data_t * lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) { lrmd_event_data_t *event = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(event != NULL); if (rsc_id != NULL) { event->rsc_id = strdup(rsc_id); CRM_ASSERT(event->rsc_id != NULL); } if (task != NULL) { event->op_type = strdup(task); CRM_ASSERT(event->op_type != NULL); } event->interval_ms = interval_ms; return event; } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); /* This will get all the int values. * we just have to be careful not to leave any * dangling pointers to strings. */ memcpy(copy, event, sizeof(lrmd_event_data_t)); copy->rsc_id = event->rsc_id ? strdup(event->rsc_id) : NULL; copy->op_type = event->op_type ? strdup(event->op_type) : NULL; copy->user_data = event->user_data ? strdup(event->user_data) : NULL; copy->output = event->output ? strdup(event->output) : NULL; copy->exit_reason = event->exit_reason ? strdup(event->exit_reason) : NULL; copy->remote_nodename = event->remote_nodename ? strdup(event->remote_nodename) : NULL; copy->params = pcmk__str_table_dup(event->params); return copy; } /*! * \brief Free an executor event * * \param[in] Executor event object to free */ void lrmd_free_event(lrmd_event_data_t *event) { if (event == NULL) { return; } // @TODO Why are these const char *? free((void *) event->rsc_id); free((void *) event->op_type); free((void *) event->user_data); free((void *) event->remote_nodename); lrmd__reset_result(event); if (event->params != NULL) { g_hash_table_destroy(event->params); } free(event); } static void lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->lrmd_private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (pcmk__str_eq(type, LRMD_OP_RSC_REG, pcmk__str_none)) { event.type = lrmd_event_register; } else if (pcmk__str_eq(type, LRMD_OP_RSC_UNREG, pcmk__str_none)) { event.type = lrmd_event_unregister; } else if (pcmk__str_eq(type, LRMD_OP_RSC_EXEC, pcmk__str_none)) { time_t epoch = 0; crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_ms(msg, F_LRMD_RSC_INTERVAL, &event.interval_ms); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_epoch(msg, F_LRMD_RSC_RUN_TIME, &epoch); event.t_run = (unsigned int) epoch; crm_element_value_epoch(msg, F_LRMD_RSC_RCCHANGE_TIME, &epoch); event.t_rcchange = (unsigned int) epoch; crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.type = lrmd_event_exec_complete; // No need to duplicate the memory, so don't use setter functions event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT); event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON); event.params = xml2list(msg); } else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) { event.type = lrmd_event_new_client; } else if (pcmk__str_eq(type, LRMD_OP_POKE, pcmk__str_none)) { event.type = lrmd_event_poke; } else { return; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } } // \return Always 0, to indicate that IPC mainloop source should be kept static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; if (native->callback != NULL) { xmlNode *msg = string2xml(buffer); lrmd_dispatch_internal(lrmd, msg); free_xml(msg); } return 0; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static bool remote_executor_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; return (native->remote->tls_session != NULL); } /*! * \internal * \brief TLS dispatch function (for both trigger and file descriptor sources) * * \param[in] userdata API connection * * \return Always return a nonnegative value, which as a file descriptor * dispatch function means keep the mainloop source, and as a * trigger dispatch function, 0 means remove the trigger from the * mainloop while 1 means keep it (and job completed) */ static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *xml = NULL; int rc = pcmk_rc_ok; if (!remote_executor_connected(lrmd)) { crm_trace("TLS dispatch triggered after disconnect"); return 0; } crm_trace("TLS dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ switch (pcmk__remote_ready(native->remote, 0)) { case pcmk_rc_ok: rc = pcmk__read_remote_message(native->remote, -1); xml = pcmk__remote_message_xml(native->remote); break; case ETIME: // Nothing to read, check if a full message is already in buffer xml = pcmk__remote_message_xml(native->remote); break; default: rc = ENOTCONN; break; } while (xml) { const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { lrmd_dispatch_internal(lrmd, xml); } else if (pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { int reply_id = 0; crm_element_value_int(xml, F_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated Pacemaker Remote reply %d", reply_id); } } free_xml(xml); xml = pcmk__remote_message_xml(native->remote); } if (rc == ENOTCONN) { crm_info("Lost %s executor connection while reading data", (native->remote_nodename? native->remote_nodename : "local")); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: if (native->pending_notify) { return 1; } else { int rc = pcmk__remote_ready(native->remote, 0); switch (rc) { case pcmk_rc_ok: return 1; case ETIME: return 0; default: return pcmk_rc2legacy(rc); } } #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return -EPROTONOSUPPORT; } } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->lrmd_private; switch (private->type) { case pcmk__client_ipc: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } crm_trace("Created executor %s command with call options %.8lx (%d)", op, (long)options, options); return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("IPC connection destroyed"); /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); native->remote->buffer = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } // \return Standard Pacemaker return code int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type) { crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); return pcmk__remote_send_xml(session, msg); } // \return Standard Pacemaker return code static int read_remote_reply(lrmd_t *lrmd, int total_timeout, int expected_reply_id, xmlNode **reply) { lrmd_private_t *native = lrmd->lrmd_private; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; int rc = pcmk_rc_ok; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } for (*reply = NULL; *reply == NULL; ) { *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return ETIME; } rc = pcmk__read_remote_message(native->remote, remaining_timeout); if (rc != pcmk_rc_ok) { return rc; } *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { return ENOMSG; } } crm_element_value_int(*reply, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(*reply, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(*reply); *reply = NULL; } else if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, *reply); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } *reply = NULL; } else if (!pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(*reply); *reply = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } free_xml(*reply); *reply = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return rc; } // \return Standard Pacemaker return code static int send_remote_message(lrmd_t *lrmd, xmlNode *msg) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd__remote_send_xml(native->remote, msg, global_remote_msg_id, "request"); if (rc != pcmk_rc_ok) { crm_err("Disconnecting because TLS message could not be sent to " "Pacemaker Remote: %s", pcmk_rc_str(rc)); lrmd_tls_disconnect(lrmd); } return rc; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; xmlNode *xml = NULL; if (!remote_executor_connected(lrmd)) { return -ENOTCONN; } rc = send_remote_message(lrmd, msg); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } rc = read_remote_reply(lrmd, timeout, global_remote_msg_id, &xml); if (rc != pcmk_rc_ok) { crm_err("Disconnecting remote after request %d reply not received: %s " CRM_XS " rc=%d timeout=%dms", global_remote_msg_id, pcmk_rc_str(rc), rc, timeout); lrmd_tls_disconnect(lrmd); } if (reply) { *reply = xml; } else { free_xml(xml); } return pcmk_rc2legacy(rc); } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = send_remote_message(lrmd, msg); if (rc == pcmk_rc_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } rc = pcmk_rc2legacy(rc); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_connected(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: return remote_executor_connected(lrmd); #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return 0; } } /*! * \internal * \brief Send a prepared API command to the executor * * \param[in] lrmd Existing connection to the executor * \param[in] op Name of API command to send * \param[in] data Command data XML to add to the sent command * \param[out] output_data If expecting a reply, it will be stored here * \param[in] timeout Timeout in milliseconds (if 0, defaults to * a sensible value per the type of connection, * standard vs. pacemaker remote); * also propagated to the command XML * \param[in] call_options Call options to pass to server when sending * \param[in] expect_reply If TRUE, wait for a reply from the server; * must be TRUE for IPC (as opposed to TLS) clients * * \return pcmk_ok on success, -errno on error */ static int lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data, xmlNode **output_data, int timeout, enum lrmd_call_options options, gboolean expect_reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("Sending %s op to executor", op); op_msg = lrmd_create_op(native->token, op, data, timeout, options); if (op_msg == NULL) { return -EINVAL; } if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); goto done; } else if(op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Executor disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return rc < 0 ? rc : pcmk_ok; } // \return Standard Pacemaker return code int lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash) { int rc = pcmk_rc_ok; const char *value; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION); crm_xml_add(data, F_LRMD_ORIGIN, __func__); value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); if ((value) && (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) { crm_xml_add(data, F_LRMD_WATCHDOG, value); } rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return (rc < 0)? pcmk_legacy2rc(rc) : pcmk_rc_ok; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { - crm_xml_add(hello, F_LRMD_IS_IPC_PROVIDER, "true"); + pcmk__xe_set_bool_attr(hello, F_LRMD_IS_IPC_PROVIDER, true); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); crm_element_value_int(reply, F_LRMD_RC, &rc); if (rc == -EPROTO) { crm_err("Executor protocol version mismatch between client (%s) and server (%s)", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to executor"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_perror(LOG_ERR, "Connection to executor failed"); rc = -ENOTCONN; } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the executor API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source) { CRM_ASSERT((dest != NULL) && (source != NULL) && (source->data != NULL)); dest->data = gnutls_malloc(source->size); CRM_ASSERT(dest->data); memcpy(dest->data, source->data, source->size); dest->size = source->size; } static void clear_gnutls_datum(gnutls_datum_t *datum) { gnutls_free(datum->data); datum->data = NULL; datum->size = 0; } #define KEY_READ_LEN 256 // Chunk size for reading key from file // \return Standard Pacemaker return code static int read_gnutls_key(const char *location, gnutls_datum_t *key) { FILE *stream = NULL; size_t buf_len = KEY_READ_LEN; if ((location == NULL) || (key == NULL)) { return EINVAL; } stream = fopen(location, "r"); if (stream == NULL) { return errno; } key->data = gnutls_malloc(buf_len); key->size = 0; while (!feof(stream)) { int next = fgetc(stream); if (next == EOF) { if (!feof(stream)) { crm_warn("Pacemaker Remote key read was partially successful " "(copy in memory may be corrupted)"); } break; } if (key->size == buf_len) { buf_len = key->size + KEY_READ_LEN; key->data = gnutls_realloc(key->data, buf_len); CRM_ASSERT(key->data); } key->data[key->size++] = (unsigned char) next; } fclose(stream); if (key->size == 0) { clear_gnutls_datum(key); return ENOKEY; } return pcmk_rc_ok; } // Cache the most recently used Pacemaker Remote authentication key struct key_cache_s { time_t updated; // When cached key was read (valid for 1 minute) const char *location; // Where cached key was read from gnutls_datum_t key; // Cached key }; static bool key_is_cached(struct key_cache_s *key_cache) { return key_cache->updated != 0; } static bool key_cache_expired(struct key_cache_s *key_cache) { return (time(NULL) - key_cache->updated) >= 60; } static void clear_key_cache(struct key_cache_s *key_cache) { clear_gnutls_datum(&(key_cache->key)); if ((key_cache->updated != 0) || (key_cache->location != NULL)) { key_cache->updated = 0; key_cache->location = NULL; crm_debug("Cleared Pacemaker Remote key cache"); } } static void get_cached_key(struct key_cache_s *key_cache, gnutls_datum_t *key) { copy_gnutls_datum(key, &(key_cache->key)); crm_debug("Using cached Pacemaker Remote key from %s", crm_str(key_cache->location)); } static void cache_key(struct key_cache_s *key_cache, gnutls_datum_t *key, const char *location) { key_cache->updated = time(NULL); key_cache->location = location; copy_gnutls_datum(&(key_cache->key), key); crm_debug("Using (and cacheing) Pacemaker Remote key from %s", crm_str(location)); } /*! * \internal * \brief Get Pacemaker Remote authentication key from file or cache * * \param[in] location Path to key file to try (this memory must * persist across all calls of this function) * \param[out] key Key from location or cache * * \return Standard Pacemaker return code */ static int get_remote_key(const char *location, gnutls_datum_t *key) { static struct key_cache_s key_cache = { 0, }; int rc = pcmk_rc_ok; if ((location == NULL) || (key == NULL)) { return EINVAL; } if (key_is_cached(&key_cache)) { if (key_cache_expired(&key_cache)) { clear_key_cache(&key_cache); } else { get_cached_key(&key_cache, key); return pcmk_rc_ok; } } rc = read_gnutls_key(location, key); if (rc != pcmk_rc_ok) { return rc; } cache_key(&key_cache, key, location); return pcmk_rc_ok; } /*! * \internal * \brief Initialize the Pacemaker Remote authentication key * * Try loading the Pacemaker Remote authentication key from cache if available, * otherwise from these locations, in order of preference: the value of the * PCMK_authkey_location environment variable, if set; the Pacemaker default key * file location; or (for historical reasons) /etc/corosync/authkey. * * \param[out] key Where to store key * * \return Standard Pacemaker return code */ int lrmd__init_remote_key(gnutls_datum_t *key) { static const char *env_location = NULL; static bool need_env = true; int env_rc = pcmk_rc_ok; int default_rc = pcmk_rc_ok; int alt_rc = pcmk_rc_ok; bool env_is_default = false; bool env_is_fallback = false; if (need_env) { env_location = getenv("PCMK_authkey_location"); need_env = false; } // Try location in environment variable, if set if (env_location != NULL) { env_rc = get_remote_key(env_location, key); if (env_rc == pcmk_rc_ok) { return pcmk_rc_ok; } env_is_default = !strcmp(env_location, DEFAULT_REMOTE_KEY_LOCATION); env_is_fallback = !strcmp(env_location, ALT_REMOTE_KEY_LOCATION); /* @TODO It would be more secure to fail, rather than fall back to the * default, if an explicitly set key location is not readable, and it * would be better to never use the Corosync location as a fallback. * However, that would break any deployments currently working with the * fallbacks. */ } // Try default location, if environment wasn't explicitly set to it if (env_is_default) { default_rc = env_rc; } else { default_rc = get_remote_key(DEFAULT_REMOTE_KEY_LOCATION, key); } // Try fallback location, if environment wasn't set to it and default failed if (env_is_fallback) { alt_rc = env_rc; } else if (default_rc != pcmk_rc_ok) { alt_rc = get_remote_key(ALT_REMOTE_KEY_LOCATION, key); } // We have all results, so log and return if ((env_rc != pcmk_rc_ok) && (default_rc != pcmk_rc_ok) && (alt_rc != pcmk_rc_ok)) { // Environment set, everything failed crm_warn("Could not read Pacemaker Remote key from %s (%s%s%s%s%s): %s", env_location, env_is_default? "" : "or default location ", env_is_default? "" : DEFAULT_REMOTE_KEY_LOCATION, !env_is_default && !env_is_fallback? " " : "", env_is_fallback? "" : "or fallback location ", env_is_fallback? "" : ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(env_rc)); return ENOKEY; } if (env_rc != pcmk_rc_ok) { // Environment set but failed, using a default crm_warn("Could not read Pacemaker Remote key from %s " "(using %s location %s instead): %s", env_location, (default_rc == pcmk_rc_ok)? "default" : "fallback", (default_rc == pcmk_rc_ok)? DEFAULT_REMOTE_KEY_LOCATION : ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(env_rc)); return pcmk_rc_ok; } if ((default_rc != pcmk_rc_ok) && (alt_rc != pcmk_rc_ok)) { // Environment unset, defaults failed crm_warn("Could not read Pacemaker Remote key from default location %s" " (or fallback location %s): %s", DEFAULT_REMOTE_KEY_LOCATION, ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(default_rc)); return ENOKEY; } return pcmk_rc_ok; // Environment variable unset, a default worked } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { crm_gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->lrmd_private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static inline int lrmd__tls_client_handshake(pcmk__remote_t *remote) { return pcmk__tls_client_handshake(remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT); } /*! * \internal * \brief Add trigger and file descriptor mainloop sources for TLS * * \param[in] lrmd API connection with established TLS session * \param[in] do_handshake Whether to perform executor handshake * * \return Standard Pacemaker return code */ static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_handshake) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_rc_ok; char *name = crm_strdup_printf("pacemaker-remote-%s:%d", native->server, native->port); struct mainloop_fd_callbacks tls_fd_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &tls_fd_callbacks); /* Async connections lose the client name provided by the API caller, so we * have to use our generated name here to perform the executor handshake. * * @TODO Keep track of the caller-provided name. Perhaps we should be using * that name in this function instead of generating one anyway. */ if (do_handshake) { rc = lrmd_handshake(lrmd, name); rc = pcmk_legacy2rc(rc); } free(name); return rc; } static void lrmd_tcp_connect_cb(void *userdata, int rc, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; gnutls_datum_t psk_key = { NULL, 0 }; native->async_timer = 0; if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } /* The TCP connection was successful, so establish the TLS connection. * @TODO make this async to avoid blocking code in client */ native->sock = sock; rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EPROTO); return; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_warn("Disconnecting after TLS handshake with Pacemaker Remote server %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EKEYREJECTED); return; } crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", native->server, native->port); rc = add_tls_to_mainloop(lrmd, true); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc; int timer_id = 0; lrmd_private_t *native = lrmd->lrmd_private; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, timeout, &timer_id, &(native->sock), lrmd, lrmd_tcp_connect_cb); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } native->async_timer = timer_id; return pcmk_ok; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, 0, NULL, &(native->sock), NULL, NULL); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); return pcmk_rc2legacy(rc); } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(native->sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); return -EPROTO; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -EKEYREJECTED; } crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server, native->port); if (fd) { *fd = native->sock; } else { add_tls_to_mainloop(lrmd, false); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; CRM_CHECK(native && native->callback, return -EINVAL); switch (native->type) { case pcmk__client_ipc: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); native->sock = 0; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_ok; crm_info("Disconnecting %s %s executor connection", pcmk__client_type_str(native->type), (native->remote_nodename? native->remote_nodename : "local")); switch (native->type) { case pcmk__client_ipc: lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return rc; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && (provider == NULL)) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_new_rsc_info(const char *rsc_id, const char *standard, const char *provider, const char *type) { lrmd_rsc_info_t *rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); CRM_ASSERT(rsc_info); if (rsc_id) { rsc_info->id = strdup(rsc_id); CRM_ASSERT(rsc_info->id); } if (standard) { rsc_info->standard = strdup(standard); CRM_ASSERT(rsc_info->standard); } if (provider) { rsc_info->provider = strdup(provider); CRM_ASSERT(rsc_info->provider); } if (type) { rsc_info->type = strdup(type); CRM_ASSERT(rsc_info->type); } return rsc_info; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard, rsc_info->provider, rsc_info->type); } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->standard); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) { free_xml(output); return NULL; } rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type); free_xml(output); return rsc_info; } void lrmd_free_op_info(lrmd_op_info_t *op_info) { if (op_info) { free(op_info->rsc_id); free(op_info->action); free(op_info->interval_ms_s); free(op_info->timeout_ms_s); free(op_info); } } static int lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms, enum lrmd_call_options options, GList **output) { xmlNode *data = NULL; xmlNode *output_xml = NULL; int rc = pcmk_ok; if (output == NULL) { return -EINVAL; } *output = NULL; // Send request if (rsc_id) { data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); } rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml, timeout_ms, options, TRUE); if (data) { free_xml(data); } // Process reply if ((rc != pcmk_ok) || (output_xml == NULL)) { return rc; } for (xmlNode *rsc_xml = first_named_child(output_xml, F_LRMD_RSC); (rsc_xml != NULL) && (rc == pcmk_ok); rsc_xml = crm_next_same_xml(rsc_xml)) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (rsc_id == NULL) { crm_err("Could not parse recurring operation information from executor"); continue; } for (xmlNode *op_xml = first_named_child(rsc_xml, T_LRMD_RSC_OP); op_xml != NULL; op_xml = crm_next_same_xml(op_xml)) { lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t)); if (op_info == NULL) { rc = -ENOMEM; break; } op_info->rsc_id = strdup(rsc_id); op_info->action = crm_element_value_copy(op_xml, F_LRMD_RSC_ACTION); op_info->interval_ms_s = crm_element_value_copy(op_xml, F_LRMD_RSC_INTERVAL); op_info->timeout_ms_s = crm_element_value_copy(op_xml, F_LRMD_TIMEOUT); *output = g_list_prepend(*output, op_info); } } free_xml(output_xml); return rc; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->lrmd_private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->lrmd_private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->lrmd_private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not get fence agent meta-data: API memory allocation failed"); return -ENOMEM; } rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); if ((rc == pcmk_ok) && (*output == NULL)) { rc = -EIO; } stonith_api->cmds->free(stonith_api); return rc; } static int lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options) { return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type, output, options, NULL); } static int lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options, lrmd_key_value_t *params) { svc_action_t *action = NULL; GHashTable *params_table = NULL; if (!standard || !type) { lrmd_key_value_freeall(params); return -EINVAL; } if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_key_value_freeall(params); return stonith_get_metadata(provider, type, output); } params_table = pcmk__strkey_table(free, free); for (const lrmd_key_value_t *param = params; param; param = param->next) { g_hash_table_insert(params_table, strdup(param->key), strdup(param->value)); } action = services__create_resource_action(type, standard, provider, type, CRMD_ACTION_METADATA, 0, CRMD_METADATA_CALL_TIMEOUT, params_table, 0); lrmd_key_value_freeall(params); if (action == NULL) { return -ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { services_action_free(action); return -EINVAL; } if (!services_action_sync(action)) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } /* timeout is in ms */ static int lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path, int timeout, lrmd_key_value_t *params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_ALERT); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_ALERT_ID, alert_id); crm_xml_add(data, F_LRMD_ALERT_PATH, alert_path); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout, lrmd_opt_notify_orig_only, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action, guint interval_ms) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if (stonith_api == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return -ENOMEM; } stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; int stonith_count = 0; // Initially, whether to include stonith devices if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stonith_count = 1; } else { GList *gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { stonith_count = 1; } } if (stonith_count) { // Now, if stonith devices are included, how many there are stonith_count = list_stonith_agents(resources); if (stonith_count > 0) { rc += stonith_count; } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static bool does_provider_have_agent(const char *agent, const char *provider, const char *class) { bool found = false; GList *agents = NULL; GList *gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (pcmk__str_eq(agent, gIter2->data, pcmk__str_casei)) { found = true; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GList *gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GList *gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } /*! * \internal * \brief Create an executor API object * * \param[out] api Will be set to newly created API object (it is the * caller's responsibility to free this value with * lrmd_api_delete() if this function succeeds) * \param[in] nodename If the object will be used for a remote connection, * the node name to use in cluster for remote executor * \param[in] server If the object will be used for a remote connection, * the resolvable host name to connect to * \param[in] port If the object will be used for a remote connection, * port number on \p server to connect to * * \return Standard Pacemaker return code * \note If the caller leaves one of \p nodename or \p server NULL, the other's * value will be used for both. If the caller leaves both NULL, an API * object will be created for a local executor connection. */ int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port) { lrmd_private_t *pvt = NULL; if (api == NULL) { return EINVAL; } *api = NULL; // Allocate all memory needed *api = calloc(1, sizeof(lrmd_t)); if (*api == NULL) { return ENOMEM; } pvt = calloc(1, sizeof(lrmd_private_t)); if (pvt == NULL) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } (*api)->lrmd_private = pvt; // @TODO Do we need to do this for local connections? pvt->remote = calloc(1, sizeof(pcmk__remote_t)); (*api)->cmds = calloc(1, sizeof(lrmd_api_operations_t)); if ((pvt->remote == NULL) || ((*api)->cmds == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } // Set methods (*api)->cmds->connect = lrmd_api_connect; (*api)->cmds->connect_async = lrmd_api_connect_async; (*api)->cmds->is_connected = lrmd_api_is_connected; (*api)->cmds->poke_connection = lrmd_api_poke_connection; (*api)->cmds->disconnect = lrmd_api_disconnect; (*api)->cmds->register_rsc = lrmd_api_register_rsc; (*api)->cmds->unregister_rsc = lrmd_api_unregister_rsc; (*api)->cmds->get_rsc_info = lrmd_api_get_rsc_info; (*api)->cmds->get_recurring_ops = lrmd_api_get_recurring_ops; (*api)->cmds->set_callback = lrmd_api_set_callback; (*api)->cmds->get_metadata = lrmd_api_get_metadata; (*api)->cmds->exec = lrmd_api_exec; (*api)->cmds->cancel = lrmd_api_cancel; (*api)->cmds->list_agents = lrmd_api_list_agents; (*api)->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; (*api)->cmds->list_standards = lrmd_api_list_standards; (*api)->cmds->exec_alert = lrmd_api_exec_alert; (*api)->cmds->get_metadata_params = lrmd_api_get_metadata_params; if ((nodename == NULL) && (server == NULL)) { pvt->type = pcmk__client_ipc; } else { #ifdef HAVE_GNUTLS_GNUTLS_H if (nodename == NULL) { nodename = server; } else if (server == NULL) { server = nodename; } pvt->type = pcmk__client_tls; pvt->remote_nodename = strdup(nodename); pvt->server = strdup(server); if ((pvt->remote_nodename == NULL) || (pvt->server == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } pvt->port = port; if (pvt->port == 0) { pvt->port = crm_default_remote_port(); } #else crm_err("Cannot communicate with Pacemaker Remote " "because GnuTLS is not enabled for this build"); lrmd_api_delete(*api); *api = NULL; return EOPNOTSUPP; #endif } return pcmk_rc_ok; } lrmd_t * lrmd_api_new(void) { lrmd_t *api = NULL; CRM_ASSERT(lrmd__new(&api, NULL, NULL, 0) == pcmk_rc_ok); return api; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { lrmd_t *api = NULL; CRM_ASSERT(lrmd__new(&api, nodename, server, port) == pcmk_rc_ok); return api; } void lrmd_api_delete(lrmd_t * lrmd) { if (lrmd == NULL) { return; } if (lrmd->cmds != NULL) { // Never NULL, but make static analysis happy lrmd->cmds->disconnect(lrmd); // No-op if already disconnected free(lrmd->cmds); } if (lrmd->lrmd_private != NULL) { lrmd_private_t *native = lrmd->lrmd_private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); free(lrmd->lrmd_private); } free(lrmd); } /*! * \internal * \brief Set the result of an executor event * * \param[in,out] event Executor event to set * \param[in] rc OCF exit status of event * \param[in] op_status Executor status of event * \param[in] exit_reason Human-friendly description of event */ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, int op_status, const char *exit_reason) { if (event == NULL) { return; } event->rc = rc; event->op_status = op_status; if (!pcmk__str_eq(event->exit_reason, exit_reason, pcmk__str_none)) { free((void *) event->exit_reason); event->exit_reason = (exit_reason == NULL)? NULL : strdup(exit_reason); } } /*! * \internal * \brief Clear an executor event's exit reason, output, and error output * * \param[in] event Executor event to reset */ void lrmd__reset_result(lrmd_event_data_t *event) { if (event == NULL) { return; } free((void *) event->exit_reason); event->exit_reason = NULL; free((void *) event->output); event->output = NULL; } diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c index 2ecc1f0f96..d9672fcc9e 100644 --- a/lib/pacemaker/pcmk_output.c +++ b/lib/pacemaker/pcmk_output.c @@ -1,1846 +1,1846 @@ /* * Copyright 2019-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include static char * colocations_header(pe_resource_t *rsc, pcmk__colocation_t *cons, gboolean dependents) { char *score = NULL; char *retval = NULL; score = score2char(cons->score); if (cons->primary_role > RSC_ROLE_STARTED) { retval = crm_strdup_printf("%s (score=%s, %s role=%s, id=%s)", rsc->id, score, dependents ? "needs" : "with", role2text(cons->primary_role), cons->id); } else { retval = crm_strdup_printf("%s (score=%s, id=%s)", rsc->id, score, cons->id); } free(score); return retval; } static void colocations_xml_node(pcmk__output_t *out, pe_resource_t *rsc, pcmk__colocation_t *cons) { char *score = NULL; xmlNodePtr node = NULL; score = score2char(cons->score); node = pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_DEPEND, "id", cons->id, "rsc", cons->dependent->id, "with-rsc", cons->primary->id, "score", score, NULL); if (cons->node_attribute) { xmlSetProp(node, (pcmkXmlStr) "node-attribute", (pcmkXmlStr) cons->node_attribute); } if (cons->dependent_role != RSC_ROLE_UNKNOWN) { xmlSetProp(node, (pcmkXmlStr) "rsc-role", (pcmkXmlStr) role2text(cons->dependent_role)); } if (cons->primary_role != RSC_ROLE_UNKNOWN) { xmlSetProp(node, (pcmkXmlStr) "with-rsc-role", (pcmkXmlStr) role2text(cons->primary_role)); } free(score); } static int do_locations_list_xml(pcmk__output_t *out, pe_resource_t *rsc, bool add_header) { GList *lpc = NULL; GList *list = rsc->rsc_location; int rc = pcmk_rc_no_output; for (lpc = list; lpc != NULL; lpc = lpc->next) { pe__location_t *cons = lpc->data; GList *lpc2 = NULL; for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) { pe_node_t *node = (pe_node_t *) lpc2->data; char *score = score2char(node->weight); if (add_header) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "locations"); } pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_LOCATION, "node", node->details->uname, "rsc", rsc->id, "id", cons->id, "score", score, NULL); free(score); } } if (add_header) { PCMK__OUTPUT_LIST_FOOTER(out, rc); } return rc; } PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *", "pe_node_t *", "pe_node_t *", "pe_action_t *", "pe_action_t *") static int rsc_action_item(pcmk__output_t *out, va_list args) { const char *change = va_arg(args, const char *); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *origin = va_arg(args, pe_node_t *); pe_node_t *destination = va_arg(args, pe_node_t *); pe_action_t *action = va_arg(args, pe_action_t *); pe_action_t *source = va_arg(args, pe_action_t *); int len = 0; char *reason = NULL; char *details = NULL; bool same_host = FALSE; bool same_role = FALSE; bool need_role = FALSE; static int rsc_width = 5; static int detail_width = 5; CRM_ASSERT(action); CRM_ASSERT(destination != NULL || origin != NULL); if(source == NULL) { source = action; } len = strlen(rsc->id); if(len > rsc_width) { rsc_width = len + 2; } if ((rsc->role > RSC_ROLE_STARTED) || (rsc->next_role > RSC_ROLE_UNPROMOTED)) { need_role = TRUE; } if(origin != NULL && destination != NULL && origin->details == destination->details) { same_host = TRUE; } if(rsc->role == rsc->next_role) { same_role = TRUE; } if (need_role && (origin == NULL)) { /* Starting and promoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), destination->details->uname); } else if (origin == NULL) { /* Starting a resource */ details = crm_strdup_printf("%s", destination->details->uname); } else if (need_role && (destination == NULL)) { /* Stopping a promotable clone instance */ details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); } else if (destination == NULL) { /* Stopping a resource */ details = crm_strdup_printf("%s", origin->details->uname); } else if (need_role && same_role && same_host) { /* Recovering, restarting or re-promoting a promotable clone instance */ details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname); } else if (same_role && same_host) { /* Recovering or Restarting a normal resource */ details = crm_strdup_printf("%s", origin->details->uname); } else if (need_role && same_role) { /* Moving a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", origin->details->uname, destination->details->uname, role2text(rsc->role)); } else if (same_role) { /* Moving a normal resource */ details = crm_strdup_printf("%s -> %s", origin->details->uname, destination->details->uname); } else if (same_host) { /* Promoting or demoting a promotable clone instance */ details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), origin->details->uname); } else { /* Moving and promoting/demoting */ details = crm_strdup_printf("%s %s -> %s %s", role2text(rsc->role), origin->details->uname, role2text(rsc->next_role), destination->details->uname); } len = strlen(details); if(len > detail_width) { detail_width = len; } if(source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) { reason = crm_strdup_printf("due to %s (blocked)", source->reason); } else if(source->reason) { reason = crm_strdup_printf("due to %s", source->reason); } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { reason = strdup("blocked"); } out->list_item(out, NULL, "%-8s %-*s ( %*s )%s%s", change, rsc_width, rsc->id, detail_width, details, reason ? " " : "", reason ? reason : ""); free(details); free(reason); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *", "pe_node_t *", "pe_node_t *", "pe_action_t *", "pe_action_t *") static int rsc_action_item_xml(pcmk__output_t *out, va_list args) { const char *change = va_arg(args, const char *); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *origin = va_arg(args, pe_node_t *); pe_node_t *destination = va_arg(args, pe_node_t *); pe_action_t *action = va_arg(args, pe_action_t *); pe_action_t *source = va_arg(args, pe_action_t *); char *change_str = NULL; bool same_host = FALSE; bool same_role = FALSE; bool need_role = FALSE; xmlNode *xml = NULL; CRM_ASSERT(action); CRM_ASSERT(destination != NULL || origin != NULL); if (source == NULL) { source = action; } if ((rsc->role > RSC_ROLE_STARTED) || (rsc->next_role > RSC_ROLE_UNPROMOTED)) { need_role = TRUE; } if(origin != NULL && destination != NULL && origin->details == destination->details) { same_host = TRUE; } if(rsc->role == rsc->next_role) { same_role = TRUE; } change_str = g_ascii_strdown(change, -1); xml = pcmk__output_create_xml_node(out, "rsc_action", "action", change_str, "resource", rsc->id, NULL); g_free(change_str); if (need_role && (origin == NULL)) { /* Starting and promoting a promotable clone instance */ pcmk__xe_set_props(xml, "role", role2text(rsc->role), "next-role", role2text(rsc->next_role), "dest", destination->details->uname, NULL); } else if (origin == NULL) { /* Starting a resource */ crm_xml_add(xml, "node", destination->details->uname); } else if (need_role && (destination == NULL)) { /* Stopping a promotable clone instance */ pcmk__xe_set_props(xml, "role", role2text(rsc->role), "node", origin->details->uname, NULL); } else if (destination == NULL) { /* Stopping a resource */ crm_xml_add(xml, "node", origin->details->uname); } else if (need_role && same_role && same_host) { /* Recovering, restarting or re-promoting a promotable clone instance */ pcmk__xe_set_props(xml, "role", role2text(rsc->role), "source", origin->details->uname, NULL); } else if (same_role && same_host) { /* Recovering or Restarting a normal resource */ crm_xml_add(xml, "source", origin->details->uname); } else if (need_role && same_role) { /* Moving a promotable clone instance */ pcmk__xe_set_props(xml, "source", origin->details->uname, "dest", destination->details->uname, "role", role2text(rsc->role), NULL); } else if (same_role) { /* Moving a normal resource */ pcmk__xe_set_props(xml, "source", origin->details->uname, "dest", destination->details->uname, NULL); } else if (same_host) { /* Promoting or demoting a promotable clone instance */ pcmk__xe_set_props(xml, "role", role2text(rsc->role), "next-role", role2text(rsc->next_role), "source", origin->details->uname, NULL); } else { /* Moving and promoting/demoting */ pcmk__xe_set_props(xml, "role", role2text(rsc->role), "source", origin->details->uname, "next-role", role2text(rsc->next_role), "dest", destination->details->uname, NULL); } if (source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) { pcmk__xe_set_props(xml, "reason", source->reason, "blocked", "true", NULL); } else if(source->reason) { crm_xml_add(xml, "reason", source->reason); } else if (!pcmk_is_set(action->flags, pe_action_runnable)) { - crm_xml_add(xml, "blocked", "true"); + pcmk__xe_set_bool_attr(xml, "blocked", true); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "gboolean") static int rsc_is_colocated_with_list(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); gboolean recursive = va_arg(args, gboolean); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { return rc; } pe__set_resource_flags(rsc, pe_rsc_allocating); for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; char *hdr = NULL; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Resources %s is colocated with", rsc->id); if (pcmk_is_set(cons->primary->flags, pe_rsc_allocating)) { out->list_item(out, NULL, "%s (id=%s - loop)", cons->primary->id, cons->id); continue; } hdr = colocations_header(cons->primary, cons, FALSE); out->list_item(out, NULL, "%s", hdr); free(hdr); /* Empty list header just for indentation of information about this resource. */ out->begin_list(out, NULL, NULL, NULL); out->message(out, "locations-list", cons->primary); if (recursive) { out->message(out, "rsc-is-colocated-with-list", cons->primary, recursive); } out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "gboolean") static int rsc_is_colocated_with_list_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); gboolean recursive = va_arg(args, gboolean); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { return rc; } pe__set_resource_flags(rsc, pe_rsc_allocating); for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; if (pcmk_is_set(cons->primary->flags, pe_rsc_allocating)) { colocations_xml_node(out, cons->primary, cons); continue; } colocations_xml_node(out, cons->primary, cons); do_locations_list_xml(out, cons->primary, false); if (recursive) { out->message(out, "rsc-is-colocated-with-list", cons->primary, recursive); } } return rc; } PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "gboolean") static int rscs_colocated_with_list(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); gboolean recursive = va_arg(args, gboolean); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { return rc; } pe__set_resource_flags(rsc, pe_rsc_allocating); for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; char *hdr = NULL; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Resources colocated with %s", rsc->id); if (pcmk_is_set(cons->dependent->flags, pe_rsc_allocating)) { out->list_item(out, NULL, "%s (id=%s - loop)", cons->dependent->id, cons->id); continue; } hdr = colocations_header(cons->dependent, cons, TRUE); out->list_item(out, NULL, "%s", hdr); free(hdr); /* Empty list header just for indentation of information about this resource. */ out->begin_list(out, NULL, NULL, NULL); out->message(out, "locations-list", cons->dependent); if (recursive) { out->message(out, "rscs-colocated-with-list", cons->dependent, recursive); } out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "gboolean") static int rscs_colocated_with_list_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); gboolean recursive = va_arg(args, gboolean); int rc = pcmk_rc_no_output; if (pcmk_is_set(rsc->flags, pe_rsc_allocating)) { return rc; } pe__set_resource_flags(rsc, pe_rsc_allocating); for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; if (pcmk_is_set(cons->dependent->flags, pe_rsc_allocating)) { colocations_xml_node(out, cons->dependent, cons); continue; } colocations_xml_node(out, cons->dependent, cons); do_locations_list_xml(out, cons->dependent, false); if (recursive) { out->message(out, "rscs-colocated-with-list", cons->dependent, recursive); } } return rc; } PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *") static int locations_list(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); GList *lpc = NULL; GList *list = rsc->rsc_location; int rc = pcmk_rc_no_output; for (lpc = list; lpc != NULL; lpc = lpc->next) { pe__location_t *cons = lpc->data; GList *lpc2 = NULL; for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) { pe_node_t *node = (pe_node_t *) lpc2->data; char *score = score2char(node->weight); PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Locations"); out->list_item(out, NULL, "Node %s (score=%s, id=%s, rsc=%s)", node->details->uname, score, cons->id, rsc->id); free(score); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *") static int locations_list_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); return do_locations_list_xml(out, rsc, true); } PCMK__OUTPUT_ARGS("stacks-constraints", "pe_resource_t *", "pe_working_set_t *", "gboolean") static int stacks_and_constraints(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); gboolean recursive = va_arg(args, gboolean); pcmk__unpack_constraints(data_set); // Constraints apply to group/clone, not member/instance rsc = uber_parent(rsc); out->message(out, "locations-list", rsc); pe__clear_resource_flags_on_all(data_set, pe_rsc_allocating); out->message(out, "rscs-colocated-with-list", rsc, recursive); pe__clear_resource_flags_on_all(data_set, pe_rsc_allocating); out->message(out, "rsc-is-colocated-with-list", rsc, recursive); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("stacks-constraints", "pe_resource_t *", "pe_working_set_t *", "gboolean") static int stacks_and_constraints_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); gboolean recursive = va_arg(args, gboolean); pcmk__unpack_constraints(data_set); // Constraints apply to group/clone, not member/instance rsc = uber_parent(rsc); pcmk__output_xml_create_parent(out, "constraints", NULL); do_locations_list_xml(out, rsc, false); pe__clear_resource_flags_on_all(data_set, pe_rsc_allocating); out->message(out, "rscs-colocated-with-list", rsc, recursive); pe__clear_resource_flags_on_all(data_set, pe_rsc_allocating); out->message(out, "rsc-is-colocated-with-list", rsc, recursive); pcmk__output_xml_pop_parent(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") static int health_text(pcmk__output_t *out, va_list args) { const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *); const char *host_from = va_arg(args, const char *); const char *fsa_state = va_arg(args, const char *); const char *result = va_arg(args, const char *); if (!out->is_quiet(out)) { return out->info(out, "Controller on %s in state %s: %s", crm_str(host_from), crm_str(fsa_state), crm_str(result)); } else if (fsa_state != NULL) { pcmk__formatted_printf(out, "%s\n", fsa_state); return pcmk_rc_ok; } return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *", "const char *") static int health_xml(pcmk__output_t *out, va_list args) { const char *sys_from = va_arg(args, const char *); const char *host_from = va_arg(args, const char *); const char *fsa_state = va_arg(args, const char *); const char *result = va_arg(args, const char *); pcmk__output_create_xml_node(out, crm_str(sys_from), "node_name", crm_str(host_from), "state", crm_str(fsa_state), "result", crm_str(result), NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "const char *", "const char *") static int pacemakerd_health_text(pcmk__output_t *out, va_list args) { const char *sys_from = va_arg(args, const char *); const char *state = va_arg(args, const char *); const char *last_updated = va_arg(args, const char *); if (!out->is_quiet(out)) { return out->info(out, "Status of %s: '%s' %s %s", crm_str(sys_from), crm_str(state), (!pcmk__str_empty(last_updated))? "last updated":"", crm_str(last_updated)); } else { pcmk__formatted_printf(out, "%s\n", crm_str(state)); return pcmk_rc_ok; } return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *", "const char *", "const char *") static int pacemakerd_health_xml(pcmk__output_t *out, va_list args) { const char *sys_from = va_arg(args, const char *); const char *state = va_arg(args, const char *); const char *last_updated = va_arg(args, const char *); pcmk__output_create_xml_node(out, crm_str(sys_from), "state", crm_str(state), "last_updated", crm_str(last_updated), NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") static int profile_default(pcmk__output_t *out, va_list args) { const char *xml_file = va_arg(args, const char *); clock_t start = va_arg(args, clock_t); clock_t end = va_arg(args, clock_t); out->list_item(out, NULL, "Testing %s ... %.2f secs", xml_file, (end - start) / (float) CLOCKS_PER_SEC); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t") static int profile_xml(pcmk__output_t *out, va_list args) { const char *xml_file = va_arg(args, const char *); clock_t start = va_arg(args, clock_t); clock_t end = va_arg(args, clock_t); char *duration = pcmk__ftoa((end - start) / (float) CLOCKS_PER_SEC); pcmk__output_create_xml_node(out, "timing", "file", xml_file, "duration", duration, NULL); free(duration); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("dc", "const char *") static int dc_text(pcmk__output_t *out, va_list args) { const char *dc = va_arg(args, const char *); if (!out->is_quiet(out)) { return out->info(out, "Designated Controller is: %s", crm_str(dc)); } else if (dc != NULL) { pcmk__formatted_printf(out, "%s\n", dc); return pcmk_rc_ok; } return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("dc", "const char *") static int dc_xml(pcmk__output_t *out, va_list args) { const char *dc = va_arg(args, const char *); pcmk__output_create_xml_node(out, "dc", "node_name", crm_str(dc), NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "gboolean") static int crmadmin_node_text(pcmk__output_t *out, va_list args) { const char *type = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *id = va_arg(args, const char *); gboolean BASH_EXPORT = va_arg(args, gboolean); if (out->is_quiet(out)) { pcmk__formatted_printf(out, "%s\n", crm_str(name)); return pcmk_rc_ok; } else if (BASH_EXPORT) { return out->info(out, "export %s=%s", crm_str(name), crm_str(id)); } else { return out->info(out, "%s node: %s (%s)", type ? type : "cluster", crm_str(name), crm_str(id)); } } PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *", "const char *", "gboolean") static int crmadmin_node_xml(pcmk__output_t *out, va_list args) { const char *type = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *id = va_arg(args, const char *); pcmk__output_create_xml_node(out, "node", "type", type ? type : "cluster", "name", crm_str(name), "id", crm_str(id), NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("digests", "pe_resource_t *", "pe_node_t *", "const char *", "guint", "op_digest_cache_t *") static int digests_text(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *task = va_arg(args, const char *); guint interval_ms = va_arg(args, guint); op_digest_cache_t *digests = va_arg(args, op_digest_cache_t *); char *action_desc = NULL; const char *rsc_desc = "unknown resource"; const char *node_desc = "unknown node"; if (interval_ms != 0) { action_desc = crm_strdup_printf("%ums-interval %s action", interval_ms, ((task == NULL)? "unknown" : task)); } else if (pcmk__str_eq(task, "monitor", pcmk__str_none)) { action_desc = strdup("probe action"); } else { action_desc = crm_strdup_printf("%s action", ((task == NULL)? "unknown" : task)); } if ((rsc != NULL) && (rsc->id != NULL)) { rsc_desc = rsc->id; } if ((node != NULL) && (node->details->uname != NULL)) { node_desc = node->details->uname; } out->begin_list(out, NULL, NULL, "Digests for %s %s on %s", rsc_desc, action_desc, node_desc); free(action_desc); if (digests == NULL) { out->list_item(out, NULL, "none"); out->end_list(out); return pcmk_rc_ok; } if (digests->digest_all_calc != NULL) { out->list_item(out, NULL, "%s (all parameters)", digests->digest_all_calc); } if (digests->digest_secure_calc != NULL) { out->list_item(out, NULL, "%s (non-private parameters)", digests->digest_secure_calc); } if (digests->digest_restart_calc != NULL) { out->list_item(out, NULL, "%s (non-reloadable parameters)", digests->digest_restart_calc); } out->end_list(out); return pcmk_rc_ok; } static void add_digest_xml(xmlNode *parent, const char *type, const char *digest, xmlNode *digest_source) { if (digest != NULL) { xmlNodePtr digest_xml = create_xml_node(parent, "digest"); crm_xml_add(digest_xml, "type", ((type == NULL)? "unspecified" : type)); crm_xml_add(digest_xml, "hash", digest); if (digest_source != NULL) { add_node_copy(digest_xml, digest_source); } } } PCMK__OUTPUT_ARGS("digests", "pe_resource_t *", "pe_node_t *", "const char *", "guint", "op_digest_cache_t *") static int digests_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *task = va_arg(args, const char *); guint interval_ms = va_arg(args, guint); op_digest_cache_t *digests = va_arg(args, op_digest_cache_t *); char *interval_s = crm_strdup_printf("%ums", interval_ms); xmlNode *xml = NULL; xml = pcmk__output_create_xml_node(out, "digests", "resource", crm_str(rsc->id), "node", crm_str(node->details->uname), "task", crm_str(task), "interval", interval_s, NULL); free(interval_s); if (digests != NULL) { add_digest_xml(xml, "all", digests->digest_all_calc, digests->params_all); add_digest_xml(xml, "nonprivate", digests->digest_secure_calc, digests->params_secure); add_digest_xml(xml, "nonreloadable", digests->digest_restart_calc, digests->params_restart); } return pcmk_rc_ok; } #define STOP_SANITY_ASSERT(lineno) do { \ if(current && current->details->unclean) { \ /* It will be a pseudo op */ \ } else if(stop == NULL) { \ crm_err("%s:%d: No stop action exists for %s", \ __func__, lineno, rsc->id); \ CRM_ASSERT(stop != NULL); \ } else if (pcmk_is_set(stop->flags, pe_action_optional)) { \ crm_err("%s:%d: Action %s is still optional", \ __func__, lineno, stop->uuid); \ CRM_ASSERT(!pcmk_is_set(stop->flags, pe_action_optional)); \ } \ } while(0) PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *", "gboolean") static int rsc_action_default(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *current = va_arg(args, pe_node_t *); pe_node_t *next = va_arg(args, pe_node_t *); gboolean moving = va_arg(args, gboolean); GList *possible_matches = NULL; char *key = NULL; int rc = pcmk_rc_no_output; pe_node_t *start_node = NULL; pe_action_t *start = NULL; pe_action_t *stop = NULL; pe_action_t *promote = NULL; pe_action_t *demote = NULL; if (!pcmk_is_set(rsc->flags, pe_rsc_managed) || (current == NULL && next == NULL)) { pe_rsc_info(rsc, "Leave %s\t(%s%s)", rsc->id, role2text(rsc->role), !pcmk_is_set(rsc->flags, pe_rsc_managed)? " unmanaged" : ""); return rc; } if (current != NULL && next != NULL && !pcmk__str_eq(current->details->id, next->details->id, pcmk__str_casei)) { moving = TRUE; } possible_matches = pe__resource_actions(rsc, next, RSC_START, FALSE); if (possible_matches) { start = possible_matches->data; g_list_free(possible_matches); } if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) { start_node = NULL; } else { start_node = current; } possible_matches = pe__resource_actions(rsc, start_node, RSC_STOP, FALSE); if (possible_matches) { stop = possible_matches->data; g_list_free(possible_matches); } possible_matches = pe__resource_actions(rsc, next, RSC_PROMOTE, FALSE); if (possible_matches) { promote = possible_matches->data; g_list_free(possible_matches); } possible_matches = pe__resource_actions(rsc, next, RSC_DEMOTE, FALSE); if (possible_matches) { demote = possible_matches->data; g_list_free(possible_matches); } if (rsc->role == rsc->next_role) { pe_action_t *migrate_op = NULL; possible_matches = pe__resource_actions(rsc, next, RSC_MIGRATED, FALSE); if (possible_matches) { migrate_op = possible_matches->data; } CRM_CHECK(next != NULL,); if (next == NULL) { } else if ((migrate_op != NULL) && (current != NULL) && pcmk_is_set(migrate_op->flags, pe_action_runnable)) { rc = out->message(out, "rsc-action-item", "Migrate", rsc, current, next, start, NULL); } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) { rc = out->message(out, "rsc-action-item", "Reload", rsc, current, next, start, NULL); } else if (start == NULL || pcmk_is_set(start->flags, pe_action_optional)) { if ((demote != NULL) && (promote != NULL) && !pcmk_is_set(demote->flags, pe_action_optional) && !pcmk_is_set(promote->flags, pe_action_optional)) { rc = out->message(out, "rsc-action-item", "Re-promote", rsc, current, next, promote, demote); } else { pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role), next->details->uname); } } else if (!pcmk_is_set(start->flags, pe_action_runnable)) { rc = out->message(out, "rsc-action-item", "Stop", rsc, current, NULL, stop, (stop && stop->reason)? stop : start); STOP_SANITY_ASSERT(__LINE__); } else if (moving && current) { rc = out->message(out, "rsc-action-item", pcmk_is_set(rsc->flags, pe_rsc_failed)? "Recover" : "Move", rsc, current, next, stop, NULL); } else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) { rc = out->message(out, "rsc-action-item", "Recover", rsc, current, NULL, stop, NULL); STOP_SANITY_ASSERT(__LINE__); } else { rc = out->message(out, "rsc-action-item", "Restart", rsc, current, next, start, NULL); /* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */ } g_list_free(possible_matches); return rc; } if(stop && (rsc->next_role == RSC_ROLE_STOPPED || (start && !pcmk_is_set(start->flags, pe_action_runnable)))) { GList *gIter = NULL; key = stop_key(rsc); for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; pe_action_t *stop_op = NULL; possible_matches = find_actions(rsc->actions, key, node); if (possible_matches) { stop_op = possible_matches->data; g_list_free(possible_matches); } if (stop_op && (stop_op->flags & pe_action_runnable)) { STOP_SANITY_ASSERT(__LINE__); } if (out->message(out, "rsc-action-item", "Stop", rsc, node, NULL, stop_op, (stop_op && stop_op->reason)? stop_op : start) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } free(key); } else if ((stop != NULL) && pcmk_all_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_stop)) { /* 'stop' may be NULL if the failure was ignored */ rc = out->message(out, "rsc-action-item", "Recover", rsc, current, next, stop, start); STOP_SANITY_ASSERT(__LINE__); } else if (moving) { rc = out->message(out, "rsc-action-item", "Move", rsc, current, next, stop, NULL); STOP_SANITY_ASSERT(__LINE__); } else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) { rc = out->message(out, "rsc-action-item", "Reload", rsc, current, next, start, NULL); } else if (stop != NULL && !pcmk_is_set(stop->flags, pe_action_optional)) { rc = out->message(out, "rsc-action-item", "Restart", rsc, current, next, start, NULL); STOP_SANITY_ASSERT(__LINE__); } else if (rsc->role == RSC_ROLE_PROMOTED) { CRM_LOG_ASSERT(current != NULL); rc = out->message(out, "rsc-action-item", "Demote", rsc, current, next, demote, NULL); } else if (rsc->next_role == RSC_ROLE_PROMOTED) { CRM_LOG_ASSERT(next); rc = out->message(out, "rsc-action-item", "Promote", rsc, current, next, promote, NULL); } else if (rsc->role == RSC_ROLE_STOPPED && rsc->next_role > RSC_ROLE_STOPPED) { rc = out->message(out, "rsc-action-item", "Start", rsc, current, next, start, NULL); } return rc; } PCMK__OUTPUT_ARGS("node-action", "char *", "char *", "char *") static int node_action(pcmk__output_t *out, va_list args) { char *task = va_arg(args, char *); char *node_name = va_arg(args, char *); char *reason = va_arg(args, char *); if (task == NULL) { return pcmk_rc_no_output; } else if (reason) { out->list_item(out, NULL, "%s %s '%s'", task, node_name, reason); } else { crm_notice(" * %s %s", task, node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-action", "char *", "char *", "char *") static int node_action_xml(pcmk__output_t *out, va_list args) { char *task = va_arg(args, char *); char *node_name = va_arg(args, char *); char *reason = va_arg(args, char *); if (task == NULL) { return pcmk_rc_no_output; } else if (reason) { pcmk__output_create_xml_node(out, "node_action", "task", task, "node", node_name, "reason", reason, NULL); } else { crm_notice(" * %s %s", task, node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNodePtr") static int inject_cluster_action(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); xmlNodePtr rsc = va_arg(args, xmlNodePtr); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if(rsc) { out->list_item(out, NULL, "Cluster action: %s for %s on %s", task, ID(rsc), node); } else { out->list_item(out, NULL, "Cluster action: %s on %s", task, node); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *", "xmlNodePtr") static int inject_cluster_action_xml(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); xmlNodePtr rsc = va_arg(args, xmlNodePtr); xmlNodePtr xml_node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } xml_node = pcmk__output_create_xml_node(out, "cluster_action", "task", task, "node", node, NULL); if (rsc) { crm_xml_add(xml_node, "id", ID(rsc)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-fencing-action", "char *", "const char *") static int inject_fencing_action(pcmk__output_t *out, va_list args) { char *target = va_arg(args, char *); const char *op = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->list_item(out, NULL, "Fencing %s (%s)", target, op); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-fencing-action", "char *", "const char *") static int inject_fencing_action_xml(pcmk__output_t *out, va_list args) { char *target = va_arg(args, char *); const char *op = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, "fencing_action", "target", target, "op", op, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr") static int inject_attr(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); xmlNodePtr cib_node = va_arg(args, xmlNodePtr); xmlChar *node_path = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } node_path = xmlGetNodePath(cib_node); out->list_item(out, NULL, "Injecting attribute %s=%s into %s '%s'", name, value, node_path, ID(cib_node)); free(node_path); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr") static int inject_attr_xml(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); xmlNodePtr cib_node = va_arg(args, xmlNodePtr); xmlChar *node_path = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } node_path = xmlGetNodePath(cib_node); pcmk__output_create_xml_node(out, "inject_attr", "name", name, "value", value, "node_path", node_path, "cib_node", ID(cib_node), NULL); free(node_path); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-spec", "char *") static int inject_spec(pcmk__output_t *out, va_list args) { char *spec = va_arg(args, char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->list_item(out, NULL, "Injecting %s into the configuration", spec); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-spec", "char *") static int inject_spec_xml(pcmk__output_t *out, va_list args) { char *spec = va_arg(args, char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, "inject_spec", "spec", spec, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *") static int inject_modify_config(pcmk__output_t *out, va_list args) { const char *quorum = va_arg(args, const char *); const char *watchdog = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->begin_list(out, NULL, NULL, "Performing Requested Modifications"); if (quorum) { out->list_item(out, NULL, "Setting quorum: %s", quorum); } if (watchdog) { out->list_item(out, NULL, "Setting watchdog: %s", watchdog); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *") static int inject_modify_config_xml(pcmk__output_t *out, va_list args) { const char *quorum = va_arg(args, const char *); const char *watchdog = va_arg(args, const char *); xmlNodePtr node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } node = pcmk__output_xml_create_parent(out, "modifications", NULL); if (quorum) { crm_xml_add(node, "quorum", quorum); } if (watchdog) { crm_xml_add(node, "watchdog", watchdog); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "char *") static int inject_modify_node(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); char *node = va_arg(args, char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if (pcmk__str_eq(action, "Online", pcmk__str_none)) { out->list_item(out, NULL, "Bringing node %s online", node); return pcmk_rc_ok; } else if (pcmk__str_eq(action, "Offline", pcmk__str_none)) { out->list_item(out, NULL, "Taking node %s offline", node); return pcmk_rc_ok; } else if (pcmk__str_eq(action, "Failing", pcmk__str_none)) { out->list_item(out, NULL, "Failing node %s", node); return pcmk_rc_ok; } return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "char *") static int inject_modify_node_xml(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); char *node = va_arg(args, char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, "modify_node", "action", action, "node", node, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "char *") static int inject_modify_ticket(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); char *ticket = va_arg(args, char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if (pcmk__str_eq(action, "Standby", pcmk__str_none)) { out->list_item(out, NULL, "Making ticket %s standby", ticket); } else { out->list_item(out, NULL, "%s ticket %s", action, ticket); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "char *") static int inject_modify_ticket_xml(pcmk__output_t *out, va_list args) { const char *action = va_arg(args, const char *); char *ticket = va_arg(args, char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } pcmk__output_create_xml_node(out, "modify_ticket", "action", action, "ticket", ticket, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *") static int inject_pseudo_action(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); if (out->is_quiet(out)) { return pcmk_rc_no_output; } out->list_item(out, NULL, "Pseudo action: %s%s%s", task, node ? " on " : "", node ? node : ""); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *") static int inject_pseudo_action_xml(pcmk__output_t *out, va_list args) { const char *node = va_arg(args, const char *); const char *task = va_arg(args, const char *); xmlNodePtr xml_node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } xml_node = pcmk__output_create_xml_node(out, "pseudo_action", "task", task, NULL); if (node) { crm_xml_add(xml_node, "node", node); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *", "char *", "guint") static int inject_rsc_action(pcmk__output_t *out, va_list args) { const char *rsc = va_arg(args, const char *); const char *operation = va_arg(args, const char *); char *node = va_arg(args, char *); guint interval_ms = va_arg(args, guint); if (out->is_quiet(out)) { return pcmk_rc_no_output; } if (interval_ms) { out->list_item(out, NULL, "Resource action: %-15s %s=%u on %s", rsc, operation, interval_ms, node); } else { out->list_item(out, NULL, "Resource action: %-15s %s on %s", rsc, operation, node); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *", "char *", "guint") static int inject_rsc_action_xml(pcmk__output_t *out, va_list args) { const char *rsc = va_arg(args, const char *); const char *operation = va_arg(args, const char *); char *node = va_arg(args, char *); guint interval_ms = va_arg(args, guint); xmlNodePtr xml_node = NULL; if (out->is_quiet(out)) { return pcmk_rc_no_output; } xml_node = pcmk__output_create_xml_node(out, "rsc_action", "resource", rsc, "op", operation, "node", node, NULL); if (interval_ms) { char *interval_s = pcmk__itoa(interval_ms); crm_xml_add(xml_node, "interval", interval_s); free(interval_s); } return pcmk_rc_ok; } #define CHECK_RC(retcode, retval) \ if (retval == pcmk_rc_ok) { \ retcode = pcmk_rc_ok; \ } PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *", "crm_exit_t", "stonith_history_t *", "gboolean", "unsigned int", "unsigned int", "const char *", "GList *", "GList *") int pcmk__cluster_status_text(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); crm_exit_t history_rc = va_arg(args, crm_exit_t); stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); gboolean fence_history = va_arg(args, gboolean); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); const char *prefix = va_arg(args, const char *); GList *unames = va_arg(args, GList *); GList *resources = va_arg(args, GList *); int rc = pcmk_rc_no_output; bool already_printed_failure = false; CHECK_RC(rc, out->message(out, "cluster-summary", data_set, section_opts, show_opts)); if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) { CHECK_RC(rc, out->message(out, "node-list", data_set->nodes, unames, resources, show_opts, rc == pcmk_rc_ok)); } /* Print resources section, if needed */ if (pcmk_is_set(section_opts, pcmk_section_resources)) { CHECK_RC(rc, out->message(out, "resource-list", data_set, show_opts, TRUE, unames, resources, rc == pcmk_rc_ok)); } /* print Node Attributes section if requested */ if (pcmk_is_set(section_opts, pcmk_section_attributes)) { CHECK_RC(rc, out->message(out, "node-attribute-list", data_set, show_opts, rc == pcmk_rc_ok, unames, resources)); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) { CHECK_RC(rc, out->message(out, "node-summary", data_set, unames, resources, section_opts, show_opts, rc == pcmk_rc_ok)); } /* If there were any failed actions, print them */ if (pcmk_is_set(section_opts, pcmk_section_failures) && xml_has_children(data_set->failed)) { CHECK_RC(rc, out->message(out, "failed-action-list", data_set, unames, resources, show_opts, rc == pcmk_rc_ok)); } /* Print failed stonith actions */ if (pcmk_is_set(section_opts, pcmk_section_fence_failed) && fence_history) { if (history_rc == 0) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, GINT_TO_POINTER(st_failed)); if (hp) { CHECK_RC(rc, out->message(out, "failed-fencing-list", stonith_history, unames, section_opts, rc == pcmk_rc_ok)); } } else { PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); already_printed_failure = true; } } /* Print tickets if requested */ if (pcmk_is_set(section_opts, pcmk_section_tickets)) { CHECK_RC(rc, out->message(out, "ticket-list", data_set, rc == pcmk_rc_ok)); } /* Print negative location constraints if requested */ if (pcmk_is_set(section_opts, pcmk_section_bans)) { CHECK_RC(rc, out->message(out, "ban-list", data_set, prefix, resources, show_opts, rc == pcmk_rc_ok)); } /* Print stonith history */ if (fence_history && pcmk_any_flags_set(section_opts, pcmk_section_fencing_all)) { if (history_rc != 0) { if (!already_printed_failure) { PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { CHECK_RC(rc, out->message(out, "fencing-list", hp, unames, section_opts, rc == pcmk_rc_ok)); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { CHECK_RC(rc, out->message(out, "pending-fencing-list", hp, unames, section_opts, rc == pcmk_rc_ok)); } } } return rc; } PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *", "crm_exit_t", "stonith_history_t *", "gboolean", "unsigned int", "unsigned int", "const char *", "GList *", "GList *") static int cluster_status_xml(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); crm_exit_t history_rc = va_arg(args, crm_exit_t); stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); gboolean fence_history = va_arg(args, gboolean); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); const char *prefix = va_arg(args, const char *); GList *unames = va_arg(args, GList *); GList *resources = va_arg(args, GList *); out->message(out, "cluster-summary", data_set, section_opts, show_opts); /*** NODES ***/ if (pcmk_is_set(section_opts, pcmk_section_nodes)) { out->message(out, "node-list", data_set->nodes, unames, resources, show_opts, FALSE); } /* Print resources section, if needed */ if (pcmk_is_set(section_opts, pcmk_section_resources)) { /* XML output always displays full details. */ unsigned int full_show_opts = show_opts & ~pcmk_show_brief; out->message(out, "resource-list", data_set, full_show_opts, FALSE, unames, resources, FALSE); } /* print Node Attributes section if requested */ if (pcmk_is_set(section_opts, pcmk_section_attributes)) { out->message(out, "node-attribute-list", data_set, show_opts, FALSE, unames, resources); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) { out->message(out, "node-summary", data_set, unames, resources, section_opts, show_opts, FALSE); } /* If there were any failed actions, print them */ if (pcmk_is_set(section_opts, pcmk_section_failures) && xml_has_children(data_set->failed)) { out->message(out, "failed-action-list", data_set, unames, resources, show_opts, FALSE); } /* Print stonith history */ if (pcmk_is_set(section_opts, pcmk_section_fencing_all) && fence_history) { out->message(out, "full-fencing-list", history_rc, stonith_history, unames, section_opts, FALSE); } /* Print tickets if requested */ if (pcmk_is_set(section_opts, pcmk_section_tickets)) { out->message(out, "ticket-list", data_set, FALSE); } /* Print negative location constraints if requested */ if (pcmk_is_set(section_opts, pcmk_section_bans)) { out->message(out, "ban-list", data_set, prefix, resources, show_opts, FALSE); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *", "crm_exit_t", "stonith_history_t *", "gboolean", "unsigned int", "unsigned int", "const char *", "GList *", "GList *") static int cluster_status_html(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); crm_exit_t history_rc = va_arg(args, crm_exit_t); stonith_history_t *stonith_history = va_arg(args, stonith_history_t *); gboolean fence_history = va_arg(args, gboolean); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); const char *prefix = va_arg(args, const char *); GList *unames = va_arg(args, GList *); GList *resources = va_arg(args, GList *); bool already_printed_failure = false; out->message(out, "cluster-summary", data_set, section_opts, show_opts); /*** NODE LIST ***/ if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) { out->message(out, "node-list", data_set->nodes, unames, resources, show_opts, FALSE); } /* Print resources section, if needed */ if (pcmk_is_set(section_opts, pcmk_section_resources)) { out->message(out, "resource-list", data_set, show_opts, TRUE, unames, resources, FALSE); } /* print Node Attributes section if requested */ if (pcmk_is_set(section_opts, pcmk_section_attributes)) { out->message(out, "node-attribute-list", data_set, show_opts, FALSE, unames, resources); } /* If requested, print resource operations (which includes failcounts) * or just failcounts */ if (pcmk_any_flags_set(section_opts, pcmk_section_operations | pcmk_section_failcounts)) { out->message(out, "node-summary", data_set, unames, resources, section_opts, show_opts, FALSE); } /* If there were any failed actions, print them */ if (pcmk_is_set(section_opts, pcmk_section_failures) && xml_has_children(data_set->failed)) { out->message(out, "failed-action-list", data_set, unames, resources, show_opts, FALSE); } /* Print failed stonith actions */ if (pcmk_is_set(section_opts, pcmk_section_fence_failed) && fence_history) { if (history_rc == 0) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_eq, GINT_TO_POINTER(st_failed)); if (hp) { out->message(out, "failed-fencing-list", stonith_history, unames, section_opts, FALSE); } } else { out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); } } /* Print stonith history */ if (fence_history && pcmk_any_flags_set(section_opts, pcmk_section_fencing_all)) { if (history_rc != 0) { if (!already_printed_failure) { out->begin_list(out, NULL, NULL, "Failed Fencing Actions"); out->list_item(out, NULL, "Failed to get fencing history: %s", crm_exit_str(history_rc)); out->end_list(out); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_neq, GINT_TO_POINTER(st_failed)); if (hp) { out->message(out, "fencing-list", hp, unames, section_opts, FALSE); } } else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) { stonith_history_t *hp = stonith__first_matching_event(stonith_history, stonith__event_state_pending, NULL); if (hp) { out->message(out, "pending-fencing-list", hp, unames, section_opts, FALSE); } } } /* Print tickets if requested */ if (pcmk_is_set(section_opts, pcmk_section_tickets)) { out->message(out, "ticket-list", data_set, FALSE); } /* Print negative location constraints if requested */ if (pcmk_is_set(section_opts, pcmk_section_bans)) { out->message(out, "ban-list", data_set, prefix, resources, show_opts, FALSE); } return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "cluster-status", "default", pcmk__cluster_status_text }, { "cluster-status", "html", cluster_status_html }, { "cluster-status", "xml", cluster_status_xml }, { "crmadmin-node", "default", crmadmin_node_text }, { "crmadmin-node", "xml", crmadmin_node_xml }, { "dc", "default", dc_text }, { "dc", "xml", dc_xml }, { "digests", "default", digests_text }, { "digests", "xml", digests_xml }, { "health", "default", health_text }, { "health", "xml", health_xml }, { "inject-attr", "default", inject_attr }, { "inject-attr", "xml", inject_attr_xml }, { "inject-cluster-action", "default", inject_cluster_action }, { "inject-cluster-action", "xml", inject_cluster_action_xml }, { "inject-fencing-action", "default", inject_fencing_action }, { "inject-fencing-action", "xml", inject_fencing_action_xml }, { "inject-modify-config", "default", inject_modify_config }, { "inject-modify-config", "xml", inject_modify_config_xml }, { "inject-modify-node", "default", inject_modify_node }, { "inject-modify-node", "xml", inject_modify_node_xml }, { "inject-modify-ticket", "default", inject_modify_ticket }, { "inject-modify-ticket", "xml", inject_modify_ticket_xml }, { "inject-pseudo-action", "default", inject_pseudo_action }, { "inject-pseudo-action", "xml", inject_pseudo_action_xml }, { "inject-rsc-action", "default", inject_rsc_action }, { "inject-rsc-action", "xml", inject_rsc_action_xml }, { "inject-spec", "default", inject_spec }, { "inject-spec", "xml", inject_spec_xml }, { "locations-list", "default", locations_list }, { "locations-list", "xml", locations_list_xml }, { "node-action", "default", node_action }, { "node-action", "xml", node_action_xml }, { "pacemakerd-health", "default", pacemakerd_health_text }, { "pacemakerd-health", "xml", pacemakerd_health_xml }, { "profile", "default", profile_default, }, { "profile", "xml", profile_xml }, { "rsc-action", "default", rsc_action_default }, { "rsc-action-item", "default", rsc_action_item }, { "rsc-action-item", "xml", rsc_action_item_xml }, { "rsc-is-colocated-with-list", "default", rsc_is_colocated_with_list }, { "rsc-is-colocated-with-list", "xml", rsc_is_colocated_with_list_xml }, { "rscs-colocated-with-list", "default", rscs_colocated_with_list }, { "rscs-colocated-with-list", "xml", rscs_colocated_with_list_xml }, { "stacks-constraints", "default", stacks_and_constraints }, { "stacks-constraints", "xml", stacks_and_constraints_xml }, { NULL, NULL, NULL } }; void pcmk__register_lib_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } diff --git a/lib/pacemaker/pcmk_sched_colocation.c b/lib/pacemaker/pcmk_sched_colocation.c index 610ae33935..5c2f2b4f4e 100644 --- a/lib/pacemaker/pcmk_sched_colocation.c +++ b/lib/pacemaker/pcmk_sched_colocation.c @@ -1,1059 +1,1064 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include +#include "crm/common/util.h" +#include "crm/common/xml_internal.h" +#include "crm/msg_xml.h" #include "libpacemaker_private.h" #define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ __rsc = pcmk__find_constraint_resource(data_set->resources, __name); \ if (__rsc == NULL) { \ pcmk__config_err("%s: No resource found for %s", __set, __name); \ return; \ } \ } while(0) static gint cmp_dependent_priority(gconstpointer a, gconstpointer b) { const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a; const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } CRM_ASSERT(rsc_constraint1->dependent != NULL); CRM_ASSERT(rsc_constraint1->primary != NULL); if (rsc_constraint1->dependent->priority > rsc_constraint2->dependent->priority) { return -1; } if (rsc_constraint1->dependent->priority < rsc_constraint2->dependent->priority) { return 1; } /* Process clones before primitives and groups */ if (rsc_constraint1->dependent->variant > rsc_constraint2->dependent->variant) { return -1; } if (rsc_constraint1->dependent->variant < rsc_constraint2->dependent->variant) { return 1; } /* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable * clones (probably unnecessary, but avoids having to update regression * tests) */ if (rsc_constraint1->dependent->variant == pe_clone) { if (pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable) && !pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) { return -1; } else if (!pcmk_is_set(rsc_constraint1->dependent->flags, pe_rsc_promotable) && pcmk_is_set(rsc_constraint2->dependent->flags, pe_rsc_promotable)) { return 1; } } return strcmp(rsc_constraint1->dependent->id, rsc_constraint2->dependent->id); } static gint cmp_primary_priority(gconstpointer a, gconstpointer b) { const pcmk__colocation_t *rsc_constraint1 = (const pcmk__colocation_t *) a; const pcmk__colocation_t *rsc_constraint2 = (const pcmk__colocation_t *) b; if (a == NULL) { return 1; } if (b == NULL) { return -1; } CRM_ASSERT(rsc_constraint1->dependent != NULL); CRM_ASSERT(rsc_constraint1->primary != NULL); if (rsc_constraint1->primary->priority > rsc_constraint2->primary->priority) { return -1; } if (rsc_constraint1->primary->priority < rsc_constraint2->primary->priority) { return 1; } /* Process clones before primitives and groups */ if (rsc_constraint1->primary->variant > rsc_constraint2->primary->variant) { return -1; } else if (rsc_constraint1->primary->variant < rsc_constraint2->primary->variant) { return 1; } /* @COMPAT scheduler <2.0.0: Process promotable clones before nonpromotable * clones (probably unnecessary, but avoids having to update regression * tests) */ if (rsc_constraint1->primary->variant == pe_clone) { if (pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable) && !pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) { return -1; } else if (!pcmk_is_set(rsc_constraint1->primary->flags, pe_rsc_promotable) && pcmk_is_set(rsc_constraint2->primary->flags, pe_rsc_promotable)) { return 1; } } return strcmp(rsc_constraint1->primary->id, rsc_constraint2->primary->id); } /*! * \internal * \brief Add orderings necessary for an anti-colocation constraint */ static void anti_colocation_order(pe_resource_t *first_rsc, int first_role, pe_resource_t *then_rsc, int then_role, pe_working_set_t *data_set) { const char *first_tasks[] = { NULL, NULL }; const char *then_tasks[] = { NULL, NULL }; /* Actions to make first_rsc lose first_role */ if (first_role == RSC_ROLE_PROMOTED) { first_tasks[0] = CRMD_ACTION_DEMOTE; } else { first_tasks[0] = CRMD_ACTION_STOP; if (first_role == RSC_ROLE_UNPROMOTED) { first_tasks[1] = CRMD_ACTION_PROMOTE; } } /* Actions to make then_rsc gain then_role */ if (then_role == RSC_ROLE_PROMOTED) { then_tasks[0] = CRMD_ACTION_PROMOTE; } else { then_tasks[0] = CRMD_ACTION_START; if (then_role == RSC_ROLE_UNPROMOTED) { then_tasks[1] = CRMD_ACTION_DEMOTE; } } for (int first_lpc = 0; (first_lpc <= 1) && (first_tasks[first_lpc] != NULL); first_lpc++) { for (int then_lpc = 0; (then_lpc <= 1) && (then_tasks[then_lpc] != NULL); then_lpc++) { pcmk__order_resource_actions(first_rsc, first_tasks[first_lpc], then_rsc, then_tasks[then_lpc], pe_order_anti_colocation, data_set); } } } /*! * \internal * \brief Add a new colocation constraint to a cluster working set * * \param[in] id XML ID for this constraint * \param[in] node_attr Colocate by this attribute (or NULL for #uname) * \param[in] score Constraint score * \param[in] dependent Resource to be colocated * \param[in] primary Resource to colocate \p dependent with * \param[in] dependent_role Current role of \p dependent * \param[in] primary_role Current role of \p primary * \param[in] influence Whether colocation constraint has influence * \param[in] data_set Cluster working set to add constraint to */ void pcmk__new_colocation(const char *id, const char *node_attr, int score, pe_resource_t *dependent, pe_resource_t *primary, const char *dependent_role, const char *primary_role, bool influence, pe_working_set_t *data_set) { pcmk__colocation_t *new_con = NULL; if (score == 0) { crm_trace("Ignoring colocation '%s' because score is 0", id); return; } if ((dependent == NULL) || (primary == NULL)) { pcmk__config_err("Ignoring colocation '%s' because resource " "does not exist", id); return; } new_con = calloc(1, sizeof(pcmk__colocation_t)); if (new_con == NULL) { return; } if (pcmk__str_eq(dependent_role, RSC_ROLE_STARTED_S, pcmk__str_null_matches|pcmk__str_casei)) { dependent_role = RSC_ROLE_UNKNOWN_S; } if (pcmk__str_eq(primary_role, RSC_ROLE_STARTED_S, pcmk__str_null_matches|pcmk__str_casei)) { primary_role = RSC_ROLE_UNKNOWN_S; } new_con->id = id; new_con->dependent = dependent; new_con->primary = primary; new_con->score = score; new_con->dependent_role = text2role(dependent_role); new_con->primary_role = text2role(primary_role); new_con->node_attribute = node_attr; new_con->influence = influence; if (node_attr == NULL) { node_attr = CRM_ATTR_UNAME; } pe_rsc_trace(dependent, "%s ==> %s (%s %d)", dependent->id, primary->id, node_attr, score); dependent->rsc_cons = g_list_insert_sorted(dependent->rsc_cons, new_con, cmp_primary_priority); primary->rsc_cons_lhs = g_list_insert_sorted(primary->rsc_cons_lhs, new_con, cmp_dependent_priority); data_set->colocation_constraints = g_list_append(data_set->colocation_constraints, new_con); if (score <= -INFINITY) { anti_colocation_order(dependent, new_con->dependent_role, primary, new_con->primary_role, data_set); anti_colocation_order(primary, new_con->primary_role, dependent, new_con->dependent_role, data_set); } } /*! * \internal * \brief Return the boolean influence corresponding to configuration * * \param[in] coloc_id Colocation XML ID (for error logging) * \param[in] rsc Resource involved in constraint (for default) * \param[in] influence_s String value of influence option * * \return true if string evaluates true, false if string evaluates false, * or value of resource's critical option if string is NULL or invalid */ static bool unpack_influence(const char *coloc_id, const pe_resource_t *rsc, const char *influence_s) { if (influence_s != NULL) { int influence_i = 0; if (crm_str_to_boolean(influence_s, &influence_i) < 0) { pcmk__config_err("Constraint '%s' has invalid value for " XML_COLOC_ATTR_INFLUENCE " (using default)", coloc_id); } else { return (influence_i != 0); } } return pcmk_is_set(rsc->flags, pe_rsc_critical); } static void unpack_colocation_set(xmlNode *set, int score, const char *coloc_id, const char *influence_s, pe_working_set_t *data_set) { xmlNode *xml_rsc = NULL; pe_resource_t *with = NULL; pe_resource_t *resource = NULL; const char *set_id = ID(set); const char *role = crm_element_value(set, "role"); - const char *sequential = crm_element_value(set, "sequential"); const char *ordering = crm_element_value(set, "ordering"); int local_score = score; + bool sequential = false; const char *score_s = crm_element_value(set, XML_RULE_ATTR_SCORE); if (score_s) { local_score = char2score(score_s); } if (local_score == 0) { crm_trace("Ignoring colocation '%s' for set '%s' because score is 0", coloc_id, set_id); return; } if (ordering == NULL) { ordering = "group"; } - if ((sequential != NULL) && !crm_is_true(sequential)) { + if (pcmk__xe_get_bool_attr(set, "sequential", &sequential) == pcmk_rc_ok && !sequential) { return; } else if ((local_score > 0) && pcmk__str_eq(ordering, "group", pcmk__str_casei)) { for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); if (with != NULL) { pe_rsc_trace(resource, "Colocating %s with %s", resource->id, with->id); pcmk__new_colocation(set_id, NULL, local_score, resource, with, role, role, unpack_influence(coloc_id, resource, influence_s), data_set); } with = resource; } } else if (local_score > 0) { pe_resource_t *last = NULL; for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); if (last != NULL) { pe_rsc_trace(resource, "Colocating %s with %s", last->id, resource->id); pcmk__new_colocation(set_id, NULL, local_score, last, resource, role, role, unpack_influence(coloc_id, last, influence_s), data_set); } last = resource; } } else { /* Anti-colocating with every prior resource is * the only way to ensure the intuitive result * (i.e. that no one in the set can run with anyone else in the set) */ for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xmlNode *xml_rsc_with = NULL; bool influence = true; EXPAND_CONSTRAINT_IDREF(set_id, resource, ID(xml_rsc)); influence = unpack_influence(coloc_id, resource, influence_s); for (xml_rsc_with = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc_with != NULL; xml_rsc_with = crm_next_same_xml(xml_rsc_with)) { if (pcmk__str_eq(resource->id, ID(xml_rsc_with), pcmk__str_casei)) { break; } EXPAND_CONSTRAINT_IDREF(set_id, with, ID(xml_rsc_with)); pe_rsc_trace(resource, "Anti-Colocating %s with %s", resource->id, with->id); pcmk__new_colocation(set_id, NULL, local_score, resource, with, role, role, influence, data_set); } } } } static void colocate_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, int score, const char *influence_s, pe_working_set_t *data_set) { xmlNode *xml_rsc = NULL; pe_resource_t *rsc_1 = NULL; pe_resource_t *rsc_2 = NULL; const char *role_1 = crm_element_value(set1, "role"); const char *role_2 = crm_element_value(set2, "role"); - const char *sequential_1 = crm_element_value(set1, "sequential"); - const char *sequential_2 = crm_element_value(set2, "sequential"); + int rc = pcmk_rc_ok; + bool sequential = false; if (score == 0) { crm_trace("Ignoring colocation '%s' between sets because score is 0", id); return; } - if ((sequential_1 == NULL) || crm_is_true(sequential_1)) { + + rc = pcmk__xe_get_bool_attr(set1, "sequential", &sequential); + if (rc != pcmk_rc_ok || sequential) { // Get the first one xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); if (xml_rsc != NULL) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); } } - if ((sequential_2 == NULL) || crm_is_true(sequential_2)) { + rc = pcmk__xe_get_bool_attr(set2, "sequential", &sequential); + if (rc != pcmk_rc_ok || sequential) { // Get the last one const char *rid = NULL; for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { rid = ID(xml_rsc); } EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); } if ((rsc_1 != NULL) && (rsc_2 != NULL)) { pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, unpack_influence(id, rsc_1, influence_s), data_set); } else if (rsc_1 != NULL) { bool influence = unpack_influence(id, rsc_1, influence_s); for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, influence, data_set); } } else if (rsc_2 != NULL) { for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, unpack_influence(id, rsc_1, influence_s), data_set); } } else { for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { xmlNode *xml_rsc_2 = NULL; bool influence = true; EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); influence = unpack_influence(id, rsc_1, influence_s); for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); pcmk__new_colocation(id, NULL, score, rsc_1, rsc_2, role_1, role_2, influence, data_set); } } } } static void unpack_simple_colocation(xmlNode *xml_obj, const char *id, const char *influence_s, pe_working_set_t *data_set) { int score_i = 0; const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); const char *dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); const char *primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); const char *dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); const char *primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); const char *attr = crm_element_value(xml_obj, XML_COLOC_ATTR_NODE_ATTR); - const char *symmetrical = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); // experimental syntax from pacemaker-next (unlikely to be adopted as-is) const char *dependent_instance = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_INSTANCE); const char *primary_instance = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_INSTANCE); pe_resource_t *dependent = pcmk__find_constraint_resource(data_set->resources, dependent_id); pe_resource_t *primary = pcmk__find_constraint_resource(data_set->resources, primary_id); if (dependent == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not exist", id, dependent_id); return; } else if (primary == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not exist", id, primary_id); return; } else if ((dependent_instance != NULL) && !pe_rsc_is_clone(dependent)) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "is not a clone but instance '%s' was requested", id, dependent_id, dependent_instance); return; } else if ((primary_instance != NULL) && !pe_rsc_is_clone(primary)) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "is not a clone but instance '%s' was requested", id, primary_id, primary_instance); return; } if (dependent_instance != NULL) { dependent = find_clone_instance(dependent, dependent_instance, data_set); if (dependent == NULL) { pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " "does not have an instance '%s'", id, dependent_id, dependent_instance); return; } } if (primary_instance != NULL) { primary = find_clone_instance(primary, primary_instance, data_set); if (primary == NULL) { pcmk__config_warn("Ignoring constraint '%s' because resource '%s' " "does not have an instance '%s'", "'%s'", id, primary_id, primary_instance); return; } } - if (crm_is_true(symmetrical)) { + if (pcmk__xe_attr_is_true(xml_obj, XML_CONS_ATTR_SYMMETRICAL)) { pcmk__config_warn("The colocation constraint '" XML_CONS_ATTR_SYMMETRICAL "' attribute has been removed"); } if (score) { score_i = char2score(score); } pcmk__new_colocation(id, attr, score_i, dependent, primary, dependent_role, primary_role, unpack_influence(id, dependent, influence_s), data_set); } // \return Standard Pacemaker return code static int unpack_colocation_tags(xmlNode *xml_obj, xmlNode **expanded_xml, pe_working_set_t *data_set) { const char *id = NULL; const char *dependent_id = NULL; const char *primary_id = NULL; const char *dependent_role = NULL; const char *primary_role = NULL; pe_resource_t *dependent = NULL; pe_resource_t *primary = NULL; pe_tag_t *dependent_tag = NULL; pe_tag_t *primary_tag = NULL; xmlNode *dependent_set = NULL; xmlNode *primary_set = NULL; bool any_sets = false; *expanded_xml = NULL; CRM_CHECK(xml_obj != NULL, return pcmk_rc_schema_validation); id = ID(xml_obj); if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, crm_element_name(xml_obj)); return pcmk_rc_schema_validation; } // Check whether there are any resource sets with template or tag references *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); if (*expanded_xml != NULL) { crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation"); return pcmk_rc_ok; } dependent_id = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE); primary_id = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET); if ((dependent_id == NULL) || (primary_id == NULL)) { return pcmk_rc_ok; } if (!pcmk__valid_resource_or_tag(data_set, dependent_id, &dependent, &dependent_tag)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", id, dependent_id); return pcmk_rc_schema_validation; } if (!pcmk__valid_resource_or_tag(data_set, primary_id, &primary, &primary_tag)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", id, primary_id); return pcmk_rc_schema_validation; } if ((dependent != NULL) && (primary != NULL)) { /* Neither side references any template/tag. */ return pcmk_rc_ok; } if ((dependent_tag != NULL) && (primary_tag != NULL)) { // A colocation constraint between two templates/tags makes no sense pcmk__config_err("Ignoring constraint '%s' because two templates or " "tags cannot be colocated", id); return pcmk_rc_schema_validation; } dependent_role = crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE); primary_role = crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE); *expanded_xml = copy_xml(xml_obj); // Convert template/tag reference in "rsc" into resource_set under constraint if (!pcmk__tag_to_set(*expanded_xml, &dependent_set, XML_COLOC_ATTR_SOURCE, true, data_set)) { free_xml(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_schema_validation; } if (dependent_set != NULL) { if (dependent_role != NULL) { // Move "rsc-role" into converted resource_set as "role" crm_xml_add(dependent_set, "role", dependent_role); xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_SOURCE_ROLE); } any_sets = true; } // Convert template/tag reference in "with-rsc" into resource_set under constraint if (!pcmk__tag_to_set(*expanded_xml, &primary_set, XML_COLOC_ATTR_TARGET, true, data_set)) { free_xml(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_schema_validation; } if (primary_set != NULL) { if (primary_role != NULL) { // Move "with-rsc-role" into converted resource_set as "role" crm_xml_add(primary_set, "role", primary_role); xml_remove_prop(*expanded_xml, XML_COLOC_ATTR_TARGET_ROLE); } any_sets = true; } if (any_sets) { crm_log_xml_trace(*expanded_xml, "Expanded rsc_colocation"); } else { free_xml(*expanded_xml); *expanded_xml = NULL; } return pcmk_rc_ok; } /*! * \internal * \brief Parse a colocation constraint from XML into a cluster working set * * \param[in] xml_obj Colocation constraint XML to unpack * \param[in] data_set Cluster working set to add constraint to */ void pcmk__unpack_colocation(xmlNode *xml_obj, pe_working_set_t *data_set) { int score_i = 0; xmlNode *set = NULL; xmlNode *last = NULL; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); const char *influence_s = crm_element_value(xml_obj, XML_COLOC_ATTR_INFLUENCE); if (score) { score_i = char2score(score); } if (unpack_colocation_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) { return; } if (expanded_xml) { orig_xml = xml_obj; xml_obj = expanded_xml; } for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; set = crm_next_same_xml(set)) { set = expand_idref(set, data_set->input); if (set == NULL) { // Configuration error, message already logged if (expanded_xml != NULL) { free_xml(expanded_xml); } return; } unpack_colocation_set(set, score_i, id, influence_s, data_set); if (last != NULL) { colocate_rsc_sets(id, last, set, score_i, influence_s, data_set); } last = set; } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } if (last == NULL) { unpack_simple_colocation(xml_obj, id, influence_s, data_set); } } static void mark_start_blocked(pe_resource_t *rsc, pe_resource_t *reason, pe_working_set_t *data_set) { char *reason_text = crm_strdup_printf("colocation with %s", reason->id); for (GList *gIter = rsc->actions; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (pcmk_is_set(action->flags, pe_action_runnable) && pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { pe__clear_action_flags(action, pe_action_runnable); pe_action_set_reason(action, reason_text, false); pcmk__block_colocated_starts(action, data_set); update_action(action, data_set); } } free(reason_text); } /*! * \internal * \brief If a start action is unrunnable, block starts of colocated resources * * \param[in] action Action to check * \param[in] data_set Cluster working set */ void pcmk__block_colocated_starts(pe_action_t *action, pe_working_set_t *data_set) { GList *gIter = NULL; pe_resource_t *rsc = NULL; if (!pcmk_is_set(action->flags, pe_action_runnable) && pcmk__str_eq(action->task, RSC_START, pcmk__str_casei)) { rsc = uber_parent(action->rsc); if (rsc->parent) { /* For bundles, uber_parent() returns the clone, not the bundle, so * the existence of rsc->parent implies this is a bundle. * In this case, we need the bundle resource, so that we can check * if all containers are stopped/stopping. */ rsc = rsc->parent; } } if ((rsc == NULL) || (rsc->rsc_cons_lhs == NULL)) { return; } // Block colocated starts only if all children (if any) have unrunnable starts for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *)gIter->data; pe_action_t *start = find_first_action(child->actions, NULL, RSC_START, NULL); if ((start == NULL) || pcmk_is_set(start->flags, pe_action_runnable)) { return; } } for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) { pcmk__colocation_t *colocate_with = (pcmk__colocation_t *) gIter->data; if (colocate_with->score == INFINITY) { mark_start_blocked(colocate_with->dependent, action->rsc, data_set); } } } /*! * \internal * \brief Determine how a colocation constraint should affect a resource * * Colocation constraints have different effects at different points in the * scheduler sequence. Initially, they affect a resource's location; once that * is determined, then for promotable clones they can affect a resource * instance's role; after both are determined, the constraints no longer matter. * Given a specific colocation constraint, check what has been done so far to * determine what should be affected at the current point in the scheduler. * * \param[in] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] constraint Colocation constraint * \param[in] preview If true, pretend resources have already been allocated * * \return How colocation constraint should be applied at this point */ enum pcmk__coloc_affects pcmk__colocation_affects(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint, bool preview) { if (!preview && pcmk_is_set(primary->flags, pe_rsc_provisional)) { // Primary resource has not been allocated yet, so we can't do anything return pcmk__coloc_affects_nothing; } if ((constraint->dependent_role >= RSC_ROLE_UNPROMOTED) && (dependent->parent != NULL) && pcmk_is_set(dependent->parent->flags, pe_rsc_promotable) && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) { /* This is a colocation by role, and the dependent is a promotable clone * that has already been allocated, so the colocation should now affect * the role. */ return pcmk__coloc_affects_role; } if (!preview && !pcmk_is_set(dependent->flags, pe_rsc_provisional)) { /* The dependent resource has already been through allocation, so the * constraint no longer has any effect. Log an error if a mandatory * colocation constraint has been violated. */ const pe_node_t *primary_node = primary->allocated_to; if (dependent->allocated_to == NULL) { crm_trace("Skipping colocation '%s': %s will not run anywhere", constraint->id, dependent->id); } else if (constraint->score >= INFINITY) { // Dependent resource must colocate with primary resource if ((primary_node == NULL) || (primary_node->details != dependent->allocated_to->details)) { crm_err("%s must be colocated with %s but is not (%s vs. %s)", dependent->id, primary->id, dependent->allocated_to->details->uname, (primary_node == NULL)? "unallocated" : primary_node->details->uname); } } else if (constraint->score <= -CRM_SCORE_INFINITY) { // Dependent resource must anti-colocate with primary resource if ((primary_node != NULL) && (dependent->allocated_to->details == primary_node->details)) { crm_err("%s and %s must be anti-colocated but are allocated " "to the same node (%s)", dependent->id, primary->id, primary_node->details->uname); } } return pcmk__coloc_affects_nothing; } if ((constraint->score > 0) && (constraint->dependent_role != RSC_ROLE_UNKNOWN) && (constraint->dependent_role != dependent->next_role)) { crm_trace("Skipping colocation '%s': dependent limited to %s role " "but %s next role is %s", constraint->id, role2text(constraint->dependent_role), dependent->id, role2text(dependent->next_role)); return pcmk__coloc_affects_nothing; } if ((constraint->score > 0) && (constraint->primary_role != RSC_ROLE_UNKNOWN) && (constraint->primary_role != primary->next_role)) { crm_trace("Skipping colocation '%s': primary limited to %s role " "but %s next role is %s", constraint->id, role2text(constraint->primary_role), primary->id, role2text(primary->next_role)); return pcmk__coloc_affects_nothing; } if ((constraint->score < 0) && (constraint->dependent_role != RSC_ROLE_UNKNOWN) && (constraint->dependent_role == dependent->next_role)) { crm_trace("Skipping anti-colocation '%s': dependent role %s matches", constraint->id, role2text(constraint->dependent_role)); return pcmk__coloc_affects_nothing; } if ((constraint->score < 0) && (constraint->primary_role != RSC_ROLE_UNKNOWN) && (constraint->primary_role == primary->next_role)) { crm_trace("Skipping anti-colocation '%s': primary role %s matches", constraint->id, role2text(constraint->primary_role)); return pcmk__coloc_affects_nothing; } return pcmk__coloc_affects_location; } /*! * \internal * \brief Apply colocation to dependent for allocation purposes * * Update the allocated node weights of the dependent resource in a colocation, * for the purposes of allocating it to a node * * \param[in] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] constraint Colocation constraint */ void pcmk__apply_coloc_to_weights(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint) { const char *attribute = CRM_ATTR_ID; const char *value = NULL; GHashTable *work = NULL; GHashTableIter iter; pe_node_t *node = NULL; if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } if (primary->allocated_to != NULL) { value = pe_node_attribute_raw(primary->allocated_to, attribute); } else if (constraint->score < 0) { // Nothing to do (anti-colocation with something that is not running) return; } work = pcmk__copy_node_table(dependent->allowed_nodes); g_hash_table_iter_init(&iter, work); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (primary->allocated_to == NULL) { pe_rsc_trace(dependent, "%s: %s@%s -= %d (%s inactive)", constraint->id, dependent->id, node->details->uname, constraint->score, primary->id); node->weight = pe__add_scores(-constraint->score, node->weight); } else if (pcmk__str_eq(pe_node_attribute_raw(node, attribute), value, pcmk__str_casei)) { if (constraint->score < CRM_SCORE_INFINITY) { pe_rsc_trace(dependent, "%s: %s@%s += %d", constraint->id, dependent->id, node->details->uname, constraint->score); node->weight = pe__add_scores(constraint->score, node->weight); } } else if (constraint->score >= CRM_SCORE_INFINITY) { pe_rsc_trace(dependent, "%s: %s@%s -= %d (%s mismatch)", constraint->id, dependent->id, node->details->uname, constraint->score, attribute); node->weight = pe__add_scores(-constraint->score, node->weight); } } if (can_run_any(work) || (constraint->score <= -INFINITY) || (constraint->score >= INFINITY)) { g_hash_table_destroy(dependent->allowed_nodes); dependent->allowed_nodes = work; work = NULL; } else { pe_rsc_info(dependent, "%s: Rolling back scores from %s (no available nodes)", dependent->id, primary->id); } if (work != NULL) { g_hash_table_destroy(work); } } /*! * \internal * \brief Apply colocation to dependent for role purposes * * Update the priority of the dependent resource in a colocation, for the * purposes of selecting its role * * \param[in] dependent Dependent resource in colocation * \param[in] primary Primary resource in colocation * \param[in] constraint Colocation constraint */ void pcmk__apply_coloc_to_priority(pe_resource_t *dependent, pe_resource_t *primary, pcmk__colocation_t *constraint) { const char *dependent_value = NULL; const char *primary_value = NULL; const char *attribute = CRM_ATTR_ID; int score_multiplier = 1; if ((primary->allocated_to == NULL) || (dependent->allocated_to == NULL)) { return; } if (constraint->node_attribute != NULL) { attribute = constraint->node_attribute; } dependent_value = pe_node_attribute_raw(dependent->allocated_to, attribute); primary_value = pe_node_attribute_raw(primary->allocated_to, attribute); if (!pcmk__str_eq(dependent_value, primary_value, pcmk__str_casei)) { if ((constraint->score == INFINITY) && (constraint->dependent_role == RSC_ROLE_PROMOTED)) { dependent->priority = -INFINITY; } return; } if ((constraint->primary_role != RSC_ROLE_UNKNOWN) && (constraint->primary_role != primary->next_role)) { return; } if (constraint->dependent_role == RSC_ROLE_UNPROMOTED) { score_multiplier = -1; } dependent->priority = pe__add_scores(score_multiplier * constraint->score, dependent->priority); } diff --git a/lib/pacemaker/pcmk_sched_constraints.c b/lib/pacemaker/pcmk_sched_constraints.c index f629ec11e7..2cde45c370 100644 --- a/lib/pacemaker/pcmk_sched_constraints.c +++ b/lib/pacemaker/pcmk_sched_constraints.c @@ -1,420 +1,420 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" static bool evaluate_lifetime(xmlNode *lifetime, pe_working_set_t *data_set) { bool result = FALSE; crm_time_t *next_change = crm_time_new_undefined(); result = pe_evaluate_rules(lifetime, NULL, data_set->now, next_change); if (crm_time_is_defined(next_change)) { time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(recheck, data_set); } crm_time_free(next_change); return result; } /*! * \internal * \brief Unpack constraints from XML * * Given a cluster working set, unpack all constraints from its input XML into * data structures. * * \param[in,out] data_set Cluster working set */ void pcmk__unpack_constraints(pe_working_set_t *data_set) { xmlNode *xml_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); for (xmlNode *xml_obj = pcmk__xe_first_child(xml_constraints); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { xmlNode *lifetime = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *tag = crm_element_name(xml_obj); if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, tag); continue; } crm_trace("Unpacking %s constraint '%s'", tag, id); lifetime = first_named_child(xml_obj, "lifetime"); if (lifetime != NULL) { pcmk__config_warn("Support for 'lifetime' attribute (in %s) is " "deprecated (the rules it contains should " "instead be direct descendents of the " "constraint object)", id); } if ((lifetime != NULL) && !evaluate_lifetime(lifetime, data_set)) { crm_info("Constraint %s %s is not active", tag, id); } else if (pcmk__str_eq(XML_CONS_TAG_RSC_ORDER, tag, pcmk__str_casei)) { pcmk__unpack_ordering(xml_obj, data_set); } else if (pcmk__str_eq(XML_CONS_TAG_RSC_DEPEND, tag, pcmk__str_casei)) { pcmk__unpack_colocation(xml_obj, data_set); } else if (pcmk__str_eq(XML_CONS_TAG_RSC_LOCATION, tag, pcmk__str_casei)) { pcmk__unpack_location(xml_obj, data_set); } else if (pcmk__str_eq(XML_CONS_TAG_RSC_TICKET, tag, pcmk__str_casei)) { pcmk__unpack_rsc_ticket(xml_obj, data_set); } else { pe_err("Unsupported constraint type: %s", tag); } } } pe_resource_t * pcmk__find_constraint_resource(GList *rsc_list, const char *id) { GList *rIter = NULL; for (rIter = rsc_list; id && rIter; rIter = rIter->next) { pe_resource_t *parent = rIter->data; pe_resource_t *match = parent->fns->find_rsc(parent, id, NULL, pe_find_renamed); if (match != NULL) { if(!pcmk__str_eq(match->id, id, pcmk__str_casei)) { /* We found an instance of a clone instead */ match = uber_parent(match); crm_debug("Found %s for %s", match->id, id); } return match; } } crm_trace("No match for %s", id); return NULL; } /*! * \internal * \brief Check whether an ID references a resource tag * * \param[in] data_set Cluster working set * \param[in] id Tag ID to search for * \param[out] tag Where to store tag, if found * * \return true if ID refers to a tagged resource or resource set template, * otherwise false */ static bool find_constraint_tag(pe_working_set_t *data_set, const char *id, pe_tag_t **tag) { *tag = NULL; // Check whether id refers to a resource set template if (g_hash_table_lookup_extended(data_set->template_rsc_sets, id, NULL, (gpointer *) tag)) { if (*tag == NULL) { crm_warn("No resource is derived from template '%s'", id); return false; } return true; } // If not, check whether id refers to a tag if (g_hash_table_lookup_extended(data_set->tags, id, NULL, (gpointer *) tag)) { if (*tag == NULL) { crm_warn("No resource is tagged with '%s'", id); return false; } return true; } crm_warn("No template or tag named '%s'", id); return false; } /*! * \brief * \internal Check whether an ID refers to a valid resource or tag * * \param[in] data_set Cluster working set * \param[in] id ID to search for * \param[out] rsc Where to store resource, if found (or NULL to skip * searching resources) * \param[out] tag Where to store tag, if found (or NULL to skip searching * tags) * * \return true if id refers to a resource (possibly indirectly via a tag) */ bool pcmk__valid_resource_or_tag(pe_working_set_t *data_set, const char *id, pe_resource_t **rsc, pe_tag_t **tag) { if (rsc != NULL) { *rsc = pcmk__find_constraint_resource(data_set->resources, id); if (*rsc != NULL) { return true; } } if ((tag != NULL) && find_constraint_tag(data_set, id, tag)) { return true; } return false; } /*! * \internal * \brief Replace any resource tags with equivalent resource_ref entries * * If a given constraint has resource sets, check each set for resource_ref * entries that list tags rather than resource IDs, and replace any found with * resource_ref entries for the corresponding resource IDs. * * \param[in] xml_obj Constraint XML * \param[in] data_set Cluster working set * * \return Equivalent XML with resource tags replaced (or NULL if none) * \note It is the caller's responsibility to free the result with free_xml(). */ xmlNode * pcmk__expand_tags_in_sets(xmlNode *xml_obj, pe_working_set_t *data_set) { xmlNode *new_xml = NULL; bool any_refs = false; // Short-circuit if there are no sets if (first_named_child(xml_obj, XML_CONS_TAG_RSC_SET) == NULL) { return NULL; } new_xml = copy_xml(xml_obj); for (xmlNode *set = first_named_child(new_xml, XML_CONS_TAG_RSC_SET); set != NULL; set = crm_next_same_xml(set)) { GList *tag_refs = NULL; GList *gIter = NULL; for (xmlNode *xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { pe_resource_t *rsc = NULL; pe_tag_t *tag = NULL; if (!pcmk__valid_resource_or_tag(data_set, ID(xml_rsc), &rsc, &tag)) { pcmk__config_err("Ignoring resource sets for constraint '%s' " "because '%s' is not a valid resource or tag", ID(xml_obj), ID(xml_rsc)); free_xml(new_xml); return NULL; } else if (rsc) { continue; } else if (tag) { /* The resource_ref under the resource_set references a template/tag */ xmlNode *last_ref = xml_rsc; /* A sample: Original XML: Now we are appending rsc2 and rsc3 which are tagged with tag1 right after it: */ for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *obj_ref = (const char *) gIter->data; xmlNode *new_rsc_ref = NULL; new_rsc_ref = xmlNewDocRawNode(getDocPtr(set), NULL, (pcmkXmlStr) XML_TAG_RESOURCE_REF, NULL); crm_xml_add(new_rsc_ref, XML_ATTR_ID, obj_ref); xmlAddNextSibling(last_ref, new_rsc_ref); last_ref = new_rsc_ref; } any_refs = true; /* Freeing the resource_ref now would break the XML child * iteration, so just remember it for freeing later. */ tag_refs = g_list_append(tag_refs, xml_rsc); } } /* Now free '', and finally get: */ for (gIter = tag_refs; gIter != NULL; gIter = gIter->next) { xmlNode *tag_ref = gIter->data; free_xml(tag_ref); } g_list_free(tag_refs); } if (!any_refs) { free_xml(new_xml); new_xml = NULL; } return new_xml; } /*! * \internal * \brief Convert a tag into a resource set of tagged resources * * \param[in] xml_obj Constraint XML * \param[out] rsc_set Where to store resource set XML created based on tag * \param[in] attr Name of XML attribute containing resource or tag ID * \param[in] convert_rsc Convert to set even if \p attr references a resource * \param[in] data_set Cluster working set */ bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, pe_working_set_t *data_set) { const char *cons_id = NULL; const char *id = NULL; pe_resource_t *rsc = NULL; pe_tag_t *tag = NULL; *rsc_set = NULL; CRM_CHECK((xml_obj != NULL) && (attr != NULL), return false); cons_id = ID(xml_obj); if (cons_id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, crm_element_name(xml_obj)); return false; } id = crm_element_value(xml_obj, attr); if (id == NULL) { return true; } if (!pcmk__valid_resource_or_tag(data_set, id, &rsc, &tag)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", cons_id, id); return false; } else if (tag) { GList *gIter = NULL; /* A template/tag is referenced by the "attr" attribute (first, then, rsc or with-rsc). Add the template/tag's corresponding "resource_set" which contains the resources derived from it or tagged with it under the constraint. */ *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET); crm_xml_add(*rsc_set, XML_ATTR_ID, id); for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *obj_ref = (const char *) gIter->data; xmlNode *rsc_ref = NULL; rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF); crm_xml_add(rsc_ref, XML_ATTR_ID, obj_ref); } /* Set sequential="false" for the resource_set */ - crm_xml_add(*rsc_set, "sequential", XML_BOOLEAN_FALSE); + pcmk__xe_set_bool_attr(*rsc_set, "sequential", false); } else if ((rsc != NULL) && convert_rsc) { /* Even a regular resource is referenced by "attr", convert it into a resource_set. Because the other side of the constraint could be a template/tag reference. */ xmlNode *rsc_ref = NULL; *rsc_set = create_xml_node(xml_obj, XML_CONS_TAG_RSC_SET); crm_xml_add(*rsc_set, XML_ATTR_ID, id); rsc_ref = create_xml_node(*rsc_set, XML_TAG_RESOURCE_REF); crm_xml_add(rsc_ref, XML_ATTR_ID, id); } else { return true; } /* Remove the "attr" attribute referencing the template/tag */ if (*rsc_set != NULL) { xml_remove_prop(xml_obj, attr); } return true; } /*! * \internal * \brief Create constraints inherent to resource types * * \param[in,out] data_set Cluster working set */ void pcmk__create_internal_constraints(pe_working_set_t *data_set) { crm_trace("Create internal constraints"); for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) { pe_resource_t *rsc = (pe_resource_t *) iter->data; rsc->cmds->internal_constraints(rsc, data_set); } } diff --git a/lib/pacemaker/pcmk_sched_ordering.c b/lib/pacemaker/pcmk_sched_ordering.c index 647bbf3581..e2ba1afc7e 100644 --- a/lib/pacemaker/pcmk_sched_ordering.c +++ b/lib/pacemaker/pcmk_sched_ordering.c @@ -1,1535 +1,1537 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include "libpacemaker_private.h" enum pe_order_kind { pe_order_kind_optional, pe_order_kind_mandatory, pe_order_kind_serialize, }; enum ordering_symmetry { ordering_asymmetric, // the only relation in an asymmetric ordering ordering_symmetric, // the normal relation in a symmetric ordering ordering_symmetric_inverse, // the inverse relation in a symmetric ordering }; #define EXPAND_CONSTRAINT_IDREF(__set, __rsc, __name) do { \ __rsc = pcmk__find_constraint_resource(data_set->resources, __name); \ if (__rsc == NULL) { \ pcmk__config_err("%s: No resource found for %s", __set, __name); \ return pcmk_rc_schema_validation; \ } \ } while (0) static const char * invert_action(const char *action) { if (pcmk__str_eq(action, RSC_START, pcmk__str_casei)) { return RSC_STOP; } else if (pcmk__str_eq(action, RSC_STOP, pcmk__str_casei)) { return RSC_START; } else if (pcmk__str_eq(action, RSC_PROMOTE, pcmk__str_casei)) { return RSC_DEMOTE; } else if (pcmk__str_eq(action, RSC_DEMOTE, pcmk__str_casei)) { return RSC_PROMOTE; } else if (pcmk__str_eq(action, RSC_PROMOTED, pcmk__str_casei)) { return RSC_DEMOTED; } else if (pcmk__str_eq(action, RSC_DEMOTED, pcmk__str_casei)) { return RSC_PROMOTED; } else if (pcmk__str_eq(action, RSC_STARTED, pcmk__str_casei)) { return RSC_STOPPED; } else if (pcmk__str_eq(action, RSC_STOPPED, pcmk__str_casei)) { return RSC_STARTED; } crm_warn("Unknown action '%s' specified in order constraint", action); return NULL; } static enum pe_order_kind get_ordering_type(xmlNode *xml_obj) { enum pe_order_kind kind_e = pe_order_kind_mandatory; const char *kind = crm_element_value(xml_obj, XML_ORDER_ATTR_KIND); if (kind == NULL) { const char *score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); kind_e = pe_order_kind_mandatory; if (score) { // @COMPAT deprecated informally since 1.0.7, formally since 2.0.1 int score_i = char2score(score); if (score_i == 0) { kind_e = pe_order_kind_optional; } pe_warn_once(pe_wo_order_score, "Support for 'score' in rsc_order is deprecated " "and will be removed in a future release " "(use 'kind' instead)"); } } else if (pcmk__str_eq(kind, "Mandatory", pcmk__str_casei)) { kind_e = pe_order_kind_mandatory; } else if (pcmk__str_eq(kind, "Optional", pcmk__str_casei)) { kind_e = pe_order_kind_optional; } else if (pcmk__str_eq(kind, "Serialize", pcmk__str_casei)) { kind_e = pe_order_kind_serialize; } else { pcmk__config_err("Resetting '" XML_ORDER_ATTR_KIND "' for constraint " "'%s' to Mandatory because '%s' is not valid", crm_str(ID(xml_obj)), kind); } return kind_e; } /*! * \internal * \brief Get ordering symmetry from XML * * \param[in] xml_obj Ordering XML * \param[in] parent_kind Default ordering kind * \param[in] parent_symmetrical_s Parent element's symmetrical setting, if any * * \retval ordering_symmetric Ordering is symmetric * \retval ordering_asymmetric Ordering is asymmetric */ static enum ordering_symmetry get_ordering_symmetry(xmlNode *xml_obj, enum pe_order_kind parent_kind, const char *parent_symmetrical_s) { - const char *symmetrical_s = NULL; + int rc = pcmk_rc_ok; + bool symmetric = false; enum pe_order_kind kind = parent_kind; // Default to parent's kind // Check ordering XML for explicit kind if ((crm_element_value(xml_obj, XML_ORDER_ATTR_KIND) != NULL) || (crm_element_value(xml_obj, XML_RULE_ATTR_SCORE) != NULL)) { kind = get_ordering_type(xml_obj); } // Check ordering XML (and parent) for explicit symmetrical setting - symmetrical_s = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); - if (symmetrical_s == NULL) { - symmetrical_s = parent_symmetrical_s; + rc = pcmk__xe_get_bool_attr(xml_obj, XML_CONS_ATTR_SYMMETRICAL, &symmetric); + + if (rc != pcmk_rc_ok && parent_symmetrical_s != NULL) { + symmetric = crm_is_true(parent_symmetrical_s); + rc = pcmk_rc_ok; } - if (symmetrical_s != NULL) { - if (crm_is_true(symmetrical_s)) { + + if (rc == pcmk_rc_ok) { + if (symmetric) { if (kind == pe_order_kind_serialize) { pcmk__config_warn("Ignoring " XML_CONS_ATTR_SYMMETRICAL " for '%s' because not valid with " XML_ORDER_ATTR_KIND " of 'Serialize'", ID(xml_obj)); } else { return ordering_symmetric; } } return ordering_asymmetric; } // Use default symmetry if (kind == pe_order_kind_serialize) { return ordering_asymmetric; } return ordering_symmetric; } /*! * \internal * \brief Get ordering flags appropriate to ordering kind * * \param[in] kind Ordering kind * \param[in] first Action name for 'first' action * \param[in] symmetry This ordering's symmetry role * * \return Minimal ordering flags appropriate to \p kind */ static enum pe_ordering ordering_flags_for_kind(enum pe_order_kind kind, const char *first, enum ordering_symmetry symmetry) { enum pe_ordering flags = pe_order_none; // so we trace-log all flags set pe__set_order_flags(flags, pe_order_optional); switch (kind) { case pe_order_kind_optional: break; case pe_order_kind_serialize: pe__set_order_flags(flags, pe_order_serialize_only); break; case pe_order_kind_mandatory: switch (symmetry) { case ordering_asymmetric: pe__set_order_flags(flags, pe_order_asymmetrical); break; case ordering_symmetric: pe__set_order_flags(flags, pe_order_implies_then); if (pcmk__strcase_any_of(first, RSC_START, RSC_PROMOTE, NULL)) { pe__set_order_flags(flags, pe_order_runnable_left); } break; case ordering_symmetric_inverse: pe__set_order_flags(flags, pe_order_implies_first); break; } break; } return flags; } /*! * \internal * \brief Find resource corresponding to ID specified in ordering * * \param[in] xml Ordering XML * \param[in] resource_attr XML attribute name for resource ID * \param[in] instance_attr XML attribute name for instance number * \param[in] data_set Cluster working set * * \return Resource corresponding to \p id, or NULL if none */ static pe_resource_t * get_ordering_resource(xmlNode *xml, const char *resource_attr, const char *instance_attr, pe_working_set_t *data_set) { pe_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(xml, resource_attr); const char *instance_id = crm_element_value(xml, instance_attr); if (rsc_id == NULL) { pcmk__config_err("Ignoring constraint '%s' without %s", ID(xml), resource_attr); return NULL; } rsc = pcmk__find_constraint_resource(data_set->resources, rsc_id); if (rsc == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not exist", ID(xml), rsc_id); return NULL; } if (instance_id != NULL) { if (!pe_rsc_is_clone(rsc)) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "is not a clone but instance '%s' was requested", ID(xml), rsc_id, instance_id); return NULL; } rsc = find_clone_instance(rsc, instance_id, data_set); if (rsc == NULL) { pcmk__config_err("Ignoring constraint '%s' because resource '%s' " "does not have an instance '%s'", "'%s'", ID(xml), rsc_id, instance_id); return NULL; } } return rsc; } /*! * \internal * \brief Determine minimum number of 'first' instances required in ordering * * \param[in] rsc 'First' resource in ordering * \param[in] xml Ordering XML * * \return Minimum 'first' instances required (or 0 if not applicable) */ static int get_minimum_first_instances(pe_resource_t *rsc, xmlNode *xml) { if (pe_rsc_is_clone(rsc)) { const char *clone_min = NULL; clone_min = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION_MIN); if (clone_min != NULL) { int clone_min_int = 0; pcmk__scan_min_int(clone_min, &clone_min_int, 0); return clone_min_int; } /* @COMPAT 1.1.13: * require-all=false is deprecated equivalent of clone-min=1 */ clone_min = crm_element_value(xml, "require-all"); if (clone_min != NULL) { pe_warn_once(pe_wo_require_all, "Support for require-all in ordering constraints " "is deprecated and will be removed in a future release" " (use clone-min clone meta-attribute instead)"); if (!crm_is_true(clone_min)) { return 1; } } } return 0; } /*! * \internal * \brief Create orderings for a constraint with clone-min > 0 * * \param[in] id Ordering ID * \param[in] rsc_first 'First' resource in ordering (a clone) * \param[in] action_first 'First' action in ordering * \param[in] rsc_then 'Then' resource in ordering * \param[in] action_then 'Then' action in ordering * \param[in] flags Ordering flags * \param[in] clone_min Minimum required instances of 'first' * \param[in] data_set Cluster working set */ static void clone_min_ordering(const char *id, pe_resource_t *rsc_first, const char *action_first, pe_resource_t *rsc_then, const char *action_then, enum pe_ordering flags, int clone_min, pe_working_set_t *data_set) { // Create a pseudo-action for when the minimum instances are active char *task = crm_strdup_printf(CRM_OP_RELAXED_CLONE ":%s", id); pe_action_t *clone_min_met = get_pseudo_op(task, data_set); free(task); /* Require the pseudo-action to have the required number of actions to be * considered runnable before allowing the pseudo-action to be runnable. */ clone_min_met->required_runnable_before = clone_min; pe__set_action_flags(clone_min_met, pe_action_requires_any); // Order the actions for each clone instance before the pseudo-action for (GList *rIter = rsc_first->children; rIter != NULL; rIter = rIter->next) { pe_resource_t *child = rIter->data; pcmk__new_ordering(child, pcmk__op_key(child->id, action_first, 0), NULL, NULL, NULL, clone_min_met, pe_order_one_or_more|pe_order_implies_then_printed, data_set); } // Order "then" action after the pseudo-action (if runnable) pcmk__new_ordering(NULL, NULL, clone_min_met, rsc_then, pcmk__op_key(rsc_then->id, action_then, 0), NULL, flags|pe_order_runnable_left, data_set); } /*! * \internal * \brief Update ordering flags for restart-type=restart * * \param[in] rsc 'Then' resource in ordering * \param[in] kind Ordering kind * \param[in] flag Ordering flag to set (when applicable) * \param[out] flags Ordering flag set to update * * \compat The restart-type resource meta-attribute is deprecated. Eventually, * it will be removed, and pe_restart_ignore will be the only behavior, * at which time this can just be removed entirely. */ #define handle_restart_type(rsc, kind, flag, flags) do { \ if (((kind) == pe_order_kind_optional) \ && ((rsc)->restart_type == pe_restart_restart)) { \ pe__set_order_flags((flags), (flag)); \ } \ } while (0) /*! * \internal * \brief Create new ordering for inverse of symmetric constraint * * \param[in] id Ordering ID (for logging only) * \param[in] kind Ordering kind * \param[in] rsc_first 'First' resource in ordering (a clone) * \param[in] action_first 'First' action in ordering * \param[in] rsc_then 'Then' resource in ordering * \param[in] action_then 'Then' action in ordering * \param[in] data_set Cluster working set */ static void inverse_ordering(const char *id, enum pe_order_kind kind, pe_resource_t *rsc_first, const char *action_first, pe_resource_t *rsc_then, const char *action_then, pe_working_set_t *data_set) { action_then = invert_action(action_then); action_first = invert_action(action_first); if ((action_then == NULL) || (action_first == NULL)) { pcmk__config_warn("Cannot invert constraint '%s' " "(please specify inverse manually)", id); } else { enum pe_ordering flags = ordering_flags_for_kind(kind, action_first, ordering_symmetric_inverse); handle_restart_type(rsc_then, kind, pe_order_implies_first, flags); pcmk__order_resource_actions(rsc_then, action_then, rsc_first, action_first, flags, data_set); } } static void unpack_simple_rsc_order(xmlNode *xml_obj, pe_working_set_t *data_set) { pe_resource_t *rsc_then = NULL; pe_resource_t *rsc_first = NULL; int min_required_before = 0; enum pe_order_kind kind = pe_order_kind_mandatory; enum pe_ordering cons_weight = pe_order_none; enum ordering_symmetry symmetry; const char *action_then = NULL; const char *action_first = NULL; const char *id = NULL; CRM_CHECK(xml_obj != NULL, return); id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " XML_ATTR_ID, crm_element_name(xml_obj)); return; } rsc_first = get_ordering_resource(xml_obj, XML_ORDER_ATTR_FIRST, XML_ORDER_ATTR_FIRST_INSTANCE, data_set); if (rsc_first == NULL) { return; } rsc_then = get_ordering_resource(xml_obj, XML_ORDER_ATTR_THEN, XML_ORDER_ATTR_THEN_INSTANCE, data_set); if (rsc_then == NULL) { return; } action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION); if (action_first == NULL) { action_first = RSC_START; } action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION); if (action_then == NULL) { action_then = action_first; } kind = get_ordering_type(xml_obj); symmetry = get_ordering_symmetry(xml_obj, kind, NULL); cons_weight = ordering_flags_for_kind(kind, action_first, symmetry); handle_restart_type(rsc_then, kind, pe_order_implies_then, cons_weight); /* If there is a minimum number of instances that must be runnable before * the 'then' action is runnable, we use a pseudo-action for convenience: * minimum number of clone instances have runnable actions -> * pseudo-action is runnable -> dependency is runnable. */ min_required_before = get_minimum_first_instances(rsc_first, xml_obj); if (min_required_before > 0) { clone_min_ordering(id, rsc_first, action_first, rsc_then, action_then, cons_weight, min_required_before, data_set); } else { pcmk__order_resource_actions(rsc_first, action_first, rsc_then, action_then, cons_weight, data_set); } if (symmetry == ordering_symmetric) { inverse_ordering(id, kind, rsc_first, action_first, rsc_then, action_then, data_set); } } static char * task_from_action_or_key(pe_action_t *action, const char *key) { char *res = NULL; if (action != NULL) { res = strdup(action->task); } else if (key != NULL) { parse_op_key(key, NULL, &res, NULL); } return res; } /*! * \internal * \brief Apply start/stop orderings to migrations * * Orderings involving start, stop, demote, and promote actions must be honored * during a migration as well, so duplicate any such ordering for the * corresponding migration actions. * * \param[in] order Ordering constraint to check * \param[in] data_set Cluster working set */ static void handle_migration_ordering(pe__ordering_t *order, pe_working_set_t *data_set) { char *lh_task = NULL; char *rh_task = NULL; bool rh_migratable; bool lh_migratable; // Only orderings between two different resources are relevant if ((order->lh_rsc == NULL) || (order->rh_rsc == NULL) || (order->lh_rsc == order->rh_rsc)) { return; } // Constraints between a parent resource and its children are not relevant if (is_parent(order->lh_rsc, order->rh_rsc) || is_parent(order->rh_rsc, order->lh_rsc)) { return; } // Only orderings involving at least one migratable resource are relevant lh_migratable = pcmk_is_set(order->lh_rsc->flags, pe_rsc_allow_migrate); rh_migratable = pcmk_is_set(order->rh_rsc->flags, pe_rsc_allow_migrate); if (!lh_migratable && !rh_migratable) { return; } // Check which actions are involved lh_task = task_from_action_or_key(order->lh_action, order->lh_action_task); rh_task = task_from_action_or_key(order->rh_action, order->rh_action_task); if ((lh_task == NULL) || (rh_task == NULL)) { goto cleanup_order; } if (pcmk__str_eq(lh_task, RSC_START, pcmk__str_casei) && pcmk__str_eq(rh_task, RSC_START, pcmk__str_casei)) { int flags = pe_order_optional; if (lh_migratable && rh_migratable) { /* A start then B start * -> A migrate_from then B migrate_to */ pcmk__new_ordering(order->lh_rsc, pcmk__op_key(order->lh_rsc->id, RSC_MIGRATED, 0), NULL, order->rh_rsc, pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); } if (rh_migratable) { if (lh_migratable) { pe__set_order_flags(flags, pe_order_apply_first_non_migratable); } /* A start then B start * -> A start then B migrate_to (if start is not part of a * migration) */ pcmk__new_ordering(order->lh_rsc, pcmk__op_key(order->lh_rsc->id, RSC_START, 0), NULL, order->rh_rsc, pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); } } else if (rh_migratable && pcmk__str_eq(lh_task, RSC_STOP, pcmk__str_casei) && pcmk__str_eq(rh_task, RSC_STOP, pcmk__str_casei)) { int flags = pe_order_optional; if (lh_migratable) { pe__set_order_flags(flags, pe_order_apply_first_non_migratable); } /* For an ordering "stop A then stop B", if A is moving via restart, and * B is migrating, enforce that B's migrate_to occurs after A's stop. */ pcmk__new_ordering(order->lh_rsc, pcmk__op_key(order->lh_rsc->id, RSC_STOP, 0), NULL, order->rh_rsc, pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); // Also order B's migrate_from after A's stop during partial migrations if (order->rh_rsc->partial_migration_target) { pcmk__new_ordering(order->lh_rsc, pcmk__op_key(order->lh_rsc->id, RSC_STOP, 0), NULL, order->rh_rsc, pcmk__op_key(order->rh_rsc->id, RSC_MIGRATED, 0), NULL, flags, data_set); } } else if (pcmk__str_eq(lh_task, RSC_PROMOTE, pcmk__str_casei) && pcmk__str_eq(rh_task, RSC_START, pcmk__str_casei)) { int flags = pe_order_optional; if (rh_migratable) { /* A promote then B start * -> A promote then B migrate_to */ pcmk__new_ordering(order->lh_rsc, pcmk__op_key(order->lh_rsc->id, RSC_PROMOTE, 0), NULL, order->rh_rsc, pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); } } else if (pcmk__str_eq(lh_task, RSC_DEMOTE, pcmk__str_casei) && pcmk__str_eq(rh_task, RSC_STOP, pcmk__str_casei)) { int flags = pe_order_optional; if (rh_migratable) { /* A demote then B stop * -> A demote then B migrate_to */ pcmk__new_ordering(order->lh_rsc, pcmk__op_key(order->lh_rsc->id, RSC_DEMOTE, 0), NULL, order->rh_rsc, pcmk__op_key(order->rh_rsc->id, RSC_MIGRATE, 0), NULL, flags, data_set); // Also order B migrate_from after A demote during partial migrations if (order->rh_rsc->partial_migration_target) { pcmk__new_ordering(order->lh_rsc, pcmk__op_key(order->lh_rsc->id, RSC_DEMOTE, 0), NULL, order->rh_rsc, pcmk__op_key(order->rh_rsc->id, RSC_MIGRATED, 0), NULL, flags, data_set); } } } cleanup_order: free(lh_task); free(rh_task); } /*! * \internal * \brief Create a new ordering between two actions * * \param[in] lh_rsc Resource for 'first' action (if NULL and * \p lh_action is a resource action, that * resource will be used) * \param[in] lh_action_task Action key for 'first' action (if NULL and * \p lh_action is not NULL, its UUID will be used) * \param[in] lh_action 'first' action (if NULL, \p lh_rsc and * \p lh_action_task must be set) * * \param[in] rh_rsc Resource for 'then' action (if NULL and * \p rh_action is a resource action, that * resource will be used) * \param[in] rh_action_task Action key for 'then' action (if NULL and * \p rh_action is not NULL, its UUID will be used) * \param[in] rh_action 'then' action (if NULL, \p rh_rsc and * \p rh_action_task must be set) * * \param[in] type Flag set of enum pe_ordering * \param[in] data_set Cluster working set to add ordering to * * \note This function takes ownership of lh_action_task and rh_action_task, * which do not need to be freed by the caller. */ void pcmk__new_ordering(pe_resource_t *lh_rsc, char *lh_action_task, pe_action_t *lh_action, pe_resource_t *rh_rsc, char *rh_action_task, pe_action_t *rh_action, enum pe_ordering type, pe_working_set_t *data_set) { pe__ordering_t *order = NULL; // One of action or resource must be specified for each side CRM_CHECK(((lh_action != NULL) || (lh_rsc != NULL)) && ((rh_action != NULL) || (rh_rsc != NULL)), free(lh_action_task); free(rh_action_task); return); if ((lh_rsc == NULL) && (lh_action != NULL)) { lh_rsc = lh_action->rsc; } if ((rh_rsc == NULL) && (rh_action != NULL)) { rh_rsc = rh_action->rsc; } order = calloc(1, sizeof(pe__ordering_t)); order->id = data_set->order_id++; order->type = type; order->lh_rsc = lh_rsc; order->rh_rsc = rh_rsc; order->lh_action = lh_action; order->rh_action = rh_action; order->lh_action_task = lh_action_task; order->rh_action_task = rh_action_task; if ((order->lh_action_task == NULL) && (lh_action != NULL)) { order->lh_action_task = strdup(lh_action->uuid); } if ((order->rh_action_task == NULL) && (rh_action != NULL)) { order->rh_action_task = strdup(rh_action->uuid); } if ((order->lh_rsc == NULL) && (lh_action != NULL)) { order->lh_rsc = lh_action->rsc; } if ((order->rh_rsc == NULL) && (rh_action != NULL)) { order->rh_rsc = rh_action->rsc; } pe_rsc_trace(lh_rsc, "Created ordering %d for %s then %s", (data_set->order_id - 1), ((lh_action_task == NULL)? "?" : lh_action_task), ((rh_action_task == NULL)? "?" : rh_action_task)); data_set->ordering_constraints = g_list_prepend(data_set->ordering_constraints, order); handle_migration_ordering(order, data_set); } /*! * \brief Unpack a set in an ordering constraint * * \param[in] set Set XML to unpack * \param[in] parent_kind rsc_order XML "kind" attribute * \param[in] parent_symmetrical_s rsc_order XML "symmetrical" attribute * \param[in] data_set Cluster working set * * \return Standard Pacemaker return code */ static int unpack_order_set(xmlNode *set, enum pe_order_kind parent_kind, const char *parent_symmetrical_s, pe_working_set_t *data_set) { xmlNode *xml_rsc = NULL; GList *set_iter = NULL; GList *resources = NULL; pe_resource_t *last = NULL; pe_resource_t *resource = NULL; int local_kind = parent_kind; bool sequential = false; enum pe_ordering flags = pe_order_optional; enum ordering_symmetry symmetry; char *key = NULL; const char *id = ID(set); const char *action = crm_element_value(set, "action"); const char *sequential_s = crm_element_value(set, "sequential"); const char *kind_s = crm_element_value(set, XML_ORDER_ATTR_KIND); if (action == NULL) { action = RSC_START; } if (kind_s) { local_kind = get_ordering_type(set); } if (sequential_s == NULL) { sequential_s = "1"; } sequential = crm_is_true(sequential_s); symmetry = get_ordering_symmetry(set, parent_kind, parent_symmetrical_s); flags = ordering_flags_for_kind(local_kind, action, symmetry); for (xml_rsc = first_named_child(set, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, resource, ID(xml_rsc)); resources = g_list_append(resources, resource); } if (pcmk__list_of_1(resources)) { crm_trace("Single set: %s", id); goto done; } set_iter = resources; while (set_iter != NULL) { resource = (pe_resource_t *) set_iter->data; set_iter = set_iter->next; key = pcmk__op_key(resource->id, action, 0); if (local_kind == pe_order_kind_serialize) { /* Serialize before everything that comes after */ for (GList *gIter = set_iter; gIter != NULL; gIter = gIter->next) { pe_resource_t *then_rsc = (pe_resource_t *) gIter->data; char *then_key = pcmk__op_key(then_rsc->id, action, 0); pcmk__new_ordering(resource, strdup(key), NULL, then_rsc, then_key, NULL, flags, data_set); } } else if (sequential) { if (last != NULL) { pcmk__order_resource_actions(last, action, resource, action, flags, data_set); } last = resource; } free(key); } if (symmetry == ordering_asymmetric) { goto done; } last = NULL; action = invert_action(action); flags = ordering_flags_for_kind(local_kind, action, ordering_symmetric_inverse); set_iter = resources; while (set_iter != NULL) { resource = (pe_resource_t *) set_iter->data; set_iter = set_iter->next; if (sequential) { if (last != NULL) { pcmk__order_resource_actions(resource, action, last, action, flags, data_set); } last = resource; } } done: g_list_free(resources); return pcmk_rc_ok; } /*! * \brief Order two resource sets relative to each other * * \param[in] id Ordering ID (for logging) * \param[in] set1 First listed set * \param[in] set2 Second listed set * \param[in] kind Ordering kind * \param[in] data_set Cluster working set * \param[in] symmetry Which ordering symmetry applies to this relation * * \return Standard Pacemaker return code */ static int order_rsc_sets(const char *id, xmlNode *set1, xmlNode *set2, enum pe_order_kind kind, pe_working_set_t *data_set, enum ordering_symmetry symmetry) { xmlNode *xml_rsc = NULL; xmlNode *xml_rsc_2 = NULL; pe_resource_t *rsc_1 = NULL; pe_resource_t *rsc_2 = NULL; const char *action_1 = crm_element_value(set1, "action"); const char *action_2 = crm_element_value(set2, "action"); - const char *sequential_1 = crm_element_value(set1, "sequential"); - const char *sequential_2 = crm_element_value(set2, "sequential"); + enum pe_ordering flags = pe_order_none; - const char *require_all_s = crm_element_value(set1, "require-all"); - bool require_all = require_all_s? crm_is_true(require_all_s) : true; + bool require_all = true; - enum pe_ordering flags = pe_order_none; + pcmk__xe_get_bool_attr(set1, "require-all", &require_all); if (action_1 == NULL) { action_1 = RSC_START; } if (action_2 == NULL) { action_2 = RSC_START; } if (symmetry == ordering_symmetric_inverse) { action_1 = invert_action(action_1); action_2 = invert_action(action_2); } if (pcmk__str_eq(RSC_STOP, action_1, pcmk__str_casei) || pcmk__str_eq(RSC_DEMOTE, action_1, pcmk__str_casei)) { /* Assuming: A -> ( B || C) -> D * The one-or-more logic only applies during the start/promote phase. * During shutdown neither B nor can shutdown until D is down, so simply * turn require_all back on. */ require_all = true; } // @TODO is action_2 correct here? flags = ordering_flags_for_kind(kind, action_2, symmetry); /* If we have an unordered set1, whether it is sequential or not is * irrelevant in regards to set2. */ if (!require_all) { char *task = crm_strdup_printf(CRM_OP_RELAXED_SET ":%s", ID(set1)); pe_action_t *unordered_action = get_pseudo_op(task, data_set); free(task); pe__set_action_flags(unordered_action, pe_action_requires_any); for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); /* Add an ordering constraint between every element in set1 and the * pseudo action. If any action in set1 is runnable the pseudo * action will be runnable. */ pcmk__new_ordering(rsc_1, pcmk__op_key(rsc_1->id, action_1, 0), NULL, NULL, NULL, unordered_action, pe_order_one_or_more|pe_order_implies_then_printed, data_set); } for (xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); /* Add an ordering constraint between the pseudo-action and every * element in set2. If the pseudo-action is runnable, every action * in set2 will be runnable. */ pcmk__new_ordering(NULL, NULL, unordered_action, rsc_2, pcmk__op_key(rsc_2->id, action_2, 0), NULL, flags|pe_order_runnable_left, data_set); } return pcmk_rc_ok; } - if (crm_is_true(sequential_1)) { + if (pcmk__xe_attr_is_true(set1, "sequential")) { if (symmetry == ordering_symmetric_inverse) { // Get the first one xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); if (xml_rsc != NULL) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); } } else { // Get the last one const char *rid = NULL; for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { rid = ID(xml_rsc); } EXPAND_CONSTRAINT_IDREF(id, rsc_1, rid); } } - if (crm_is_true(sequential_2)) { + if (pcmk__xe_attr_is_true(set2, "sequential")) { if (symmetry == ordering_symmetric_inverse) { // Get the last one const char *rid = NULL; for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { rid = ID(xml_rsc); } EXPAND_CONSTRAINT_IDREF(id, rsc_2, rid); } else { // Get the first one xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); if (xml_rsc != NULL) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); } } } if ((rsc_1 != NULL) && (rsc_2 != NULL)) { pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags, data_set); } else if (rsc_1 != NULL) { for (xml_rsc = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags, data_set); } } else if (rsc_2 != NULL) { for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags, data_set); } } else { for (xml_rsc = first_named_child(set1, XML_TAG_RESOURCE_REF); xml_rsc != NULL; xml_rsc = crm_next_same_xml(xml_rsc)) { EXPAND_CONSTRAINT_IDREF(id, rsc_1, ID(xml_rsc)); for (xmlNode *xml_rsc_2 = first_named_child(set2, XML_TAG_RESOURCE_REF); xml_rsc_2 != NULL; xml_rsc_2 = crm_next_same_xml(xml_rsc_2)) { EXPAND_CONSTRAINT_IDREF(id, rsc_2, ID(xml_rsc_2)); pcmk__order_resource_actions(rsc_1, action_1, rsc_2, action_2, flags, data_set); } } } return pcmk_rc_ok; } /*! * \internal * \brief If an ordering constraint uses resource tags, expand them * * \param[in] xml_obj Ordering constraint XML * \param[out] expanded_xml Equivalent XML with tags expanded * \param[in] data_set Cluster working set * * \return Standard Pacemaker return code (specifically, pcmk_rc_ok on success, * and pcmk_rc_schema_validation on invalid configuration) */ static int unpack_order_tags(xmlNode *xml_obj, xmlNode **expanded_xml, pe_working_set_t *data_set) { const char *id_first = NULL; const char *id_then = NULL; const char *action_first = NULL; const char *action_then = NULL; pe_resource_t *rsc_first = NULL; pe_resource_t *rsc_then = NULL; pe_tag_t *tag_first = NULL; pe_tag_t *tag_then = NULL; xmlNode *rsc_set_first = NULL; xmlNode *rsc_set_then = NULL; bool any_sets = false; // Check whether there are any resource sets with template or tag references *expanded_xml = pcmk__expand_tags_in_sets(xml_obj, data_set); if (*expanded_xml != NULL) { crm_log_xml_trace(*expanded_xml, "Expanded rsc_order"); return pcmk_rc_ok; } id_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST); id_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN); if ((id_first == NULL) || (id_then == NULL)) { return pcmk_rc_ok; } if (!pcmk__valid_resource_or_tag(data_set, id_first, &rsc_first, &tag_first)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", ID(xml_obj), id_first); return pcmk_rc_schema_validation; } if (!pcmk__valid_resource_or_tag(data_set, id_then, &rsc_then, &tag_then)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", ID(xml_obj), id_then); return pcmk_rc_schema_validation; } if ((rsc_first != NULL) && (rsc_then != NULL)) { // Neither side references a template or tag return pcmk_rc_ok; } action_first = crm_element_value(xml_obj, XML_ORDER_ATTR_FIRST_ACTION); action_then = crm_element_value(xml_obj, XML_ORDER_ATTR_THEN_ACTION); *expanded_xml = copy_xml(xml_obj); // Convert template/tag reference in "first" into resource_set under constraint if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_first, XML_ORDER_ATTR_FIRST, true, data_set)) { free_xml(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_schema_validation; } if (rsc_set_first != NULL) { if (action_first != NULL) { // Move "first-action" into converted resource_set as "action" crm_xml_add(rsc_set_first, "action", action_first); xml_remove_prop(*expanded_xml, XML_ORDER_ATTR_FIRST_ACTION); } any_sets = true; } // Convert template/tag reference in "then" into resource_set under constraint if (!pcmk__tag_to_set(*expanded_xml, &rsc_set_then, XML_ORDER_ATTR_THEN, true, data_set)) { free_xml(*expanded_xml); *expanded_xml = NULL; return pcmk_rc_schema_validation; } if (rsc_set_then != NULL) { if (action_then != NULL) { // Move "then-action" into converted resource_set as "action" crm_xml_add(rsc_set_then, "action", action_then); xml_remove_prop(*expanded_xml, XML_ORDER_ATTR_THEN_ACTION); } any_sets = true; } if (any_sets) { crm_log_xml_trace(*expanded_xml, "Expanded rsc_order"); } else { free_xml(*expanded_xml); *expanded_xml = NULL; } return pcmk_rc_ok; } /*! * \internal * \brief Unpack ordering constraint XML * * \param[in] xml_obj Ordering constraint XML to unpack * \param[in,out] data_set Cluster working set */ void pcmk__unpack_ordering(xmlNode *xml_obj, pe_working_set_t *data_set) { xmlNode *set = NULL; xmlNode *last = NULL; xmlNode *orig_xml = NULL; xmlNode *expanded_xml = NULL; const char *id = crm_element_value(xml_obj, XML_ATTR_ID); const char *invert = crm_element_value(xml_obj, XML_CONS_ATTR_SYMMETRICAL); enum pe_order_kind kind = get_ordering_type(xml_obj); enum ordering_symmetry symmetry = get_ordering_symmetry(xml_obj, kind, NULL); // Expand any resource tags in the constraint XML if (unpack_order_tags(xml_obj, &expanded_xml, data_set) != pcmk_rc_ok) { return; } if (expanded_xml != NULL) { orig_xml = xml_obj; xml_obj = expanded_xml; } // If the constraint has resource sets, unpack them for (set = first_named_child(xml_obj, XML_CONS_TAG_RSC_SET); set != NULL; set = crm_next_same_xml(set)) { set = expand_idref(set, data_set->input); if ((set == NULL) // Configuration error, message already logged || (unpack_order_set(set, kind, invert, data_set) != pcmk_rc_ok)) { if (expanded_xml != NULL) { free_xml(expanded_xml); } return; } if (last != NULL) { if (order_rsc_sets(id, last, set, kind, data_set, symmetry) != pcmk_rc_ok) { if (expanded_xml != NULL) { free_xml(expanded_xml); } return; } if ((symmetry == ordering_symmetric) && (order_rsc_sets(id, set, last, kind, data_set, ordering_symmetric_inverse) != pcmk_rc_ok)) { if (expanded_xml != NULL) { free_xml(expanded_xml); } return; } } last = set; } if (expanded_xml) { free_xml(expanded_xml); xml_obj = orig_xml; } // If the constraint has no resource sets, unpack it as a simple ordering if (last == NULL) { return unpack_simple_rsc_order(xml_obj, data_set); } } static bool ordering_is_invalid(pe_action_t *action, pe_action_wrapper_t *input) { /* Prevent user-defined ordering constraints between resources * running in a guest node and the resource that defines that node. */ if (!pcmk_is_set(input->type, pe_order_preserve) && (input->action->rsc != NULL) && pcmk__rsc_corresponds_to_guest(action->rsc, input->action->node)) { crm_warn("Invalid ordering constraint between %s and %s", input->action->rsc->id, action->rsc->id); return true; } /* If there's an order like * "rscB_stop node2"-> "load_stopped_node2" -> "rscA_migrate_to node1" * * then rscA is being migrated from node1 to node2, while rscB is being * migrated from node2 to node1. If there would be a graph loop, * break the order "load_stopped_node2" -> "rscA_migrate_to node1". */ if ((input->type == pe_order_load) && action->rsc && pcmk__str_eq(action->task, RSC_MIGRATE, pcmk__str_casei) && pcmk__graph_has_loop(action, action, input)) { return true; } return false; } void pcmk__disable_invalid_orderings(pe_working_set_t *data_set) { for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) { pe_action_t *action = (pe_action_t *) iter->data; pe_action_wrapper_t *input = NULL; for (GList *input_iter = action->actions_before; input_iter != NULL; input_iter = input_iter->next) { input = (pe_action_wrapper_t *) input_iter->data; if (ordering_is_invalid(action, input)) { input->type = pe_order_none; } } } } /*! * \internal * \brief Order stops on a node before the node's shutdown * * \param[in] node Node being shut down * \param[in] shutdown_op Shutdown action for node * \param[in] data_set Cluster working set */ void pcmk__order_stops_before_shutdown(pe_node_t *node, pe_action_t *shutdown_op, pe_working_set_t *data_set) { for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) { pe_action_t *action = (pe_action_t *) iter->data; // Only stops on the node shutting down are relevant if ((action->rsc == NULL) || (action->node == NULL) || (action->node->details != node->details) || !pcmk__str_eq(action->task, RSC_STOP, pcmk__str_casei)) { continue; } // Resources and nodes in maintenance mode won't be touched if (pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)) { pe_rsc_trace(action->rsc, "Not ordering %s before %s shutdown because " "resource in maintenance mode", action->uuid, node->details->uname); continue; } else if (node->details->maintenance) { pe_rsc_trace(action->rsc, "Not ordering %s before %s shutdown because " "node in maintenance mode", action->uuid, node->details->uname); continue; } /* Don't touch a resource that is unmanaged or blocked, to avoid * blocking the shutdown (though if another action depends on this one, * we may still end up blocking) */ if (!pcmk_any_flags_set(action->rsc->flags, pe_rsc_managed|pe_rsc_block)) { pe_rsc_trace(action->rsc, "Not ordering %s before %s shutdown because " "resource is unmanaged or blocked", action->uuid, node->details->uname); continue; } pe_rsc_trace(action->rsc, "Ordering %s before %s shutdown", action->uuid, node->details->uname); pe__clear_action_flags(action, pe_action_optional); pcmk__new_ordering(action->rsc, NULL, action, NULL, strdup(CRM_OP_SHUTDOWN), shutdown_op, pe_order_optional|pe_order_runnable_left, data_set); } } /*! * \brief Find resource actions matching directly or as child * * \param[in] rsc Resource to check * \param[in] original_key Action key to search for (possibly referencing * parent of \rsc) * * \return Newly allocated list of matching actions * \note It is the caller's responsibility to free the result with g_list_free() */ static GList * find_actions_by_task(pe_resource_t *rsc, const char *original_key) { // Search under given task key directly GList *list = find_actions(rsc->actions, original_key, NULL); if (list == NULL) { // Search again using this resource's ID char *key = NULL; char *task = NULL; guint interval_ms = 0; if (parse_op_key(original_key, NULL, &task, &interval_ms)) { key = pcmk__op_key(rsc->id, task, interval_ms); list = find_actions(rsc->actions, key, NULL); free(key); free(task); } else { crm_err("Invalid operation key (bug?): %s", original_key); } } return list; } static void rsc_order_then(pe_action_t *lh_action, pe_resource_t *rsc, pe__ordering_t *order) { GList *rh_actions = NULL; pe_action_t *rh_action = NULL; enum pe_ordering type; CRM_CHECK(rsc != NULL, return); CRM_CHECK(order != NULL, return); type = order->type; rh_action = order->rh_action; crm_trace("Applying ordering constraint %d (then: %s)", order->id, rsc->id); if (rh_action != NULL) { rh_actions = g_list_prepend(NULL, rh_action); } else if (rsc != NULL) { rh_actions = find_actions_by_task(rsc, order->rh_action_task); } if (rh_actions == NULL) { pe_rsc_trace(rsc, "Ignoring constraint %d: then (%s for %s) not found", order->id, order->rh_action_task, rsc->id); return; } if ((lh_action != NULL) && (lh_action->rsc == rsc) && pcmk_is_set(lh_action->flags, pe_action_dangle)) { pe_rsc_trace(rsc, "Detected dangling operation %s -> %s", lh_action->uuid, order->rh_action_task); pe__clear_order_flags(type, pe_order_implies_then); } for (GList *gIter = rh_actions; gIter != NULL; gIter = gIter->next) { pe_action_t *rh_action_iter = (pe_action_t *) gIter->data; if (lh_action) { order_actions(lh_action, rh_action_iter, type); } else if (type & pe_order_implies_then) { pe__clear_action_flags(rh_action_iter, pe_action_runnable); crm_warn("Unrunnable %s 0x%.6x", rh_action_iter->uuid, type); } else { crm_warn("neither %s 0x%.6x", rh_action_iter->uuid, type); } } g_list_free(rh_actions); } static void rsc_order_first(pe_resource_t *lh_rsc, pe__ordering_t *order, pe_working_set_t *data_set) { GList *lh_actions = NULL; pe_action_t *lh_action = order->lh_action; pe_resource_t *rh_rsc = order->rh_rsc; CRM_ASSERT(lh_rsc != NULL); pe_rsc_trace(lh_rsc, "Applying ordering constraint %d (first: %s)", order->id, lh_rsc->id); if (lh_action != NULL) { lh_actions = g_list_prepend(NULL, lh_action); } else { lh_actions = find_actions_by_task(lh_rsc, order->lh_action_task); } if ((lh_actions == NULL) && (lh_rsc == rh_rsc)) { pe_rsc_trace(lh_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->lh_action_task, lh_rsc->id); } else if (lh_actions == NULL) { char *key = NULL; char *op_type = NULL; guint interval_ms = 0; parse_op_key(order->lh_action_task, NULL, &op_type, &interval_ms); key = pcmk__op_key(lh_rsc->id, op_type, interval_ms); if ((lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_STOPPED) && pcmk__str_eq(op_type, RSC_STOP, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->lh_action_task, lh_rsc->id); } else if ((lh_rsc->fns->state(lh_rsc, TRUE) == RSC_ROLE_UNPROMOTED) && pcmk__str_eq(op_type, RSC_DEMOTE, pcmk__str_casei)) { free(key); pe_rsc_trace(lh_rsc, "Ignoring constraint %d: first (%s for %s) not found", order->id, order->lh_action_task, lh_rsc->id); } else { pe_rsc_trace(lh_rsc, "Creating first (%s for %s) for constraint %d ", order->lh_action_task, lh_rsc->id, order->id); lh_action = custom_action(lh_rsc, key, op_type, NULL, TRUE, TRUE, data_set); lh_actions = g_list_prepend(NULL, lh_action); } free(op_type); } if (rh_rsc == NULL) { if (order->rh_action == NULL) { pe_rsc_trace(lh_rsc, "Ignoring constraint %d: then not found", order->id); return; } rh_rsc = order->rh_action->rsc; } for (GList *gIter = lh_actions; gIter != NULL; gIter = gIter->next) { lh_action = (pe_action_t *) gIter->data; if (rh_rsc == NULL) { order_actions(lh_action, order->rh_action, order->type); } else { rsc_order_then(lh_action, rh_rsc, order); } } g_list_free(lh_actions); } void pcmk__apply_orderings(pe_working_set_t *data_set) { crm_trace("Applying ordering constraints"); /* Don't ask me why, but apparently they need to be processed in * the order they were created in... go figure * * Also g_list_append() has horrendous performance characteristics * So we need to use g_list_prepend() and then reverse the list here */ data_set->ordering_constraints = g_list_reverse(data_set->ordering_constraints); for (GList *gIter = data_set->ordering_constraints; gIter != NULL; gIter = gIter->next) { pe__ordering_t *order = gIter->data; pe_resource_t *rsc = order->lh_rsc; if (rsc != NULL) { rsc_order_first(rsc, order, data_set); continue; } rsc = order->rh_rsc; if (rsc != NULL) { rsc_order_then(order->lh_action, rsc, order); } else { crm_trace("Applying ordering constraint %d (non-resource actions)", order->id); order_actions(order->lh_action, order->rh_action, order->type); } } g_list_foreach(data_set->actions, (GFunc) pcmk__block_colocated_starts, data_set); crm_trace("Ordering probes"); pcmk__order_probes(data_set); crm_trace("Updating %d actions", g_list_length(data_set->actions)); g_list_foreach(data_set->actions, (GFunc) update_action, data_set); pcmk__disable_invalid_orderings(data_set); } diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c index e817c7bbe8..f3e6004a57 100644 --- a/lib/pengine/pe_output.c +++ b/lib/pengine/pe_output.c @@ -1,2889 +1,2889 @@ /* * Copyright 2019-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include /* Never display node attributes whose name starts with one of these prefixes */ #define FILTER_STR { PCMK__FAIL_COUNT_PREFIX, PCMK__LAST_FAILURE_PREFIX, \ "shutdown", "terminate", "standby", "probe_complete", \ "#", NULL } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } /*! * \internal * \brief Determine whether extended information about an attribute should be added. * * \param[in] node Node that ran this resource. * \param[in] rsc_list The list of resources for this node. * \param[in] attrname The attribute to find. * \param[out] expected_score The expected value for this attribute. * * \return TRUE if extended information should be printed, FALSE otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static gboolean add_extra_info(pe_node_t *node, GList *rsc_list, pe_working_set_t *data_set, const char *attrname, int *expected_score) { GList *gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->meta, "type"); const char *name = NULL; GHashTable *params = NULL; if (rsc->children != NULL) { if (add_extra_info(node, rsc->children, data_set, attrname, expected_score)) { return TRUE; } } if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) { continue; } params = pe_rsc_params(rsc, node, data_set); name = g_hash_table_lookup(params, "name"); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (pcmk__str_eq(name, attrname, pcmk__str_casei)) { int host_list_num = 0; const char *hosts = g_hash_table_lookup(params, "host_list"); const char *multiplier = g_hash_table_lookup(params, "multiplier"); int multiplier_i; if (hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } if ((multiplier == NULL) || (pcmk__scan_min_int(multiplier, &multiplier_i, INT_MIN) != pcmk_rc_ok)) { /* The ocf:pacemaker:ping resource agent defaults multiplier to * 1. The agent currently does not handle invalid text, but it * should, and this would be a reasonable choice ... */ multiplier_i = 1; } *expected_score = host_list_num * multiplier_i; return TRUE; } } return FALSE; } static GList * filter_attr_list(GList *attr_list, char *name) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return attr_list); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return attr_list; } } return g_list_insert_sorted(attr_list, name, compare_attribute); } static GList * get_operation_list(xmlNode *rsc_entry) { GList *op_list = NULL; xmlNode *rsc_op = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { const char *task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* Ignore notifies and some probes */ if (pcmk__str_eq(task, CRMD_ACTION_NOTIFY, pcmk__str_casei) || (pcmk__str_eq(task, "probe", pcmk__str_casei) && (op_rc_i == 7))) { continue; } if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); return op_list; } static void add_dump_node(gpointer key, gpointer value, gpointer user_data) { xmlNodePtr node = user_data; pcmk_create_xml_text_node(node, (const char *) key, (const char *) value); } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } static const char * get_cluster_stack(pe_working_set_t *data_set) { xmlNode *stack = get_xpath_object("//nvpair[@name='cluster-infrastructure']", data_set->input, LOG_DEBUG); return stack? crm_element_value(stack, XML_NVPAIR_ATTR_VALUE) : "unknown"; } static char * last_changed_string(const char *last_written, const char *user, const char *client, const char *origin) { if (last_written != NULL || user != NULL || client != NULL || origin != NULL) { return crm_strdup_printf("%s%s%s%s%s%s%s", last_written ? last_written : "", user ? " by " : "", user ? user : "", client ? " via " : "", client ? client : "", origin ? " on " : "", origin ? origin : ""); } else { return strdup(""); } } static char * op_history_string(xmlNode *xml_op, const char *task, const char *interval_ms_s, int rc, gboolean print_timing) { const char *call = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); char *interval_str = NULL; char *buf = NULL; if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *pair = pcmk__format_nvpair("interval", interval_ms_s, "ms"); interval_str = crm_strdup_printf(" %s", pair); free(pair); } if (print_timing) { char *last_change_str = NULL; char *exec_str = NULL; char *queue_str = NULL; const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *time = pcmk__format_named_time(XML_RSC_OP_LAST_CHANGE, epoch); last_change_str = crm_strdup_printf(" %s", time); free(time); } value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); if (value) { char *pair = pcmk__format_nvpair(XML_RSC_OP_T_EXEC, value, "ms"); exec_str = crm_strdup_printf(" %s", pair); free(pair); } value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); if (value) { char *pair = pcmk__format_nvpair(XML_RSC_OP_T_QUEUE, value, "ms"); queue_str = crm_strdup_printf(" %s", pair); free(pair); } buf = crm_strdup_printf("(%s) %s:%s%s%s%s rc=%d (%s)", call, task, interval_str ? interval_str : "", last_change_str ? last_change_str : "", exec_str ? exec_str : "", queue_str ? queue_str : "", rc, services_ocf_exitcode_str(rc)); if (last_change_str) { free(last_change_str); } if (exec_str) { free(exec_str); } if (queue_str) { free(queue_str); } } else { buf = crm_strdup_printf("(%s) %s%s%s", call, task, interval_str ? ":" : "", interval_str ? interval_str : ""); } if (interval_str) { free(interval_str); } return buf; } static char * resource_history_string(pe_resource_t *rsc, const char *rsc_id, gboolean all, int failcount, time_t last_failure) { char *buf = NULL; if (rsc == NULL) { buf = crm_strdup_printf("%s: orphan", rsc_id); } else if (all || failcount || last_failure > 0) { char *failcount_s = NULL; char *lastfail_s = NULL; if (failcount > 0) { failcount_s = crm_strdup_printf(" %s=%d", PCMK__FAIL_COUNT_PREFIX, failcount); } else { failcount_s = strdup(""); } if (last_failure > 0) { lastfail_s = crm_strdup_printf(" %s='%s'", PCMK__LAST_FAILURE_PREFIX, pcmk__epoch2str(&last_failure)); } buf = crm_strdup_printf("%s: migration-threshold=%d%s%s", rsc_id, rsc->migration_threshold, failcount_s, lastfail_s? lastfail_s : ""); free(failcount_s); free(lastfail_s); } else { buf = crm_strdup_printf("%s:", rsc_id); } return buf; } PCMK__OUTPUT_ARGS("cluster-summary", "pe_working_set_t *", "unsigned int", "unsigned int") static int cluster_summary(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(data_set); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s); } if (pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : NULL; const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); char *dc_name = data_set->dc_node ? pe__node_display_name(data_set->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-dc", data_set->dc_node, quorum, dc_version_s, dc_name); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-times", last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(data_set->nodes), data_set->ninstances, data_set->disabled_resources, data_set->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-options", data_set); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", data_set->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } PCMK__OUTPUT_ARGS("cluster-summary", "pe_working_set_t *", "unsigned int", "unsigned int") static int cluster_summary_html(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(data_set); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s); } /* Always print DC if none, even if not requested */ if (data_set->dc_node == NULL || pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object("//nvpair[@name='dc-version']", data_set->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, XML_NVPAIR_ATTR_VALUE) : NULL; const char *quorum = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); char *dc_name = data_set->dc_node ? pe__node_display_name(data_set->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-dc", data_set->dc_node, quorum, dc_version_s, dc_name); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(data_set->input, XML_CIB_ATTR_WRITTEN); const char *user = crm_element_value(data_set->input, XML_ATTR_UPDATE_USER); const char *client = crm_element_value(data_set->input, XML_ATTR_UPDATE_CLIENT); const char *origin = crm_element_value(data_set->input, XML_ATTR_UPDATE_ORIG); PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-times", last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(data_set->nodes), data_set->ninstances, data_set->disabled_resources, data_set->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { /* Kind of a hack - close the list we may have opened earlier in this * function so we can put all the options into their own list. We * only want to do this on HTML output, though. */ PCMK__OUTPUT_LIST_FOOTER(out, rc); out->begin_list(out, NULL, NULL, "Config Options"); out->message(out, "cluster-options", data_set); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", data_set->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } char * pe__node_display_name(pe_node_t *node, bool print_detail) { char *node_name; const char *node_host = NULL; const char *node_id = NULL; int name_len; CRM_ASSERT((node != NULL) && (node->details != NULL) && (node->details->uname != NULL)); /* Host is displayed only if this is a guest node */ if (pe__is_guest_node(node)) { pe_node_t *host_node = pe__current_node(node->details->remote_rsc); if (host_node && host_node->details) { node_host = host_node->details->uname; } if (node_host == NULL) { node_host = ""; /* so we at least get "uname@" to indicate guest */ } } /* Node ID is displayed if different from uname and detail is requested */ if (print_detail && !pcmk__str_eq(node->details->uname, node->details->id, pcmk__str_casei)) { node_id = node->details->id; } /* Determine name length */ name_len = strlen(node->details->uname) + 1; if (node_host) { name_len += strlen(node_host) + 1; /* "@node_host" */ } if (node_id) { name_len += strlen(node_id) + 3; /* + " (node_id)" */ } /* Allocate and populate display name */ node_name = malloc(name_len); CRM_ASSERT(node_name != NULL); strcpy(node_name, node->details->uname); if (node_host) { strcat(node_name, "@"); strcat(node_name, node_host); } if (node_id) { strcat(node_name, " ("); strcat(node_name, node_id); strcat(node_name, ")"); } return node_name; } int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name , size_t pairs_count, ...) { xmlNodePtr xml_node = NULL; va_list args; CRM_ASSERT(tag_name != NULL); xml_node = pcmk__output_xml_peek_parent(out); CRM_ASSERT(xml_node != NULL); xml_node = is_list ? create_xml_node(xml_node, tag_name) : xmlNewChild(xml_node, NULL, (pcmkXmlStr) tag_name, NULL); va_start(args, pairs_count); while(pairs_count--) { const char *param_name = va_arg(args, const char *); const char *param_value = va_arg(args, const char *); if (param_name && param_value) { crm_xml_add(xml_node, param_name, param_value); } }; va_end(args); if (is_list) { pcmk__output_xml_push_parent(out, xml_node); } return pcmk_rc_ok; } static const char * role_desc(enum rsc_role_e role) { if (role == RSC_ROLE_PROMOTED) { #ifdef PCMK__COMPAT_2_0 return "as " RSC_ROLE_PROMOTED_LEGACY_S " "; #else return "in " RSC_ROLE_PROMOTED_S " role "; #endif } return ""; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "unsigned int") static int ban_html(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); unsigned int show_opts = va_arg(args, unsigned int); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); char *buf = crm_strdup_printf("%s\tprevents %s from running %son %s", location->id, location->rsc_lh->id, role_desc(location->role_filter), node_name); pcmk__output_create_html_node(out, "li", NULL, NULL, buf); free(node_name); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "unsigned int") static int ban_text(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); unsigned int show_opts = va_arg(args, unsigned int); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->list_item(out, NULL, "%s\tprevents %s from running %son %s", location->id, location->rsc_lh->id, role_desc(location->role_filter), node_name); free(node_name); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pe_node_t *", "pe__location_t *", "unsigned int") static int ban_xml(pcmk__output_t *out, va_list args) { pe_node_t *pe_node = va_arg(args, pe_node_t *); pe__location_t *location = va_arg(args, pe__location_t *); unsigned int show_opts G_GNUC_UNUSED = va_arg(args, unsigned int); const char *promoted_only = pcmk__btoa(location->role_filter == RSC_ROLE_PROMOTED); char *weight_s = pcmk__itoa(pe_node->weight); pcmk__output_create_xml_node(out, "ban", "id", location->id, "resource", location->rsc_lh->id, "node", pe_node->details->uname, "weight", weight_s, "promoted-only", promoted_only, /* This is a deprecated alias for * promoted_only. Removing it will break * backward compatibility of the API schema, * which will require an API schema major * version bump. */ "master_only", promoted_only, NULL); free(weight_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban-list", "pe_working_set_t *", "const char *", "GList *", "unsigned int", "gboolean") static int ban_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); const char *prefix = va_arg(args, const char *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); GList *gIter, *gIter2; int rc = pcmk_rc_no_output; /* Print each ban */ for (gIter = data_set->placement_constraints; gIter != NULL; gIter = gIter->next) { pe__location_t *location = gIter->data; if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) { continue; } if (!pcmk__str_in_list(rsc_printable_id(location->rsc_lh), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(uber_parent(location->rsc_lh)), only_rsc, pcmk__str_star_matches)) { continue; } for (gIter2 = location->node_list_rh; gIter2 != NULL; gIter2 = gIter2->next) { pe_node_t *node = (pe_node_t *) gIter2->data; if (node->weight < 0) { PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints"); out->message(out, "ban", node, location, show_opts); } } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_html(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "li", NULL); char *nnodes_str = crm_strdup_printf("%d node%s configured", nnodes, pcmk__plural_s(nnodes)); pcmk_create_html_node(nodes_node, "span", NULL, NULL, nnodes_str); free(nnodes_str); if (ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); s = crm_strdup_printf(", %d ", nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else if (ndisabled && !nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, ")"); } else if (!ndisabled && nblocked) { char *s = crm_strdup_printf("%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), nblocked); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); pcmk_create_html_node(resources_node, "span", NULL, "bold", "BLOCKED"); pcmk_create_html_node(resources_node, "span", NULL, NULL, " from further action due to failure)"); } else { char *s = crm_strdup_printf("%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); pcmk_create_html_node(resources_node, "span", NULL, NULL, s); free(s); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_text(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); out->list_item(out, NULL, "%d node%s configured", nnodes, pcmk__plural_s(nnodes)); if (ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED, %d BLOCKED from " "further action due to failure)", nresources, pcmk__plural_s(nresources), ndisabled, nblocked); } else if (ndisabled && !nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED)", nresources, pcmk__plural_s(nresources), ndisabled); } else if (!ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d BLOCKED from further action " "due to failure)", nresources, pcmk__plural_s(nresources), nblocked); } else { out->list_item(out, NULL, "%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_xml(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "nodes_configured", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "resources_configured", NULL); char *s = pcmk__itoa(nnodes); crm_xml_add(nodes_node, "number", s); free(s); s = pcmk__itoa(nresources); crm_xml_add(resources_node, "number", s); free(s); s = pcmk__itoa(ndisabled); crm_xml_add(resources_node, "disabled", s); free(s); s = pcmk__itoa(nblocked); crm_xml_add(resources_node, "blocked", s); free(s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *") static int cluster_dc_html(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Current DC: "); if (dc) { if (crm_is_true(quorum)) { char *buf = crm_strdup_printf("%s (version %s) - partition with quorum", dc_name, dc_version_s ? dc_version_s : "unknown"); pcmk_create_html_node(node, "span", NULL, NULL, buf); free(buf); } else { char *buf = crm_strdup_printf("%s (version %s) - partition", dc_name, dc_version_s ? dc_version_s : "unknown"); pcmk_create_html_node(node, "span", NULL, NULL, buf); free(buf); pcmk_create_html_node(node, "span", NULL, "warning", "WITHOUT"); pcmk_create_html_node(node, "span", NULL, NULL, "quorum"); } } else { pcmk_create_html_node(node ,"span", NULL, "warning", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *") static int cluster_dc_text(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); if (dc) { out->list_item(out, "Current DC", "%s (version %s) - partition %s quorum", dc_name, dc_version_s ? dc_version_s : "unknown", crm_is_true(quorum) ? "with" : "WITHOUT"); } else { out->list_item(out, "Current DC", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pe_node_t *", "const char *", "const char *", "char *") static int cluster_dc_xml(pcmk__output_t *out, va_list args) { pe_node_t *dc = va_arg(args, pe_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name G_GNUC_UNUSED = va_arg(args, char *); if (dc) { pcmk__output_create_xml_node(out, "current_dc", "present", "true", "version", dc_version_s ? dc_version_s : "", "name", dc->details->uname, "id", dc->details->id, "with_quorum", pcmk__btoa(crm_is_true(quorum)), NULL); } else { pcmk__output_create_xml_node(out, "current_dc", "present", "false", NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("maint-mode", "unsigned long long int") static int cluster_maint_mode_text(pcmk__output_t *out, va_list args) { unsigned long long flags = va_arg(args, unsigned long long); if (pcmk_is_set(flags, pe_flag_maintenance_mode)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will not attempt to start, stop or recover services\n"); return pcmk_rc_ok; } else if (pcmk_is_set(flags, pe_flag_stop_everything)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will keep all resources stopped\n"); return pcmk_rc_ok; } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_html(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); out->list_item(out, NULL, "STONITH of failed nodes %s", pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); out->list_item(out, NULL, "Cluster is %s", pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster) ? "symmetric" : "asymmetric"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case no_quorum_suicide: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "DISABLED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will not attempt to start, stop, or recover services)"); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, NULL, "Resource management: "); pcmk_create_html_node(node, "span", NULL, "bold", "STOPPED"); pcmk_create_html_node(node, "span", NULL, NULL, " (the cluster will keep all resources stopped)"); } else { out->list_item(out, NULL, "Resource management: enabled"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_log(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); if (pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)) { return out->info(out, "Resource management is DISABLED. The cluster will not attempt to start, stop or recover services."); } else if (pcmk_is_set(data_set->flags, pe_flag_stop_everything)) { return out->info(out, "Resource management is DISABLED. The cluster has stopped all resources."); } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_text(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); out->list_item(out, NULL, "STONITH of failed nodes %s", pcmk_is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled"); out->list_item(out, NULL, "Cluster is %s", pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster) ? "symmetric" : "asymmetric"); switch (data_set->no_quorum_policy) { case no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case no_quorum_suicide: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pe_working_set_t *") static int cluster_options_xml(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); const char *no_quorum_policy = NULL; char *stonith_timeout_str = pcmk__itoa(data_set->stonith_timeout); char *priority_fencing_delay_str = pcmk__itoa(data_set->priority_fencing_delay * 1000); switch (data_set->no_quorum_policy) { case no_quorum_freeze: no_quorum_policy = "freeze"; break; case no_quorum_stop: no_quorum_policy = "stop"; break; case no_quorum_demote: no_quorum_policy = "demote"; break; case no_quorum_ignore: no_quorum_policy = "ignore"; break; case no_quorum_suicide: no_quorum_policy = "suicide"; break; } pcmk__output_create_xml_node(out, "cluster_options", "stonith-enabled", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)), "symmetric-cluster", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)), "no-quorum-policy", no_quorum_policy, "maintenance-mode", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)), "stop-all-resources", pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stop_everything)), "stonith-timeout-ms", stonith_timeout_str, "priority-fencing-delay-ms", priority_fencing_delay_str, NULL); free(stonith_timeout_str); free(priority_fencing_delay_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *") static int cluster_stack_html(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(node, "span", NULL, "bold", "Stack: "); pcmk_create_html_node(node, "span", NULL, NULL, stack_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *") static int cluster_stack_text(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); out->list_item(out, "Stack", "%s", stack_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *") static int cluster_stack_xml(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); pcmk__output_create_xml_node(out, "stack", "type", stack_s, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *") static int cluster_times_html(pcmk__output_t *out, va_list args) { const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "li", NULL); char *buf = last_changed_string(last_written, user, client, origin); pcmk_create_html_node(updated_node, "span", NULL, "bold", "Last updated: "); pcmk_create_html_node(updated_node, "span", NULL, NULL, pcmk__epoch2str(NULL)); pcmk_create_html_node(changed_node, "span", NULL, "bold", "Last change: "); pcmk_create_html_node(changed_node, "span", NULL, NULL, buf); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *") static int cluster_times_xml(pcmk__output_t *out, va_list args) { const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); pcmk__output_create_xml_node(out, "last_update", "time", pcmk__epoch2str(NULL), NULL); pcmk__output_create_xml_node(out, "last_change", "time", last_written ? last_written : "", "user", user ? user : "", "client", client ? client : "", "origin", origin ? origin : "", NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *") static int cluster_times_text(pcmk__output_t *out, va_list args) { const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *buf = last_changed_string(last_written, user, client, origin); out->list_item(out, "Last updated", "%s", pcmk__epoch2str(NULL)); out->list_item(out, "Last change", " %s", buf); free(buf); return pcmk_rc_ok; } /*! * \internal * \brief Display a failed action in less-technical natural language */ static void failed_action_friendly(pcmk__output_t *out, xmlNodePtr xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { char *rsc_id = NULL; char *task = NULL; guint interval_ms = 0; const char *last_change_str = NULL; time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key) || !parse_op_key(op_key, &rsc_id, &task, &interval_ms)) { rsc_id = strdup("unknown resource"); task = strdup("unknown action"); interval_ms = 0; } str = g_string_sized_new(strlen(rsc_id) + strlen(task) + strlen(node_name) + 100); // reasonable starting size g_string_printf(str, "%s ", rsc_id); if (interval_ms != 0) { g_string_append_printf(str, "%s-interval ", pcmk__readable_interval(interval_ms)); } g_string_append_printf(str, "%s on %s", crm_action_str(task, interval_ms), node_name); if (status == PCMK_EXEC_DONE) { g_string_append_printf(str, " returned '%s'", services_ocf_exitcode_str(rc)); } else { g_string_append_printf(str, " could not be executed (%s)", pcmk_exec_status_str(status)); } if (!pcmk__str_empty(exit_reason)) { g_string_append_printf(str, " because '%s'", exit_reason); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change_epoch) == pcmk_ok) { last_change_str = pcmk__epoch2str(&last_change_epoch); if (last_change_str != NULL) { g_string_append_printf(str, " at %s", last_change_str); } } if (!pcmk__str_empty(exec_time)) { int exec_time_ms = 0; if ((pcmk__scan_min_int(exec_time, &exec_time_ms, 0) == pcmk_rc_ok) && (exec_time_ms > 0)) { g_string_append_printf(str, " after %s", pcmk__readable_interval(exec_time_ms)); } } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); free(rsc_id); free(task); } /*! * \internal * \brief Display a failed action with technical details */ static void failed_action_technical(pcmk__output_t *out, xmlNodePtr xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { const char *call_id = crm_element_value(xml_op, XML_LRM_ATTR_CALLID); const char *queue_time = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); const char *exit_status = services_ocf_exitcode_str(rc); const char *lrm_status = pcmk_exec_status_str(status); const char *last_change_str = NULL; time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key)) { op_key = "unknown operation"; } if (pcmk__str_empty(exit_status)) { exit_status = "unknown exit status"; } if (pcmk__str_empty(call_id)) { call_id = "unknown"; } str = g_string_sized_new(strlen(op_key) + strlen(node_name) + strlen(exit_status) + strlen(call_id) + strlen(lrm_status) + 50); // rough estimate g_string_printf(str, "%s on %s '%s' (%d): call=%s, status='%s'", op_key, node_name, exit_status, rc, call_id, lrm_status); if (!pcmk__str_empty(exit_reason)) { g_string_append_printf(str, ", exitreason='%s'", exit_reason); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change_epoch) == pcmk_ok) { last_change_str = pcmk__epoch2str(&last_change_epoch); if (last_change_str != NULL) { g_string_append_printf(str, ", " XML_RSC_OP_LAST_CHANGE "='%s'", last_change_str); } } if (!pcmk__str_empty(queue_time)) { g_string_append_printf(str, ", queued=%sms", queue_time); } if (!pcmk__str_empty(exec_time)) { g_string_append_printf(str, ", exec=%sms", exec_time); } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "unsigned int") static int failed_action_default(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); unsigned int show_opts = va_arg(args, unsigned int); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); const char *node_name = crm_element_value(xml_op, XML_ATTR_UNAME); const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); const char *exec_time = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); int rc; int status; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); if (pcmk__str_empty(op_key)) { op_key = ID(xml_op); } if (pcmk__str_empty(node_name)) { node_name = "unknown node"; } if (pcmk_is_set(show_opts, pcmk_show_failed_detail)) { failed_action_technical(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } else { failed_action_friendly(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action", "xmlNodePtr", "unsigned int") static int failed_action_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); unsigned int show_opts G_GNUC_UNUSED = va_arg(args, unsigned int); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int rc; int status; const char *exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON); time_t epoch = 0; char *rc_s = NULL; char *reason_s = crm_xml_escape(exit_reason ? exit_reason : "none"); xmlNodePtr node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_RC), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); rc_s = pcmk__itoa(rc); node = pcmk__output_create_xml_node(out, "failure", (op_key == NULL)? "id" : "op_key", (op_key == NULL)? ID(xml_op) : op_key, "node", crm_element_value(xml_op, XML_ATTR_UNAME), "exitstatus", services_ocf_exitcode_str(rc), "exitreason", crm_str(reason_s), "exitcode", rc_s, "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "status", pcmk_exec_status_str(status), NULL); free(rc_s); if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { guint interval_ms = 0; char *s = NULL; crm_time_t *crm_when = crm_time_new_undefined(); char *rc_change = NULL; crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); s = pcmk__itoa(interval_ms); crm_time_set_timet(crm_when, &epoch); rc_change = crm_time_as_string(crm_when, crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone); pcmk__xe_set_props(node, XML_RSC_OP_LAST_CHANGE, rc_change, "queued", crm_element_value(xml_op, XML_RSC_OP_T_QUEUE), "exec", crm_element_value(xml_op, XML_RSC_OP_T_EXEC), "interval", s, "task", crm_element_value(xml_op, XML_LRM_ATTR_TASK), NULL); free(s); free(rc_change); crm_time_free(crm_when); } free(reason_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action-list", "pe_working_set_t *", "GList *", "GList *", "unsigned int", "gboolean") static int failed_action_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, gboolean); gboolean print_spacer = va_arg(args, gboolean); xmlNode *xml_op = NULL; int rc = pcmk_rc_no_output; const char *id = NULL; if (xmlChildElementCount(data_set->failed) == 0) { return rc; } for (xml_op = pcmk__xml_first_child(data_set->failed); xml_op != NULL; xml_op = pcmk__xml_next(xml_op)) { char *rsc = NULL; if (!pcmk__str_in_list(crm_element_value(xml_op, XML_ATTR_UNAME), only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } id = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); if (parse_op_key(id ? id : ID(xml_op), &rsc, NULL, NULL) == FALSE) { continue; } if (!pcmk__str_in_list(rsc, only_rsc, pcmk__str_star_matches)) { free(rsc); continue; } free(rsc); PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions"); out->message(out, "failed-action", xml_op, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void status_node(pe_node_t *node, xmlNodePtr parent) { if (node->details->standby_onfail && node->details->online) { pcmk_create_html_node(parent, "span", NULL, "standby", " standby (on-fail)"); } else if (node->details->standby && node->details->online) { char *s = crm_strdup_printf(" standby%s", node->details->running_rsc ? " (with active resources)" : ""); pcmk_create_html_node(parent, "span", NULL, " standby", s); free(s); } else if (node->details->standby) { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE (standby)"); } else if (node->details->maintenance && node->details->online) { pcmk_create_html_node(parent, "span", NULL, "maint", " maintenance"); } else if (node->details->maintenance) { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE (maintenance)"); } else if (node->details->online) { pcmk_create_html_node(parent, "span", NULL, "online", " online"); } else { pcmk_create_html_node(parent, "span", NULL, "offline", " OFFLINE"); } } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "unsigned int", "gboolean", "const char *", "GList *", "GList *") static int node_html(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean full = va_arg(args, gboolean); const char *node_mode G_GNUC_UNUSED = va_arg(args, const char *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (full) { xmlNodePtr item_node; if (pcmk_all_flags_set(show_opts, pcmk_show_brief | pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); out->begin_list(out, NULL, NULL, "Node: %s", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node); if (rscs != NULL) { unsigned int new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); } pcmk__output_xml_pop_parent(out); out->end_list(out); } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc2 = NULL; int rc = pcmk_rc_no_output; out->begin_list(out, NULL, NULL, "Node: %s", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, "Status:"); status_node(node, item_node); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { pe_resource_t *rsc = (pe_resource_t *) lpc2->data; PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Resources"); out->message(out, crm_map_element_name(rsc->xml), show_opts | pcmk_show_rsc_only, rsc, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); pcmk__output_xml_pop_parent(out); out->end_list(out); } else { char *buf = crm_strdup_printf("Node: %s", node_name); item_node = pcmk__output_create_xml_node(out, "li", NULL); pcmk_create_html_node(item_node, "span", NULL, NULL, buf); status_node(node, item_node); free(buf); } } else { out->begin_list(out, NULL, NULL, "Node: %s", node_name); } free(node_name); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "unsigned int", "gboolean", "const char *", "GList *", "GList *") static int node_text(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean full = va_arg(args, gboolean); const char *node_mode = va_arg(args, const char *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); char *buf = NULL; /* Print the node name and status */ if (pe__is_guest_node(node)) { buf = crm_strdup_printf("GuestNode %s: %s", node_name, node_mode); } else if (pe__is_remote_node(node)) { buf = crm_strdup_printf("RemoteNode %s: %s", node_name, node_mode); } else { buf = crm_strdup_printf("Node %s: %s", node_name, node_mode); } /* If we're grouping by node, print its resources */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pcmk_is_set(show_opts, pcmk_show_brief)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); if (rscs != NULL) { unsigned int new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "%s", buf); out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); out->end_list(out); g_list_free(rscs); } } else { GList *gIter2 = NULL; out->begin_list(out, NULL, NULL, "%s", buf); out->begin_list(out, NULL, NULL, "Resources"); for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { pe_resource_t *rsc = (pe_resource_t *) gIter2->data; out->message(out, crm_map_element_name(rsc->xml), show_opts | pcmk_show_rsc_only, rsc, only_node, only_rsc); } out->end_list(out); out->end_list(out); } } else { out->list_item(out, NULL, "%s", buf); } free(buf); free(node_name); } else { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->begin_list(out, NULL, NULL, "Node: %s", node_name); free(node_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node", "pe_node_t *", "unsigned int", "gboolean", "const char *", "GList *", "GList *") static int node_xml(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); unsigned int show_opts G_GNUC_UNUSED = va_arg(args, unsigned int); gboolean full = va_arg(args, gboolean); const char *node_mode G_GNUC_UNUSED = va_arg(args, const char *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { const char *node_type = "unknown"; char *length_s = pcmk__itoa(g_list_length(node->details->running_rsc)); switch (node->details->type) { case node_member: node_type = "member"; break; case node_remote: node_type = "remote"; break; case node_ping: node_type = "ping"; break; } pe__name_and_nvpairs_xml(out, true, "node", 13, "name", node->details->uname, "id", node->details->id, "online", pcmk__btoa(node->details->online), "standby", pcmk__btoa(node->details->standby), "standby_onfail", pcmk__btoa(node->details->standby_onfail), "maintenance", pcmk__btoa(node->details->maintenance), "pending", pcmk__btoa(node->details->pending), "unclean", pcmk__btoa(node->details->unclean), "shutdown", pcmk__btoa(node->details->shutdown), "expected_up", pcmk__btoa(node->details->expected_up), "is_dc", pcmk__btoa(node->details->is_dc), "resources_running", length_s, "type", node_type); if (pe__is_guest_node(node)) { xmlNodePtr xml_node = pcmk__output_xml_peek_parent(out); crm_xml_add(xml_node, "id_as_resource", node->details->remote_rsc->container->id); } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc = NULL; for (lpc = node->details->running_rsc; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; out->message(out, crm_map_element_name(rsc->xml), show_opts | pcmk_show_rsc_only, rsc, only_node, only_rsc); } } free(length_s); out->end_list(out); } else { pcmk__output_xml_create_parent(out, "node", "name", node->details->uname, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "gboolean", "int") static int node_attribute_text(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); gboolean add_extra = va_arg(args, gboolean); int expected_score = va_arg(args, int); if (add_extra) { int v; if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } if (v <= 0) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is lost", name, value); } else if (v < expected_score) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is degraded (Expected=%d)", name, value, expected_score); } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "gboolean", "int") static int node_attribute_html(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); gboolean add_extra = va_arg(args, gboolean); int expected_score = va_arg(args, int); if (add_extra) { int v; char *s = crm_strdup_printf("%s: %s", name, value); xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li", NULL); if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } pcmk_create_html_node(item_node, "span", NULL, NULL, s); free(s); if (v <= 0) { pcmk_create_html_node(item_node, "span", NULL, "bold", "(connectivity is lost)"); } else if (v < expected_score) { char *buf = crm_strdup_printf("(connectivity is degraded -- expected %d", expected_score); pcmk_create_html_node(item_node, "span", NULL, "bold", buf); free(buf); } } else { out->list_item(out, NULL, "%s: %s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pe_working_set_t *", "xmlNodePtr") static int node_and_op(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pe_resource_t *rsc = NULL; gchar *node_str = NULL; char *last_change_str = NULL; const char *op_rsc = crm_element_value(xml_op, "resource"); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status; time_t last_change = 0; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); rsc = pe_find_resource(data_set->resources, op_rsc); if (rsc) { pe_node_t *node = pe__current_node(rsc); const char *target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); unsigned int show_opts = pcmk_show_rsc_only | pcmk_show_pending; if (node == NULL) { node = rsc->pending_node; } node_str = pcmk__native_output_string(rsc, rsc_printable_id(rsc), node, show_opts, target_role, false); } else { node_str = crm_strdup_printf("Unknown resource %s", op_rsc); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change) == pcmk_ok) { last_change_str = crm_strdup_printf(", %s=%s, exec=%sms", XML_RSC_OP_LAST_CHANGE, pcmk__trim(ctime(&last_change)), crm_element_value(xml_op, XML_RSC_OP_T_EXEC)); } out->list_item(out, NULL, "%s: %s (node=%s, call=%s, rc=%s%s): %s", node_str, op_key ? op_key : ID(xml_op), crm_element_value(xml_op, XML_ATTR_UNAME), crm_element_value(xml_op, XML_LRM_ATTR_CALLID), crm_element_value(xml_op, XML_LRM_ATTR_RC), last_change_str ? last_change_str : "", pcmk_exec_status_str(status)); g_free(node_str); free(last_change_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pe_working_set_t *", "xmlNodePtr") static int node_and_op_xml(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pe_resource_t *rsc = NULL; const char *op_rsc = crm_element_value(xml_op, "resource"); const char *op_key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY); int status; time_t last_change = 0; xmlNode *node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, XML_LRM_ATTR_OPSTATUS), &status, 0); node = pcmk__output_create_xml_node(out, "operation", "op", op_key ? op_key : ID(xml_op), "node", crm_element_value(xml_op, XML_ATTR_UNAME), "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "rc", crm_element_value(xml_op, XML_LRM_ATTR_RC), "status", pcmk_exec_status_str(status), NULL); rsc = pe_find_resource(data_set->resources, op_rsc); if (rsc) { const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE); char *agent_tuple = NULL; agent_tuple = crm_strdup_printf("%s:%s:%s", class, pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) ? crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER) : "", kind); pcmk__xe_set_props(node, "rsc", rsc_printable_id(rsc), "agent", agent_tuple, NULL); free(agent_tuple); } if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &last_change) == pcmk_ok) { pcmk__xe_set_props(node, XML_RSC_OP_LAST_CHANGE, pcmk__trim(ctime(&last_change)), XML_RSC_OP_T_EXEC, crm_element_value(xml_op, XML_RSC_OP_T_EXEC), NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "gboolean", "int") static int node_attribute_xml(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); gboolean add_extra = va_arg(args, gboolean); int expected_score = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "attribute", "name", name, "value", value, NULL); if (add_extra) { char *buf = pcmk__itoa(expected_score); crm_xml_add(node, "expected", buf); free(buf); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute-list", "pe_working_set_t *", "unsigned int", "gboolean", "GList *", "GList *") static int node_attribute_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); int rc = pcmk_rc_no_output; /* Display each node's attributes */ for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = gIter->data; GList *attr_list = NULL; GHashTableIter iter; gpointer key; if (!node || !node->details || !node->details->online) { continue; } g_hash_table_iter_init(&iter, node->details->attrs); while (g_hash_table_iter_next (&iter, &key, NULL)) { attr_list = filter_attr_list(attr_list, key); } if (attr_list == NULL) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { g_list_free(attr_list); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes"); out->message(out, "node", node, show_opts, FALSE, NULL, only_node, only_rsc); for (GList *aIter = attr_list; aIter != NULL; aIter = aIter->next) { const char *name = aIter->data; const char *value = NULL; int expected_score = 0; gboolean add_extra = FALSE; value = pe_node_attribute_raw(node, name); add_extra = add_extra_info(node, node->details->running_rsc, data_set, name, &expected_score); /* Print attribute name and value */ out->message(out, "node-attribute", name, value, add_extra, expected_score); } g_list_free(attr_list); out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-capacity", "pe_node_t *", "const char *") static int node_capacity(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); const char *comment = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, node->details->uname); g_hash_table_foreach(node->details->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-capacity", "pe_node_t *", "const char *") static int node_capacity_xml(pcmk__output_t *out, va_list args) { pe_node_t *node = va_arg(args, pe_node_t *); const char *comment = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, "capacity", "node", node->details->uname, "comment", comment, NULL); g_hash_table_foreach(node->details->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-history-list", "pe_working_set_t *", "pe_node_t *", "xmlNodePtr", "GList *", "GList *", "unsigned int", "unsigned int") static int node_history_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); pe_node_t *node = va_arg(args, pe_node_t *); xmlNode *node_state = va_arg(args, xmlNode *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; int rc = pcmk_rc_no_output; lrm_rsc = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE); /* Print history of each of the node's resources */ for (rsc_entry = first_named_child(lrm_rsc, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); pe_resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); /* We can't use is_filtered here to filter group resources. For is_filtered, * we have to decide whether to check the parent or not. If we check the * parent, all elements of a group will always be printed because that's how * is_filtered works for groups. If we do not check the parent, sometimes * this will filter everything out. * * For other resource types, is_filtered is okay. */ if (uber_parent(rsc)->variant == pe_group) { if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(uber_parent(rsc)), only_rsc, pcmk__str_star_matches)) { continue; } } else { if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) { continue; } } if (!pcmk_is_set(section_opts, pcmk_section_operations)) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pe_fc_default, NULL, data_set); if (failcount <= 0) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, FALSE, NULL, only_node, only_rsc); } out->message(out, "resource-history", rsc, rsc_id, FALSE, failcount, last_failure, FALSE); } else { GList *op_list = get_operation_list(rsc_entry); pe_resource_t *rsc = pe_find_resource(data_set->resources, crm_element_value(rsc_entry, XML_ATTR_ID)); if (op_list == NULL) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, FALSE, NULL, only_node, only_rsc); } out->message(out, "resource-operation-list", data_set, rsc, node, op_list, show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "unsigned int", "gboolean") static int node_list_html(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer G_GNUC_UNUSED = va_arg(args, gboolean); int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Node List"); out->message(out, "node", node, show_opts, TRUE, NULL, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "unsigned int", "gboolean") static int node_list_text(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); /* space-separated lists of node names */ char *online_nodes = NULL; char *online_remote_nodes = NULL; char *online_guest_nodes = NULL; char *offline_nodes = NULL; char *offline_remote_nodes = NULL; size_t online_nodes_len = 0; size_t online_remote_nodes_len = 0; size_t online_guest_nodes_len = 0; size_t offline_nodes_len = 0; size_t offline_remote_nodes_len = 0; int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; const char *node_mode = NULL; char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { free(node_name); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer == TRUE, rc, "Node List"); /* Get node mode */ if (node->details->unclean) { if (node->details->online) { node_mode = "UNCLEAN (online)"; } else if (node->details->pending) { node_mode = "UNCLEAN (pending)"; } else { node_mode = "UNCLEAN (offline)"; } } else if (node->details->pending) { node_mode = "pending"; } else if (node->details->standby_onfail && node->details->online) { node_mode = "standby (on-fail)"; } else if (node->details->standby) { if (node->details->online) { if (node->details->running_rsc) { node_mode = "standby (with active resources)"; } else { node_mode = "standby"; } } else { node_mode = "OFFLINE (standby)"; } } else if (node->details->maintenance) { if (node->details->online) { node_mode = "maintenance"; } else { node_mode = "OFFLINE (maintenance)"; } } else if (node->details->online) { node_mode = "online"; if (!pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pe__is_guest_node(node)) { pcmk__add_word(&online_guest_nodes, &online_guest_nodes_len, node_name); } else if (pe__is_remote_node(node)) { pcmk__add_word(&online_remote_nodes, &online_remote_nodes_len, node_name); } else { pcmk__add_word(&online_nodes, &online_nodes_len, node_name); } free(node_name); continue; } } else { node_mode = "OFFLINE"; if (!pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pe__is_remote_node(node)) { pcmk__add_word(&offline_remote_nodes, &offline_remote_nodes_len, node_name); } else if (pe__is_guest_node(node)) { /* ignore offline guest nodes */ } else { pcmk__add_word(&offline_nodes, &offline_nodes_len, node_name); } free(node_name); continue; } } /* If we get here, node is in bad state, or we're grouping by node */ out->message(out, "node", node, show_opts, TRUE, node_mode, only_node, only_rsc); free(node_name); } /* If we're not grouping by node, summarize nodes by status */ if (online_nodes) { out->list_item(out, "Online", "[ %s ]", online_nodes); free(online_nodes); } if (offline_nodes) { out->list_item(out, "OFFLINE", "[ %s ]", offline_nodes); free(offline_nodes); } if (online_remote_nodes) { out->list_item(out, "RemoteOnline", "[ %s ]", online_remote_nodes); free(online_remote_nodes); } if (offline_remote_nodes) { out->list_item(out, "RemoteOFFLINE", "[ %s ]", offline_remote_nodes); free(offline_remote_nodes); } if (online_guest_nodes) { out->list_item(out, "GuestOnline", "[ %s ]", online_guest_nodes); free(online_guest_nodes); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "unsigned int", "gboolean") static int node_list_xml(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer G_GNUC_UNUSED = va_arg(args, gboolean); out->begin_list(out, NULL, NULL, "nodes"); for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } out->message(out, "node", node, show_opts, TRUE, NULL, only_node, only_rsc); } out->end_list(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-summary", "pe_working_set_t *", "GList *", "GList *", "unsigned int", "unsigned int", "gboolean") static int node_summary(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); unsigned int section_opts = va_arg(args, unsigned int); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_spacer = va_arg(args, gboolean); xmlNode *node_state = NULL; xmlNode *cib_status = get_object_root(XML_CIB_TAG_STATUS, data_set->input); int rc = pcmk_rc_no_output; if (xmlChildElementCount(cib_status) == 0) { return rc; } for (node_state = first_named_child(cib_status, XML_CIB_TAG_STATE); node_state != NULL; node_state = crm_next_same_xml(node_state)) { pe_node_t *node = pe_find_node_id(data_set->nodes, ID(node_state)); if (!node || !node->details || !node->details->online) { continue; } if (!pcmk__str_in_list(node->details->uname, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, pcmk_is_set(section_opts, pcmk_section_operations) ? "Operations" : "Migration Summary"); out->message(out, "node-history-list", data_set, node, node_state, only_node, only_rsc, section_opts, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-weight", "pe_resource_t *", "const char *", "const char *", "char *") static int node_weight(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); char *score = va_arg(args, char *); if (rsc) { out->list_item(out, NULL, "%s: %s allocation score on %s: %s", prefix, rsc->id, uname, score); } else { out->list_item(out, NULL, "%s: %s = %s", prefix, uname, score); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-weight", "pe_resource_t *", "const char *", "const char *", "char *") static int node_weight_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); char *score = va_arg(args, char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "node_weight", "function", prefix, "node", uname, "score", score, NULL); if (rsc) { crm_xml_add(node, "id", rsc->id); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "unsigned int") static int op_history_text(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); unsigned int show_opts = va_arg(args, unsigned int); char *buf = op_history_string(xml_op, task, interval_ms_s, rc, pcmk_is_set(show_opts, pcmk_show_timing)); out->list_item(out, NULL, "%s", buf); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNodePtr", "const char *", "const char *", "int", "unsigned int") static int op_history_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); unsigned int show_opts = va_arg(args, unsigned int); char *rc_s = pcmk__itoa(rc); xmlNodePtr node = pcmk__output_create_xml_node(out, "operation_history", "call", crm_element_value(xml_op, XML_LRM_ATTR_CALLID), "task", task, "rc", rc_s, "rc_text", services_ocf_exitcode_str(rc), NULL); free(rc_s); if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *s = crm_strdup_printf("%sms", interval_ms_s); crm_xml_add(node, "interval", s); free(s); } if (pcmk_is_set(show_opts, pcmk_show_timing)) { const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { crm_xml_add(node, XML_RSC_OP_LAST_CHANGE, pcmk__epoch2str(&epoch)); } value = crm_element_value(xml_op, XML_RSC_OP_T_EXEC); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, XML_RSC_OP_T_EXEC, s); free(s); } value = crm_element_value(xml_op, XML_RSC_OP_T_QUEUE); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, XML_RSC_OP_T_QUEUE, s); free(s); } } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pe_resource_t *", "pe_node_t *", "char *") static int promotion_score(pcmk__output_t *out, va_list args) { pe_resource_t *child_rsc = va_arg(args, pe_resource_t *); pe_node_t *chosen = va_arg(args, pe_node_t *); char *score = va_arg(args, char *); out->list_item(out, NULL, "%s promotion score on %s: %s", child_rsc->id, chosen? chosen->details->uname : "none", score); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pe_resource_t *", "pe_node_t *", "char *") static int promotion_score_xml(pcmk__output_t *out, va_list args) { pe_resource_t *child_rsc = va_arg(args, pe_resource_t *); pe_node_t *chosen = va_arg(args, pe_node_t *); char *score = va_arg(args, char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "promotion_score", "id", child_rsc->id, "score", score, NULL); if (chosen) { crm_xml_add(node, "node", chosen->details->uname); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "pe_resource_t *", "gboolean") static int resource_config(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); gboolean raw = va_arg(args, gboolean); char *rsc_xml = NULL; if (raw) { rsc_xml = dump_xml_formatted(rsc->orig_xml ? rsc->orig_xml : rsc->xml); } else { rsc_xml = dump_xml_formatted(rsc->xml); } pcmk__formatted_printf(out, "Resource XML:\n"); out->output_xml(out, "xml", rsc_xml); free(rsc_xml); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pe_resource_t *", "const char *", "gboolean", "int", "time_t", "gboolean") static int resource_history_text(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *rsc_id = va_arg(args, const char *); gboolean all = va_arg(args, gboolean); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, int); gboolean as_header = va_arg(args, gboolean); char *buf = resource_history_string(rsc, rsc_id, all, failcount, last_failure); if (as_header) { out->begin_list(out, NULL, NULL, "%s", buf); } else { out->list_item(out, NULL, "%s", buf); } free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pe_resource_t *", "const char *", "gboolean", "int", "time_t", "gboolean") static int resource_history_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); const char *rsc_id = va_arg(args, const char *); gboolean all = va_arg(args, gboolean); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, int); gboolean as_header = va_arg(args, gboolean); xmlNodePtr node = pcmk__output_xml_create_parent(out, "resource_history", "id", rsc_id, NULL); if (rsc == NULL) { - crm_xml_add(node, "orphan", "true"); + pcmk__xe_set_bool_attr(node, "orphan", true); } else if (all || failcount || last_failure > 0) { char *migration_s = pcmk__itoa(rsc->migration_threshold); pcmk__xe_set_props(node, "orphan", "false", "migration-threshold", migration_s, NULL); free(migration_s); if (failcount > 0) { char *s = pcmk__itoa(failcount); crm_xml_add(node, PCMK__FAIL_COUNT_PREFIX, s); free(s); } if (last_failure > 0) { crm_xml_add(node, PCMK__LAST_FAILURE_PREFIX, pcmk__epoch2str(&last_failure)); } } if (as_header == FALSE) { pcmk__output_xml_pop_parent(out); } return pcmk_rc_ok; } static void print_resource_header(pcmk__output_t *out, unsigned int show_opts) { if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { /* Active resources have already been printed by node */ out->begin_list(out, NULL, NULL, "Inactive Resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->begin_list(out, NULL, NULL, "Full List of Resources"); } else { out->begin_list(out, NULL, NULL, "Active Resources"); } } PCMK__OUTPUT_ARGS("resource-list", "pe_working_set_t *", "unsigned int", "gboolean", "GList *", "GList *", "gboolean") static int resource_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); unsigned int show_opts = va_arg(args, unsigned int); gboolean print_summary = va_arg(args, gboolean); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); gboolean print_spacer = va_arg(args, gboolean); GList *rsc_iter; int rc = pcmk_rc_no_output; bool printed_header = false; /* If we already showed active resources by node, and * we're not showing inactive resources, we have nothing to do */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node) && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { return rc; } /* If we haven't already printed resources grouped by node, * and brief output was requested, print resource summary */ if (pcmk_is_set(show_opts, pcmk_show_brief) && !pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(data_set->resources, only_rsc); PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; rc = pe__rscs_brief_output(out, rscs, show_opts); g_list_free(rscs); } /* For each resource, display it if appropriate */ for (rsc_iter = data_set->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { pe_resource_t *rsc = (pe_resource_t *) rsc_iter->data; int x; /* Complex resources may have some sub-resources active and some inactive */ gboolean is_active = rsc->fns->active(rsc, TRUE); gboolean partially_active = rsc->fns->active(rsc, FALSE); /* Skip inactive orphans (deleted but still in CIB) */ if (pcmk_is_set(rsc->flags, pe_rsc_orphan) && !is_active) { continue; /* Skip active resources if we already displayed them by node */ } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (is_active) { continue; } /* Skip primitives already counted in a brief summary */ } else if (pcmk_is_set(show_opts, pcmk_show_brief) && (rsc->variant == pe_native)) { continue; /* Skip resources that aren't at least partially active, * unless we're displaying inactive resources */ } else if (!partially_active && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { continue; } else if (partially_active && !pe__rsc_running_on_any(rsc, only_node)) { continue; } if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } /* Print this resource */ x = out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, only_node, only_rsc); if (x == pcmk_rc_ok) { rc = pcmk_rc_ok; } } if (print_summary && rc != pcmk_rc_ok) { if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { out->list_item(out, NULL, "No inactive resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->list_item(out, NULL, "No resources"); } else { out->list_item(out, NULL, "No active resources"); } } if (printed_header) { out->end_list(out); } return rc; } PCMK__OUTPUT_ARGS("resource-operation-list", "pe_working_set_t *", "pe_resource_t *", "pe_node_t *", "GList *", "unsigned int") static int resource_operation_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); GList *op_list = va_arg(args, GList *); unsigned int show_opts = va_arg(args, unsigned int); GList *gIter = NULL; int rc = pcmk_rc_no_output; /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); const char *op_rc = crm_element_value(xml_op, XML_LRM_ATTR_RC); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, CRMD_ACTION_STATUS, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* If this is the first printed operation, print heading for resource */ if (rc == pcmk_rc_no_output) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pe_fc_default, NULL, data_set); out->message(out, "resource-history", rsc, rsc_printable_id(rsc), TRUE, failcount, last_failure, TRUE); rc = pcmk_rc_ok; } /* Print the operation */ out->message(out, "op-history", xml_op, task, interval_ms_s, op_rc_i, show_opts); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("resource-util", "pe_resource_t *", "pe_node_t *", "const char *") static int resource_util(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *fn = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", fn, rsc->id, node->details->uname); g_hash_table_foreach(rsc->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-util", "pe_resource_t *", "pe_node_t *", "const char *") static int resource_util_xml(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *fn = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, "utilization", "resource", rsc->id, "node", node->details->uname, "function", fn, NULL); g_hash_table_foreach(rsc->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_html(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); if (ticket->last_granted > -1) { char *time = pcmk__format_named_time("last-granted", ticket->last_granted); out->list_item(out, NULL, "%s:\t%s%s %s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", time); free(time); } else { out->list_item(out, NULL, "%s:\t%s%s", ticket->id, ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_text(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); if (ticket->last_granted > -1) { char *time = pcmk__format_named_time("last-granted", ticket->last_granted); out->list_item(out, ticket->id, "%s%s %s", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : "", time); free(time); } else { out->list_item(out, ticket->id, "%s%s", ticket->granted ? "granted" : "revoked", ticket->standby ? " [standby]" : ""); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pe_ticket_t *") static int ticket_xml(pcmk__output_t *out, va_list args) { pe_ticket_t *ticket = va_arg(args, pe_ticket_t *); xmlNodePtr node = NULL; node = pcmk__output_create_xml_node(out, "ticket", "id", ticket->id, "status", ticket->granted ? "granted" : "revoked", "standby", pcmk__btoa(ticket->standby), NULL); if (ticket->last_granted > -1) { crm_xml_add(node, "last-granted", pcmk__epoch2str(&ticket->last_granted)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-list", "pe_working_set_t *", "gboolean") static int ticket_list(pcmk__output_t *out, va_list args) { pe_working_set_t *data_set = va_arg(args, pe_working_set_t *); gboolean print_spacer = va_arg(args, gboolean); GHashTableIter iter; gpointer key, value; if (g_hash_table_size(data_set->tickets) == 0) { return pcmk_rc_no_output; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); /* Print section heading */ out->begin_list(out, NULL, NULL, "Tickets"); /* Print each ticket */ g_hash_table_iter_init(&iter, data_set->tickets); while (g_hash_table_iter_next(&iter, &key, &value)) { pe_ticket_t *ticket = (pe_ticket_t *) value; out->message(out, "ticket", ticket); } /* Close section */ out->end_list(out); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "ban", "default", ban_text }, { "ban", "html", ban_html }, { "ban", "xml", ban_xml }, { "ban-list", "default", ban_list }, { "bundle", "default", pe__bundle_text }, { "bundle", "xml", pe__bundle_xml }, { "bundle", "html", pe__bundle_html }, { "clone", "default", pe__clone_default }, { "clone", "xml", pe__clone_xml }, { "cluster-counts", "default", cluster_counts_text }, { "cluster-counts", "html", cluster_counts_html }, { "cluster-counts", "xml", cluster_counts_xml }, { "cluster-dc", "default", cluster_dc_text }, { "cluster-dc", "html", cluster_dc_html }, { "cluster-dc", "xml", cluster_dc_xml }, { "cluster-options", "default", cluster_options_text }, { "cluster-options", "html", cluster_options_html }, { "cluster-options", "log", cluster_options_log }, { "cluster-options", "xml", cluster_options_xml }, { "cluster-summary", "default", cluster_summary }, { "cluster-summary", "html", cluster_summary_html }, { "cluster-stack", "default", cluster_stack_text }, { "cluster-stack", "html", cluster_stack_html }, { "cluster-stack", "xml", cluster_stack_xml }, { "cluster-times", "default", cluster_times_text }, { "cluster-times", "html", cluster_times_html }, { "cluster-times", "xml", cluster_times_xml }, { "failed-action", "default", failed_action_default }, { "failed-action", "xml", failed_action_xml }, { "failed-action-list", "default", failed_action_list }, { "group", "default", pe__group_default}, { "group", "xml", pe__group_xml }, { "maint-mode", "text", cluster_maint_mode_text }, { "node", "default", node_text }, { "node", "html", node_html }, { "node", "xml", node_xml }, { "node-and-op", "default", node_and_op }, { "node-and-op", "xml", node_and_op_xml }, { "node-capacity", "default", node_capacity }, { "node-capacity", "xml", node_capacity_xml }, { "node-history-list", "default", node_history_list }, { "node-list", "default", node_list_text }, { "node-list", "html", node_list_html }, { "node-list", "xml", node_list_xml }, { "node-weight", "default", node_weight }, { "node-weight", "xml", node_weight_xml }, { "node-attribute", "default", node_attribute_text }, { "node-attribute", "html", node_attribute_html }, { "node-attribute", "xml", node_attribute_xml }, { "node-attribute-list", "default", node_attribute_list }, { "node-summary", "default", node_summary }, { "op-history", "default", op_history_text }, { "op-history", "xml", op_history_xml }, { "primitive", "default", pe__resource_text }, { "primitive", "xml", pe__resource_xml }, { "primitive", "html", pe__resource_html }, { "promotion-score", "default", promotion_score }, { "promotion-score", "xml", promotion_score_xml }, { "resource-config", "default", resource_config }, { "resource-history", "default", resource_history_text }, { "resource-history", "xml", resource_history_xml }, { "resource-list", "default", resource_list }, { "resource-operation-list", "default", resource_operation_list }, { "resource-util", "default", resource_util }, { "resource-util", "xml", resource_util_xml }, { "ticket", "default", ticket_text }, { "ticket", "html", ticket_html }, { "ticket", "xml", ticket_xml }, { "ticket-list", "default", ticket_list }, { NULL, NULL, NULL } }; void pe__register_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } void pe__output_node(pe_node_t *node, gboolean details, pcmk__output_t *out) { if (node == NULL) { crm_trace(""); return; } CRM_ASSERT(node->details); crm_trace("%sNode %s: (weight=%d, fixed=%s)", node->details->online ? "" : "Unavailable/Unclean ", node->details->uname, node->weight, node->fixed ? "True" : "False"); if (details) { char *pe_mutable = strdup("\t\t"); GList *gIter = node->details->running_rsc; GList *all = NULL; all = g_list_prepend(all, (gpointer) "*"); crm_trace("\t\t===Node Attributes"); g_hash_table_foreach(node->details->attrs, print_str_str, pe_mutable); free(pe_mutable); crm_trace("\t\t=== Resources"); for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; out->message(out, crm_map_element_name(rsc->xml), pe_print_pending, rsc, all, all); } g_list_free(all); } } diff --git a/lib/pengine/status.c b/lib/pengine/status.c index 096afebfdf..6ef121292f 100644 --- a/lib/pengine/status.c +++ b/lib/pengine/status.c @@ -1,447 +1,446 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include /*! * \brief Create a new working set * * \return New, initialized working set on success, else NULL (and set errno) * \note Only pe_working_set_t objects created with this function (as opposed * to statically declared or directly allocated) should be used with the * functions in this library, to allow for future extensions to the * data type. The caller is responsible for freeing the memory with * pe_free_working_set() when the instance is no longer needed. */ pe_working_set_t * pe_new_working_set() { pe_working_set_t *data_set = calloc(1, sizeof(pe_working_set_t)); if (data_set != NULL) { set_working_set_defaults(data_set); } return data_set; } /*! * \brief Free a working set * * \param[in] data_set Working set to free */ void pe_free_working_set(pe_working_set_t *data_set) { if (data_set != NULL) { pe_reset_working_set(data_set); data_set->priv = NULL; free(data_set); } } /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pe_working_set_t * data_set) { xmlNode *config = get_xpath_object("//"XML_CIB_TAG_CRMCONFIG, data_set->input, LOG_TRACE); xmlNode *cib_nodes = get_xpath_object("//"XML_CIB_TAG_NODES, data_set->input, LOG_TRACE); xmlNode *cib_resources = get_xpath_object("//"XML_CIB_TAG_RESOURCES, data_set->input, LOG_TRACE); xmlNode *cib_status = get_xpath_object("//"XML_CIB_TAG_STATUS, data_set->input, LOG_TRACE); xmlNode *cib_tags = get_xpath_object("//" XML_CIB_TAG_TAGS, data_set->input, LOG_NEVER); - const char *value = crm_element_value(data_set->input, XML_ATTR_HAVE_QUORUM); crm_trace("Beginning unpack"); /* reset remaining global variables */ data_set->failed = create_xml_node(NULL, "failed-ops"); if (data_set->input == NULL) { return FALSE; } if (data_set->now == NULL) { data_set->now = crm_time_new(NULL); } if (data_set->dc_uuid == NULL) { data_set->dc_uuid = crm_element_value_copy(data_set->input, XML_ATTR_DC_UUID); } - if (crm_is_true(value)) { + if (pcmk__xe_attr_is_true(data_set->input, XML_ATTR_HAVE_QUORUM)) { pe__set_working_set_flags(data_set, pe_flag_have_quorum); } else { pe__clear_working_set_flags(data_set, pe_flag_have_quorum); } data_set->op_defaults = get_xpath_object("//" XML_CIB_TAG_OPCONFIG, data_set->input, LOG_NEVER); data_set->rsc_defaults = get_xpath_object("//" XML_CIB_TAG_RSCCONFIG, data_set->input, LOG_NEVER); unpack_config(config, data_set); if (!pcmk_any_flags_set(data_set->flags, pe_flag_quick_location|pe_flag_have_quorum) && (data_set->no_quorum_policy != no_quorum_ignore)) { crm_warn("Fencing and resource management disabled due to lack of quorum"); } unpack_nodes(cib_nodes, data_set); if (!pcmk_is_set(data_set->flags, pe_flag_quick_location)) { unpack_remote_nodes(cib_resources, data_set); } unpack_resources(cib_resources, data_set); unpack_tags(cib_tags, data_set); if (!pcmk_is_set(data_set->flags, pe_flag_quick_location)) { unpack_status(cib_status, data_set); } if (!pcmk_is_set(data_set->flags, pe_flag_no_counts)) { for (GList *item = data_set->resources; item != NULL; item = item->next) { ((pe_resource_t *) (item->data))->fns->count(item->data); } } pe__set_working_set_flags(data_set, pe_flag_have_status); return TRUE; } /*! * \internal * \brief Free a list of pe_resource_t * * \param[in] resources List to free * * \note When a working set's resource list is freed, that includes the original * storage for the uname and id of any Pacemaker Remote nodes in the * working set's node list, so take care not to use those afterward. * \todo Refactor pe_node_t to strdup() the node name. */ static void pe_free_resources(GList *resources) { pe_resource_t *rsc = NULL; GList *iterator = resources; while (iterator != NULL) { rsc = (pe_resource_t *) iterator->data; iterator = iterator->next; rsc->fns->free(rsc); } if (resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GList *actions) { GList *iterator = actions; while (iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if (actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GList *nodes) { for (GList *iterator = nodes; iterator != NULL; iterator = iterator->next) { pe_node_t *node = (pe_node_t *) iterator->data; // Shouldn't be possible, but to be safe ... if (node == NULL) { continue; } if (node->details == NULL) { free(node); continue; } /* This is called after pe_free_resources(), which means that we can't * use node->details->uname for Pacemaker Remote nodes. */ crm_trace("Freeing node %s", (pe__is_guest_or_remote_node(node)? "(guest or remote)" : node->details->uname)); if (node->details->attrs != NULL) { g_hash_table_destroy(node->details->attrs); } if (node->details->utilization != NULL) { g_hash_table_destroy(node->details->utilization); } if (node->details->digest_cache != NULL) { g_hash_table_destroy(node->details->digest_cache); } g_list_free(node->details->running_rsc); g_list_free(node->details->allocated_rsc); free(node->details); free(node); } if (nodes != NULL) { g_list_free(nodes); } } static void pe__free_ordering(GList *constraints) { GList *iterator = constraints; while (iterator != NULL) { pe__ordering_t *order = iterator->data; iterator = iterator->next; free(order->lh_action_task); free(order->rh_action_task); free(order); } if (constraints != NULL) { g_list_free(constraints); } } static void pe__free_location(GList *constraints) { GList *iterator = constraints; while (iterator != NULL) { pe__location_t *cons = iterator->data; iterator = iterator->next; g_list_free_full(cons->node_list_rh, free); free(cons->id); free(cons); } if (constraints != NULL) { g_list_free(constraints); } } /*! * \brief Reset working set to default state without freeing it or constraints * * \param[in,out] data_set Working set to reset * * \deprecated This function is deprecated as part of the API; * pe_reset_working_set() should be used instead. */ void cleanup_calculations(pe_working_set_t * data_set) { if (data_set == NULL) { return; } pe__clear_working_set_flags(data_set, pe_flag_have_status); if (data_set->config_hash != NULL) { g_hash_table_destroy(data_set->config_hash); } if (data_set->singletons != NULL) { g_hash_table_destroy(data_set->singletons); } if (data_set->tickets) { g_hash_table_destroy(data_set->tickets); } if (data_set->template_rsc_sets) { g_hash_table_destroy(data_set->template_rsc_sets); } if (data_set->tags) { g_hash_table_destroy(data_set->tags); } free(data_set->dc_uuid); crm_trace("deleting resources"); pe_free_resources(data_set->resources); crm_trace("deleting actions"); pe_free_actions(data_set->actions); crm_trace("deleting nodes"); pe_free_nodes(data_set->nodes); pe__free_param_checks(data_set); g_list_free(data_set->stop_needed); free_xml(data_set->graph); crm_time_free(data_set->now); free_xml(data_set->input); free_xml(data_set->failed); set_working_set_defaults(data_set); CRM_CHECK(data_set->ordering_constraints == NULL,; ); CRM_CHECK(data_set->placement_constraints == NULL,; ); } /*! * \brief Reset a working set to default state without freeing it * * \param[in,out] data_set Working set to reset */ void pe_reset_working_set(pe_working_set_t *data_set) { if (data_set == NULL) { return; } crm_trace("Deleting %d ordering constraints", g_list_length(data_set->ordering_constraints)); pe__free_ordering(data_set->ordering_constraints); data_set->ordering_constraints = NULL; crm_trace("Deleting %d location constraints", g_list_length(data_set->placement_constraints)); pe__free_location(data_set->placement_constraints); data_set->placement_constraints = NULL; crm_trace("Deleting %d colocation constraints", g_list_length(data_set->colocation_constraints)); g_list_free_full(data_set->colocation_constraints, free); data_set->colocation_constraints = NULL; crm_trace("Deleting %d ticket constraints", g_list_length(data_set->ticket_constraints)); g_list_free_full(data_set->ticket_constraints, free); data_set->ticket_constraints = NULL; cleanup_calculations(data_set); } void set_working_set_defaults(pe_working_set_t * data_set) { void *priv = data_set->priv; memset(data_set, 0, sizeof(pe_working_set_t)); data_set->priv = priv; data_set->order_id = 1; data_set->action_id = 1; data_set->no_quorum_policy = no_quorum_stop; data_set->flags = 0x0ULL; pe__set_working_set_flags(data_set, pe_flag_stop_rsc_orphans |pe_flag_symmetric_cluster |pe_flag_stop_action_orphans); if (!strcmp(PCMK__CONCURRENT_FENCING_DEFAULT, "true")) { pe__set_working_set_flags(data_set, pe_flag_concurrent_fencing); } } pe_resource_t * pe_find_resource(GList *rsc_list, const char *id) { return pe_find_resource_with_flags(rsc_list, id, pe_find_renamed); } pe_resource_t * pe_find_resource_with_flags(GList *rsc_list, const char *id, enum pe_find flags) { GList *rIter = NULL; for (rIter = rsc_list; id && rIter; rIter = rIter->next) { pe_resource_t *parent = rIter->data; pe_resource_t *match = parent->fns->find_rsc(parent, id, NULL, flags); if (match != NULL) { return match; } } crm_trace("No match for %s", id); return NULL; } pe_node_t * pe_find_node_any(GList *nodes, const char *id, const char *uname) { pe_node_t *match = pe_find_node_id(nodes, id); if (match) { return match; } crm_trace("Looking up %s via its uname instead", uname); return pe_find_node(nodes, uname); } pe_node_t * pe_find_node_id(GList *nodes, const char *id) { GList *gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node && pcmk__str_eq(node->details->id, id, pcmk__str_casei)) { return node; } } /* error */ return NULL; } pe_node_t * pe_find_node(GList *nodes, const char *uname) { GList *gIter = nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node && pcmk__str_eq(node->details->uname, uname, pcmk__str_casei)) { return node; } } /* error */ return NULL; } diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c index 8d8da752b4..da76260504 100644 --- a/lib/pengine/utils.c +++ b/lib/pengine/utils.c @@ -1,2570 +1,2571 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "pe_status_private.h" extern bool pcmk__is_daemon; extern xmlNode *get_object_root(const char *object_type, xmlNode * the_root); void print_str_str(gpointer key, gpointer value, gpointer user_data); gboolean ghash_free_str_str(gpointer key, gpointer value, gpointer user_data); static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, pe_working_set_t * data_set, guint interval_ms); static xmlNode *find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled); #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t * pe_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; CRM_CHECK(action != NULL, return NULL); if (action->action_details == NULL) { action->action_details = calloc(1, sizeof(pe_rsc_action_details_t)); CRM_CHECK(action->action_details != NULL, return NULL); } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters == NULL) { details->versioned_parameters = create_xml_node(NULL, XML_TAG_OP_VER_ATTRS); } if (details->versioned_meta == NULL) { details->versioned_meta = create_xml_node(NULL, XML_TAG_OP_VER_META); } return details; } static void pe_free_rsc_action_details(pe_action_t *action) { pe_rsc_action_details_t *details; if ((action == NULL) || (action->action_details == NULL)) { return; } details = (pe_rsc_action_details_t *) action->action_details; if (details->versioned_parameters) { free_xml(details->versioned_parameters); } if (details->versioned_meta) { free_xml(details->versioned_meta); } action->action_details = NULL; } #endif /*! * \internal * \brief Check whether we can fence a particular node * * \param[in] data_set Working set for cluster * \param[in] node Name of node to check * * \return true if node can be fenced, false otherwise */ bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node) { if (pe__is_guest_node(node)) { /* Guest nodes are fenced by stopping their container resource. We can * do that if the container's host is either online or fenceable. */ pe_resource_t *rsc = node->details->remote_rsc->container; for (GList *n = rsc->running_on; n != NULL; n = n->next) { pe_node_t *container_node = n->data; if (!container_node->details->online && !pe_can_fence(data_set, container_node)) { return false; } } return true; } else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { return false; /* Turned off */ } else if (!pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) { return false; /* No devices */ } else if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { return true; } else if (data_set->no_quorum_policy == no_quorum_ignore) { return true; } else if(node == NULL) { return false; } else if(node->details->online) { crm_notice("We can fence %s without quorum because they're in our membership", node->details->uname); return true; } crm_trace("Cannot fence %s", node->details->uname); return false; } /*! * \internal * \brief Copy a node object * * \param[in] this_node Node object to copy * * \return Newly allocated shallow copy of this_node * \note This function asserts on errors and is guaranteed to return non-NULL. */ pe_node_t * pe__copy_node(const pe_node_t *this_node) { pe_node_t *new_node = NULL; CRM_ASSERT(this_node != NULL); new_node = calloc(1, sizeof(pe_node_t)); CRM_ASSERT(new_node != NULL); new_node->rsc_discover_mode = this_node->rsc_discover_mode; new_node->weight = this_node->weight; new_node->fixed = this_node->fixed; new_node->details = this_node->details; return new_node; } /* any node in list1 or list2 and not in the other gets a score of -INFINITY */ void node_list_exclude(GHashTable * hash, GList *list, gboolean merge_scores) { GHashTable *result = hash; pe_node_t *other_node = NULL; GList *gIter = list; GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { other_node = pe_find_node_id(list, node->details->id); if (other_node == NULL) { node->weight = -INFINITY; } else if (merge_scores) { node->weight = pe__add_scores(node->weight, other_node->weight); } } for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; other_node = pe_hash_table_lookup(result, node->details->id); if (other_node == NULL) { pe_node_t *new_node = pe__copy_node(node); new_node->weight = -INFINITY; g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } } } /*! * \internal * \brief Create a node hash table from a node list * * \param[in] list Node list * * \return Hash table equivalent of node list */ GHashTable * pe__node_list2table(GList *list) { GHashTable *result = NULL; result = pcmk__strkey_table(NULL, free); for (GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *new_node = pe__copy_node((pe_node_t *) gIter->data); g_hash_table_insert(result, (gpointer) new_node->details->id, new_node); } return result; } gint sort_node_uname(gconstpointer a, gconstpointer b) { return pcmk__numeric_strcasecmp(((const pe_node_t *) a)->details->uname, ((const pe_node_t *) b)->details->uname); } /*! * \internal * \brief Output node weights to stdout * * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ static void pe__output_node_weights(pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set) { pcmk__output_t *out = data_set->priv; char score[128]; // Stack-allocated since this is called frequently // Sort the nodes so the output is consistent for regression tests GList *list = g_list_sort(g_hash_table_get_values(nodes), sort_node_uname); for (GList *gIter = list; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; score2char_stack(node->weight, score, sizeof(score)); out->message(out, "node-weight", rsc, comment, node->details->uname, score); } g_list_free(list); } /*! * \internal * \brief Log node weights at trace level * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes If rsc is not specified, use these nodes */ static void pe__log_node_weights(const char *file, const char *function, int line, pe_resource_t *rsc, const char *comment, GHashTable *nodes) { GHashTableIter iter; pe_node_t *node = NULL; char score[128]; // Stack-allocated since this is called frequently // Don't waste time if we're not tracing at this point pcmk__log_else(LOG_TRACE, return); g_hash_table_iter_init(&iter, nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { score2char_stack(node->weight, score, sizeof(score)); if (rsc) { qb_log_from_external_source(function, file, "%s: %s allocation score on %s: %s", LOG_TRACE, line, 0, comment, rsc->id, node->details->uname, score); } else { qb_log_from_external_source(function, file, "%s: %s = %s", LOG_TRACE, line, 0, comment, node->details->uname, score); } } } /*! * \internal * \brief Log or output node weights * * \param[in] file Caller's filename * \param[in] function Caller's function name * \param[in] line Caller's line number * \param[in] to_log Log if true, otherwise output * \param[in] rsc Use allowed nodes for this resource * \param[in] comment Text description to prefix lines with * \param[in] nodes Use these nodes */ void pe__show_node_weights_as(const char *file, const char *function, int line, bool to_log, pe_resource_t *rsc, const char *comment, GHashTable *nodes, pe_working_set_t *data_set) { if (rsc != NULL && pcmk_is_set(rsc->flags, pe_rsc_orphan)) { // Don't show allocation scores for orphans return; } if (nodes == NULL) { // Nothing to show return; } if (to_log) { pe__log_node_weights(file, function, line, rsc, comment, nodes); } else { pe__output_node_weights(rsc, comment, nodes, data_set); } // If this resource has children, repeat recursively for each if (rsc && rsc->children) { for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe_resource_t *child = (pe_resource_t *) gIter->data; pe__show_node_weights_as(file, function, line, to_log, child, comment, child->allowed_nodes, data_set); } } } gint sort_rsc_index(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->sort_index > resource2->sort_index) { return -1; } if (resource1->sort_index < resource2->sort_index) { return 1; } return 0; } gint sort_rsc_priority(gconstpointer a, gconstpointer b) { const pe_resource_t *resource1 = (const pe_resource_t *)a; const pe_resource_t *resource2 = (const pe_resource_t *)b; if (a == NULL && b == NULL) { return 0; } if (a == NULL) { return 1; } if (b == NULL) { return -1; } if (resource1->priority > resource2->priority) { return -1; } if (resource1->priority < resource2->priority) { return 1; } return 0; } static enum pe_quorum_policy effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set) { enum pe_quorum_policy policy = data_set->no_quorum_policy; if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) { policy = no_quorum_ignore; } else if (data_set->no_quorum_policy == no_quorum_demote) { switch (rsc->role) { case RSC_ROLE_PROMOTED: case RSC_ROLE_UNPROMOTED: if (rsc->next_role > RSC_ROLE_UNPROMOTED) { pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED, "no-quorum-policy=demote"); } policy = no_quorum_ignore; break; default: policy = no_quorum_stop; break; } } return policy; } static void add_singleton(pe_working_set_t *data_set, pe_action_t *action) { if (data_set->singletons == NULL) { data_set->singletons = pcmk__strkey_table(NULL, NULL); } g_hash_table_insert(data_set->singletons, action->uuid, action); } static pe_action_t * lookup_singleton(pe_working_set_t *data_set, const char *action_uuid) { if (data_set->singletons == NULL) { return NULL; } return g_hash_table_lookup(data_set->singletons, action_uuid); } /*! * \internal * \brief Find an existing action that matches arguments * * \param[in] key Action key to match * \param[in] rsc Resource to match (if any) * \param[in] node Node to match (if any) * \param[in] data_set Cluster working set * * \return Existing action that matches arguments (or NULL if none) */ static pe_action_t * find_existing_action(const char *key, pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { GList *matches = NULL; pe_action_t *action = NULL; /* When rsc is NULL, it would be quicker to check data_set->singletons, * but checking all data_set->actions takes the node into account. */ matches = find_actions(((rsc == NULL)? data_set->actions : rsc->actions), key, node); if (matches == NULL) { return NULL; } CRM_LOG_ASSERT(!pcmk__list_of_multiple(matches)); action = matches->data; g_list_free(matches); return action; } /*! * \internal * \brief Create a new action object * * \param[in] key Action key * \param[in] task Action name * \param[in] rsc Resource that action is for (if any) * \param[in] node Node that action is on (if any) * \param[in] optional Whether action should be considered optional * \param[in] for_graph Whether action should be recorded in transition graph * \param[in] data_set Cluster working set * * \return Newly allocated action * \note This function takes ownership of \p key. It is the caller's * responsibility to free the return value with pe_free_action(). */ static pe_action_t * new_action(char *key, const char *task, pe_resource_t *rsc, pe_node_t *node, bool optional, bool for_graph, pe_working_set_t *data_set) { pe_action_t *action = calloc(1, sizeof(pe_action_t)); CRM_ASSERT(action != NULL); action->rsc = rsc; action->task = strdup(task); CRM_ASSERT(action->task != NULL); action->uuid = key; action->extra = pcmk__strkey_table(free, free); action->meta = pcmk__strkey_table(free, free); if (node) { action->node = pe__copy_node(node); } if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { // Resource history deletion for a node can be done on the DC pe__set_action_flags(action, pe_action_dc); } pe__set_action_flags(action, pe_action_runnable); if (optional) { pe__set_action_flags(action, pe_action_optional); } else { pe__clear_action_flags(action, pe_action_optional); } if (rsc != NULL) { guint interval_ms = 0; action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE); parse_op_key(key, NULL, NULL, &interval_ms); unpack_operation(action, action->op_entry, rsc->container, data_set, interval_ms); } if (for_graph) { pe_rsc_trace(rsc, "Created %s action %d (%s): %s for %s on %s", (optional? "optional" : "required"), data_set->action_id, key, task, ((rsc == NULL)? "no resource" : rsc->id), ((node == NULL)? "no node" : node->details->uname)); action->id = data_set->action_id++; data_set->actions = g_list_prepend(data_set->actions, action); if (rsc == NULL) { add_singleton(data_set, action); } else { rsc->actions = g_list_prepend(rsc->actions, action); } } return action; } /*! * \internal * \brief Evaluate node attribute values for an action * * \param[in] action Action to unpack attributes for * \param[in] data_set Cluster working set */ static void unpack_action_node_attributes(pe_action_t *action, pe_working_set_t *data_set) { if (!pcmk_is_set(action->flags, pe_action_have_node_attrs) && (action->op_entry != NULL)) { pe_rule_eval_data_t rule_data = { .node_hash = action->node->details->attrs, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pe__set_action_flags(action, pe_action_have_node_attrs); pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS, &rule_data, action->extra, NULL, FALSE, data_set); } } /*! * \internal * \brief Update an action's optional flag * * \param[in] action Action to update * \param[in] optional Requested optional status */ static void update_action_optional(pe_action_t *action, gboolean optional) { // Force a non-recurring action to be optional if its resource is unmanaged if ((action->rsc != NULL) && (action->node != NULL) && !pcmk_is_set(action->flags, pe_action_pseudo) && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) && (g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS) == NULL)) { pe_rsc_debug(action->rsc, "%s on %s is optional (%s is unmanaged)", action->uuid, action->node->details->uname, action->rsc->id); pe__set_action_flags(action, pe_action_optional); // We shouldn't clear runnable here because ... something // Otherwise require the action if requested } else if (!optional) { pe__clear_action_flags(action, pe_action_optional); } } /*! * \internal * \brief Update a resource action's runnable flag * * \param[in] action Action to update * \param[in] for_graph Whether action should be recorded in transition graph * \param[in] data_set Cluster working set * * \note This may also schedule fencing if a stop is unrunnable. */ static void update_resource_action_runnable(pe_action_t *action, bool for_graph, pe_working_set_t *data_set) { if (pcmk_is_set(action->flags, pe_action_pseudo)) { return; } if (action->node == NULL) { pe_rsc_trace(action->rsc, "%s is unrunnable (unallocated)", action->uuid); pe__clear_action_flags(action, pe_action_runnable); } else if (!pcmk_is_set(action->flags, pe_action_dc) && !(action->node->details->online) && (!pe__is_guest_node(action->node) || action->node->details->remote_requires_reset)) { pe__clear_action_flags(action, pe_action_runnable); do_crm_log((for_graph? LOG_WARNING: LOG_TRACE), "%s on %s is unrunnable (node is offline)", action->uuid, action->node->details->uname); if (pcmk_is_set(action->rsc->flags, pe_rsc_managed) && for_graph && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei) && !(action->node->details->unclean)) { pe_fence_node(data_set, action->node, "stop is unrunnable", false); } } else if (!pcmk_is_set(action->flags, pe_action_dc) && action->node->details->pending) { pe__clear_action_flags(action, pe_action_runnable); do_crm_log((for_graph? LOG_WARNING: LOG_TRACE), "Action %s on %s is unrunnable (node is pending)", action->uuid, action->node->details->uname); } else if (action->needs == rsc_req_nothing) { pe_action_set_reason(action, NULL, TRUE); if (pe__is_guest_node(action->node) && !pe_can_fence(data_set, action->node)) { /* An action that requires nothing usually does not require any * fencing in order to be runnable. However, there is an exception: * such an action cannot be completed if it is on a guest node whose * host is unclean and cannot be fenced. */ pe_rsc_debug(action->rsc, "%s on %s is unrunnable " "(node's host cannot be fenced)", action->uuid, action->node->details->uname); pe__clear_action_flags(action, pe_action_runnable); } else { pe_rsc_trace(action->rsc, "%s on %s does not require fencing or quorum", action->uuid, action->node->details->uname); pe__set_action_flags(action, pe_action_runnable); } } else { switch (effective_quorum_policy(action->rsc, data_set)) { case no_quorum_stop: pe_rsc_debug(action->rsc, "%s on %s is unrunnable (no quorum)", action->uuid, action->node->details->uname); pe__clear_action_flags(action, pe_action_runnable); pe_action_set_reason(action, "no quorum", true); break; case no_quorum_freeze: if (!action->rsc->fns->active(action->rsc, TRUE) || (action->rsc->next_role > action->rsc->role)) { pe_rsc_debug(action->rsc, "%s on %s is unrunnable (no quorum)", action->uuid, action->node->details->uname); pe__clear_action_flags(action, pe_action_runnable); pe_action_set_reason(action, "quorum freeze", true); } break; default: //pe_action_set_reason(action, NULL, TRUE); pe__set_action_flags(action, pe_action_runnable); break; } } } /*! * \internal * \brief Update a resource object's flags for a new action on it * * \param[in] rsc Resource that action is for (if any) * \param[in] action New action */ static void update_resource_flags_for_action(pe_resource_t *rsc, pe_action_t *action) { /* @COMPAT pe_rsc_starting and pe_rsc_stopping are not actually used * within Pacemaker, and should be deprecated and eventually removed */ if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { pe__set_resource_flags(rsc, pe_rsc_stopping); } else if (pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)) { if (pcmk_is_set(action->flags, pe_action_runnable)) { pe__set_resource_flags(rsc, pe_rsc_starting); } else { pe__clear_resource_flags(rsc, pe_rsc_starting); } } } /*! * \brief Create or update an action object * * \param[in] rsc Resource that action is for (if any) * \param[in] key Action key (must be non-NULL) * \param[in] task Action name (must be non-NULL) * \param[in] on_node Node that action is on (if any) * \param[in] optional Whether action should be considered optional * \param[in] save_action Whether action should be recorded in transition graph * \param[in] data_set Cluster working set * * \return Action object corresponding to arguments * \note This function takes ownership of (and might free) \p key. If * \p save_action is true, \p data_set will own the returned action, * otherwise it is the caller's responsibility to free the return value * with pe_free_action(). */ pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, pe_node_t *on_node, gboolean optional, gboolean save_action, pe_working_set_t *data_set) { pe_action_t *action = NULL; CRM_ASSERT((key != NULL) && (task != NULL) && (data_set != NULL)); if (save_action) { action = find_existing_action(key, rsc, on_node, data_set); } if (action == NULL) { action = new_action(key, task, rsc, on_node, optional, save_action, data_set); } else { free(key); } update_action_optional(action, optional); if (rsc != NULL) { if (action->node != NULL) { unpack_action_node_attributes(action, data_set); } update_resource_action_runnable(action, save_action, data_set); if (save_action) { update_resource_flags_for_action(rsc, action); } } return action; } static bool valid_stop_on_fail(const char *value) { return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL); } static const char * unpack_operation_on_fail(pe_action_t * action) { const char *name = NULL; const char *role = NULL; const char *on_fail = NULL; const char *interval_spec = NULL; - const char *enabled = NULL; const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL); if (pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei) && !valid_stop_on_fail(value)) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop " "action to default value because '%s' is not " "allowed for stop", action->rsc->id, value); return NULL; } else if (pcmk__str_eq(action->task, CRMD_ACTION_DEMOTE, pcmk__str_casei) && !value) { // demote on_fail defaults to monitor value for promoted role if present xmlNode *operation = NULL; CRM_CHECK(action->rsc != NULL, return NULL); for (operation = pcmk__xe_first_child(action->rsc->ops_xml); (operation != NULL) && (value == NULL); operation = pcmk__xe_next(operation)) { + bool enabled = false; if (!pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { continue; } name = crm_element_value(operation, "name"); role = crm_element_value(operation, "role"); on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL); - enabled = crm_element_value(operation, "enabled"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); if (!on_fail) { continue; - } else if (enabled && !crm_is_true(enabled)) { + } else if (pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && !enabled) { continue; } else if (!pcmk__str_eq(name, "monitor", pcmk__str_casei) || !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S, RSC_ROLE_PROMOTED_LEGACY_S, NULL)) { continue; } else if (crm_parse_interval_spec(interval_spec) == 0) { continue; } else if (pcmk__str_eq(on_fail, "demote", pcmk__str_casei)) { continue; } value = on_fail; } } else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) { value = "ignore"; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { name = crm_element_value(action->op_entry, "name"); role = crm_element_value(action->op_entry, "role"); interval_spec = crm_element_value(action->op_entry, XML_LRM_ATTR_INTERVAL); if (!pcmk__str_eq(name, CRMD_ACTION_PROMOTE, pcmk__str_casei) && (!pcmk__str_eq(name, CRMD_ACTION_STATUS, pcmk__str_casei) || !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S, RSC_ROLE_PROMOTED_LEGACY_S, NULL) || (crm_parse_interval_spec(interval_spec) == 0))) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s " "action to default value because 'demote' is not " "allowed for it", action->rsc->id, name); return NULL; } } return value; } static xmlNode * find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled) { guint interval_ms = 0; guint min_interval_ms = G_MAXUINT; const char *name = NULL; - const char *value = NULL; const char *interval_spec = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { + bool enabled = false; + name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); - value = crm_element_value(operation, "enabled"); - if (!include_disabled && value && crm_is_true(value) == FALSE) { + if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && + !enabled) { continue; } if (!pcmk__str_eq(name, RSC_STATUS, pcmk__str_casei)) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); if (interval_ms && (interval_ms < min_interval_ms)) { min_interval_ms = interval_ms; op = operation; } } } return op; } static int unpack_start_delay(const char *value, GHashTable *meta) { int start_delay = 0; if (value != NULL) { start_delay = crm_get_msec(value); if (start_delay < 0) { start_delay = 0; } if (meta) { g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY), pcmk__itoa(start_delay)); } } return start_delay; } // true if value contains valid, non-NULL interval origin for recurring op static bool unpack_interval_origin(const char *value, xmlNode *xml_obj, guint interval_ms, crm_time_t *now, long long *start_delay) { long long result = 0; guint interval_sec = interval_ms / 1000; crm_time_t *origin = NULL; // Ignore unspecified values and non-recurring operations if ((value == NULL) || (interval_ms == 0) || (now == NULL)) { return false; } // Parse interval origin from text origin = crm_time_new(value); if (origin == NULL) { pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation " "'%s' because '%s' is not valid", (ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value); return false; } // Get seconds since origin (negative if origin is in the future) result = crm_time_get_seconds(now) - crm_time_get_seconds(origin); crm_time_free(origin); // Calculate seconds from closest interval to now result = result % interval_sec; // Calculate seconds remaining until next interval result = ((result <= 0)? 0 : interval_sec) - result; crm_info("Calculated a start delay of %llds for operation '%s'", result, (ID(xml_obj)? ID(xml_obj) : "(unspecified)")); if (start_delay != NULL) { *start_delay = result * 1000; // milliseconds } return true; } static int unpack_timeout(const char *value) { int timeout_ms = crm_get_msec(value); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } return timeout_ms; } int pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set) { xmlNode *child = NULL; GHashTable *action_meta = NULL; const char *timeout_spec = NULL; int timeout_ms = 0; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP); child != NULL; child = crm_next_same_xml(child)) { if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME), pcmk__str_casei)) { timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT); break; } } if (timeout_spec == NULL && data_set->op_defaults) { action_meta = pcmk__strkey_table(free, free); pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action_meta, NULL, FALSE, data_set); timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT); } // @TODO check meta-attributes (including versioned meta-attributes) // @TODO maybe use min-interval monitor timeout as default for monitors timeout_ms = crm_get_msec(timeout_spec); if (timeout_ms < 0) { timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); } if (action_meta != NULL) { g_hash_table_destroy(action_meta); } return timeout_ms; } #if ENABLE_VERSIONED_ATTRS static void unpack_versioned_meta(xmlNode *versioned_meta, xmlNode *xml_obj, guint interval_ms, crm_time_t *now) { xmlNode *attrs = NULL; xmlNode *attr = NULL; for (attrs = pcmk__xe_first_child(versioned_meta); attrs != NULL; attrs = pcmk__xe_next(attrs)) { for (attr = pcmk__xe_first_child(attrs); attr != NULL; attr = pcmk__xe_next(attr)) { const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); if (pcmk__str_eq(name, XML_OP_ATTR_START_DELAY, pcmk__str_casei)) { int start_delay = unpack_start_delay(value, NULL); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } else if (pcmk__str_eq(name, XML_OP_ATTR_ORIGIN, pcmk__str_casei)) { long long start_delay = 0; if (unpack_interval_origin(value, xml_obj, interval_ms, now, &start_delay)) { crm_xml_add(attr, XML_NVPAIR_ATTR_NAME, XML_OP_ATTR_START_DELAY); crm_xml_add_ll(attr, XML_NVPAIR_ATTR_VALUE, start_delay); } } else if (pcmk__str_eq(name, XML_ATTR_TIMEOUT, pcmk__str_casei)) { int timeout_ms = unpack_timeout(value); crm_xml_add_int(attr, XML_NVPAIR_ATTR_VALUE, timeout_ms); } } } } #endif /*! * \brief Unpack operation XML into an action structure * * Unpack an operation's meta-attributes (normalizing the interval, timeout, * and start delay values as integer milliseconds), requirements, and * failure policy. * * \param[in,out] action Action to unpack into * \param[in] xml_obj Operation XML (or NULL if all defaults) * \param[in] container Resource that contains affected resource, if any * \param[in] data_set Cluster state * \param[in] interval_ms How frequently to perform the operation */ static void unpack_operation(pe_action_t * action, xmlNode * xml_obj, pe_resource_t * container, pe_working_set_t * data_set, guint interval_ms) { int timeout_ms = 0; const char *value = NULL; bool is_probe = pcmk__str_eq(action->task, RSC_STATUS, pcmk__str_casei) && (interval_ms == 0); #if ENABLE_VERSIONED_ATTRS pe_rsc_action_details_t *rsc_details = NULL; #endif pe_rsc_eval_data_t rsc_rule_data = { .standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS), .provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER), .agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE) }; pe_op_eval_data_t op_rule_data = { .op_name = action->task, .interval = interval_ms }; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = RSC_ROLE_UNKNOWN, .now = data_set->now, .match_data = NULL, .rsc_data = &rsc_rule_data, .op_data = &op_rule_data }; CRM_CHECK(action && action->rsc, return); // Cluster-wide pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data, action->meta, NULL, FALSE, data_set); // Determine probe default timeout differently if (is_probe) { xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE); if (min_interval_mon) { value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT); if (value) { crm_trace("\t%s: Setting default timeout to minimum-interval " "monitor's timeout '%s'", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } } if (xml_obj) { xmlAttrPtr xIter = NULL; // take precedence over defaults pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data, action->meta, NULL, TRUE, data_set); #if ENABLE_VERSIONED_ATTRS rsc_details = pe_rsc_action_details(action); pe_eval_versioned_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, &rule_data, rsc_details->versioned_parameters, NULL); pe_eval_versioned_attributes(data_set->input, xml_obj, XML_TAG_META_SETS, &rule_data, rsc_details->versioned_meta, NULL); #endif /* Anything set as an XML property has highest precedence. * This ensures we use the name and interval from the tag. */ for (xIter = xml_obj->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = crm_element_value(xml_obj, prop_name); g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value)); } } g_hash_table_remove(action->meta, "id"); // Normalize interval to milliseconds if (interval_ms > 0) { g_hash_table_replace(action->meta, strdup(XML_LRM_ATTR_INTERVAL), crm_strdup_printf("%u", interval_ms)); } else { g_hash_table_remove(action->meta, XML_LRM_ATTR_INTERVAL); } /* * Timeout order of precedence: * 1. pcmk_monitor_timeout (if rsc has pcmk_ra_cap_fence_params * and task is start or a probe; pcmk_monitor_timeout works * by default for a recurring monitor) * 2. explicit op timeout on the primitive * 3. default op timeout * a. if probe, then min-interval monitor's timeout * b. else, in XML_CIB_TAG_OPCONFIG * 4. CRM_DEFAULT_OP_TIMEOUT_S * * #1 overrides general rule of XML property having highest * precedence. */ if (pcmk_is_set(pcmk_get_ra_caps(rsc_rule_data.standard), pcmk_ra_cap_fence_params) && (pcmk__str_eq(action->task, RSC_START, pcmk__str_casei) || is_probe)) { GHashTable *params = pe_rsc_params(action->rsc, action->node, data_set); value = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (value) { crm_trace("\t%s: Setting timeout to pcmk_monitor_timeout '%s', " "overriding default", action->uuid, value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), strdup(value)); } } // Normalize timeout to positive milliseconds value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT); timeout_ms = unpack_timeout(value); g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT), pcmk__itoa(timeout_ms)); if (!pcmk__strcase_any_of(action->task, RSC_START, RSC_PROMOTE, NULL)) { action->needs = rsc_req_nothing; value = "nothing (not start or promote)"; } else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_fencing)) { action->needs = rsc_req_stonith; value = "fencing"; } else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_quorum)) { action->needs = rsc_req_quorum; value = "quorum"; } else { action->needs = rsc_req_nothing; value = "nothing"; } pe_rsc_trace(action->rsc, "%s requires %s", action->uuid, value); value = unpack_operation_on_fail(action); if (value == NULL) { } else if (pcmk__str_eq(value, "block", pcmk__str_casei)) { action->on_fail = action_fail_block; g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block")); value = "block"; // The above could destroy the original string } else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) { action->on_fail = action_fail_fence; value = "node fencing"; if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for " "operation '%s' to 'stop' because 'fence' is not " "valid when fencing is disabled", action->uuid); action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } } else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) { action->on_fail = action_fail_standby; value = "node standby"; } else if (pcmk__strcase_any_of(value, "ignore", "nothing", NULL)) { action->on_fail = action_fail_ignore; value = "ignore"; } else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) { action->on_fail = action_fail_migrate; value = "force migration"; } else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop resource"; } else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate)"; } else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) { if (container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate)"; } else { value = NULL; } } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { action->on_fail = action_fail_demote; value = "demote instance"; } else { pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value); value = NULL; } /* defaults */ if (value == NULL && container) { action->on_fail = action_fail_restart_container; value = "restart container (and possibly migrate) (default)"; /* For remote nodes, ensure that any failure that results in dropping an * active connection to the node results in fencing of the node. * * There are only two action failures that don't result in fencing. * 1. probes - probe failures are expected. * 2. start - a start failure indicates that an active connection does not already * exist. The user can set op on-fail=fence if they really want to fence start * failures. */ } else if (((value == NULL) || !pcmk_is_set(action->rsc->flags, pe_rsc_managed)) && pe__resource_is_remote_conn(action->rsc, data_set) && !(pcmk__str_eq(action->task, CRMD_ACTION_STATUS, pcmk__str_casei) && (interval_ms == 0)) && !pcmk__str_eq(action->task, CRMD_ACTION_START, pcmk__str_casei)) { if (!pcmk_is_set(action->rsc->flags, pe_rsc_managed)) { action->on_fail = action_fail_stop; action->fail_role = RSC_ROLE_STOPPED; value = "stop unmanaged remote node (enforcing default)"; } else { if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { value = "fence remote node (default)"; } else { value = "recover remote node connection (default)"; } if (action->rsc->remote_reconnect_ms) { action->fail_role = RSC_ROLE_STOPPED; } action->on_fail = action_fail_reset_remote; } } else if (value == NULL && pcmk__str_eq(action->task, CRMD_ACTION_STOP, pcmk__str_casei)) { if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) { action->on_fail = action_fail_fence; value = "resource fence (default)"; } else { action->on_fail = action_fail_block; value = "resource block (default)"; } } else if (value == NULL) { action->on_fail = action_fail_recover; value = "restart (and possibly migrate) (default)"; } pe_rsc_trace(action->rsc, "%s failure handling: %s", action->uuid, value); value = NULL; if (xml_obj != NULL) { value = g_hash_table_lookup(action->meta, "role_after_failure"); if (value) { pe_warn_once(pe_wo_role_after, "Support for role_after_failure is deprecated and will be removed in a future release"); } } if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) { action->fail_role = text2role(value); } /* defaults */ if (action->fail_role == RSC_ROLE_UNKNOWN) { if (pcmk__str_eq(action->task, CRMD_ACTION_PROMOTE, pcmk__str_casei)) { action->fail_role = RSC_ROLE_UNPROMOTED; } else { action->fail_role = RSC_ROLE_STARTED; } } pe_rsc_trace(action->rsc, "%s failure results in: %s", action->uuid, role2text(action->fail_role)); value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY); if (value) { unpack_start_delay(value, action->meta); } else { long long start_delay = 0; value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN); if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now, &start_delay)) { g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY), crm_strdup_printf("%lld", start_delay)); } } #if ENABLE_VERSIONED_ATTRS unpack_versioned_meta(rsc_details->versioned_meta, xml_obj, interval_ms, data_set->now); #endif } static xmlNode * find_rsc_op_entry_helper(pe_resource_t * rsc, const char *key, gboolean include_disabled) { guint interval_ms = 0; gboolean do_retry = TRUE; char *local_key = NULL; const char *name = NULL; - const char *value = NULL; const char *interval_spec = NULL; char *match_key = NULL; xmlNode *op = NULL; xmlNode *operation = NULL; retry: for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL; operation = pcmk__xe_next(operation)) { if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) { + bool enabled = false; + name = crm_element_value(operation, "name"); interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL); - value = crm_element_value(operation, "enabled"); - if (!include_disabled && value && crm_is_true(value) == FALSE) { + if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && + !enabled) { continue; } interval_ms = crm_parse_interval_spec(interval_spec); match_key = pcmk__op_key(rsc->id, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); if (rsc->clone_name) { match_key = pcmk__op_key(rsc->clone_name, name, interval_ms); if (pcmk__str_eq(key, match_key, pcmk__str_casei)) { op = operation; } free(match_key); } if (op != NULL) { free(local_key); return op; } } } free(local_key); if (do_retry == FALSE) { return NULL; } do_retry = FALSE; if (strstr(key, CRMD_ACTION_MIGRATE) || strstr(key, CRMD_ACTION_MIGRATED)) { local_key = pcmk__op_key(rsc->id, "migrate", 0); key = local_key; goto retry; } else if (strstr(key, "_notify_")) { local_key = pcmk__op_key(rsc->id, "notify", 0); key = local_key; goto retry; } return NULL; } xmlNode * find_rsc_op_entry(pe_resource_t * rsc, const char *key) { return find_rsc_op_entry_helper(rsc, key, FALSE); } /* * Used by the HashTable for-loop */ void print_str_str(gpointer key, gpointer value, gpointer user_data) { crm_trace("%s%s %s ==> %s", user_data == NULL ? "" : (char *)user_data, user_data == NULL ? "" : ": ", (char *)key, (char *)value); } void pe_free_action(pe_action_t * action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */ g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */ if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } #if ENABLE_VERSIONED_ATTRS if (action->rsc) { pe_free_rsc_action_details(action); } #endif free(action->cancel_task); free(action->reason); free(action->task); free(action->uuid); free(action->node); free(action); } GList * find_recurring_actions(GList *input, pe_node_t * not_on_node) { const char *value = NULL; GList *result = NULL; GList *gIter = input; CRM_CHECK(input != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; value = g_hash_table_lookup(action->meta, XML_LRM_ATTR_INTERVAL_MS); if (value == NULL) { /* skip */ } else if (pcmk__str_eq(value, "0", pcmk__str_casei)) { /* skip */ } else if (pcmk__str_eq(CRMD_ACTION_CANCEL, action->task, pcmk__str_casei)) { /* skip */ } else if (not_on_node == NULL) { crm_trace("(null) Found: %s", action->uuid); result = g_list_prepend(result, action); } else if (action->node == NULL) { /* skip */ } else if (action->node->details != not_on_node->details) { crm_trace("Found: %s", action->uuid); result = g_list_prepend(result, action); } } return result; } enum action_tasks get_complex_task(pe_resource_t * rsc, const char *name, gboolean allow_non_atomic) { enum action_tasks task = text2task(name); if (rsc == NULL) { return task; } else if (allow_non_atomic == FALSE || rsc->variant == pe_native) { switch (task) { case stopped_rsc: case started_rsc: case action_demoted: case action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); return task - 1; default: break; } } return task; } pe_action_t * find_first_action(GList *input, const char *uuid, const char *task, pe_node_t * on_node) { GList *gIter = NULL; CRM_CHECK(uuid || task, return NULL); for (gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) { continue; } else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (on_node->details == action->node->details) { return action; } } return NULL; } GList * find_actions(GList *input, const char *key, const pe_node_t *on_node) { GList *gIter = input; GList *result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) { continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, on_node->details->uname); action->node = pe__copy_node(on_node); result = g_list_prepend(result, action); } else if (on_node->details == action->node->details) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } } return result; } GList * find_actions_exact(GList *input, const char *key, const pe_node_t *on_node) { GList *result = NULL; CRM_CHECK(key != NULL, return NULL); if (on_node == NULL) { return NULL; } for (GList *gIter = input; gIter != NULL; gIter = gIter->next) { pe_action_t *action = (pe_action_t *) gIter->data; if ((action->node != NULL) && pcmk__str_eq(key, action->uuid, pcmk__str_casei) && pcmk__str_eq(on_node->details->id, action->node->details->id, pcmk__str_casei)) { crm_trace("Action %s on %s matches", key, on_node->details->uname); result = g_list_prepend(result, action); } } return result; } /*! * \brief Find all actions of given type for a resource * * \param[in] rsc Resource to search * \param[in] node Find only actions scheduled on this node * \param[in] task Action name to search for * \param[in] require_node If TRUE, NULL node or action node will not match * * \return List of actions found (or NULL if none) * \note If node is not NULL and require_node is FALSE, matching actions * without a node will be assigned to node. */ GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node) { GList *result = NULL; char *key = pcmk__op_key(rsc->id, task, 0); if (require_node) { result = find_actions_exact(rsc->actions, key, node); } else { result = find_actions(rsc->actions, key, node); } free(key); return result; } static void resource_node_score(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag) { pe_node_t *match = NULL; if ((rsc->exclusive_discover || (node->rsc_discover_mode == pe_discover_never)) && pcmk__str_eq(tag, "symmetric_default", pcmk__str_casei)) { /* This string comparision may be fragile, but exclusive resources and * exclusive nodes should not have the symmetric_default constraint * applied to them. */ return; } else if (rsc->children) { GList *gIter = rsc->children; for (; gIter != NULL; gIter = gIter->next) { pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; resource_node_score(child_rsc, node, score, tag); } } pe_rsc_trace(rsc, "Setting %s for %s on %s: %d", tag, rsc->id, node->details->uname, score); match = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id); if (match == NULL) { match = pe__copy_node(node); g_hash_table_insert(rsc->allowed_nodes, (gpointer) match->details->id, match); } match->weight = pe__add_scores(match->weight, score); } void resource_location(pe_resource_t * rsc, pe_node_t * node, int score, const char *tag, pe_working_set_t * data_set) { if (node != NULL) { resource_node_score(rsc, node, score, tag); } else if (data_set != NULL) { GList *gIter = data_set->nodes; for (; gIter != NULL; gIter = gIter->next) { pe_node_t *node_iter = (pe_node_t *) gIter->data; resource_node_score(rsc, node_iter, score, tag); } } else { GHashTableIter iter; pe_node_t *node_iter = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node_iter)) { resource_node_score(rsc, node_iter, score, tag); } } if (node == NULL && score == -INFINITY) { if (rsc->allocated_to) { crm_info("Deallocating %s from %s", rsc->id, rsc->allocated_to->details->uname); free(rsc->allocated_to); rsc->allocated_to = NULL; } } } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) gint sort_op_by_callid(gconstpointer a, gconstpointer b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const xmlNode *xml_a = a; const xmlNode *xml_b = b; const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID); const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID); if (pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_casei)) { /* We have duplicate lrm_rsc_op entries in the status * section which is unlikely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why it's happening. */ pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id); crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (b_call_id >= 0 && a_call_id == b_call_id) { /* * The op and last_failed_op are the same * Order on last-rc-change */ time_t last_a = -1; time_t last_b = -1; crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a); crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b); crm_trace("rc-change: %lld vs %lld", (long long) last_a, (long long) last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation * Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other */ int a_id = -1; int b_id = -1; const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC); const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic a"); } if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (e.g. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means it's a shutdown operation and _always_ comes last */ if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } time_t get_effective_time(pe_working_set_t * data_set) { if(data_set) { if (data_set->now == NULL) { crm_trace("Recording a new 'now'"); data_set->now = crm_time_new(NULL); } return crm_time_get_seconds_since_epoch(data_set->now); } crm_trace("Defaulting to 'now'"); return time(NULL); } gboolean get_target_role(pe_resource_t * rsc, enum rsc_role_e * role) { enum rsc_role_e local_role = RSC_ROLE_UNKNOWN; const char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); CRM_CHECK(role != NULL, return FALSE); if (pcmk__str_eq(value, "started", pcmk__str_null_matches | pcmk__str_casei) || pcmk__str_eq("default", value, pcmk__str_casei)) { return FALSE; } local_role = text2role(value); if (local_role == RSC_ROLE_UNKNOWN) { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' is not valid", rsc->id, value); return FALSE; } else if (local_role > RSC_ROLE_STARTED) { if (pcmk_is_set(uber_parent(rsc)->flags, pe_rsc_promotable)) { if (local_role > RSC_ROLE_UNPROMOTED) { /* This is what we'd do anyway, just leave the default to avoid messing up the placement algorithm */ return FALSE; } } else { pcmk__config_err("Ignoring '" XML_RSC_ATTR_TARGET_ROLE "' for %s " "because '%s' only makes sense for promotable " "clones", rsc->id, value); return FALSE; } } *role = local_role; return TRUE; } gboolean order_actions(pe_action_t * lh_action, pe_action_t * rh_action, enum pe_ordering order) { GList *gIter = NULL; pe_action_wrapper_t *wrapper = NULL; GList *list = NULL; if (order == pe_order_none) { return FALSE; } if (lh_action == NULL || rh_action == NULL) { return FALSE; } crm_trace("Creating action wrappers for ordering: %s then %s", lh_action->uuid, rh_action->uuid); /* Ensure we never create a dependency on ourselves... it's happened */ CRM_ASSERT(lh_action != rh_action); /* Filter dups, otherwise update_action_states() has too much work to do */ gIter = lh_action->actions_after; for (; gIter != NULL; gIter = gIter->next) { pe_action_wrapper_t *after = (pe_action_wrapper_t *) gIter->data; if (after->action == rh_action && (after->type & order)) { return FALSE; } } wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = rh_action; wrapper->type = order; list = lh_action->actions_after; list = g_list_prepend(list, wrapper); lh_action->actions_after = list; wrapper = calloc(1, sizeof(pe_action_wrapper_t)); wrapper->action = lh_action; wrapper->type = order; list = rh_action->actions_before; list = g_list_prepend(list, wrapper); rh_action->actions_before = list; return TRUE; } pe_action_t * get_pseudo_op(const char *name, pe_working_set_t * data_set) { pe_action_t *op = lookup_singleton(data_set, name); if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set); pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable); } return op; } void destroy_ticket(gpointer data) { pe_ticket_t *ticket = data; if (ticket->state) { g_hash_table_destroy(ticket->state); } free(ticket->id); free(ticket); } pe_ticket_t * ticket_new(const char *ticket_id, pe_working_set_t * data_set) { pe_ticket_t *ticket = NULL; if (pcmk__str_empty(ticket_id)) { return NULL; } if (data_set->tickets == NULL) { data_set->tickets = pcmk__strkey_table(free, destroy_ticket); } ticket = g_hash_table_lookup(data_set->tickets, ticket_id); if (ticket == NULL) { ticket = calloc(1, sizeof(pe_ticket_t)); if (ticket == NULL) { crm_err("Cannot allocate ticket '%s'", ticket_id); return NULL; } crm_trace("Creaing ticket entry for %s", ticket_id); ticket->id = strdup(ticket_id); ticket->granted = FALSE; ticket->last_granted = -1; ticket->standby = FALSE; ticket->state = pcmk__strkey_table(free, free); g_hash_table_insert(data_set->tickets, strdup(ticket->id), ticket); } return ticket; } const char *rsc_printable_id(pe_resource_t *rsc) { if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) { return ID(rsc->xml); } return rsc->id; } void pe__clear_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags) { pe__clear_resource_flags(rsc, flags); for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe__clear_resource_flags_recursive((pe_resource_t *) gIter->data, flags); } } void pe__clear_resource_flags_on_all(pe_working_set_t *data_set, uint64_t flag) { for (GList *lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *r = (pe_resource_t *) lpc->data; pe__clear_resource_flags_recursive(r, flag); } } void pe__set_resource_flags_recursive(pe_resource_t *rsc, uint64_t flags) { pe__set_resource_flags(rsc, flags); for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { pe__set_resource_flags_recursive((pe_resource_t *) gIter->data, flags); } } static GList * find_unfencing_devices(GList *candidates, GList *matches) { for (GList *gIter = candidates; gIter != NULL; gIter = gIter->next) { pe_resource_t *candidate = gIter->data; const char *provides = g_hash_table_lookup(candidate->meta, PCMK_STONITH_PROVIDES); const char *requires = g_hash_table_lookup(candidate->meta, XML_RSC_ATTR_REQUIRES); if(candidate->children) { matches = find_unfencing_devices(candidate->children, matches); } else if (!pcmk_is_set(candidate->flags, pe_rsc_fence_device)) { continue; } else if (pcmk__str_eq(provides, "unfencing", pcmk__str_casei) || pcmk__str_eq(requires, "unfencing", pcmk__str_casei)) { matches = g_list_prepend(matches, candidate); } } return matches; } static int node_priority_fencing_delay(pe_node_t * node, pe_working_set_t * data_set) { int member_count = 0; int online_count = 0; int top_priority = 0; int lowest_priority = 0; GList *gIter = NULL; // `priority-fencing-delay` is disabled if (data_set->priority_fencing_delay <= 0) { return 0; } /* No need to request a delay if the fencing target is not a normal cluster * member, for example if it's a remote node or a guest node. */ if (node->details->type != node_member) { return 0; } // No need to request a delay if the fencing target is in our partition if (node->details->online) { return 0; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *n = gIter->data; if (n->details->type != node_member) { continue; } member_count ++; if (n->details->online) { online_count++; } if (member_count == 1 || n->details->priority > top_priority) { top_priority = n->details->priority; } if (member_count == 1 || n->details->priority < lowest_priority) { lowest_priority = n->details->priority; } } // No need to delay if we have more than half of the cluster members if (online_count > member_count / 2) { return 0; } /* All the nodes have equal priority. * Any configured corresponding `pcmk_delay_base/max` will be applied. */ if (lowest_priority == top_priority) { return 0; } if (node->details->priority < top_priority) { return 0; } return data_set->priority_fencing_delay; } pe_action_t * pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set) { char *op_key = NULL; pe_action_t *stonith_op = NULL; if(op == NULL) { op = data_set->stonith_action; } op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op); stonith_op = lookup_singleton(data_set, op_key); if(stonith_op == NULL) { stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname); add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id); add_hash_param(stonith_op->meta, "stonith_action", op); if (pe__is_guest_or_remote_node(node) && pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { /* Extra work to detect device changes on remotes * * We may do this for all nodes in the future, but for now * the check_action_definition() based stuff works fine. */ long max = 1024; long digests_all_offset = 0; long digests_secure_offset = 0; char *digests_all = calloc(max, sizeof(char)); char *digests_secure = calloc(max, sizeof(char)); GList *matches = find_unfencing_devices(data_set->resources, NULL); for (GList *gIter = matches; gIter != NULL; gIter = gIter->next) { pe_resource_t *match = gIter->data; const char *agent = g_hash_table_lookup(match->meta, XML_ATTR_TYPE); op_digest_cache_t *data = NULL; data = pe__compare_fencing_digest(match, agent, node, data_set); if(data->rc == RSC_DIGEST_ALL) { optional = FALSE; crm_notice("Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id); if (!pcmk__is_daemon && data_set->priv != NULL) { pcmk__output_t *out = data_set->priv; out->info(out, "notice: Unfencing %s (remote): because the definition of %s changed", node->details->uname, match->id); } } digests_all_offset += snprintf( digests_all+digests_all_offset, max-digests_all_offset, "%s:%s:%s,", match->id, agent, data->digest_all_calc); digests_secure_offset += snprintf( digests_secure+digests_secure_offset, max-digests_secure_offset, "%s:%s:%s,", match->id, agent, data->digest_secure_calc); } g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_ALL), digests_all); g_hash_table_insert(stonith_op->meta, strdup(XML_OP_ATTR_DIGESTS_SECURE), digests_secure); } } else { free(op_key); } if (data_set->priority_fencing_delay > 0 /* It's a suitable case where `priority-fencing-delay` applies. * At least add `priority-fencing-delay` field as an indicator. */ && (priority_delay /* Re-calculate priority delay for the suitable case when * pe_fence_op() is called again by stage6() after node priority has * been actually calculated with native_add_running() */ || g_hash_table_lookup(stonith_op->meta, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) { /* Add `priority-fencing-delay` to the fencing op even if it's 0 for * the targeting node. So that it takes precedence over any possible * `pcmk_delay_base/max`. */ char *delay_s = pcmk__itoa(node_priority_fencing_delay(node, data_set)); g_hash_table_insert(stonith_op->meta, strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY), delay_s); } if(optional == FALSE && pe_can_fence(data_set, node)) { pe__clear_action_flags(stonith_op, pe_action_optional); pe_action_set_reason(stonith_op, reason, false); } else if(reason && stonith_op->reason == NULL) { stonith_op->reason = strdup(reason); } return stonith_op; } void trigger_unfencing( pe_resource_t * rsc, pe_node_t *node, const char *reason, pe_action_t *dependency, pe_working_set_t * data_set) { if (!pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) { /* No resources require it */ return; } else if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_fence_device)) { /* Wasn't a stonith device */ return; } else if(node && node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { pe_action_t *unfence = pe_fence_op(node, "on", FALSE, reason, FALSE, data_set); if(dependency) { order_actions(unfence, dependency, pe_order_optional); } } else if(rsc) { GHashTableIter iter; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if(node->details->online && node->details->unclean == FALSE && node->details->shutdown == FALSE) { trigger_unfencing(rsc, node, reason, dependency, data_set); } } } } gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref) { pe_tag_t *tag = NULL; GList *gIter = NULL; gboolean is_existing = FALSE; CRM_CHECK(tags && tag_name && obj_ref, return FALSE); tag = g_hash_table_lookup(tags, tag_name); if (tag == NULL) { tag = calloc(1, sizeof(pe_tag_t)); if (tag == NULL) { return FALSE; } tag->id = strdup(tag_name); tag->refs = NULL; g_hash_table_insert(tags, strdup(tag_name), tag); } for (gIter = tag->refs; gIter != NULL; gIter = gIter->next) { const char *existing_ref = (const char *) gIter->data; if (pcmk__str_eq(existing_ref, obj_ref, pcmk__str_none)){ is_existing = TRUE; break; } } if (is_existing == FALSE) { tag->refs = g_list_append(tag->refs, strdup(obj_ref)); crm_trace("Added: tag=%s ref=%s", tag->id, obj_ref); } return TRUE; } /*! * \internal * \brief Create an action reason string based on the action itself * * \param[in] action Action to create reason string for * \param[in] flag Action flag that was cleared * * \return Newly allocated string suitable for use as action reason * \note It is the caller's responsibility to free() the result. */ char * pe__action2reason(pe_action_t *action, enum pe_action_flags flag) { const char *change = NULL; switch (flag) { case pe_action_runnable: case pe_action_migrate_runnable: change = "unrunnable"; break; case pe_action_optional: change = "required"; break; default: // Bug: caller passed unsupported flag CRM_CHECK(change != NULL, change = ""); break; } return crm_strdup_printf("%s%s%s %s", change, (action->rsc == NULL)? "" : " ", (action->rsc == NULL)? "" : action->rsc->id, action->task); } void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite) { if (action->reason != NULL && overwrite) { pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, crm_str(reason)); free(action->reason); } else if (action->reason == NULL) { pe_rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, crm_str(reason)); } else { // crm_assert(action->reason != NULL && !overwrite); return; } if (reason != NULL) { action->reason = strdup(reason); } else { action->reason = NULL; } } /*! * \internal * \brief Check whether shutdown has been requested for a node * * \param[in] node Node to check * * \return TRUE if node has shutdown attribute set and nonzero, FALSE otherwise * \note This differs from simply using node->details->shutdown in that it can * be used before that has been determined (and in fact to determine it), * and it can also be used to distinguish requested shutdown from implicit * shutdown of remote nodes by virtue of their connection stopping. */ bool pe__shutdown_requested(pe_node_t *node) { const char *shutdown = pe_node_attribute_raw(node, XML_CIB_ATTR_SHUTDOWN); return !pcmk__str_eq(shutdown, "0", pcmk__str_null_matches); } /*! * \internal * \brief Update a data set's "recheck by" time * * \param[in] recheck Epoch time when recheck should happen * \param[in,out] data_set Current working set */ void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set) { if ((recheck > get_effective_time(data_set)) && ((data_set->recheck_by == 0) || (data_set->recheck_by > recheck))) { data_set->recheck_by = recheck; } } /*! * \internal * \brief Wrapper for pe_unpack_nvpairs() using a cluster working set */ void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, pe_rule_eval_data_t *rule_data, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set) { crm_time_t *next_change = crm_time_new_undefined(); pe_eval_nvpairs(data_set->input, xml_obj, set_name, rule_data, hash, always_first, overwrite, next_change); if (crm_time_is_defined(next_change)) { time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(recheck, data_set); } crm_time_free(next_change); } bool pe__resource_is_disabled(pe_resource_t *rsc) { const char *target_role = NULL; CRM_CHECK(rsc != NULL, return false); target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (target_role) { enum rsc_role_e target_role_e = text2role(target_role); if ((target_role_e == RSC_ROLE_STOPPED) || ((target_role_e == RSC_ROLE_UNPROMOTED) && pcmk_is_set(uber_parent(rsc)->flags, pe_rsc_promotable))) { return true; } } return false; } /*! * \internal * \brief Create an action to clear a resource's history from CIB * * \param[in] rsc Resource to clear * \param[in] node Node to clear history on * * \return New action to clear resource history */ pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set) { char *key = NULL; CRM_ASSERT(rsc && node); key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0); return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE, data_set); } bool pe__rsc_running_on_any(pe_resource_t *rsc, GList *node_list) { for (GList *ele = rsc->running_on; ele; ele = ele->next) { pe_node_t *node = (pe_node_t *) ele->data; if (pcmk__str_in_list(node->details->uname, node_list, pcmk__str_star_matches|pcmk__str_casei)) { return true; } } return false; } bool pcmk__rsc_filtered_by_node(pe_resource_t *rsc, GList *only_node) { return (rsc->fns->active(rsc, FALSE) && !pe__rsc_running_on_any(rsc, only_node)); } GList * pe__filter_rsc_list(GList *rscs, GList *filter) { GList *retval = NULL; for (GList *gIter = rscs; gIter; gIter = gIter->next) { pe_resource_t *rsc = (pe_resource_t *) gIter->data; /* I think the second condition is safe here for all callers of this * function. If not, it needs to move into pe__node_text. */ if (pcmk__str_in_list(rsc_printable_id(rsc), filter, pcmk__str_star_matches) || (rsc->parent && pcmk__str_in_list(rsc_printable_id(rsc->parent), filter, pcmk__str_star_matches))) { retval = g_list_prepend(retval, rsc); } } return retval; } GList * pe__build_node_name_list(pe_working_set_t *data_set, const char *s) { GList *nodes = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { /* Nothing was given so return a list of all node names. Or, '*' was * given. This would normally fall into the pe__unames_with_tag branch * where it will return an empty list. Catch it here instead. */ nodes = g_list_prepend(nodes, strdup("*")); } else { pe_node_t *node = pe_find_node(data_set->nodes, s); if (node) { /* The given string was a valid uname for a node. Return a * singleton list containing just that uname. */ nodes = g_list_prepend(nodes, strdup(s)); } else { /* The given string was not a valid uname. It's either a tag or * it's a typo or something. In the first case, we'll return a * list of all the unames of the nodes with the given tag. In the * second case, we'll return a NULL pointer and nothing will * get displayed. */ nodes = pe__unames_with_tag(data_set, s); } } return nodes; } GList * pe__build_rsc_list(pe_working_set_t *data_set, const char *s) { GList *resources = NULL; if (pcmk__str_eq(s, "*", pcmk__str_null_matches)) { resources = g_list_prepend(resources, strdup("*")); } else { pe_resource_t *rsc = pe_find_resource_with_flags(data_set->resources, s, pe_find_renamed|pe_find_any); if (rsc) { /* A colon in the name we were given means we're being asked to filter * on a specific instance of a cloned resource. Put that exact string * into the filter list. Otherwise, use the printable ID of whatever * resource was found that matches what was asked for. */ if (strstr(s, ":") != NULL) { resources = g_list_prepend(resources, strdup(rsc->id)); } else { resources = g_list_prepend(resources, strdup(rsc_printable_id(rsc))); } } else { /* The given string was not a valid resource name. It's either * a tag or it's a typo or something. See build_uname_list for * more detail. */ resources = pe__rscs_with_tag(data_set, s); } } return resources; } diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c index 69503a4c0e..4eb237be49 100644 --- a/tools/crm_resource_print.c +++ b/tools/crm_resource_print.c @@ -1,816 +1,816 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #define cons_string(x) x?x:"NA" void cli_resource_print_cts_constraints(pe_working_set_t * data_set) { pcmk__output_t *out = data_set->priv; xmlNode *xml_obj = NULL; xmlNode *lifetime = NULL; xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); for (xml_obj = pcmk__xe_first_child(cib_constraints); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { const char *id = crm_element_value(xml_obj, XML_ATTR_ID); if (id == NULL) { continue; } // @COMPAT lifetime is deprecated lifetime = first_named_child(xml_obj, "lifetime"); if (pe_evaluate_rules(lifetime, NULL, data_set->now, NULL) == FALSE) { continue; } if (!pcmk__str_eq(XML_CONS_TAG_RSC_DEPEND, crm_element_name(xml_obj), pcmk__str_casei)) { continue; } out->info(out, "Constraint %s %s %s %s %s %s %s", crm_element_name(xml_obj), cons_string(crm_element_value(xml_obj, XML_ATTR_ID)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET)), cons_string(crm_element_value(xml_obj, XML_RULE_ATTR_SCORE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_SOURCE_ROLE)), cons_string(crm_element_value(xml_obj, XML_COLOC_ATTR_TARGET_ROLE))); } } void cli_resource_print_cts(pe_resource_t * rsc, pcmk__output_t *out) { const char *host = NULL; bool needs_quorum = TRUE; const char *rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); const char *rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); const char *rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); pe_node_t *node = pe__current_node(rsc); if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { needs_quorum = FALSE; } else { // @TODO check requires in resource meta-data and rsc_defaults } if (node != NULL) { host = node->details->uname; } out->info(out, "Resource: %s %s %s %s %s %s %s %s %d %lld 0x%.16llx", crm_element_name(rsc->xml), rsc->id, rsc->clone_name ? rsc->clone_name : rsc->id, rsc->parent ? rsc->parent->id : "NA", rprov ? rprov : "NA", rclass, rtype, host ? host : "NA", needs_quorum, rsc->flags, rsc->flags); g_list_foreach(rsc->children, (GFunc) cli_resource_print_cts, out); } // \return Standard Pacemaker return code int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bool active, pe_working_set_t * data_set) { pcmk__output_t *out = data_set->priv; int rc = pcmk_rc_no_output; GList *ops = find_operations(rsc_id, host_uname, active, data_set); if (!ops) { return rc; } out->begin_list(out, NULL, NULL, "Resource Operations"); rc = pcmk_rc_ok; for (GList *lpc = ops; lpc != NULL; lpc = lpc->next) { xmlNode *xml_op = (xmlNode *) lpc->data; out->message(out, "node-and-op", data_set, xml_op); } out->end_list(out); return rc; } // \return Standard Pacemaker return code int cli_resource_print(pe_resource_t *rsc, pe_working_set_t *data_set, bool expanded) { pcmk__output_t *out = data_set->priv; unsigned int show_opts = pcmk_show_pending; GList *all = NULL; all = g_list_prepend(all, (gpointer) "*"); out->begin_list(out, NULL, NULL, "Resource Config"); out->message(out, crm_map_element_name(rsc->xml), show_opts, rsc, all, all); out->message(out, "resource-config", rsc, !expanded); out->end_list(out); g_list_free(all); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("attribute-list", "pe_resource_t *", "char *", "GHashTable *") static int attribute_list_default(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); char *attr = va_arg(args, char *); GHashTable *params = va_arg(args, GHashTable *); const char *value = NULL; if (params != NULL) { value = g_hash_table_lookup(params, attr); } if (value != NULL) { out->begin_list(out, NULL, NULL, "Attributes"); out->list_item(out, attr, "%s", value); out->end_list(out); } else { out->err(out, "Attribute '%s' not found for '%s'", attr, rsc->id); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("agent-status", "int", "const char *", "const char *", "const char *", "const char *", "const char *", "int", "const char *") static int agent_status_default(pcmk__output_t *out, va_list args) { int status = va_arg(args, int); const char *action = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *class = va_arg(args, const char *); const char *provider = va_arg(args, const char *); const char *type = va_arg(args, const char *); int rc = va_arg(args, int); const char *exit_reason = va_arg(args, const char *); if (status == PCMK_EXEC_DONE) { /* Operation [for ] ([:]:) * returned ([: ]) */ out->info(out, "Operation %s%s%s (%s%s%s:%s) returned %d (%s%s%s)", action, ((name == NULL)? "" : " for "), ((name == NULL)? "" : name), class, ((provider == NULL)? "" : ":"), ((provider == NULL)? "" : provider), type, rc, services_ocf_exitcode_str(rc), ((exit_reason == NULL)? "" : ": "), ((exit_reason == NULL)? "" : exit_reason)); } else { /* Operation [for ] ([:]:) * could not be executed ([: ]) */ out->err(out, "Operation %s%s%s (%s%s%s:%s) could not be executed (%s%s%s)", action, ((name == NULL)? "" : " for "), ((name == NULL)? "" : name), class, ((provider == NULL)? "" : ":"), ((provider == NULL)? "" : provider), type, pcmk_exec_status_str(status), ((exit_reason == NULL)? "" : ": "), ((exit_reason == NULL)? "" : exit_reason)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("agent-status", "int", "const char *", "const char *", "const char *", "const char *", "const char *", "int", "const char *") static int agent_status_xml(pcmk__output_t *out, va_list args) { int status = va_arg(args, int); const char *action G_GNUC_UNUSED = va_arg(args, const char *); const char *name G_GNUC_UNUSED = va_arg(args, const char *); const char *class G_GNUC_UNUSED = va_arg(args, const char *); const char *provider G_GNUC_UNUSED = va_arg(args, const char *); const char *type G_GNUC_UNUSED = va_arg(args, const char *); int rc = va_arg(args, int); const char *exit_reason = va_arg(args, const char *); char *exit_str = pcmk__itoa(rc); char *status_str = pcmk__itoa(status); pcmk__output_create_xml_node(out, "agent-status", "code", exit_str, "message", services_ocf_exitcode_str(rc), "execution_code", status_str, "execution_message", pcmk_exec_status_str(status), "reason", exit_reason, NULL); free(exit_str); free(status_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("attribute-list", "pe_resource_t *", "char *", "GHashTable *") static int attribute_list_text(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); char *attr = va_arg(args, char *); GHashTable *params = va_arg(args, GHashTable *); const char *value = NULL; if (params != NULL) { value = g_hash_table_lookup(params, attr); } if (value != NULL) { pcmk__formatted_printf(out, "%s\n", value); } else { out->err(out, "Attribute '%s' not found for '%s'", attr, rsc->id); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("override", "const char *", "const char *", "const char *") static int override_default(pcmk__output_t *out, va_list args) { const char *rsc_name = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); if (rsc_name == NULL) { out->list_item(out, NULL, "Overriding the cluster configuration with '%s' = '%s'", name, value); } else { out->list_item(out, NULL, "Overriding the cluster configuration for '%s' with '%s' = '%s'", rsc_name, name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("override", "const char *", "const char *", "const char *") static int override_xml(pcmk__output_t *out, va_list args) { const char *rsc_name = va_arg(args, const char *); const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, "override", "name", name, "value", value, NULL); if (rsc_name != NULL) { crm_xml_add(node, "rsc", rsc_name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("property-list", "pe_resource_t *", "char *") static int property_list_default(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); char *attr = va_arg(args, char *); const char *value = crm_element_value(rsc->xml, attr); if (value != NULL) { out->begin_list(out, NULL, NULL, "Properties"); out->list_item(out, attr, "%s", value); out->end_list(out); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("property-list", "pe_resource_t *", "char *") static int property_list_text(pcmk__output_t *out, va_list args) { pe_resource_t *rsc = va_arg(args, pe_resource_t *); char *attr = va_arg(args, char *); const char *value = crm_element_value(rsc->xml, attr); if (value != NULL) { pcmk__formatted_printf(out, "%s\n", value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-agent-action", "int", "const char *", "const char *", "const char *", "const char *", "const char *", "GHashTable *", "int", "int", "const char *", "char *", "char *") static int resource_agent_action_default(pcmk__output_t *out, va_list args) { int verbose = va_arg(args, int); const char *class = va_arg(args, const char *); const char *provider = va_arg(args, const char *); const char *type = va_arg(args, const char *); const char *rsc_name = va_arg(args, const char *); const char *action = va_arg(args, const char *); GHashTable *overrides = va_arg(args, GHashTable *); int rc = va_arg(args, int); int status = va_arg(args, int); const char *exit_reason = va_arg(args, const char *); char *stdout_data = va_arg(args, char *); char *stderr_data = va_arg(args, char *); if (overrides) { GHashTableIter iter; char *name = NULL; char *value = NULL; out->begin_list(out, NULL, NULL, "overrides"); g_hash_table_iter_init(&iter, overrides); while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) { out->message(out, "override", rsc_name, name, value); } out->end_list(out); } out->message(out, "agent-status", status, action, rsc_name, class, provider, type, rc, exit_reason); /* hide output for validate-all if not in verbose */ if (verbose == 0 && pcmk__str_eq(action, "validate-all", pcmk__str_casei)) { return pcmk_rc_ok; } if (stdout_data || stderr_data) { xmlNodePtr doc = NULL; if (stdout_data != NULL) { doc = string2xml(stdout_data); } if (doc != NULL) { out->output_xml(out, "command", stdout_data); xmlFreeNode(doc); } else { out->subprocess_output(out, rc, stdout_data, stderr_data); } } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-agent-action", "int", "const char *", "const char *", "const char *", "const char *", "const char *", "GHashTable *", "int", "int", "const char *", "char *", "char *") static int resource_agent_action_xml(pcmk__output_t *out, va_list args) { int verbose G_GNUC_UNUSED = va_arg(args, int); const char *class = va_arg(args, const char *); const char *provider = va_arg(args, const char *); const char *type = va_arg(args, const char *); const char *rsc_name = va_arg(args, const char *); const char *action = va_arg(args, const char *); GHashTable *overrides = va_arg(args, GHashTable *); int rc = va_arg(args, int); int status = va_arg(args, int); const char *exit_reason = va_arg(args, const char *); char *stdout_data = va_arg(args, char *); char *stderr_data = va_arg(args, char *); xmlNodePtr node = pcmk__output_xml_create_parent(out, "resource-agent-action", "action", action, "class", class, "type", type, NULL); if (rsc_name) { crm_xml_add(node, "rsc", rsc_name); } if (provider) { crm_xml_add(node, "provider", provider); } if (overrides) { GHashTableIter iter; char *name = NULL; char *value = NULL; out->begin_list(out, NULL, NULL, "overrides"); g_hash_table_iter_init(&iter, overrides); while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) { out->message(out, "override", rsc_name, name, value); } out->end_list(out); } out->message(out, "agent-status", status, action, rsc_name, class, provider, type, rc, exit_reason); if (stdout_data || stderr_data) { xmlNodePtr doc = NULL; if (stdout_data != NULL) { doc = string2xml(stdout_data); } if (doc != NULL) { out->output_xml(out, "command", stdout_data); xmlFreeNode(doc); } else { out->subprocess_output(out, rc, stdout_data, stderr_data); } } pcmk__output_xml_pop_parent(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-check-list", "resource_checks_t *") static int resource_check_list_default(pcmk__output_t *out, va_list args) { resource_checks_t *checks = va_arg(args, resource_checks_t *); pe_resource_t *parent = uber_parent(checks->rsc); int rc = pcmk_rc_no_output; bool printed = false; if (checks->flags != 0 || checks->lock_node != NULL) { printed = true; out->begin_list(out, NULL, NULL, "Resource Checks"); } if (pcmk_is_set(checks->flags, rsc_remain_stopped)) { out->list_item(out, "check", "Configuration specifies '%s' should remain stopped", parent->id); } if (pcmk_is_set(checks->flags, rsc_unpromotable)) { out->list_item(out, "check", "Configuration specifies '%s' should not be promoted", parent->id); } if (pcmk_is_set(checks->flags, rsc_unmanaged)) { out->list_item(out, "check", "Configuration prevents cluster from stopping or starting unmanaged '%s'", parent->id); } if (checks->lock_node) { out->list_item(out, "check", "'%s' is locked to node %s due to shutdown", parent->id, checks->lock_node); } if (printed) { out->end_list(out); rc = pcmk_rc_ok; } return rc; } PCMK__OUTPUT_ARGS("resource-check-list", "resource_checks_t *") static int resource_check_list_xml(pcmk__output_t *out, va_list args) { resource_checks_t *checks = va_arg(args, resource_checks_t *); pe_resource_t *parent = uber_parent(checks->rsc); xmlNodePtr node = pcmk__output_create_xml_node(out, "check", "id", parent->id, NULL); if (pcmk_is_set(checks->flags, rsc_remain_stopped)) { - crm_xml_add(node, "remain_stopped", "true"); + pcmk__xe_set_bool_attr(node, "remain_stopped", true); } if (pcmk_is_set(checks->flags, rsc_unpromotable)) { - crm_xml_add(node, "promotable", "false"); + pcmk__xe_set_bool_attr(node, "promotable", false); } if (pcmk_is_set(checks->flags, rsc_unmanaged)) { - crm_xml_add(node, "unmanaged", "true"); + pcmk__xe_set_bool_attr(node, "unmanaged", true); } if (checks->lock_node) { crm_xml_add(node, "locked-to", checks->lock_node); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-search-list", "GList *", "gchar *") static int resource_search_list_default(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); gchar *requested_name = va_arg(args, gchar *); bool printed = false; int rc = pcmk_rc_no_output; if (!out->is_quiet(out) && nodes == NULL) { out->err(out, "resource %s is NOT running", requested_name); return rc; } for (GList *lpc = nodes; lpc != NULL; lpc = lpc->next) { node_info_t *ni = (node_info_t *) lpc->data; if (!printed) { out->begin_list(out, NULL, NULL, "Nodes"); printed = true; rc = pcmk_rc_ok; } if (out->is_quiet(out)) { out->list_item(out, "node", "%s", ni->node_name); } else { const char *role_text = ""; if (ni->promoted) { #ifdef PCMK__COMPAT_2_0 role_text = " " RSC_ROLE_PROMOTED_LEGACY_S; #else role_text = " " RSC_ROLE_PROMOTED_S; #endif } out->list_item(out, "node", "resource %s is running on: %s%s", requested_name, ni->node_name, role_text); } } if (printed) { out->end_list(out); } return rc; } PCMK__OUTPUT_ARGS("resource-search-list", "GList *", "gchar *") static int resource_search_list_xml(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); gchar *requested_name = va_arg(args, gchar *); pcmk__output_xml_create_parent(out, "nodes", "resource", requested_name, NULL); for (GList *lpc = nodes; lpc != NULL; lpc = lpc->next) { node_info_t *ni = (node_info_t *) lpc->data; xmlNodePtr sub_node = pcmk__output_create_xml_text_node(out, "node", ni->node_name); if (ni->promoted) { crm_xml_add(sub_node, "state", "promoted"); } } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t *", "pe_node_t *") static int resource_reasons_list_default(pcmk__output_t *out, va_list args) { cib_t *cib_conn = va_arg(args, cib_t *); GList *resources = va_arg(args, GList *); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *host_uname = (node == NULL)? NULL : node->details->uname; out->begin_list(out, NULL, NULL, "Resource Reasons"); if ((rsc == NULL) && (host_uname == NULL)) { GList *lpc = NULL; GList *hosts = NULL; for (lpc = resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; rsc->fns->location(rsc, &hosts, TRUE); if (hosts == NULL) { out->list_item(out, "reason", "Resource %s is not running", rsc->id); } else { out->list_item(out, "reason", "Resource %s is running", rsc->id); } cli_resource_check(out, cib_conn, rsc); g_list_free(hosts); hosts = NULL; } } else if ((rsc != NULL) && (host_uname != NULL)) { if (resource_is_running_on(rsc, host_uname)) { out->list_item(out, "reason", "Resource %s is running on host %s", rsc->id, host_uname); } else { out->list_item(out, "reason", "Resource %s is not running on host %s", rsc->id, host_uname); } cli_resource_check(out, cib_conn, rsc); } else if ((rsc == NULL) && (host_uname != NULL)) { const char* host_uname = node->details->uname; GList *allResources = node->details->allocated_rsc; GList *activeResources = node->details->running_rsc; GList *unactiveResources = pcmk__subtract_lists(allResources, activeResources, (GCompareFunc) strcmp); GList *lpc = NULL; for (lpc = activeResources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; out->list_item(out, "reason", "Resource %s is running on host %s", rsc->id, host_uname); cli_resource_check(out, cib_conn, rsc); } for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; out->list_item(out, "reason", "Resource %s is assigned to host %s but not running", rsc->id, host_uname); cli_resource_check(out, cib_conn, rsc); } g_list_free(allResources); g_list_free(activeResources); g_list_free(unactiveResources); } else if ((rsc != NULL) && (host_uname == NULL)) { GList *hosts = NULL; rsc->fns->location(rsc, &hosts, TRUE); out->list_item(out, "reason", "Resource %s is %srunning", rsc->id, (hosts? "" : "not ")); cli_resource_check(out, cib_conn, rsc); g_list_free(hosts); } out->end_list(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-reasons-list", "cib_t *", "GList *", "pe_resource_t *", "pe_node_t *") static int resource_reasons_list_xml(pcmk__output_t *out, va_list args) { cib_t *cib_conn = va_arg(args, cib_t *); GList *resources = va_arg(args, GList *); pe_resource_t *rsc = va_arg(args, pe_resource_t *); pe_node_t *node = va_arg(args, pe_node_t *); const char *host_uname = (node == NULL)? NULL : node->details->uname; xmlNodePtr xml_node = pcmk__output_xml_create_parent(out, "reason", NULL); if ((rsc == NULL) && (host_uname == NULL)) { GList *lpc = NULL; GList *hosts = NULL; pcmk__output_xml_create_parent(out, "resources", NULL); for (lpc = resources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; rsc->fns->location(rsc, &hosts, TRUE); pcmk__output_xml_create_parent(out, "resource", "id", rsc->id, "running", pcmk__btoa(hosts != NULL), NULL); cli_resource_check(out, cib_conn, rsc); pcmk__output_xml_pop_parent(out); g_list_free(hosts); hosts = NULL; } pcmk__output_xml_pop_parent(out); } else if ((rsc != NULL) && (host_uname != NULL)) { if (resource_is_running_on(rsc, host_uname)) { crm_xml_add(xml_node, "running_on", host_uname); } cli_resource_check(out, cib_conn, rsc); } else if ((rsc == NULL) && (host_uname != NULL)) { const char* host_uname = node->details->uname; GList *allResources = node->details->allocated_rsc; GList *activeResources = node->details->running_rsc; GList *unactiveResources = pcmk__subtract_lists(allResources, activeResources, (GCompareFunc) strcmp); GList *lpc = NULL; pcmk__output_xml_create_parent(out, "resources", NULL); for (lpc = activeResources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; pcmk__output_xml_create_parent(out, "resource", "id", rsc->id, "running", "true", "host", host_uname, NULL); cli_resource_check(out, cib_conn, rsc); pcmk__output_xml_pop_parent(out); } for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { pe_resource_t *rsc = (pe_resource_t *) lpc->data; pcmk__output_xml_create_parent(out, "resource", "id", rsc->id, "running", "false", "host", host_uname, NULL); cli_resource_check(out, cib_conn, rsc); pcmk__output_xml_pop_parent(out); } pcmk__output_xml_pop_parent(out); g_list_free(allResources); g_list_free(activeResources); g_list_free(unactiveResources); } else if ((rsc != NULL) && (host_uname == NULL)) { GList *hosts = NULL; rsc->fns->location(rsc, &hosts, TRUE); crm_xml_add(xml_node, "running", pcmk__btoa(hosts != NULL)); cli_resource_check(out, cib_conn, rsc); g_list_free(hosts); } return pcmk_rc_ok; } static void add_resource_name(pe_resource_t *rsc, pcmk__output_t *out) { if (rsc->children == NULL) { out->list_item(out, "resource", "%s", rsc->id); } else { g_list_foreach(rsc->children, (GFunc) add_resource_name, out); } } PCMK__OUTPUT_ARGS("resource-names-list", "GList *") static int resource_names(pcmk__output_t *out, va_list args) { GList *resources = va_arg(args, GList *); if (resources == NULL) { out->err(out, "NO resources configured\n"); return pcmk_rc_no_output; } out->begin_list(out, NULL, NULL, "Resource Names"); g_list_foreach(resources, (GFunc) add_resource_name, out); out->end_list(out); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "agent-status", "default", agent_status_default }, { "agent-status", "xml", agent_status_xml }, { "attribute-list", "default", attribute_list_default }, { "attribute-list", "text", attribute_list_text }, { "override", "default", override_default }, { "override", "xml", override_xml }, { "property-list", "default", property_list_default }, { "property-list", "text", property_list_text }, { "resource-agent-action", "default", resource_agent_action_default }, { "resource-agent-action", "xml", resource_agent_action_xml }, { "resource-check-list", "default", resource_check_list_default }, { "resource-check-list", "xml", resource_check_list_xml }, { "resource-search-list", "default", resource_search_list_default }, { "resource-search-list", "xml", resource_search_list_xml }, { "resource-reasons-list", "default", resource_reasons_list_default }, { "resource-reasons-list", "xml", resource_reasons_list_xml }, { "resource-names-list", "default", resource_names }, { NULL, NULL, NULL } }; void crm_resource_register_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); }