diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index 11bcbef262..ba308c86f2 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -1,1534 +1,1543 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static unsigned long cib_local_bcast_num = 0; typedef struct cib_local_notify_s { xmlNode *notify_src; char *client_id; gboolean from_peer; gboolean sync_reply; } cib_local_notify_t; int next_client_id = 0; gboolean legacy_mode = FALSE; qb_ipcs_service_t *ipcs_ro = NULL; qb_ipcs_service_t *ipcs_rw = NULL; qb_ipcs_service_t *ipcs_shm = NULL; void send_cib_replace(const xmlNode * sync_request, const char *host); static void cib_process_request(xmlNode* request, gboolean force_synchronous, gboolean privileged, crm_client_t *cib_client); int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged); gboolean cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged); gboolean cib_legacy_mode(void) { return legacy_mode; } static int32_t cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (cib_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void cib_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) { crm_client_t *client = crm_client_get(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, TRUE); } static int32_t cib_ipc_dispatch_ro(qb_ipcs_connection_t * c, void *data, size_t size) { crm_client_t *client = crm_client_get(c); crm_trace("%p message from %s", c, client->id); return cib_common_callback(c, data, size, FALSE); } /* Error code means? */ static int32_t cib_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void cib_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); cib_ipc_closed(c); if (cib_shutdown_flag) { cib_shutdown(0); } } struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, .connection_created = cib_ipc_created, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy }; void cib_common_callback_worker(uint32_t id, uint32_t flags, xmlNode * op_request, crm_client_t * cib_client, gboolean privileged) { const char *op = crm_element_value(op_request, F_CIB_OPERATION); if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { if (flags & crm_ipc_client_response) { xmlNode *ack = create_xml_node(NULL, __FUNCTION__); crm_xml_add(ack, F_CIB_OPERATION, CRM_OP_REGISTER); crm_xml_add(ack, F_CIB_CLIENTID, cib_client->id); crm_ipcs_send(cib_client, id, ack, flags); cib_client->request_id = 0; free_xml(ack); } return; } else if (crm_str_eq(op, T_CIB_NOTIFY, TRUE)) { /* Update the notify filters for this client */ int on_off = 0; long long bit = 0; const char *type = crm_element_value(op_request, F_CIB_NOTIFY_TYPE); crm_element_value_int(op_request, F_CIB_NOTIFY_ACTIVATE, &on_off); crm_debug("Setting %s callbacks for %s (%s): %s", type, cib_client->name, cib_client->id, on_off ? "on" : "off"); if (safe_str_eq(type, T_CIB_POST_NOTIFY)) { bit = cib_notify_post; } else if (safe_str_eq(type, T_CIB_PRE_NOTIFY)) { bit = cib_notify_pre; } else if (safe_str_eq(type, T_CIB_UPDATE_CONFIRM)) { bit = cib_notify_confirm; } else if (safe_str_eq(type, T_CIB_DIFF_NOTIFY)) { bit = cib_notify_diff; } else if (safe_str_eq(type, T_CIB_REPLACE_NOTIFY)) { bit = cib_notify_replace; } if (on_off) { set_bit(cib_client->options, bit); } else { clear_bit(cib_client->options, bit); } if (flags & crm_ipc_client_response) { /* TODO - include rc */ crm_ipcs_send_ack(cib_client, id, flags, "ack", __FUNCTION__, __LINE__); } return; } cib_process_request(op_request, FALSE, privileged, cib_client); } int32_t cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean privileged) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; crm_client_t *cib_client = crm_client_get(c); xmlNode *op_request = crm_ipcs_recv(cib_client, data, size, &id, &flags); if (op_request) { crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); } if (op_request == NULL) { crm_trace("Invalid message from %p", c); crm_ipcs_send_ack(cib_client, id, flags, "nack", __FUNCTION__, __LINE__); return 0; } else if(cib_client == NULL) { crm_trace("Invalid client %p", c); return 0; } if (is_set(call_options, cib_sync_call)) { CRM_LOG_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(cib_client->request_id == 0); /* This means the client has two synchronous events in-flight */ cib_client->request_id = id; /* Reply only to the last one */ } if (cib_client->name == NULL) { const char *value = crm_element_value(op_request, F_CIB_CLIENTNAME); if (value == NULL) { cib_client->name = crm_itoa(cib_client->pid); } else { cib_client->name = strdup(value); if (crm_is_daemon_name(value)) { set_bit(cib_client->options, cib_is_daemon); } } } /* Allow cluster daemons more leeway before being evicted */ if (is_set(cib_client->options, cib_is_daemon)) { const char *qmax = cib_config_lookup("cluster-ipc-limit"); if (crm_set_client_queue_max(cib_client, qmax)) { crm_trace("IPC threshold for %s[%u] is now %u", cib_client->name, cib_client->pid, cib_client->queue_max); } } crm_xml_add(op_request, F_CIB_CLIENTID, cib_client->id); crm_xml_add(op_request, F_CIB_CLIENTNAME, cib_client->name); #if ENABLE_ACL CRM_LOG_ASSERT(cib_client->user != NULL); crm_acl_get_set_user(op_request, F_CIB_USER, cib_client->user); #endif crm_log_xml_trace(op_request, "Client[inbound]"); cib_common_callback_worker(id, flags, op_request, cib_client, privileged); free_xml(op_request); return 0; } static uint64_t ping_seq = 0; static char *ping_digest = NULL; static bool ping_modified_since = FALSE; int sync_our_cib(xmlNode * request, gboolean all); static gboolean cib_digester_cb(gpointer data) { if (cib_is_master) { char buffer[32]; xmlNode *ping = create_xml_node(NULL, "ping"); ping_seq++; free(ping_digest); ping_digest = NULL; ping_modified_since = FALSE; snprintf(buffer, 32, U64T, ping_seq); crm_trace("Requesting peer digests (%s)", buffer); crm_xml_add(ping, F_TYPE, "cib"); crm_xml_add(ping, F_CIB_OPERATION, CRM_OP_PING); crm_xml_add(ping, F_CIB_PING_ID, buffer); crm_xml_add(ping, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); send_cluster_message(NULL, crm_msg_cib, ping, TRUE); free_xml(ping); } return FALSE; } static void process_ping_reply(xmlNode *reply) { uint64_t seq = 0; const char *host = crm_element_value(reply, F_ORIG); xmlNode *pong = get_message_xml(reply, F_CIB_CALLDATA); const char *seq_s = crm_element_value(pong, F_CIB_PING_ID); const char *digest = crm_element_value(pong, XML_ATTR_DIGEST); if (seq_s) { seq = crm_int_helper(seq_s, NULL); } if(digest == NULL) { crm_trace("Ignoring ping reply %s from %s with no digest", seq_s, host); } else if(seq != ping_seq) { crm_trace("Ignoring out of sequence ping reply %s from %s", seq_s, host); } else if(ping_modified_since) { crm_trace("Ignoring ping reply %s from %s: cib updated since", seq_s, host); } else { const char *version = crm_element_value(pong, XML_ATTR_CRM_VERSION); if(ping_digest == NULL) { crm_trace("Calculating new digest"); ping_digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, version); } crm_trace("Processing ping reply %s from %s (%s)", seq_s, host, digest); if(safe_str_eq(ping_digest, digest) == FALSE) { xmlNode *remote_cib = get_message_xml(pong, F_CIB_CALLDATA); crm_notice("Local CIB %s.%s.%s.%s differs from %s: %s.%s.%s.%s %p", crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(the_cib, XML_ATTR_GENERATION), crm_element_value(the_cib, XML_ATTR_NUMUPDATES), ping_digest, host, remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION_ADMIN):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_GENERATION):"_", remote_cib?crm_element_value(remote_cib, XML_ATTR_NUMUPDATES):"_", digest, remote_cib); if(remote_cib && remote_cib->children) { /* Additional debug */ xml_calculate_changes(the_cib, remote_cib); xml_log_changes(LOG_INFO, __FUNCTION__, remote_cib); crm_trace("End of differences"); } free_xml(remote_cib); sync_our_cib(reply, FALSE); } } } static void do_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { int rid = 0; int call_id = 0; ssize_t rc = 0; crm_client_t *client_obj = NULL; CRM_ASSERT(notify_src && client_id); crm_element_value_int(notify_src, F_CIB_CALLID, &call_id); client_obj = crm_client_get_by_id(client_id); if (client_obj == NULL) { crm_debug("Could not send response %d: client %s not found", call_id, client_id); return; } if (sync_reply) { if (client_obj->ipcs) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending response [call %d] to %s %s", call_id, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } } else { crm_trace("Sending event %d to %s %s", call_id, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } switch (client_obj->kind) { case CRM_CLIENT_IPC: rc = crm_ipcs_send(client_obj, rid, notify_src, (sync_reply? crm_ipc_flags_none : crm_ipc_server_event)); if (rc < 0) { crm_warn("%s reply to %s failed: %s " CRM_XS " rc=%lld", (sync_reply? "Synchronous" : "Asynchronous"), client_obj->name, pcmk_strerror(rc), (long long) rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case CRM_CLIENT_TLS: #endif case CRM_CLIENT_TCP: crm_remote_send(client_obj->remote, notify_src); break; default: crm_err("Unknown transport %d for %s", client_obj->kind, client_obj->name); } } static void local_notify_destroy_callback(gpointer data) { cib_local_notify_t *notify = data; free_xml(notify->notify_src); free(notify->client_id); free(notify); } static void check_local_notify(int bcast_id) { cib_local_notify_t *notify = NULL; if (!local_notify_queue) { return; } notify = g_hash_table_lookup(local_notify_queue, GINT_TO_POINTER(bcast_id)); if (notify) { do_local_notify(notify->notify_src, notify->client_id, notify->sync_reply, notify->from_peer); g_hash_table_remove(local_notify_queue, GINT_TO_POINTER(bcast_id)); } } static void queue_local_notify(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { cib_local_notify_t *notify = calloc(1, sizeof(cib_local_notify_t)); notify->notify_src = notify_src; notify->client_id = strdup(client_id); notify->sync_reply = sync_reply; notify->from_peer = from_peer; if (!local_notify_queue) { local_notify_queue = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, local_notify_destroy_callback); } g_hash_table_insert(local_notify_queue, GINT_TO_POINTER(cib_local_bcast_num), notify); } static void parse_local_options_v1(crm_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if (cib_op_modifies(call_type) && !(call_options & cib_inhibit_bcast)) { /* we need to send an update anyway */ *needs_reply = TRUE; } else { *needs_reply = FALSE; } if (host == NULL && (call_options & cib_scope_local)) { crm_trace("Processing locally scoped %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (host == NULL && cib_is_master) { crm_trace("Processing master %s op locally from %s", op, cib_client->name); *local_notify = TRUE; } else if (safe_str_eq(host, cib_our_uname)) { crm_trace("Processing locally addressed %s op from %s", op, cib_client->name); *local_notify = TRUE; } else if (stand_alone) { *needs_forward = FALSE; *local_notify = TRUE; *process = TRUE; } else { crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host ? host : "the master instance"); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options_v2(crm_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if (cib_op_modifies(call_type)) { if(safe_str_eq(op, CIB_OP_MASTER) || safe_str_eq(op, CIB_OP_SLAVE)) { /* Always handle these locally */ *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; return; } else { /* Redirect all other updates via CPG */ *needs_reply = TRUE; *needs_forward = TRUE; *process = FALSE; crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host ? host : "the master instance"); return; } } *process = TRUE; *needs_reply = FALSE; *local_notify = TRUE; *needs_forward = FALSE; if (stand_alone) { crm_trace("Processing %s op from %s (stand-alone)", op, cib_client->name); } else if (host == NULL) { crm_trace("Processing unaddressed %s op from %s", op, cib_client->name); } else if (safe_str_eq(host, cib_our_uname)) { crm_trace("Processing locally addressed %s op from %s", op, cib_client->name); } else { crm_trace("%s op from %s needs to be forwarded to %s", op, cib_client->name, host); *needs_forward = TRUE; *process = FALSE; } } static void parse_local_options(crm_client_t * cib_client, int call_type, int call_options, const char *host, const char *op, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { if(cib_legacy_mode()) { parse_local_options_v1(cib_client, call_type, call_options, host, op, local_notify, needs_reply, process, needs_forward); } else { parse_local_options_v2(cib_client, call_type, call_options, host, op, local_notify, needs_reply, process, needs_forward); } } static gboolean parse_peer_options_v1(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *op = NULL; const char *host = NULL; const char *delegated = NULL; const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = safe_str_eq(reply_to, cib_our_uname); if (crm_is_true(update)) { *needs_reply = FALSE; if (is_reply) { *local_notify = TRUE; crm_trace("Processing global/peer update from %s" " that originated from us", originator); } else { crm_trace("Processing global/peer update from %s", originator); } return TRUE; } crm_trace("Processing %s request sent by %s", op, originator); op = crm_element_value(request, F_CIB_OPERATION); if (safe_str_eq(op, "cib_shutdown_req")) { /* Always process these */ *local_notify = FALSE; if (reply_to == NULL || is_reply) { *process = TRUE; } if (is_reply) { *needs_reply = FALSE; } return *process; } if (is_reply && safe_str_eq(op, CRM_OP_PING)) { process_ping_reply(request); return FALSE; } if (is_reply) { crm_trace("Forward reply sent from %s to local clients", originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && safe_str_eq(host, cib_our_uname)) { crm_trace("Processing %s request sent to us from %s", op, originator); return TRUE; } else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) { crm_trace("Processing %s request sent to %s by %s", op, host?host:"everyone", originator); *needs_reply = TRUE; return TRUE; } else if (host == NULL && cib_is_master == TRUE) { crm_trace("Processing %s request sent to master instance from %s", op, originator); return TRUE; } delegated = crm_element_value(request, F_CIB_DELEGATED); if (delegated != NULL) { crm_trace("Ignoring msg for master instance"); } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring msg for instance on %s", crm_str(host)); } else if (reply_to == NULL && cib_is_master == FALSE) { /* this is for the master instance and we're not it */ crm_trace("Ignoring reply to %s", crm_str(reply_to)); } else if (safe_str_eq(op, "cib_shutdown_req")) { if (reply_to != NULL) { crm_debug("Processing %s from %s", op, host); *needs_reply = FALSE; } else { crm_debug("Processing %s reply from %s", op, host); } return TRUE; } else { crm_err("Nothing for us to do?"); crm_log_xml_err(request, "Peer[inbound]"); } return FALSE; } static gboolean parse_peer_options_v2(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { const char *host = NULL; const char *delegated = crm_element_value(request, F_CIB_DELEGATED); const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); const char *update = crm_element_value(request, F_CIB_GLOBAL_UPDATE); gboolean is_reply = safe_str_eq(reply_to, cib_our_uname); if(safe_str_eq(op, CIB_OP_REPLACE)) { /* sync_our_cib() sets F_CIB_ISREPLY */ if (reply_to) { delegated = reply_to; } goto skip_is_reply; } else if(safe_str_eq(op, CIB_OP_SYNC)) { } else if (is_reply && safe_str_eq(op, CRM_OP_PING)) { process_ping_reply(request); return FALSE; } else if (safe_str_eq(op, CIB_OP_UPGRADE)) { /* Only the DC (node with the oldest software) should process * this operation if F_CIB_SCHEMA_MAX is unset * * If the DC is happy it will then send out another * CIB_OP_UPGRADE which will tell all nodes to do the actual * upgrade. * * Except this time F_CIB_SCHEMA_MAX will be set which puts a * limit on how far newer nodes will go */ const char *max = crm_element_value(request, F_CIB_SCHEMA_MAX); + const char *upgrade_rc = crm_element_value(request, F_CIB_UPGRADE_RC); - crm_trace("Parsing %s operation%s for %s with max=%s", - op, is_reply?" reply":"", cib_is_master?"master":"slave", max); - if(max == NULL && cib_is_master) { + crm_trace("Parsing %s operation%s for %s with max=%s and upgrade_rc=%s", + op, (is_reply? " reply" : ""), + (cib_is_master? "master" : "slave"), + (max? max : "none"), (upgrade_rc? upgrade_rc : "none")); + + if (upgrade_rc != NULL) { + // Our upgrade request was rejected by DC, notify clients of result + crm_xml_add(request, F_CIB_RC, upgrade_rc); + + } else if ((max == NULL) && cib_is_master) { /* We are the DC, check if this upgrade is allowed */ goto skip_is_reply; } else if(max) { /* Ok, go ahead and upgrade to 'max' */ goto skip_is_reply; } else { - return FALSE; /* Ignore */ + // Ignore broadcast client requests when we're not DC + return FALSE; } } else if (crm_is_true(update)) { crm_info("Detected legacy %s global update from %s", op, originator); send_sync_request(NULL); legacy_mode = TRUE; return FALSE; } else if (is_reply && cib_op_modifies(call_type)) { crm_trace("Ignoring legacy %s reply sent from %s to local clients", op, originator); return FALSE; } else if (safe_str_eq(op, "cib_shutdown_req")) { /* Legacy handling */ crm_debug("Legacy handling of %s message from %s", op, originator); *local_notify = FALSE; if (reply_to == NULL) { *process = TRUE; } return *process; } if(is_reply) { crm_trace("Handling %s reply sent from %s to local clients", op, originator); *process = FALSE; *needs_reply = FALSE; *local_notify = TRUE; return TRUE; } skip_is_reply: *process = TRUE; *needs_reply = FALSE; if(safe_str_eq(delegated, cib_our_uname)) { *local_notify = TRUE; } else { *local_notify = FALSE; } host = crm_element_value(request, F_CIB_HOST); if (host != NULL && safe_str_eq(host, cib_our_uname)) { crm_trace("Processing %s request sent to us from %s", op, originator); *needs_reply = TRUE; return TRUE; } else if (host != NULL) { /* this is for a specific instance and we're not it */ crm_trace("Ignoring %s operation for instance on %s", op, crm_str(host)); return FALSE; } else if(is_reply == FALSE && safe_str_eq(op, CRM_OP_PING)) { *needs_reply = TRUE; } crm_trace("Processing %s request sent to everyone by %s/%s on %s %s", op, crm_element_value(request, F_CIB_CLIENTNAME), crm_element_value(request, F_CIB_CALLID), originator, (*local_notify)?"(notify)":""); return TRUE; } static gboolean parse_peer_options(int call_type, xmlNode * request, gboolean * local_notify, gboolean * needs_reply, gboolean * process, gboolean * needs_forward) { /* TODO: What happens when an update comes in after node A * requests the CIB from node B, but before it gets the reply (and * sends out the replace operation) */ if(cib_legacy_mode()) { return parse_peer_options_v1( call_type, request, local_notify, needs_reply, process, needs_forward); } else { return parse_peer_options_v2( call_type, request, local_notify, needs_reply, process, needs_forward); } } static void forward_request(xmlNode * request, crm_client_t * cib_client, int call_options) { const char *op = crm_element_value(request, F_CIB_OPERATION); const char *host = crm_element_value(request, F_CIB_HOST); crm_xml_add(request, F_CIB_DELEGATED, cib_our_uname); if (host != NULL) { crm_trace("Forwarding %s op to %s", op, host); send_cluster_message(crm_get_peer(0, host), crm_msg_cib, request, FALSE); } else { crm_trace("Forwarding %s op to master instance", op); send_cluster_message(NULL, crm_msg_cib, request, FALSE); } /* Return the request to its original state */ xml_remove_prop(request, F_CIB_DELEGATED); if (call_options & cib_discard_reply) { crm_trace("Client not interested in reply"); } } static gboolean send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gboolean broadcast) { CRM_ASSERT(msg != NULL); if (broadcast) { /* this (successful) call modified the CIB _and_ the * change needs to be broadcast... * send via HA to other nodes */ int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; const char *digest = NULL; int format = 1; CRM_LOG_ASSERT(result_diff != NULL); digest = crm_element_value(result_diff, XML_ATTR_DIGEST); crm_element_value_int(result_diff, "format", &format); cib_diff_version_details(result_diff, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); crm_trace("Sending update diff %d.%d.%d -> %d.%d.%d %s", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates, digest); crm_xml_add(msg, F_CIB_ISREPLY, originator); crm_xml_add(msg, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); crm_xml_add(msg, F_CIB_OPERATION, CIB_OP_APPLY_DIFF); crm_xml_add(msg, F_CIB_USER, CRM_DAEMON_USER); if (format == 1) { CRM_ASSERT(digest != NULL); } add_message_xml(msg, F_CIB_UPDATE_DIFF, result_diff); crm_log_xml_explicit(msg, "copy"); return send_cluster_message(NULL, crm_msg_cib, msg, TRUE); } else if (originator != NULL) { /* send reply via HA to originating node */ crm_trace("Sending request result to %s only", originator); crm_xml_add(msg, F_CIB_ISREPLY, originator); return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); } return FALSE; } static void cib_process_request(xmlNode *request, gboolean force_synchronous, gboolean privileged, crm_client_t *cib_client) { int call_type = 0; int call_options = 0; gboolean process = TRUE; gboolean is_update = TRUE; gboolean from_peer = TRUE; gboolean needs_reply = TRUE; gboolean local_notify = FALSE; gboolean needs_forward = FALSE; gboolean global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); xmlNode *op_reply = NULL; xmlNode *result_diff = NULL; int rc = pcmk_ok; const char *op = crm_element_value(request, F_CIB_OPERATION); const char *originator = crm_element_value(request, F_ORIG); const char *host = crm_element_value(request, F_CIB_HOST); const char *target = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); const char *client_id = crm_element_value(request, F_CIB_CLIENTID); const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); const char *reply_to = crm_element_value(request, F_CIB_ISREPLY); if (cib_client) { from_peer = FALSE; } crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); if (force_synchronous) { call_options |= cib_sync_call; } if (host != NULL && strlen(host) == 0) { host = NULL; } if (host) { target = host; } else if (call_options & cib_scope_local) { target = "local host"; } else { target = "master"; } if (from_peer) { crm_trace("Processing peer %s operation from %s/%s on %s intended for %s (reply=%s)", op, client_name, call_id, originator, target, reply_to); } else { crm_xml_add(request, F_ORIG, cib_our_uname); crm_trace("Processing local %s operation from %s/%s intended for %s", op, client_name, call_id, target); } rc = cib_get_operation_id(op, &call_type); if (rc != pcmk_ok) { /* TODO: construct error reply? */ crm_err("Pre-processing of command failed: %s", pcmk_strerror(rc)); return; } if (from_peer == FALSE) { parse_local_options(cib_client, call_type, call_options, host, op, &local_notify, &needs_reply, &process, &needs_forward); } else if (parse_peer_options(call_type, request, &local_notify, &needs_reply, &process, &needs_forward) == FALSE) { return; } is_update = cib_op_modifies(call_type); if (call_options & cib_discard_reply) { needs_reply = is_update; local_notify = FALSE; } if (needs_forward) { const char *host = crm_element_value(request, F_CIB_HOST); const char *section = crm_element_value(request, F_CIB_SECTION); int log_level = LOG_INFO; if (safe_str_eq(op, CRM_OP_NOOP)) { log_level = LOG_DEBUG; } do_crm_log(log_level, "Forwarding %s operation for section %s to %s (origin=%s/%s/%s)", op, section ? section : "'all'", host ? host : cib_legacy_mode() ? "master" : "all", originator ? originator : "local", client_name, call_id); forward_request(request, cib_client, call_options); return; } if (cib_status != pcmk_ok) { const char *call = crm_element_value(request, F_CIB_CALLID); rc = cib_status; crm_err("Operation ignored, cluster configuration is invalid." " Please repair and restart: %s", pcmk_strerror(cib_status)); op_reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(op_reply, F_TYPE, T_CIB); crm_xml_add(op_reply, F_CIB_OPERATION, op); crm_xml_add(op_reply, F_CIB_CALLID, call); crm_xml_add(op_reply, F_CIB_CLIENTID, client_id); crm_xml_add_int(op_reply, F_CIB_CALLOPTS, call_options); crm_xml_add_int(op_reply, F_CIB_RC, rc); crm_trace("Attaching reply output"); add_message_xml(op_reply, F_CIB_CALLDATA, the_cib); crm_log_xml_explicit(op_reply, "cib:reply"); } else if (process) { time_t finished = 0; int now = time(NULL); int level = LOG_INFO; const char *section = crm_element_value(request, F_CIB_SECTION); rc = cib_process_command(request, &op_reply, &result_diff, privileged); if (is_update == FALSE) { level = LOG_TRACE; } else if (global_update) { switch (rc) { case pcmk_ok: level = LOG_INFO; break; case -pcmk_err_old_data: case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: level = LOG_TRACE; break; default: level = LOG_ERR; } } else if (rc != pcmk_ok && is_update) { level = LOG_WARNING; } do_crm_log(level, "Completed %s operation for section %s: %s (rc=%d, origin=%s/%s/%s, version=%s.%s.%s)", op, section ? section : "'all'", pcmk_strerror(rc), rc, originator ? originator : "local", client_name, call_id, the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION_ADMIN) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_GENERATION) : "0", the_cib ? crm_element_value(the_cib, XML_ATTR_NUMUPDATES) : "0"); finished = time(NULL); if (finished - now > 3) { crm_trace("%s operation took %lds to complete", op, (long)(finished - now)); crm_write_blackbox(0, NULL); } if (op_reply == NULL && (needs_reply || local_notify)) { crm_err("Unexpected NULL reply to message"); crm_log_xml_err(request, "null reply"); needs_reply = FALSE; local_notify = FALSE; } } /* from now on we are the server */ if(is_update && cib_legacy_mode() == FALSE) { crm_trace("Completed pre-sync update from %s/%s/%s%s", originator ? originator : "local", client_name, call_id, local_notify?" with local notification":""); } else if (needs_reply == FALSE || stand_alone) { /* nothing more to do... * this was a non-originating slave update */ crm_trace("Completed slave update"); } else if (cib_legacy_mode() && rc == pcmk_ok && result_diff != NULL && !(call_options & cib_inhibit_bcast)) { gboolean broadcast = FALSE; cib_local_bcast_num++; crm_xml_add_int(request, F_CIB_LOCAL_NOTIFY_ID, cib_local_bcast_num); broadcast = send_peer_reply(request, result_diff, originator, TRUE); if (broadcast && client_id && local_notify && op_reply) { /* If we have been asked to sync the reply, * and a bcast msg has gone out, we queue the local notify * until we know the bcast message has been received */ local_notify = FALSE; crm_trace("Queuing local %ssync notification for %s", (call_options & cib_sync_call) ? "" : "a-", client_id); queue_local_notify(op_reply, client_id, (call_options & cib_sync_call), from_peer); op_reply = NULL; /* the reply is queued, so don't free here */ } } else if (call_options & cib_discard_reply) { crm_trace("Caller isn't interested in reply"); } else if (from_peer) { if (is_update == FALSE || result_diff == NULL) { crm_trace("Request not broadcast: R/O call"); } else if (call_options & cib_inhibit_bcast) { crm_trace("Request not broadcast: inhibited"); } else if (rc != pcmk_ok) { crm_trace("Request not broadcast: call failed: %s", pcmk_strerror(rc)); } else { crm_trace("Directing reply to %s", originator); } send_peer_reply(op_reply, result_diff, originator, FALSE); } if (local_notify && client_id) { crm_trace("Performing local %ssync notification for %s", (call_options & cib_sync_call) ? "" : "a-", client_id); if (process == FALSE) { do_local_notify(request, client_id, call_options & cib_sync_call, from_peer); } else { do_local_notify(op_reply, client_id, call_options & cib_sync_call, from_peer); } } free_xml(op_reply); free_xml(result_diff); return; } int cib_process_command(xmlNode * request, xmlNode ** reply, xmlNode ** cib_diff, gboolean privileged) { xmlNode *input = NULL; xmlNode *output = NULL; xmlNode *result_cib = NULL; xmlNode *current_cib = NULL; int call_type = 0; int call_options = 0; const char *op = NULL; const char *section = NULL; const char *call_id = crm_element_value(request, F_CIB_CALLID); int rc = pcmk_ok; int rc2 = pcmk_ok; gboolean send_r_notify = FALSE; gboolean global_update = FALSE; gboolean config_changed = FALSE; gboolean manage_counters = TRUE; static mainloop_timer_t *digest_timer = NULL; CRM_ASSERT(cib_status == pcmk_ok); if(digest_timer == NULL) { digest_timer = mainloop_timer_add("digester", 5000, FALSE, cib_digester_cb, NULL); } *reply = NULL; *cib_diff = NULL; current_cib = the_cib; /* Start processing the request... */ op = crm_element_value(request, F_CIB_OPERATION); crm_element_value_int(request, F_CIB_CALLOPTS, &call_options); rc = cib_get_operation_id(op, &call_type); if (rc == pcmk_ok && privileged == FALSE) { rc = cib_op_can_run(call_type, call_options, privileged, global_update); } rc2 = cib_op_prepare(call_type, request, &input, §ion); if (rc == pcmk_ok) { rc = rc2; } if (rc != pcmk_ok) { crm_trace("Call setup failed: %s", pcmk_strerror(rc)); goto done; } else if (cib_op_modifies(call_type) == FALSE) { rc = cib_perform_op(op, call_options, cib_op_func(call_type), TRUE, section, request, input, FALSE, &config_changed, current_cib, &result_cib, NULL, &output); CRM_CHECK(result_cib == NULL, free_xml(result_cib)); goto done; } /* Handle a valid write action */ global_update = crm_is_true(crm_element_value(request, F_CIB_GLOBAL_UPDATE)); if (global_update) { /* legacy code */ manage_counters = FALSE; call_options |= cib_force_diff; crm_trace("Global update detected"); CRM_CHECK(call_type == 3 || call_type == 4, crm_err("Call type: %d", call_type); crm_log_xml_err(request, "bad op")); } if (rc == pcmk_ok) { ping_modified_since = TRUE; if (call_options & cib_inhibit_bcast) { /* skip */ crm_trace("Skipping update: inhibit broadcast"); manage_counters = FALSE; } if (is_not_set(call_options, cib_dryrun) && safe_str_eq(section, XML_CIB_TAG_STATUS)) { /* Copying large CIBs accounts for a huge percentage of our CIB usage */ call_options |= cib_zero_copy; } else { clear_bit(call_options, cib_zero_copy); } /* result_cib must not be modified after cib_perform_op() returns */ rc = cib_perform_op(op, call_options, cib_op_func(call_type), FALSE, section, request, input, manage_counters, &config_changed, current_cib, &result_cib, cib_diff, &output); if (manage_counters == FALSE) { int format = 1; /* Legacy code * If the diff is NULL at this point, it's because nothing changed */ if (*cib_diff) { crm_element_value_int(*cib_diff, "format", &format); } if (format == 1) { config_changed = cib_config_changed(NULL, NULL, cib_diff); } } /* Always write to disk for replace ops, * this also negates the need to detect ordering changes */ if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { config_changed = TRUE; } } if (rc == pcmk_ok && is_not_set(call_options, cib_dryrun)) { crm_trace("Activating %s->%s%s%s", crm_element_value(current_cib, XML_ATTR_NUMUPDATES), crm_element_value(result_cib, XML_ATTR_NUMUPDATES), (is_set(call_options, cib_zero_copy)? " zero-copy" : ""), (config_changed? " changed" : "")); if(is_not_set(call_options, cib_zero_copy)) { rc = activateCibXml(result_cib, config_changed, op); crm_trace("Activated %s (%d)", crm_element_value(current_cib, XML_ATTR_NUMUPDATES), rc); } if (rc == pcmk_ok && cib_internal_config_changed(*cib_diff)) { cib_read_config(config_hash, result_cib); } if (crm_str_eq(CIB_OP_REPLACE, op, TRUE)) { if (section == NULL) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_TAG_CIB)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_NODES)) { send_r_notify = TRUE; } else if (safe_str_eq(section, XML_CIB_TAG_STATUS)) { send_r_notify = TRUE; } } else if (crm_str_eq(CIB_OP_ERASE, op, TRUE)) { send_r_notify = TRUE; } mainloop_timer_stop(digest_timer); mainloop_timer_start(digest_timer); } else if (rc == -pcmk_err_schema_validation) { CRM_ASSERT(is_not_set(call_options, cib_zero_copy)); if (output != NULL) { crm_log_xml_info(output, "cib:output"); free_xml(output); } output = result_cib; } else { crm_trace("Not activating %d %d %s", rc, is_set(call_options, cib_dryrun), crm_element_value(result_cib, XML_ATTR_NUMUPDATES)); if(is_not_set(call_options, cib_zero_copy)) { free_xml(result_cib); } } if ((call_options & (cib_inhibit_notify|cib_dryrun)) == 0) { const char *client = crm_element_value(request, F_CIB_CLIENTNAME); crm_trace("Sending notifications %d", is_set(call_options, cib_dryrun)); cib_diff_notify(call_options, client, call_id, op, input, rc, *cib_diff); } if (send_r_notify) { const char *origin = crm_element_value(request, F_ORIG); cib_replace_notify(origin, the_cib, rc, *cib_diff); } xml_log_patchset(LOG_TRACE, "cib:diff", *cib_diff); done: if ((call_options & cib_discard_reply) == 0) { const char *caller = crm_element_value(request, F_CIB_CLIENTID); *reply = create_xml_node(NULL, "cib-reply"); crm_xml_add(*reply, F_TYPE, T_CIB); crm_xml_add(*reply, F_CIB_OPERATION, op); crm_xml_add(*reply, F_CIB_CALLID, call_id); crm_xml_add(*reply, F_CIB_CLIENTID, caller); crm_xml_add_int(*reply, F_CIB_CALLOPTS, call_options); crm_xml_add_int(*reply, F_CIB_RC, rc); if (output != NULL) { crm_trace("Attaching reply output"); add_message_xml(*reply, F_CIB_CALLDATA, output); } crm_log_xml_explicit(*reply, "cib:reply"); } crm_trace("cleanup"); if (cib_op_modifies(call_type) == FALSE && output != current_cib) { free_xml(output); output = NULL; } if (call_type >= 0) { cib_op_cleanup(call_type, call_options, &input, &output); } crm_trace("done"); return rc; } void cib_peer_callback(xmlNode * msg, void *private_data) { const char *reason = NULL; const char *originator = crm_element_value(msg, F_ORIG); if (cib_legacy_mode() && (originator == NULL || crm_str_eq(originator, cib_our_uname, TRUE))) { /* message is from ourselves */ int bcast_id = 0; if (!(crm_element_value_int(msg, F_CIB_LOCAL_NOTIFY_ID, &bcast_id))) { check_local_notify(bcast_id); } return; } else if (crm_peer_cache == NULL) { reason = "membership not established"; goto bail; } if (crm_element_value(msg, F_CIB_CLIENTNAME) == NULL) { crm_xml_add(msg, F_CIB_CLIENTNAME, originator); } /* crm_log_xml_trace("Peer[inbound]", msg); */ cib_process_request(msg, FALSE, TRUE, NULL); return; bail: if (reason) { const char *seq = crm_element_value(msg, F_SEQ); const char *op = crm_element_value(msg, F_CIB_OPERATION); crm_warn("Discarding %s message (%s) from %s: %s", op, seq, originator, reason); } } static gboolean cib_force_exit(gpointer data) { crm_notice("Forcing exit!"); terminate_cib(__FUNCTION__, CRM_EX_ERROR); return FALSE; } static void disconnect_remote_client(gpointer key, gpointer value, gpointer user_data) { crm_client_t *a_client = value; crm_err("Disconnecting %s... Not implemented", crm_str(a_client->name)); } void cib_shutdown(int nsig) { struct qb_ipcs_stats srv_stats; if (cib_shutdown_flag == FALSE) { int disconnects = 0; qb_ipcs_connection_t *c = NULL; cib_shutdown_flag = TRUE; c = qb_ipcs_connection_first_get(ipcs_rw); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_rw, last); crm_debug("Disconnecting r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_ro); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_ro, last); crm_debug("Disconnecting r/o client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } c = qb_ipcs_connection_first_get(ipcs_shm); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(ipcs_shm, last); crm_debug("Disconnecting non-blocking r/w client %p...", last); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); disconnects++; } disconnects += crm_hash_table_size(client_connections); crm_debug("Disconnecting %d remote clients", crm_hash_table_size(client_connections)); g_hash_table_foreach(client_connections, disconnect_remote_client, NULL); crm_info("Disconnected %d clients", disconnects); } qb_ipcs_stats_get(ipcs_rw, &srv_stats, QB_FALSE); if (crm_hash_table_size(client_connections) == 0) { crm_info("All clients disconnected (%d)", srv_stats.active_connections); initiate_exit(); } else { crm_info("Waiting on %d clients to disconnect (%d)", crm_hash_table_size(client_connections), srv_stats.active_connections); } } void initiate_exit(void) { int active = 0; xmlNode *leaving = NULL; active = crm_active_peers(); if (active < 2) { terminate_cib(__FUNCTION__, 0); return; } crm_info("Sending disconnect notification to %d peers...", active); leaving = create_xml_node(NULL, "exit-notification"); crm_xml_add(leaving, F_TYPE, "cib"); crm_xml_add(leaving, F_CIB_OPERATION, "cib_shutdown_req"); send_cluster_message(NULL, crm_msg_cib, leaving, TRUE); free_xml(leaving); g_timeout_add(crm_get_msec("5s"), cib_force_exit, NULL); } extern int remote_fd; extern int remote_tls_fd; /*! * \internal * \brief Close remote sockets, free the global CIB and quit * * \param[in] caller Name of calling function (for log message) * \param[in] fast If -1, skip disconnect; if positive, exit that */ void terminate_cib(const char *caller, int fast) { crm_info("%s: Exiting%s...", caller, (fast > 0)? " fast" : mainloop ? " from mainloop" : ""); if (remote_fd > 0) { close(remote_fd); remote_fd = 0; } if (remote_tls_fd > 0) { close(remote_tls_fd); remote_tls_fd = 0; } uninitializeCib(); if (fast > 0) { /* Quit fast on error */ cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(fast); } else if ((mainloop != NULL) && g_main_is_running(mainloop)) { /* Quit via returning from the main loop. If fast == -1, we skip the * disconnect here, and it will be done when the main loop returns * (this allows the peer status callback to avoid messing with the * peer caches). */ if (fast == 0) { crm_cluster_disconnect(&crm_cluster); } g_main_loop_quit(mainloop); } else { /* Quit via clean exit. Even the peer status callback can disconnect * here, because we're not returning control to the caller. */ crm_cluster_disconnect(&crm_cluster); cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); crm_exit(CRM_EX_OK); } } diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c index 9d733dc92b..4f513e3584 100644 --- a/daemons/based/based_messages.c +++ b/daemons/based/based_messages.c @@ -1,500 +1,534 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Maximum number of diffs to ignore while waiting for a resync */ #define MAX_DIFF_RETRY 5 gboolean cib_is_master = FALSE; xmlNode *the_cib = NULL; int revision_check(xmlNode * cib_update, xmlNode * cib_copy, int flags); int get_revision(xmlNode * xml_obj, int cur_revision); int updateList(xmlNode * local_cib, xmlNode * update_command, xmlNode * failed, int operation, const char *section); gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code); int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); int sync_our_cib(xmlNode * request, gboolean all); int cib_process_shutdown_req(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *host = crm_element_value(req, F_ORIG); *answer = NULL; if (crm_element_value(req, F_CIB_ISREPLY) == NULL) { crm_info("Peer %s is requesting to shut down", host); return pcmk_ok; } if (cib_shutdown_flag == FALSE) { crm_err("Peer %s mistakenly thinks we wanted to shut down", host); return -EINVAL; } crm_info("Peer %s has acknowledged our shutdown request", host); terminate_cib(__FUNCTION__, 0); return pcmk_ok; } int cib_process_default(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); *answer = NULL; if (op == NULL) { result = -EINVAL; crm_err("No operation specified"); } else if (strcasecmp(CRM_OP_NOOP, op) == 0) { ; } else { result = -EPROTONOSUPPORT; crm_err("Action [%s] is not supported by the CIB manager", op); } return result; } int cib_process_readwrite(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int result = pcmk_ok; crm_trace("Processing \"%s\" event", op); if (safe_str_eq(op, CIB_OP_ISMASTER)) { if (cib_is_master == TRUE) { result = pcmk_ok; } else { result = -EPERM; } return result; } if (safe_str_eq(op, CIB_OP_MASTER)) { if (cib_is_master == FALSE) { crm_info("We are now in R/W mode"); cib_is_master = TRUE; } else { crm_debug("We are still in R/W mode"); } } else if (cib_is_master) { crm_info("We are now in R/O mode"); cib_is_master = FALSE; } return result; } /* Set to 1 when a sync is requested, incremented when a diff is ignored, * reset to 0 when a sync is received */ static int sync_in_progress = 0; void send_sync_request(const char *host) { xmlNode *sync_me = create_xml_node(NULL, "sync-me"); crm_info("Requesting re-sync from %s", (host? host : "all peers")); sync_in_progress = 1; crm_xml_add(sync_me, F_TYPE, "cib"); crm_xml_add(sync_me, F_CIB_OPERATION, CIB_OP_SYNC_ONE); crm_xml_add(sync_me, F_CIB_DELEGATED, cib_our_uname); send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE); free_xml(sync_me); } int cib_process_ping(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *host = crm_element_value(req, F_ORIG); const char *seq = crm_element_value(req, F_CIB_PING_ID); char *digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); static struct qb_log_callsite *cs = NULL; crm_trace("Processing \"%s\" event %s from %s", op, seq, host); *answer = create_xml_node(NULL, XML_CRM_TAG_PING); crm_xml_add(*answer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(*answer, XML_ATTR_DIGEST, digest); crm_xml_add(*answer, F_CIB_PING_ID, seq); if (cs == NULL) { cs = qb_log_callsite_get(__func__, __FILE__, __FUNCTION__, LOG_TRACE, __LINE__, crm_trace_nonlog); } if (cs && cs->targets) { /* Append additional detail so the reciever can log the differences */ add_message_xml(*answer, F_CIB_CALLDATA, the_cib); } else { /* Always include at least the version details */ const char *tag = TYPE(the_cib); xmlNode *shallow = create_xml_node(NULL, tag); copy_in_properties(shallow, the_cib); add_message_xml(*answer, F_CIB_CALLDATA, shallow); free_xml(shallow); } crm_info("Reporting our current digest to %s: %s for %s.%s.%s (%p %d)", host, digest, crm_element_value(existing_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(existing_cib, XML_ATTR_GENERATION), crm_element_value(existing_cib, XML_ATTR_NUMUPDATES), existing_cib, cs && cs->targets); free(digest); return pcmk_ok; } int cib_process_sync(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { return sync_our_cib(req, TRUE); } int cib_process_upgrade_server(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; *answer = NULL; if(crm_element_value(req, F_CIB_SCHEMA_MAX)) { + /* The originator of an upgrade request sends it to the DC, without + * F_CIB_SCHEMA_MAX. If an upgrade is needed, the DC re-broadcasts the + * request with F_CIB_SCHEMA_MAX, and each node performs the upgrade + * (and notifies its local clients) here. + */ return cib_process_upgrade( op, options, section, req, input, existing_cib, result_cib, answer); } else { int new_version = 0; int current_version = 0; xmlNode *scratch = copy_xml(existing_cib); const char *host = crm_element_value(req, F_ORIG); const char *value = crm_element_value(existing_cib, XML_ATTR_VALIDATION); + const char *client_id = crm_element_value(req, F_CIB_CLIENTID); + const char *call_opts = crm_element_value(req, F_CIB_CALLOPTS); + const char *call_id = crm_element_value(req, F_CIB_CALLID); crm_trace("Processing \"%s\" event", op); if (value != NULL) { current_version = get_schema_version(value); } rc = update_validation(&scratch, &new_version, 0, TRUE, TRUE); if (new_version > current_version) { xmlNode *up = create_xml_node(NULL, __FUNCTION__); rc = pcmk_ok; crm_notice("Upgrade request from %s verified", host); crm_xml_add(up, F_TYPE, "cib"); crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); crm_xml_add(up, F_CIB_SCHEMA_MAX, get_schema_name(new_version)); crm_xml_add(up, F_CIB_DELEGATED, host); - crm_xml_add(up, F_CIB_CLIENTID, crm_element_value(req, F_CIB_CLIENTID)); - crm_xml_add(up, F_CIB_CALLOPTS, crm_element_value(req, F_CIB_CALLOPTS)); - crm_xml_add(up, F_CIB_CALLID, crm_element_value(req, F_CIB_CALLID)); + crm_xml_add(up, F_CIB_CLIENTID, client_id); + crm_xml_add(up, F_CIB_CALLOPTS, call_opts); + crm_xml_add(up, F_CIB_CALLID, call_id); if (cib_legacy_mode() && cib_is_master) { rc = cib_process_upgrade( op, options, section, up, input, existing_cib, result_cib, answer); } else { send_cluster_message(NULL, crm_msg_cib, up, FALSE); } free_xml(up); } else if(rc == pcmk_ok) { rc = -pcmk_err_schema_unchanged; } + if (rc != pcmk_ok) { + // Notify originating peer so it can notify its local clients + crm_node_t *origin = crm_find_peer(0, host); + + crm_info("Rejecting upgrade request from %s: %s " + CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, + (origin? origin->uname : "lost")); + + if (origin) { + xmlNode *up = create_xml_node(NULL, __FUNCTION__); + + crm_xml_add(up, F_TYPE, "cib"); + crm_xml_add(up, F_CIB_OPERATION, CIB_OP_UPGRADE); + crm_xml_add(up, F_CIB_DELEGATED, host); + crm_xml_add(up, F_CIB_ISREPLY, host); + crm_xml_add(up, F_CIB_CLIENTID, client_id); + crm_xml_add(up, F_CIB_CALLOPTS, call_opts); + crm_xml_add(up, F_CIB_CALLID, call_id); + crm_xml_add_int(up, F_CIB_UPGRADE_RC, rc); + if (send_cluster_message(origin, crm_msg_cib, up, TRUE) + == FALSE) { + crm_warn("Could not send CIB upgrade result to %s", host); + } + free_xml(up); + } + } free_xml(scratch); } return rc; } int cib_process_sync_one(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { return sync_our_cib(req, FALSE); } int cib_server_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { int rc = pcmk_ok; if (sync_in_progress > MAX_DIFF_RETRY) { /* Don't ignore diffs forever; the last request may have been lost. * If the diff fails, we'll ask for another full resync. */ sync_in_progress = 0; } /* The master should never ignore a diff */ if (sync_in_progress && !cib_is_master) { int diff_add_updates = 0; int diff_add_epoch = 0; int diff_add_admin_epoch = 0; int diff_del_updates = 0; int diff_del_epoch = 0; int diff_del_admin_epoch = 0; cib_diff_version_details(input, &diff_add_admin_epoch, &diff_add_epoch, &diff_add_updates, &diff_del_admin_epoch, &diff_del_epoch, &diff_del_updates); sync_in_progress++; crm_notice("Not applying diff %d.%d.%d -> %d.%d.%d (sync in progress)", diff_del_admin_epoch, diff_del_epoch, diff_del_updates, diff_add_admin_epoch, diff_add_epoch, diff_add_updates); return -pcmk_err_diff_resync; } rc = cib_process_diff(op, options, section, req, input, existing_cib, result_cib, answer); crm_trace("result: %s (%d), %s", pcmk_strerror(rc), rc, cib_is_master?"master":"slave"); if (rc == -pcmk_err_diff_resync && cib_is_master == FALSE) { free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); } else if (rc == -pcmk_err_diff_resync) { rc = -pcmk_err_diff_failed; if (options & cib_force_diff) { crm_warn("Not requesting full refresh in R/W mode"); } } else if ((rc != pcmk_ok) && !cib_is_master && cib_legacy_mode()) { crm_warn("Requesting full CIB refresh because update failed: %s" CRM_XS " rc=%d", pcmk_strerror(rc), rc); xml_log_patchset(LOG_INFO, __FUNCTION__, input); free_xml(*result_cib); *result_cib = NULL; send_sync_request(NULL); } return rc; } int cib_process_replace_svr(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { const char *tag = crm_element_name(input); int rc = cib_process_replace(op, options, section, req, input, existing_cib, result_cib, answer); if (rc == pcmk_ok && safe_str_eq(tag, XML_TAG_CIB)) { sync_in_progress = 0; } return rc; } static int delete_cib_object(xmlNode * parent, xmlNode * delete_spec) { const char *object_name = NULL; const char *object_id = NULL; xmlNode *equiv_node = NULL; int result = pcmk_ok; if (delete_spec != NULL) { object_name = crm_element_name(delete_spec); } object_id = crm_element_value(delete_spec, XML_ATTR_ID); crm_trace("Processing: <%s id=%s>", crm_str(object_name), crm_str(object_id)); if (delete_spec == NULL) { result = -EINVAL; } else if (parent == NULL) { result = -EINVAL; } else if (object_id == NULL) { /* placeholder object */ equiv_node = find_xml_node(parent, object_name, FALSE); } else { equiv_node = find_entity(parent, object_name, object_id); } if (result != pcmk_ok) { ; /* nothing */ } else if (equiv_node == NULL) { result = pcmk_ok; } else if (xml_has_children(delete_spec) == FALSE) { /* only leaves are deleted */ crm_debug("Removing leaf: <%s id=%s>", crm_str(object_name), crm_str(object_id)); free_xml(equiv_node); equiv_node = NULL; } else { xmlNode *child = NULL; for (child = __xml_first_child(delete_spec); child != NULL; child = __xml_next(child)) { int tmp_result = delete_cib_object(equiv_node, child); /* only the first error is likely to be interesting */ if (tmp_result != pcmk_ok && result == pcmk_ok) { result = tmp_result; } } } return result; } int cib_process_delete_absolute(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer) { xmlNode *failed = NULL; int result = pcmk_ok; xmlNode *update_section = NULL; crm_trace("Processing \"%s\" event for section=%s", op, crm_str(section)); if (safe_str_eq(XML_CIB_TAG_SECTION_ALL, section)) { section = NULL; } else if (safe_str_eq(XML_TAG_CIB, section)) { section = NULL; } else if (safe_str_eq(crm_element_name(input), XML_TAG_CIB)) { section = NULL; } CRM_CHECK(strcasecmp(CIB_OP_DELETE, op) == 0, return -EINVAL); if (input == NULL) { crm_err("Cannot perform modification with no data"); return -EINVAL; } failed = create_xml_node(NULL, XML_TAG_FAILED); update_section = get_object_root(section, *result_cib); result = delete_cib_object(update_section, input); update_results(failed, input, op, result); if ((result == pcmk_ok) && xml_has_children(failed)) { result = -EINVAL; } if (result != pcmk_ok) { crm_log_xml_err(failed, "CIB Update failures"); *answer = failed; } else { free_xml(failed); } return result; } int sync_our_cib(xmlNode * request, gboolean all) { int result = pcmk_ok; char *digest = NULL; const char *host = crm_element_value(request, F_ORIG); const char *op = crm_element_value(request, F_CIB_OPERATION); xmlNode *replace_request = cib_msg_copy(request, FALSE); CRM_CHECK(the_cib != NULL,;); CRM_CHECK(replace_request != NULL,;); crm_debug("Syncing CIB to %s", all ? "all peers" : host); if (all == FALSE && host == NULL) { crm_log_xml_err(request, "bad sync"); } /* remove the "all == FALSE" condition * * sync_from was failing, the local client wasn't being notified * because it didn't know it was a reply * setting this does not prevent the other nodes from applying it * if all == TRUE */ if (host != NULL) { crm_xml_add(replace_request, F_CIB_ISREPLY, host); } if (all) { xml_remove_prop(replace_request, F_CIB_HOST); } crm_xml_add(replace_request, F_CIB_OPERATION, CIB_OP_REPLACE); crm_xml_add(replace_request, "original_" F_CIB_OPERATION, op); crm_xml_add(replace_request, F_CIB_GLOBAL_UPDATE, XML_BOOLEAN_TRUE); crm_xml_add(replace_request, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); digest = calculate_xml_versioned_digest(the_cib, FALSE, TRUE, CRM_FEATURE_SET); crm_xml_add(replace_request, XML_ATTR_DIGEST, digest); add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); if (send_cluster_message (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) { result = -ENOTCONN; } free_xml(replace_request); free(digest); return result; } diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 9748a3a6ec..04051d7055 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -1,2675 +1,2676 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define START_DELAY_THRESHOLD 5 * 60 * 1000 #define MAX_LRM_REG_FAILS 30 #define s_if_plural(i) (((i) == 1)? "" : "s") struct delete_event_s { int rc; const char *rsc; lrm_state_t *lrm_state; }; static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id); static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list); static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data); static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name); static lrmd_event_data_t *construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation); static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request); void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id); static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level); static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op); static void lrm_connection_destroy(void) { if (is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_crit("Connection to executor failed"); register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL); clear_bit(fsa_input_register, R_LRM_CONNECTED); } else { crm_info("Disconnected from executor"); } } static char * make_stop_id(const char *rsc, int call_id) { return crm_strdup_printf("%s:%d", rsc, call_id); } static void copy_instance_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") == NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } static void copy_meta_keys(gpointer key, gpointer value, gpointer user_data) { if (strstr(key, CRM_META "_") != NULL) { g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value)); } } /*! * \internal * \brief Remove a recurring operation from a resource's history * * \param[in,out] history Resource history to modify * \param[in] op Operation to remove * * \return TRUE if the operation was found and removed, FALSE otherwise */ static gboolean history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_event_data_t *existing = iter->data; if ((op->interval_ms == existing->interval_ms) && crm_str_eq(op->rsc_id, existing->rsc_id, TRUE) && safe_str_eq(op->op_type, existing->op_type)) { history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter); lrmd_free_event(existing); return TRUE; } } return FALSE; } /*! * \internal * \brief Free all recurring operations in resource history * * \param[in,out] history Resource history to modify */ static void history_free_recurring_ops(rsc_history_t *history) { GList *iter; for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) { lrmd_free_event(iter->data); } g_list_free(history->recurring_op_list); history->recurring_op_list = NULL; } /*! * \internal * \brief Free resource history * * \param[in,out] history Resource history to free */ void history_free(gpointer data) { rsc_history_t *history = (rsc_history_t*)data; if (history->stop_params) { g_hash_table_destroy(history->stop_params); } /* Don't need to free history->rsc.id because it's set to history->id */ free(history->rsc.type); free(history->rsc.standard); free(history->rsc.provider); lrmd_free_event(history->failed); lrmd_free_event(history->last); free(history->id); history_free_recurring_ops(history); free(history); } static void update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { int target_rc = 0; rsc_history_t *entry = NULL; if (op->rsc_deleted) { crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type); delete_rsc_status(lrm_state, op->rsc_id, cib_quorum_override, NULL); return; } if (safe_str_eq(op->op_type, RSC_NOTIFY)) { return; } crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type); entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id); if (entry == NULL && rsc) { entry = calloc(1, sizeof(rsc_history_t)); entry->id = strdup(op->rsc_id); g_hash_table_insert(lrm_state->resource_history, entry->id, entry); entry->rsc.id = entry->id; entry->rsc.type = strdup(rsc->type); entry->rsc.standard = strdup(rsc->standard); if (rsc->provider) { entry->rsc.provider = strdup(rsc->provider); } else { entry->rsc.provider = NULL; } } else if (entry == NULL) { crm_info("Resource %s no longer exists, not updating cache", op->rsc_id); return; } entry->last_callid = op->call_id; target_rc = rsc_op_expected_rc(op); if (op->op_status == PCMK_LRM_OP_CANCELLED) { if (op->interval_ms > 0) { crm_trace("Removing cancelled recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); history_remove_recurring_op(entry, op); return; } else { crm_trace("Skipping " CRM_OP_FMT " rc=%d, status=%d", op->rsc_id, op->op_type, op->interval_ms, op->rc, op->op_status); } } else if (did_rsc_op_fail(op, target_rc)) { /* Store failed monitors here, otherwise the block below will cause them * to be forgotten when a stop happens. */ if (entry->failed) { lrmd_free_event(entry->failed); } entry->failed = lrmd_copy_event(op); } else if (op->interval_ms == 0) { if (entry->last) { lrmd_free_event(entry->last); } entry->last = lrmd_copy_event(op); if (op->params && (safe_str_eq(CRMD_ACTION_START, op->op_type) || safe_str_eq("reload", op->op_type) || safe_str_eq(CRMD_ACTION_STATUS, op->op_type))) { if (entry->stop_params) { g_hash_table_destroy(entry->stop_params); } entry->stop_params = crm_str_table_new(); g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params); } } if (op->interval_ms > 0) { /* Ensure there are no duplicates */ history_remove_recurring_op(entry, op); crm_trace("Adding recurring op: " CRM_OP_FMT, op->rsc_id, op->op_type, op->interval_ms); entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op)); } else if (entry->recurring_op_list && safe_str_eq(op->op_type, RSC_STATUS) == FALSE) { crm_trace("Dropping %d recurring ops because of: " CRM_OP_FMT, g_list_length(entry->recurring_op_list), op->rsc_id, op->op_type, op->interval_ms); history_free_recurring_ops(entry); } } /*! * \internal * \brief Send a direct OK ack for a resource task * * \param[in] lrm_state LRM connection * \param[in] input Input message being ack'ed * \param[in] rsc_id ID of affected resource * \param[in] rsc Affected resource (if available) * \param[in] task Operation task being ack'ed * \param[in] ack_host Name of host to send ack to * \param[in] ack_sys IPC system name to ack */ static void send_task_ok_ack(lrm_state_t *lrm_state, ha_msg_input_t *input, const char *rsc_id, lrmd_rsc_info_t *rsc, const char *task, const char *ack_host, const char *ack_sys) { lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task); op->rc = PCMK_OCF_OK; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(ack_host, ack_sys, rsc, op, rsc_id); lrmd_free_event(op); } void lrm_op_callback(lrmd_event_data_t * op) { const char *nodename = NULL; lrm_state_t *lrm_state = NULL; CRM_CHECK(op != NULL, return); /* determine the node name for this connection. */ nodename = op->remote_nodename ? op->remote_nodename : fsa_our_uname; if (op->type == lrmd_event_disconnect && (safe_str_eq(nodename, fsa_our_uname))) { /* If this is the local executor IPC connection, set the right bits in the * controller when the connection goes down. */ lrm_connection_destroy(); return; } else if (op->type != lrmd_event_exec_complete) { /* we only need to process execution results */ return; } lrm_state = lrm_state_find(nodename); CRM_ASSERT(lrm_state != NULL); process_lrm_event(lrm_state, op, NULL); } /* A_LRM_CONNECT */ void do_lrm_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* This only pertains to local executor connections. Remote connections are * handled as resources within the scheduler. Connecting and disconnecting * from remote executor instances is handled differently. */ lrm_state_t *lrm_state = NULL; if(fsa_our_uname == NULL) { return; /* Nothing to do */ } lrm_state = lrm_state_find_or_create(fsa_our_uname); if (lrm_state == NULL) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } if (action & A_LRM_DISCONNECT) { if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) { if (action == A_LRM_DISCONNECT) { crmd_fsa_stall(FALSE); return; } } clear_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Disconnecting from the executor"); lrm_state_disconnect(lrm_state); lrm_state_reset_tables(lrm_state, FALSE); crm_notice("Disconnected from the executor"); } if (action & A_LRM_CONNECT) { int ret = pcmk_ok; crm_debug("Connecting to the executor"); ret = lrm_state_ipc_connect(lrm_state); if (ret != pcmk_ok) { if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) { crm_warn("Failed to connect to the executor %d time%s (%d max)", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails), MAX_LRM_REG_FAILS); crm_timer_start(wait_timer); crmd_fsa_stall(FALSE); return; } } if (ret != pcmk_ok) { crm_err("Failed to connect to the executor the max allowed %d time%s", lrm_state->num_lrm_register_fails, s_if_plural(lrm_state->num_lrm_register_fails)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } set_bit(fsa_input_register, R_LRM_CONNECTED); crm_info("Connection to the executor established"); } if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level) { int counter = 0; gboolean rc = TRUE; const char *when = "lrm disconnect"; GHashTableIter gIter; const char *key = NULL; rsc_history_t *entry = NULL; struct recurring_op_s *pending = NULL; crm_debug("Checking for active resources before exit"); if (cur_state == S_TERMINATE) { log_level = LOG_ERR; when = "shutdown"; } else if (is_set(fsa_input_register, R_SHUTDOWN)) { when = "shutdown... waiting"; } if (lrm_state->pending_ops && lrm_state_is_connected(lrm_state) == TRUE) { guint removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_actions, lrm_state); guint nremaining = g_hash_table_size(lrm_state->pending_ops); if (removed || nremaining) { crm_notice("Stopped %u recurring operation%s at %s (%u remaining)", removed, s_if_plural(removed), when, nremaining); } } if (lrm_state->pending_ops) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) { /* Ignore recurring actions in the shutdown calculations */ if (pending->interval_ms == 0) { counter++; } } } if (counter > 0) { do_crm_log(log_level, "%d pending executor operation%s at %s", counter, s_if_plural(counter), when); if (cur_state == S_TERMINATE || !is_set(fsa_input_register, R_SENT_RSC_STOP)) { g_hash_table_iter_init(&gIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) { do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key); } } else { rc = FALSE; } return rc; } if (lrm_state->resource_history == NULL) { return rc; } if (is_set(fsa_input_register, R_SHUTDOWN)) { /* At this point we're not waiting, we're just shutting down */ when = "shutdown"; } counter = 0; g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) { if (is_rsc_active(lrm_state, entry->id) == FALSE) { continue; } counter++; if (log_level == LOG_ERR) { crm_info("Found %s active at %s", entry->id, when); } else { crm_trace("Found %s active at %s", entry->id, when); } if (lrm_state->pending_ops) { GHashTableIter hIter; g_hash_table_iter_init(&hIter, lrm_state->pending_ops); while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) { if (crm_str_eq(entry->id, pending->rsc_id, TRUE)) { crm_notice("%sction %s (%s) incomplete at %s", pending->interval_ms == 0 ? "A" : "Recurring a", key, pending->op_key, when); } } } } if (counter) { crm_err("%d resource%s active at %s", counter, (counter == 1)? " was" : "s were", when); } return rc; } static char * build_parameter_list(const lrmd_event_data_t *op, const struct ra_metadata_s *metadata, xmlNode *result, enum ra_param_flags_e param_type, bool invert_for_xml) { int len = 0; int max = 0; char *list = NULL; GList *iter = NULL; /* Newer resource agents support the "private" parameter attribute to * indicate sensitive parameters. For backward compatibility with older * agents, this list is used if the agent doesn't specify any as "private". */ const char *secure_terms[] = { "password", "passwd", "user", }; if (is_not_set(metadata->ra_flags, ra_uses_private) && (param_type == ra_param_private)) { max = DIMOF(secure_terms); } for (iter = metadata->ra_params; iter != NULL; iter = iter->next) { struct ra_param_s *param = (struct ra_param_s *) iter->data; bool accept = FALSE; if (is_set(param->rap_flags, param_type)) { accept = TRUE; } else if (max) { for (int lpc = 0; lpc < max; lpc++) { if (safe_str_eq(secure_terms[lpc], param->rap_name)) { accept = TRUE; break; } } } if (accept) { int start = len; crm_trace("Attr %s is %s", param->rap_name, ra_param_flag2text(param_type)); len += strlen(param->rap_name) + 2; // include spaces around list = realloc_safe(list, len + 1); // include null terminator // spaces before and after make parsing simpler sprintf(list + start, " %s ", param->rap_name); } else { crm_trace("Rejecting %s for %s", param->rap_name, ra_param_flag2text(param_type)); } if (result && (invert_for_xml? !accept : accept)) { const char *v = g_hash_table_lookup(op->params, param->rap_name); if (v != NULL) { crm_trace("Adding attr %s=%s to the xml result", param->rap_name, v); crm_xml_add(result, param->rap_name, v); } } } return list; } static void append_restart_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *restart = NULL; CRM_LOG_ASSERT(op->params != NULL); if (op->interval_ms > 0) { /* monitors are not reloadable */ return; } if (is_set(metadata->ra_flags, ra_supports_reload)) { restart = create_xml_node(NULL, XML_TAG_PARAMS); /* Add any parameters with unique="1" to the "op-force-restart" list. * * (Currently, we abuse "unique=0" to indicate reloadability. This is * nonstandard and should eventually be replaced once the OCF standard * is updated with something better.) */ list = build_parameter_list(op, metadata, restart, ra_param_unique, FALSE); } else { /* Resource does not support reloads */ return; } digest = calculate_operation_digest(restart, version); /* Add "op-force-restart" and "op-restart-digest" to indicate the resource supports reload, * no matter if it actually supports any parameters with unique="1"). */ crm_xml_add(update, XML_LRM_ATTR_OP_RESTART, list? list: ""); crm_xml_add(update, XML_LRM_ATTR_RESTART_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(restart, "restart digest source"); free_xml(restart); free(digest); free(list); } static void append_secure_list(lrmd_event_data_t *op, struct ra_metadata_s *metadata, xmlNode *update, const char *version) { char *list = NULL; char *digest = NULL; xmlNode *secure = NULL; CRM_LOG_ASSERT(op->params != NULL); /* * To keep XML_LRM_ATTR_OP_SECURE short, we want it to contain the * secure parameters but XML_LRM_ATTR_SECURE_DIGEST to be based on * the insecure ones */ secure = create_xml_node(NULL, XML_TAG_PARAMS); list = build_parameter_list(op, metadata, secure, ra_param_private, TRUE); if (list != NULL) { digest = calculate_operation_digest(secure, version); crm_xml_add(update, XML_LRM_ATTR_OP_SECURE, list); crm_xml_add(update, XML_LRM_ATTR_SECURE_DIGEST, digest); crm_trace("%s: %s, %s", op->rsc_id, digest, list); crm_log_xml_trace(secure, "secure digest source"); } else { crm_trace("%s: no secure parameters", op->rsc_id); } free_xml(secure); free(digest); free(list); } static gboolean build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *node_name, const char *src) { int target_rc = 0; xmlNode *xml_op = NULL; struct ra_metadata_s *metadata = NULL; const char *caller_version = NULL; lrm_state_t *lrm_state = NULL; if (op == NULL) { return FALSE; } target_rc = rsc_op_expected_rc(op); /* there is a small risk in formerly mixed clusters that it will * be sub-optimal. * * however with our upgrade policy, the update we send should * still be completely supported anyway */ caller_version = g_hash_table_lookup(op->params, XML_ATTR_CRM_VERSION); CRM_LOG_ASSERT(caller_version != NULL); if(caller_version == NULL) { caller_version = CRM_FEATURE_SET; } crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version); xml_op = create_operation_update(parent, op, caller_version, target_rc, fsa_our_uname, src, LOG_DEBUG); if (xml_op == NULL) { return TRUE; } if ((rsc == NULL) || (op->params == NULL) || !crm_op_needs_metadata(rsc->standard, op->op_type)) { crm_trace("No digests needed for %s action on %s (params=%p rsc=%p)", op->op_type, op->rsc_id, op->params, rsc); return TRUE; } lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { crm_warn("Cannot calculate digests for operation " CRM_OP_FMT " because we have no connection to executor for %s", op->rsc_id, op->op_type, op->interval_ms, node_name); return TRUE; } metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata == NULL) { /* For now, we always collect resource agent meta-data via a local, * synchronous, direct execution of the agent. This has multiple issues: * the executor should execute agents, not the controller; meta-data for * Pacemaker Remote nodes should be collected on those nodes, not * locally; and the meta-data call shouldn't eat into the timeout of the * real action being performed. * * These issues are planned to be addressed by having the scheduler * schedule a meta-data cache check at the beginning of each transition. * Once that is working, this block will only be a fallback in case the * initial collection fails. */ char *metadata_str = NULL; int rc = lrm_state_get_metadata(lrm_state, rsc->standard, rsc->provider, rsc->type, &metadata_str, 0); if (rc != pcmk_ok) { crm_warn("Failed to get metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } metadata = metadata_cache_update(lrm_state->metadata_cache, rsc, metadata_str); free(metadata_str); if (metadata == NULL) { crm_warn("Failed to update metadata for %s (%s:%s:%s)", rsc->id, rsc->standard, rsc->provider, rsc->type); return TRUE; } } #if ENABLE_VERSIONED_ATTRS crm_xml_add(xml_op, XML_ATTR_RA_VERSION, metadata->ra_version); #endif crm_trace("Including additional digests for %s::%s:%s", rsc->standard, rsc->provider, rsc->type); append_restart_list(op, metadata, xml_op, caller_version); append_secure_list(op, metadata, xml_op, caller_version); return TRUE; } static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id) { rsc_history_t *entry = NULL; entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (entry == NULL || entry->last == NULL) { return FALSE; } crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type, entry->last->interval_ms, entry->last->rc); if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_STOP)) { return FALSE; } else if (entry->last->rc == PCMK_OCF_OK && safe_str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE)) { /* a stricter check is too complex... * leave that to the PE */ return FALSE; } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) { return FALSE; } else if ((entry->last->interval_ms == 0) && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) { /* Badly configured resources can't be reliably stopped */ return FALSE; } return TRUE; } static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list) { GHashTableIter iter; rsc_history_t *entry = NULL; g_hash_table_iter_init(&iter, lrm_state->resource_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) { GList *gIter = NULL; xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE); crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id); crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type); crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard); crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider); if (entry->last && entry->last->params) { const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); if (container) { crm_trace("Resource %s is a part of container resource %s", entry->id, container); crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container); } } build_operation_update(xml_rsc, &(entry->rsc), entry->failed, lrm_state->node_name, __FUNCTION__); build_operation_update(xml_rsc, &(entry->rsc), entry->last, lrm_state->node_name, __FUNCTION__); for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) { build_operation_update(xml_rsc, &(entry->rsc), gIter->data, lrm_state->node_name, __FUNCTION__); } } return FALSE; } static xmlNode * do_lrm_query_internal(lrm_state_t *lrm_state, int update_flags) { xmlNode *xml_state = NULL; xmlNode *xml_data = NULL; xmlNode *rsc_list = NULL; crm_node_t *peer = NULL; peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); CRM_CHECK(peer != NULL, return NULL); xml_state = create_node_state_update(peer, update_flags, NULL, __FUNCTION__); if (xml_state == NULL) { return NULL; } xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM); crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid); rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES); /* Build a list of active (not always running) resources */ build_active_RAs(lrm_state, rsc_list); crm_log_xml_trace(xml_state, "Current executor state"); return xml_state; } xmlNode * do_lrm_query(gboolean is_replace, const char *node_name) { lrm_state_t *lrm_state = lrm_state_find(node_name); if (!lrm_state) { crm_err("Could not find executor state for node %s", node_name); return NULL; } return do_lrm_query_internal(lrm_state, node_update_cluster|node_update_peer); } static void notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc) { lrmd_event_data_t *op = NULL; const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); crm_info("Notifying %s on %s that %s was%s deleted", from_sys, (from_host? from_host : "localhost"), rsc_id, ((rc == pcmk_ok)? "" : " not")); op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE); if (rc == pcmk_ok) { op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; } else { op->op_status = PCMK_LRM_OP_ERROR; op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc_id); lrmd_free_event(op); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { /* this isn't expected - trigger a new transition */ time_t now = time(NULL); char *now_s = crm_itoa(now); crm_debug("Triggering a refresh after %s deleted %s from the executor", from_sys, rsc_id); update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL, "last-lrm-refresh", now_s, FALSE, NULL, NULL); free(now_s); } } static gboolean lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data) { struct delete_event_s *event = user_data; struct pending_deletion_op_s *op = value; if (crm_str_eq(event->rsc, op->rsc, TRUE)) { notify_deleted(event->lrm_state, op->input, event->rsc, event->rc); return TRUE; } return FALSE; } static gboolean lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data) { const char *rsc = user_data; struct recurring_op_s *pending = value; if (crm_str_eq(rsc, pending->rsc_id, TRUE)) { crm_info("Removing op %s:%d for deleted resource %s", pending->op_key, pending->call_id, rsc); return TRUE; } return FALSE; } /* * Remove the rsc from the CIB * * Avoids refreshing the entire LRM section of this host */ #define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']" static int delete_rsc_status(lrm_state_t * lrm_state, const char *rsc_id, int call_options, const char *user_name) { char *rsc_xpath = NULL; int rc = pcmk_ok; CRM_CHECK(rsc_id != NULL, return -ENXIO); rsc_xpath = crm_strdup_printf(rsc_template, lrm_state->node_name, rsc_id); rc = cib_internal_op(fsa_cib_conn, CIB_OP_DELETE, NULL, rsc_xpath, NULL, NULL, call_options | cib_xpath, user_name); free(rsc_xpath); return rc; } static void delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, GHashTableIter * rsc_gIter, int rc, const char *user_name) { struct delete_event_s event; CRM_CHECK(rsc_id != NULL, return); if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); if (rsc_gIter) g_hash_table_iter_remove(rsc_gIter); else g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); crm_debug("sync: Sending delete op for %s", rsc_id_copy); delete_rsc_status(lrm_state, rsc_id_copy, cib_quorum_override, user_name); g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } if (input) { notify_deleted(lrm_state, input, rsc_id, rc); } event.rc = rc; event.rsc = rsc_id; event.lrm_state = lrm_state; g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event); } /*! * \internal * \brief Erase an LRM history entry from the CIB, given the operation data * * \param[in] lrm_state LRM state of the desired node * \param[in] op Operation whose history should be deleted */ static void erase_lrm_history_by_op(lrm_state_t *lrm_state, lrmd_event_data_t *op) { xmlNode *xml_top = NULL; CRM_CHECK(op != NULL, return); xml_top = create_xml_node(NULL, XML_LRM_TAG_RSC_OP); crm_xml_add_int(xml_top, XML_LRM_ATTR_CALLID, op->call_id); crm_xml_add(xml_top, XML_ATTR_TRANSITION_KEY, op->user_data); if (op->interval_ms > 0) { char *op_id = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); /* Avoid deleting last_failure too (if it was a result of this recurring op failing) */ crm_xml_add(xml_top, XML_ATTR_ID, op_id); free(op_id); } crm_debug("Erasing resource operation history for " CRM_OP_FMT " (call=%d)", op->rsc_id, op->op_type, op->interval_ms, op->call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, XML_CIB_TAG_STATUS, xml_top, cib_quorum_override); crm_log_xml_trace(xml_top, "op:cancel"); free_xml(xml_top); } /* Define xpath to find LRM resource history entry by node and resource */ #define XPATH_HISTORY \ "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \ "/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']" \ "/" XML_LRM_TAG_RSC_OP /* ... and also by operation key */ #define XPATH_HISTORY_ID XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s']" /* ... and also by operation key and operation call ID */ #define XPATH_HISTORY_CALL XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_CALLID "='%d']" /* ... and also by operation key and original operation key */ #define XPATH_HISTORY_ORIG XPATH_HISTORY \ "[@" XML_ATTR_ID "='%s' and @" XML_LRM_ATTR_TASK_KEY "='%s']" /*! * \internal * \brief Erase an LRM history entry from the CIB, given operation identifiers * * \param[in] lrm_state LRM state of the node to clear history for * \param[in] rsc_id Name of resource to clear history for * \param[in] key Operation key of operation to clear history for * \param[in] orig_op If specified, delete only if it has this original op * \param[in] call_id If specified, delete entry only if it has this call ID */ static void erase_lrm_history_by_id(lrm_state_t *lrm_state, const char *rsc_id, const char *key, const char *orig_op, int call_id) { char *op_xpath = NULL; CRM_CHECK((rsc_id != NULL) && (key != NULL), return); if (call_id > 0) { op_xpath = crm_strdup_printf(XPATH_HISTORY_CALL, lrm_state->node_name, rsc_id, key, call_id); } else if (orig_op) { op_xpath = crm_strdup_printf(XPATH_HISTORY_ORIG, lrm_state->node_name, rsc_id, key, orig_op); } else { op_xpath = crm_strdup_printf(XPATH_HISTORY_ID, lrm_state->node_name, rsc_id, key); } crm_debug("Erasing resource operation history for %s on %s (call=%d)", key, rsc_id, call_id); fsa_cib_conn->cmds->remove(fsa_cib_conn, op_xpath, NULL, cib_quorum_override | cib_xpath); free(op_xpath); } static inline gboolean last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms) { if (entry == NULL) { return FALSE; } if (op == NULL) { return TRUE; } return (safe_str_eq(op, entry->failed->op_type) && (interval_ms == entry->failed->interval_ms)); } /*! * \internal * \brief Clear a resource's last failure * * Erase a resource's last failure on a particular node from both the * LRM resource history in the CIB, and the resource history remembered * for the LRM state. * * \param[in] rsc_id Resource name * \param[in] node_name Node name * \param[in] operation If specified, only clear if matching this operation * \param[in] interval_ms If operation is specified, it has this interval */ void lrm_clear_last_failure(const char *rsc_id, const char *node_name, const char *operation, guint interval_ms) { char *op_key = NULL; char *orig_op_key = NULL; lrm_state_t *lrm_state = NULL; lrm_state = lrm_state_find(node_name); if (lrm_state == NULL) { return; } /* Erase from CIB */ op_key = generate_op_key(rsc_id, "last_failure", 0); if (operation) { orig_op_key = generate_op_key(rsc_id, operation, interval_ms); } erase_lrm_history_by_id(lrm_state, rsc_id, op_key, orig_op_key, 0); free(op_key); free(orig_op_key); /* Remove from memory */ if (lrm_state->resource_history) { rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); if (last_failed_matches_op(entry, operation, interval_ms)) { lrmd_free_event(entry->failed); entry->failed = NULL; } } } /* Returns: gboolean - cancellation is in progress */ static gboolean cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove) { int rc = pcmk_ok; char *local_key = NULL; struct recurring_op_s *pending = NULL; CRM_CHECK(op != 0, return FALSE); CRM_CHECK(rsc_id != NULL, return FALSE); if (key == NULL) { local_key = make_stop_id(rsc_id, op); key = local_key; } pending = g_hash_table_lookup(lrm_state->pending_ops, key); if (pending) { if (remove && pending->remove == FALSE) { pending->remove = TRUE; crm_debug("Scheduling %s for removal", key); } if (pending->cancelled) { crm_debug("Operation %s already cancelled", key); free(local_key); return FALSE; } pending->cancelled = TRUE; } else { crm_info("No pending op found for %s", key); free(local_key); return FALSE; } crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key); rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type, pending->interval_ms); if (rc == pcmk_ok) { crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key); free(local_key); return TRUE; } crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key); /* The caller needs to make sure the entry is * removed from the pending_ops list * * Usually by returning TRUE inside the worker function * supplied to g_hash_table_foreach_remove() * * Not removing the entry from pending_ops will block * the node from shutting down */ free(local_key); return FALSE; } struct cancel_data { gboolean done; gboolean remove; const char *key; lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean cancel_action_by_key(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct cancel_data *data = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (crm_str_eq(op->op_key, data->key, TRUE)) { data->done = TRUE; remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove); } return remove; } static gboolean cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove) { guint removed = 0; struct cancel_data data; CRM_CHECK(rsc != NULL, return FALSE); CRM_CHECK(key != NULL, return FALSE); data.key = key; data.rsc = rsc; data.done = FALSE; data.remove = remove; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove(lrm_state->pending_ops, cancel_action_by_key, &data); crm_trace("Removed %u op cache entries, new size: %u", removed, g_hash_table_size(lrm_state->pending_ops)); return data.done; } /*! * \internal * \brief Retrieve resource information from LRM * * \param[in] lrm_state LRM connection to use * \param[in] rsc_xml XML containing resource configuration * \param[in] do_create If true, register resource with LRM if not already * \param[out] rsc_info Where to store resource information obtained from LRM * * \retval pcmk_ok Success (and rsc_info holds newly allocated result) * \retval -EINVAL Required information is missing from arguments * \retval -ENOTCONN No active connection to LRM * \retval -ENODEV Resource not found * \retval -errno Error communicating with executor when registering resource * * \note Caller is responsible for freeing result on success. */ static int get_lrm_resource(lrm_state_t *lrm_state, xmlNode *rsc_xml, gboolean do_create, lrmd_rsc_info_t **rsc_info) { const char *id = ID(rsc_xml); CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL); CRM_CHECK(id, return -EINVAL); if (lrm_state_is_connected(lrm_state) == FALSE) { return -ENOTCONN; } crm_trace("Retrieving resource information for %s from the executor", id); *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); // If resource isn't known by ID, try clone name, if provided if (!*rsc_info) { const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG); if (long_id) { *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0); } } if ((*rsc_info == NULL) && do_create) { const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE); int rc; crm_trace("Registering resource %s with the executor", id); rc = lrm_state_register_rsc(lrm_state, id, class, provider, type, lrmd_opt_drop_recurring); if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Could not register resource %s with the executor on %s: %s " CRM_XS " rc=%d", id, lrm_state->node_name, pcmk_strerror(rc), rc); /* Register this as an internal error if this involves the local * executor. Otherwise, we're likely dealing with an unresponsive * remote node, which is not an FSA failure. */ if (lrm_state_is_local(lrm_state) == TRUE) { register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } return rc; } *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0); } return *rsc_info? pcmk_ok : -ENODEV; } static void delete_resource(lrm_state_t * lrm_state, const char *id, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { crm_trace("Resource '%s' deleted", id); } else if (rc == -EINPROGRESS) { crm_info("Deletion of resource '%s' pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); op = calloc(1, sizeof(struct pending_deletion_op_s)); op->rsc = strdup(rsc->id); op->input = copy_ha_msg_input(request); g_hash_table_insert(lrm_state->deletion_ops, ref, op); } return; } else { crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", id, sys, user ? user : "internal", host, rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); } static int get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id) { int call_id = 999999999; rsc_history_t *entry = NULL; if(lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* Make sure the call id is greater than the last successful operation, * otherwise the failure will not result in a possible recovery of the resource * as it could appear the failure occurred before the successful start */ if (entry) { call_id = entry->last_callid + 1; } if (call_id < 0) { call_id = 1; } return call_id; } static void fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status, enum ocf_exitcode op_exitcode) { op->call_id = get_fake_call_id(lrm_state, op->rsc_id); op->t_run = time(NULL); op->t_rcchange = op->t_run; op->op_status = op_status; op->rc = op_exitcode; } static void force_reprobe(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { GHashTableIter gIter; rsc_history_t *entry = NULL; crm_info("Clearing resource history on node %s", lrm_state->node_name); g_hash_table_iter_init(&gIter, lrm_state->resource_history); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { /* only unregister the resource during a reprobe if it is not a remote connection * resource. otherwise unregistering the connection will terminate remote-node * membership */ gboolean unregister = TRUE; if (is_remote_lrmd_ra(NULL, NULL, entry->id)) { lrm_state_t *remote_lrm_state = lrm_state_find(entry->id); if (remote_lrm_state) { /* when forcing a reprobe, make sure to clear remote node before * clearing the remote node's connection resource */ force_reprobe(remote_lrm_state, from_sys, from_host, user_name, TRUE); } unregister = FALSE; } delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, user_name, NULL, unregister); } /* Now delete the copy in the CIB */ erase_status_tag(lrm_state->node_name, XML_CIB_TAG_LRM, cib_scope_local); /* Finally, _delete_ the value in pacemaker-attrd -- setting it to FALSE * would result in the scheduler sending us back here again */ update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node); } static void synthesize_lrmd_failure(lrm_state_t *lrm_state, xmlNode *action, int rc) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc_info = NULL; const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK); const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET); xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE); if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) { /* @TODO Should we do something else, like direct ack? */ crm_info("Can't fake %s failure (%d) on %s without resource configuration", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); return; } else if(operation == NULL) { /* This probably came from crm_resource -C, nothing to do */ crm_info("Can't fake %s failure (%d) on %s without operation", ID(xml_rsc), rc, target_node); return; } op = construct_op(lrm_state, action, ID(xml_rsc), operation); if (safe_str_eq(operation, RSC_NOTIFY)) { // Notifications can't fail fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_OK); } else { fake_op_status(lrm_state, op, PCMK_LRM_OP_ERROR, rc); } crm_info("Faking " CRM_OP_FMT " result (%d) on %s", op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node); /* Process the result as if it came from the LRM, if possible * (i.e. resource info can be obtained from the lrm_state). */ if (lrm_state) { rsc_info = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); } if (rsc_info) { lrmd_free_rsc_info(rsc_info); process_lrm_event(lrm_state, op, NULL); } else if (controld_action_is_recordable(op->op_type)) { /* If we can't process the result normally, at least write it to the CIB * if possible, so the scheduler can act on it. */ const char *standard = crm_element_value(xml_rsc, XML_AGENT_ATTR_CLASS); const char *provider = crm_element_value(xml_rsc, XML_AGENT_ATTR_PROVIDER); const char *type = crm_element_value(xml_rsc, XML_ATTR_TYPE); if (standard && type) { rsc_info = lrmd_new_rsc_info(op->rsc_id, standard, provider, type); do_update_resource(target_node, rsc_info, op); lrmd_free_rsc_info(rsc_info); } else { // @TODO Should we direct ack? crm_info("Can't fake %s failure (%d) on %s without resource standard and type", crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc, target_node); } } lrmd_free_event(op); } /*! * \internal * \brief Get target of an LRM operation * * \param[in] xml LRM operation data XML * * \return LRM operation target node name (local node or Pacemaker Remote node) */ static const char * lrm_op_target(xmlNode *xml) { const char *target = NULL; if (xml) { target = crm_element_value(xml, XML_LRM_ATTR_TARGET); } if (target == NULL) { target = fsa_our_uname; } return target; } static void fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { lrmd_event_data_t *op = NULL; lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE); CRM_CHECK(xml_rsc != NULL, return); /* The executor simply executes operations and reports the results, without * any concept of success or failure, so to fail a resource, we must fake * what a failure looks like. * * To do this, we create a fake executor operation event for the resource, * and pass that event to the executor client callback so it will be * processed as if it came from the executor. */ op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon"); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); free((char*) op->user_data); op->user_data = NULL; op->interval_ms = 0; #if ENABLE_ACL if (user_name && is_privileged(user_name) == FALSE) { crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc)); send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); return; } #endif if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) { crm_info("Failing resource %s...", rsc->id); + op->exit_reason = strdup("Simulated failure"); process_lrm_event(lrm_state, op, NULL); op->op_status = PCMK_LRM_OP_DONE; op->rc = PCMK_OCF_OK; lrmd_free_rsc_info(rsc); } else { crm_info("Cannot find/create resource in order to fail it..."); crm_log_xml_warn(xml, "bad input"); } send_direct_ack(from_host, from_sys, NULL, op, ID(xml_rsc)); lrmd_free_event(op); } static void handle_refresh_op(lrm_state_t *lrm_state, const char *user_name, const char *from_host, const char *from_sys) { int rc = pcmk_ok; xmlNode *fragment = do_lrm_query_internal(lrm_state, node_update_all); fsa_cib_update(XML_CIB_TAG_STATUS, fragment, cib_quorum_override, rc, user_name); crm_info("Forced a local resource history refresh: call=%d", rc); if (safe_str_neq(CRM_SYSTEM_CRMD, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, fragment, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing refresh from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } free_xml(fragment); } static void handle_query_op(xmlNode *msg, lrm_state_t *lrm_state) { xmlNode *data = do_lrm_query_internal(lrm_state, node_update_all); xmlNode *reply = create_reply(msg, data); if (relay_message(reply, TRUE) == FALSE) { crm_err("Unable to route reply"); crm_log_xml_err(reply, "reply"); } free_xml(reply); free_xml(data); } static void handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys, const char *from_host, const char *user_name, gboolean is_remote_node) { crm_notice("Forcing the status of all resources to be redetected"); force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node); if (safe_str_neq(CRM_SYSTEM_PENGINE, from_sys) && safe_str_neq(CRM_SYSTEM_TENGINE, from_sys)) { xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host, from_sys, CRM_SYSTEM_LRMD, fsa_our_uuid); crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(reply); } } static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys) { char *op_key = NULL; char *meta_key = NULL; int call = 0; const char *call_id = NULL; const char *op_task = NULL; const char *interval_ms_s = NULL; gboolean in_progress = FALSE; xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE); CRM_CHECK(params != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); interval_ms_s = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(interval_ms_s != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_TASK); op_task = crm_element_value(params, meta_key); free(meta_key); CRM_CHECK(op_task != NULL, return FALSE); meta_key = crm_meta_name(XML_LRM_ATTR_CALLID); call_id = crm_element_value(params, meta_key); free(meta_key); op_key = generate_op_key(rsc->id, op_task, crm_parse_ms(interval_ms_s)); crm_debug("Scheduler requested op %s (call=%s) be cancelled", op_key, (call_id? call_id : "NA")); call = crm_parse_int(call_id, "0"); if (call == 0) { // Normal case when the scheduler cancels a recurring op in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE); } else { // Normal case when the scheduler cancels an orphan op in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE); } // Acknowledge cancellation operation if for a remote connection resource if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) { char *op_id = make_stop_id(rsc->id, call); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) { crm_info("Nothing known about operation %d for %s", call, op_key); } erase_lrm_history_by_id(lrm_state, rsc->id, op_key, NULL, call); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); /* needed at least for cancellation of a remote operation */ g_hash_table_remove(lrm_state->pending_ops, op_id); free(op_id); } else { /* No ack is needed since abcdaa8, but peers with older versions * in a rolling upgrade need one. We didn't bump the feature set * at that commit, so we can only compare against the previous * CRM version (3.0.8). If any peers have feature set 3.0.9 but * not abcdaa8, they will time out waiting for the ack (no * released versions of Pacemaker are affected). */ const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION); if (compare_version(peer_version, "3.0.8") <= 0) { crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)", op_key, from_host, peer_version); send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task, from_host, from_sys); } } free(op_key); return TRUE; } static void do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host, bool crm_rsc_delete, const char *user_name) { gboolean unregister = TRUE; #if ENABLE_ACL int cib_rc = delete_rsc_status(lrm_state, rsc->id, cib_dryrun|cib_sync_call, user_name); if (cib_rc != pcmk_ok) { lrmd_event_data_t *op = NULL; crm_err("Could not delete resource status of %s for %s (user %s) on %s: %s" CRM_XS " rc=%d", rsc->id, from_sys, (user_name? user_name : "unknown"), from_host, pcmk_strerror(cib_rc), cib_rc); op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE); op->op_status = PCMK_LRM_OP_ERROR; if (cib_rc == -EACCES) { op->rc = PCMK_OCF_INSUFFICIENT_PRIV; } else { op->rc = PCMK_OCF_UNKNOWN_ERROR; } send_direct_ack(from_host, from_sys, NULL, op, rsc->id); lrmd_free_event(op); return; } #endif if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) { unregister = FALSE; } delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, user_name, input, unregister); } /* A_LRM_INVOKE */ void do_lrm_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { lrm_state_t *lrm_state = NULL; const char *crm_op = NULL; const char *from_sys = NULL; const char *from_host = NULL; const char *operation = NULL; ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg); const char *user_name = NULL; const char *target_node = NULL; gboolean is_remote_node = FALSE; bool crm_rsc_delete = FALSE; target_node = lrm_op_target(input->xml); is_remote_node = safe_str_neq(target_node, fsa_our_uname); lrm_state = lrm_state_find(target_node); if ((lrm_state == NULL) && is_remote_node) { crm_err("Failing action because local node has never had connection to remote node %s", target_node); synthesize_lrmd_failure(NULL, input->xml, PCMK_OCF_CONNECTION_DIED); return; } CRM_ASSERT(lrm_state != NULL); #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); crm_trace("Executor command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM); if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } crm_trace("Executor %s command from %s", crm_op, from_sys); if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { crm_rsc_delete = TRUE; // Only crm_resource uses this op operation = CRMD_ACTION_DELETE; } else if (safe_str_eq(crm_op, CRM_OP_LRM_FAIL)) { fail_lrm_resource(input->xml, lrm_state, user_name, from_host, from_sys); return; } else if (input->xml != NULL) { operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK); } if (safe_str_eq(crm_op, CRM_OP_LRM_REFRESH)) { handle_refresh_op(lrm_state, user_name, from_host, from_sys); } else if (safe_str_eq(crm_op, CRM_OP_LRM_QUERY)) { handle_query_op(input->msg, lrm_state); } else if (safe_str_eq(operation, CRM_OP_PROBED)) { update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE, user_name, is_remote_node); } else if (safe_str_eq(operation, CRM_OP_REPROBE) || safe_str_eq(crm_op, CRM_OP_REPROBE)) { handle_reprobe_op(lrm_state, from_sys, from_host, user_name, is_remote_node); } else if (operation != NULL) { lrmd_rsc_info_t *rsc = NULL; xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE); gboolean create_rsc = safe_str_neq(operation, CRMD_ACTION_DELETE); int rc; // We can't return anything meaningful without a resource ID CRM_CHECK(xml_rsc && ID(xml_rsc), return); rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc); if (rc == -ENOTCONN) { synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_CONNECTION_DIED); return; } else if ((rc < 0) && !create_rsc) { /* Delete of malformed or nonexistent resource * (deleting something that does not exist is a success) */ crm_notice("Not registering resource '%s' for a %s event " CRM_XS " get-rc=%d (%s) transition-key=%s", ID(xml_rsc), operation, rc, pcmk_strerror(rc), ID(input->xml)); delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok, user_name); send_task_ok_ack(lrm_state, input, ID(xml_rsc), NULL, operation, from_host, from_sys); return; } else if (rc == -EINVAL) { // Resource operation on malformed resource crm_err("Invalid resource definition for %s", ID(xml_rsc)); crm_log_xml_warn(input->msg, "invalid resource"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_NOT_CONFIGURED); // fatal error return; } else if (rc < 0) { // Error communicating with the executor crm_err("Could not register resource '%s' with executor: %s " CRM_XS " rc=%d", ID(xml_rsc), pcmk_strerror(rc), rc); crm_log_xml_warn(input->msg, "failed registration"); synthesize_lrmd_failure(lrm_state, input->xml, PCMK_OCF_INVALID_PARAM); // hard error return; } if (safe_str_eq(operation, CRMD_ACTION_CANCEL)) { if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) { crm_log_xml_warn(input->xml, "Bad command"); } } else if (safe_str_eq(operation, CRMD_ACTION_DELETE)) { do_lrm_delete(input, lrm_state, rsc, from_sys, from_host, crm_rsc_delete, user_name); } else { do_lrm_rsc_op(lrm_state, rsc, operation, input->xml, input->msg); } lrmd_free_rsc_info(rsc); } else { crm_err("Cannot perform operation %s of unknown type", crm_str(crm_op)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static lrmd_event_data_t * construct_op(lrm_state_t * lrm_state, xmlNode * rsc_op, const char *rsc_id, const char *operation) { lrmd_event_data_t *op = NULL; const char *op_delay = NULL; const char *op_timeout = NULL; const char *interval_ms_s = NULL; GHashTable *params = NULL; const char *transition = NULL; CRM_ASSERT(rsc_id && operation); op = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(op != NULL); op->type = lrmd_event_exec_complete; op->op_type = strdup(operation); op->op_status = PCMK_LRM_OP_PENDING; op->rc = -1; op->rsc_id = strdup(rsc_id); op->interval_ms = 0; op->timeout = 0; op->start_delay = 0; if (rsc_op == NULL) { CRM_LOG_ASSERT(safe_str_eq(CRMD_ACTION_STOP, operation)); op->user_data = NULL; /* the stop_all_resources() case * by definition there is no DC (or they'd be shutting * us down). * So we should put our version here. */ op->params = crm_str_table_new(); g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); crm_trace("Constructed %s op for %s", operation, rsc_id); return op; } params = xml2list(rsc_op); g_hash_table_remove(params, CRM_META "_op_target_rc"); op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY); op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT); interval_ms_s = crm_meta_value(params, XML_LRM_ATTR_INTERVAL_MS); op->interval_ms = crm_parse_ms(interval_ms_s); op->timeout = crm_parse_int(op_timeout, "0"); op->start_delay = crm_parse_int(op_delay, "0"); #if ENABLE_VERSIONED_ATTRS // Resolve any versioned parameters if (lrm_state && safe_str_neq(op->op_type, RSC_METADATA) && safe_str_neq(op->op_type, CRMD_ACTION_DELETE) && !is_remote_lrmd_ra(NULL, NULL, rsc_id)) { // Resource info *should* already be cached, so we don't get executor call lrmd_rsc_info_t *rsc = lrm_state_get_rsc_info(lrm_state, rsc_id, 0); struct ra_metadata_s *metadata; metadata = metadata_cache_get(lrm_state->metadata_cache, rsc); if (metadata) { xmlNode *versioned_attrs = NULL; GHashTable *hash = NULL; char *key = NULL; char *value = NULL; GHashTableIter iter; versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_OP_VER_META); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_replace(params, crm_meta_name(key), strdup(value)); if (safe_str_eq(key, XML_ATTR_TIMEOUT)) { op->timeout = crm_parse_int(value, "0"); } else if (safe_str_eq(key, XML_OP_ATTR_START_DELAY)) { op->start_delay = crm_parse_int(value, "0"); } } g_hash_table_destroy(hash); versioned_attrs = first_named_child(rsc_op, XML_TAG_RSC_VER_ATTRS); hash = pe_unpack_versioned_parameters(versioned_attrs, metadata->ra_version); g_hash_table_iter_init(&iter, hash); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { g_hash_table_iter_steal(&iter); g_hash_table_replace(params, key, value); } g_hash_table_destroy(hash); } lrmd_free_rsc_info(rsc); } #endif if (safe_str_neq(operation, RSC_STOP)) { op->params = params; } else { rsc_history_t *entry = NULL; if (lrm_state) { entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id); } /* If we do not have stop parameters cached, use * whatever we are given */ if (!entry || !entry->stop_params) { op->params = params; } else { /* Copy the cached parameter list so that we stop the resource * with the old attributes, not the new ones */ op->params = crm_str_table_new(); g_hash_table_foreach(params, copy_meta_keys, op->params); g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params); g_hash_table_destroy(params); params = NULL; } } /* sanity */ if (op->timeout <= 0) { op->timeout = op->interval_ms; } if (op->start_delay < 0) { op->start_delay = 0; } transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY); CRM_CHECK(transition != NULL, return op); op->user_data = strdup(transition); if (op->interval_ms != 0) { if (safe_str_eq(operation, CRMD_ACTION_START) || safe_str_eq(operation, CRMD_ACTION_STOP)) { crm_err("Start and Stop actions cannot have an interval: %u", op->interval_ms); op->interval_ms = 0; } } crm_trace("Constructed %s op for %s: interval=%u", operation, rsc_id, op->interval_ms); return op; } void send_direct_ack(const char *to_host, const char *to_sys, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op, const char *rsc_id) { xmlNode *reply = NULL; xmlNode *update, *iter; crm_node_t *peer = NULL; CRM_CHECK(op != NULL, return); if (op->rsc_id == NULL) { CRM_ASSERT(rsc_id != NULL); op->rsc_id = strdup(rsc_id); } if (to_sys == NULL) { to_sys = CRM_SYSTEM_TENGINE; } peer = crm_get_peer(0, fsa_our_uname); update = create_node_state_update(peer, node_update_none, NULL, __FUNCTION__); iter = create_xml_node(update, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, fsa_our_uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); crm_log_xml_trace(update, "ACK Update"); crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, crm_element_value(reply, XML_ATTR_REFERENCE)); if (relay_message(reply, TRUE) == FALSE) { crm_log_xml_err(reply, "Unable to route reply"); } free_xml(update); free_xml(reply); } gboolean verify_stopped(enum crmd_fsa_state cur_state, int log_level) { gboolean res = TRUE; GList *lrm_state_list = lrm_state_get_list(); GList *state_entry; for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) { lrm_state_t *lrm_state = state_entry->data; if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) { /* keep iterating through all even when false is returned */ res = FALSE; } } set_bit(fsa_input_register, R_SENT_RSC_STOP); g_list_free(lrm_state_list); lrm_state_list = NULL; return res; } struct stop_recurring_action_s { lrmd_rsc_info_t *rsc; lrm_state_t *lrm_state; }; static gboolean stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; struct stop_recurring_action_s *event = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if ((op->interval_ms != 0) && crm_str_eq(op->rsc_id, event->rsc->id, TRUE)) { crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key); remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE); } return remove; } static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data) { gboolean remove = FALSE; lrm_state_t *lrm_state = user_data; struct recurring_op_s *op = (struct recurring_op_s *)value; if (op->interval_ms != 0) { crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (const char *) key); remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE); } return remove; } static void record_pending_op(const char *node_name, lrmd_rsc_info_t *rsc, lrmd_event_data_t *op) { const char *record_pending = NULL; CRM_CHECK(node_name != NULL, return); CRM_CHECK(rsc != NULL, return); CRM_CHECK(op != NULL, return); // Never record certain operation types as pending if ((op->op_type == NULL) || (op->params == NULL) || !controld_action_is_recordable(op->op_type)) { return; } // defaults to true record_pending = crm_meta_value(op->params, XML_OP_ATTR_PENDING); if (record_pending && !crm_is_true(record_pending)) { return; } op->call_id = -1; op->op_status = PCMK_LRM_OP_PENDING; op->rc = PCMK_OCF_UNKNOWN; op->t_run = time(NULL); op->t_rcchange = op->t_run; /* write a "pending" entry to the CIB, inhibit notification */ crm_debug("Recording pending op " CRM_OP_FMT " on %s in the CIB", op->rsc_id, op->op_type, op->interval_ms, node_name); do_update_resource(node_name, rsc, op); } static void do_lrm_rsc_op(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *operation, xmlNode * msg, xmlNode * request) { int call_id = 0; char *op_id = NULL; lrmd_event_data_t *op = NULL; lrmd_key_value_t *params = NULL; fsa_data_t *msg_data = NULL; const char *transition = NULL; gboolean stop_recurring = FALSE; bool send_nack = FALSE; CRM_CHECK(rsc != NULL, return); CRM_CHECK(operation != NULL, return); if (msg != NULL) { transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY); if (transition == NULL) { crm_log_xml_err(msg, "Missing transition number"); } } op = construct_op(lrm_state, msg, rsc->id, operation); CRM_CHECK(op != NULL, return); if (is_remote_lrmd_ra(NULL, NULL, rsc->id) && (op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_MIGRATE) == 0) { /* pcmk remote connections are a special use case. * We never ever want to stop monitoring a connection resource until * the entire migration has completed. If the connection is unexpectedly * severed, even during a migration, this is an event we must detect.*/ stop_recurring = FALSE; } else if ((op->interval_ms == 0) && strcmp(operation, CRMD_ACTION_STATUS) != 0 && strcmp(operation, CRMD_ACTION_NOTIFY) != 0) { /* stop any previous monitor operations before changing the resource state */ stop_recurring = TRUE; } if (stop_recurring == TRUE) { guint removed = 0; struct stop_recurring_action_s data; data.rsc = rsc; data.lrm_state = lrm_state; removed = g_hash_table_foreach_remove( lrm_state->pending_ops, stop_recurring_action_by_rsc, &data); if (removed) { crm_debug("Stopped %u recurring operation%s in preparation for " CRM_OP_FMT, removed, s_if_plural(removed), rsc->id, operation, op->interval_ms); } } /* now do the op */ crm_info("Performing key=%s op=" CRM_OP_FMT, transition, rsc->id, operation, op->interval_ms); if (is_set(fsa_input_register, R_SHUTDOWN) && safe_str_eq(operation, RSC_START)) { register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); send_nack = TRUE; } else if (fsa_state != S_NOT_DC && fsa_state != S_POLICY_ENGINE /* Recalculating */ && fsa_state != S_TRANSITION_ENGINE && safe_str_neq(operation, CRMD_ACTION_STOP)) { send_nack = TRUE; } if(send_nack) { crm_notice("Discarding attempt to perform action %s on %s in state %s (shutdown=%s)", operation, rsc->id, fsa_state2string(fsa_state), is_set(fsa_input_register, R_SHUTDOWN)?"true":"false"); op->rc = CRM_DIRECT_NACK_RC; op->op_status = PCMK_LRM_OP_ERROR; send_direct_ack(NULL, NULL, rsc, op, rsc->id); lrmd_free_event(op); free(op_id); return; } record_pending_op(lrm_state->node_name, rsc, op); op_id = generate_op_key(rsc->id, op->op_type, op->interval_ms); if (op->interval_ms > 0) { /* cancel it so we can then restart it without conflict */ cancel_op_key(lrm_state, rsc, op_id, FALSE); } if (op->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, op->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { params = lrmd_key_value_add(params, key, value); } } call_id = lrm_state_exec(lrm_state, rsc->id, op->op_type, op->user_data, op->interval_ms, op->timeout, op->start_delay, params); if (call_id <= 0 && lrm_state_is_local(lrm_state)) { crm_err("Operation %s on %s failed: %d", operation, rsc->id, call_id); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } else if (call_id <= 0) { crm_err("Operation %s on resource %s failed to execute on remote node %s: %d", operation, rsc->id, lrm_state->node_name, call_id); fake_op_status(lrm_state, op, PCMK_LRM_OP_DONE, PCMK_OCF_UNKNOWN_ERROR); process_lrm_event(lrm_state, op, NULL); } else { /* record all operations so we can wait * for them to complete during shutdown */ char *call_id_s = make_stop_id(rsc->id, call_id); struct recurring_op_s *pending = NULL; pending = calloc(1, sizeof(struct recurring_op_s)); crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s); pending->call_id = call_id; pending->interval_ms = op->interval_ms; pending->op_type = strdup(operation); pending->op_key = strdup(op_id); pending->rsc_id = strdup(rsc->id); pending->start_time = time(NULL); pending->user_data = op->user_data? strdup(op->user_data) : NULL; g_hash_table_replace(lrm_state->pending_ops, call_id_s, pending); if ((op->interval_ms > 0) && (op->start_delay > START_DELAY_THRESHOLD)) { char *uuid = NULL; int dummy = 0, target_rc = 0; crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id); decode_transition_key(op->user_data, &uuid, &dummy, &dummy, &target_rc); free(uuid); op->rc = target_rc; op->op_status = PCMK_LRM_OP_DONE; send_direct_ack(NULL, NULL, rsc, op, rsc->id); } pending->params = op->params; op->params = NULL; } free(op_id); lrmd_free_event(op); return; } int last_resource_update = 0; static void cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { switch (rc) { case pcmk_ok: case -pcmk_err_diff_failed: case -pcmk_err_diff_resync: crm_trace("Resource update %d complete: rc=%d", call_id, rc); break; default: crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc)); } if (call_id == last_resource_update) { last_resource_update = 0; trigger_fsa(fsa_source); } } static int do_update_resource(const char *node_name, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op) { /* */ int rc = pcmk_ok; xmlNode *update, *iter = NULL; int call_opt = crmd_cib_smart_opt(); const char *uuid = NULL; CRM_CHECK(op != NULL, return 0); iter = create_xml_node(iter, XML_CIB_TAG_STATUS); update = iter; iter = create_xml_node(iter, XML_CIB_TAG_STATE); if (safe_str_eq(node_name, fsa_our_uname)) { uuid = fsa_our_uuid; } else { /* remote nodes uuid and uname are equal */ uuid = node_name; crm_xml_add(iter, XML_NODE_IS_REMOTE, "true"); } CRM_LOG_ASSERT(uuid != NULL); if(uuid == NULL) { rc = -EINVAL; goto done; } crm_xml_add(iter, XML_ATTR_UUID, uuid); crm_xml_add(iter, XML_ATTR_UNAME, node_name); crm_xml_add(iter, XML_ATTR_ORIGIN, __FUNCTION__); iter = create_xml_node(iter, XML_CIB_TAG_LRM); crm_xml_add(iter, XML_ATTR_ID, uuid); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES); iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE); crm_xml_add(iter, XML_ATTR_ID, op->rsc_id); build_operation_update(iter, rsc, op, node_name, __FUNCTION__); if (rsc) { const char *container = NULL; crm_xml_add(iter, XML_ATTR_TYPE, rsc->type); crm_xml_add(iter, XML_AGENT_ATTR_CLASS, rsc->standard); crm_xml_add(iter, XML_AGENT_ATTR_PROVIDER, rsc->provider); if (op->params) { container = g_hash_table_lookup(op->params, CRM_META"_"XML_RSC_ATTR_CONTAINER); } if (container) { crm_trace("Resource %s is a part of container resource %s", op->rsc_id, container); crm_xml_add(iter, XML_RSC_ATTR_CONTAINER, container); } } else { crm_warn("Resource %s no longer exists in the executor", op->rsc_id); send_direct_ack(NULL, NULL, rsc, op, op->rsc_id); goto cleanup; } crm_log_xml_trace(update, __FUNCTION__); /* make it an asynchronous call and be done with it * * Best case: * the resource state will be discovered during * the next signup or election. * * Bad case: * we are shutting down and there is no DC at the time, * but then why were we shutting down then anyway? * (probably because of an internal error) * * Worst case: * we get shot for having resources "running" that really weren't * * the alternative however means blocking here for too long, which * isn't acceptable */ fsa_cib_update(XML_CIB_TAG_STATUS, update, call_opt, rc, NULL); if (rc > 0) { last_resource_update = rc; } done: /* the return code is a call number, not an error code */ crm_trace("Sent resource state update message: %d for %s=%u on %s", rc, op->op_type, op->interval_ms, op->rsc_id); fsa_register_cib_callback(rc, FALSE, NULL, cib_rsc_callback); cleanup: free_xml(update); return rc; } void do_lrm_event(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data) { CRM_CHECK(FALSE, return); } static char * unescape_newlines(const char *string) { char *pch = NULL; char *ret = NULL; static const char *escaped_newline = "\\n"; if (!string) { return NULL; } ret = strdup(string); pch = strstr(ret, escaped_newline); while (pch != NULL) { /* 2 chars for 2 chars, null-termination irrelevant */ memcpy(pch, "\n ", 2 * sizeof(char)); pch = strstr(pch, escaped_newline); } return ret; } gboolean process_lrm_event(lrm_state_t * lrm_state, lrmd_event_data_t * op, struct recurring_op_s *pending) { char *op_id = NULL; char *op_key = NULL; int update_id = 0; gboolean remove = FALSE; gboolean removed = FALSE; lrmd_rsc_info_t *rsc = NULL; CRM_CHECK(op != NULL, return FALSE); CRM_CHECK(op->rsc_id != NULL, return FALSE); op_id = make_stop_id(op->rsc_id, op->call_id); op_key = generate_op_key(op->rsc_id, op->op_type, op->interval_ms); rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0); if(pending == NULL) { remove = TRUE; pending = g_hash_table_lookup(lrm_state->pending_ops, op_id); } if (op->op_status == PCMK_LRM_OP_ERROR) { switch(op->rc) { case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_MASTER: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_MASTER: // Leave it to the TE/scheduler to decide if this is an error op->op_status = PCMK_LRM_OP_DONE; break; default: /* Nothing to do */ break; } } if (op->op_status != PCMK_LRM_OP_CANCELLED) { if (controld_action_is_recordable(op->op_type)) { update_id = do_update_resource(lrm_state->node_name, rsc, op); } else { send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } } else if (op->interval_ms == 0) { /* This will occur when "crm resource cleanup" is called while actions are in-flight */ crm_err("Op %s (call=%d): Cancelled", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else if (pending == NULL) { /* We don't need to do anything for cancelled ops * that are not in our pending op list. There are no * transition actions waiting on these operations. */ } else if (op->user_data == NULL) { /* At this point we have a pending entry, but no transition * key present in the user_data field. report this */ crm_err("Op %s (call=%d): No user data", op_key, op->call_id); } else if (pending->remove) { /* The tengine canceled this op, we have been waiting for the cancel to finish. */ erase_lrm_history_by_op(lrm_state, op); } else if (op->rsc_deleted) { /* The tengine initiated this op, but it was cancelled outside of the * tengine's control during a resource cleanup/re-probe request. The tengine * must be alerted that this operation completed, otherwise the tengine * will continue waiting for this update to occur until it is timed out. * We don't want this update going to the cib though, so use a direct ack. */ crm_trace("Op %s (call=%d): cancelled due to rsc deletion", op_key, op->call_id); send_direct_ack(NULL, NULL, NULL, op, op->rsc_id); } else { /* Before a stop is called, no need to direct ack */ crm_trace("Op %s (call=%d): no delete event required", op_key, op->call_id); } if(remove == FALSE) { /* The caller will do this afterwards, but keep the logging consistent */ removed = TRUE; } else if ((op->interval_ms == 0) && g_hash_table_remove(lrm_state->pending_ops, op_id)) { removed = TRUE; crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed", op_key, op->call_id, op_id, g_hash_table_size(lrm_state->pending_ops)); } else if ((op->interval_ms != 0) && (op->op_status == PCMK_LRM_OP_CANCELLED)) { removed = TRUE; g_hash_table_remove(lrm_state->pending_ops, op_id); } switch (op->op_status) { case PCMK_LRM_OP_CANCELLED: crm_info("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false")); break; case PCMK_LRM_OP_DONE: do_crm_log((op->interval_ms? LOG_INFO : LOG_NOTICE), "Result of %s operation for %s on %s: %d (%s) " CRM_XS " call=%d key=%s confirmed=%s cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, op->rc, services_ocf_exitcode_str(op->rc), op->call_id, op_key, (removed? "true" : "false"), update_id); break; case PCMK_LRM_OP_TIMEOUT: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s timeout=%dms", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, op->timeout); break; default: crm_err("Result of %s operation for %s on %s: %s " CRM_XS " call=%d key=%s confirmed=%s status=%d cib-update=%d", crm_action_str(op->op_type, op->interval_ms), op->rsc_id, lrm_state->node_name, services_lrm_status_str(op->op_status), op->call_id, op_key, (removed? "true" : "false"), op->op_status, update_id); } if (op->output) { char *prefix = crm_strdup_printf("%s-" CRM_OP_FMT ":%d", lrm_state->node_name, op->rsc_id, op->op_type, op->interval_ms, op->call_id); if (op->rc) { crm_log_output(LOG_NOTICE, prefix, op->output); } else { crm_log_output(LOG_DEBUG, prefix, op->output); } free(prefix); } if (safe_str_neq(op->op_type, RSC_METADATA)) { crmd_alert_resource_op(lrm_state->node_name, op); } else if (op->rc == PCMK_OCF_OK) { char *metadata = unescape_newlines(op->output); metadata_cache_update(lrm_state->metadata_cache, rsc, metadata); free(metadata); } if (op->rsc_deleted) { crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key); delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL); } /* If a shutdown was escalated while operations were pending, * then the FSA will be stalled right now... allow it to continue */ mainloop_set_trigger(fsa_source); update_history_cache(lrm_state, rsc, op); lrmd_free_rsc_info(rsc); free(op_key); free(op_id); return TRUE; } diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h index c3eb00f9ff..5497fe9d3e 100644 --- a/include/crm/cib/internal.h +++ b/include/crm/cib/internal.h @@ -1,214 +1,205 @@ /* - * Copyright (C) 2004 Andrew Beekhof + * Copyright 2004-2018 Andrew Beekhof * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ + #ifndef CIB_INTERNAL__H # define CIB_INTERNAL__H # include # include # define CIB_OP_SLAVE "cib_slave" # define CIB_OP_SLAVEALL "cib_slave_all" # define CIB_OP_MASTER "cib_master" # define CIB_OP_SYNC "cib_sync" # define CIB_OP_SYNC_ONE "cib_sync_one" # define CIB_OP_ISMASTER "cib_ismaster" # define CIB_OP_BUMP "cib_bump" # define CIB_OP_QUERY "cib_query" # define CIB_OP_CREATE "cib_create" # define CIB_OP_MODIFY "cib_modify" # define CIB_OP_DELETE "cib_delete" # define CIB_OP_ERASE "cib_erase" # define CIB_OP_REPLACE "cib_replace" # define CIB_OP_APPLY_DIFF "cib_apply_diff" # define CIB_OP_UPGRADE "cib_upgrade" # define CIB_OP_DELETE_ALT "cib_delete_alt" # define F_CIB_CLIENTID "cib_clientid" # define F_CIB_CALLOPTS "cib_callopt" # define F_CIB_CALLID "cib_callid" # define F_CIB_CALLDATA "cib_calldata" # define F_CIB_OPERATION "cib_op" # define F_CIB_ISREPLY "cib_isreplyto" # define F_CIB_SECTION "cib_section" # define F_CIB_HOST "cib_host" # define F_CIB_RC "cib_rc" +# define F_CIB_UPGRADE_RC "cib_upgrade_rc" # define F_CIB_DELEGATED "cib_delegated_from" # define F_CIB_OBJID "cib_object" # define F_CIB_OBJTYPE "cib_object_type" # define F_CIB_EXISTING "cib_existing_object" # define F_CIB_SEENCOUNT "cib_seen" # define F_CIB_TIMEOUT "cib_timeout" # define F_CIB_UPDATE "cib_update" # define F_CIB_CALLBACK_TOKEN "cib_async_id" # define F_CIB_GLOBAL_UPDATE "cib_update" # define F_CIB_UPDATE_RESULT "cib_update_result" # define F_CIB_CLIENTNAME "cib_clientname" # define F_CIB_NOTIFY_TYPE "cib_notify_type" # define F_CIB_NOTIFY_ACTIVATE "cib_notify_activate" # define F_CIB_UPDATE_DIFF "cib_update_diff" # define F_CIB_USER "cib_user" # define F_CIB_LOCAL_NOTIFY_ID "cib_local_notify_id" # define F_CIB_PING_ID "cib_ping_id" # define F_CIB_SCHEMA_MAX "cib_schema_max" # define T_CIB "cib" # define T_CIB_NOTIFY "cib_notify" /* notify sub-types */ # define T_CIB_PRE_NOTIFY "cib_pre_notify" # define T_CIB_POST_NOTIFY "cib_post_notify" # define T_CIB_UPDATE_CONFIRM "cib_update_confirmation" # define T_CIB_REPLACE_NOTIFY "cib_refresh_notify" # define cib_channel_ro "cib_ro" # define cib_channel_rw "cib_rw" # define cib_channel_shm "cib_shm" gboolean cib_diff_version_details(xmlNode * diff, int *admin_epoch, int *epoch, int *updates, int *_admin_epoch, int *_epoch, int *_updates); gboolean cib_read_config(GHashTable * options, xmlNode * current_cib); void verify_cib_options(GHashTable * options); gboolean cib_internal_config_changed(xmlNode * diff); extern GHashTable *cib_op_callback_table; typedef struct cib_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*callback) (const char *event, xmlNode * msg); } cib_notify_client_t; typedef struct cib_callback_client_s { void (*callback) (xmlNode *, int, int, xmlNode *, void *); const char *id; void *user_data; gboolean only_success; struct timer_rec_s *timer; void (*free_func)(void *); } cib_callback_client_t; struct timer_rec_s { int call_id; int timeout; guint ref; cib_t *cib; }; typedef int (*cib_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); cib_t *cib_new_variant(void); int cib_perform_op(const char *op, int call_options, cib_op_t * fn, gboolean is_query, const char *section, xmlNode * req, xmlNode * input, gboolean manage_counters, gboolean * config_changed, xmlNode * current_cib, xmlNode ** result_cib, xmlNode ** diff, xmlNode ** output); xmlNode *cib_create_op(int call_id, const char *token, const char *op, const char *host, const char *section, xmlNode * data, int call_options, const char *user_name); void cib_native_callback(cib_t * cib, xmlNode * msg, int call_id, int rc); void cib_native_notify(gpointer data, gpointer user_data); int cib_native_register_notification(cib_t * cib, const char *callback, int enabled); gboolean cib_client_register_callback(cib_t * cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback) (xmlNode *, int, int, xmlNode *, void *)); gboolean cib_client_register_callback_full(cib_t *cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback)(xmlNode *, int, int, xmlNode *, void *), void (*free_func)(void *)); int cib_process_query(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_erase(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_bump(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_replace(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_create(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_modify(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_delete(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_diff(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); int cib_process_upgrade(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); /*! * \internal * \brief Core function to manipulate with/query CIB/XML per xpath + arguments * \param[in] op, the operation to be performed: * CIB_OP_{CREATE,DELETE,MODIFY,QUERY,REPLACE} * \param[in] options, ORed flags per relevant \c cib_call_options enumeration: * cib_{multiple,no_children,xpath_address} * \param[in] section, xpath defining place of interest in * {existing,result}_cib * \param[in] req, UNUSED * \param[in] input, the input operand for * CIB_OP_{CREATE,MODIFY,REPLACE} * \param[in] existing_cib, the input operand (CIB) for \c CIB_OP_QUERY * \param[inout] result_cib, the operand and result for * CIB_OP_{CREATE,DELETE,MODIFY,REPLACE} * \param[out] answer, the result for \c CIB_OP_QUERY, structured per \c options * * \retval \c pcmk_ok (0) for success, different value for failure */ int cib_process_xpath(const char *op, int options, const char *section, xmlNode * req, xmlNode * input, xmlNode * existing_cib, xmlNode ** result_cib, xmlNode ** answer); gboolean cib_config_changed(xmlNode * last, xmlNode * next, xmlNode ** diff); gboolean update_results(xmlNode * failed, xmlNode * target, const char *operation, int return_code); int cib_update_counter(xmlNode * xml_obj, const char *field, gboolean reset); int cib_internal_op(cib_t * cib, const char *op, const char *host, const char *section, xmlNode * data, xmlNode ** output_data, int call_options, const char *user_name); int cib_file_read_and_verify(const char *filename, const char *sigfile, xmlNode **root); int cib_file_write_with_digest(xmlNode *cib_root, const char *cib_dirname, const char *cib_filename); #endif diff --git a/tools/cibadmin.c b/tools/cibadmin.c index 410eea9306..96caf3483a 100644 --- a/tools/cibadmin.c +++ b/tools/cibadmin.c @@ -1,540 +1,551 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include static int message_timeout_ms = 30; static int command_options = 0; static int request_id = 0; static int bump_log_num = 0; static const char *host = NULL; static const char *cib_user = NULL; static const char *cib_action = NULL; static const char *obj_type = NULL; static cib_t *the_cib = NULL; static GMainLoop *mainloop = NULL; static gboolean force_flag = FALSE; static crm_exit_t exit_code = CRM_EX_OK; int do_init(void); int do_work(xmlNode *input, int command_options, xmlNode **output); void cibadmin_op_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output\n"}, {"-spacer-", 0, 0, '-', "Commands:"}, {"upgrade", 0, 0, 'u', "\tUpgrade the configuration to the latest syntax"}, {"query", 0, 0, 'Q', "\tQuery the contents of the CIB"}, {"erase", 0, 0, 'E', "\tErase the contents of the whole CIB"}, {"bump", 0, 0, 'B', "\tIncrease the CIB's epoch value by 1"}, {"create", 0, 0, 'C', "\tCreate an object in the CIB. Will fail if the object already exists."}, {"modify", 0, 0, 'M', "\tFind the object somewhere in the CIB's XML tree and update it. Fails if the object does not exist unless -c is specified"}, {"patch", 0, 0, 'P', "\tSupply an update in the form of an xml diff (See also: crm_diff)"}, {"replace", 0, 0, 'R', "\tRecursively replace an object in the CIB"}, {"delete", 0, 0, 'D', "\tDelete the first object matching the supplied criteria, Eg. "}, {"-spacer-", 0, 0, '-', "\n\tThe tagname and all attributes must match in order for the element to be deleted\n"}, {"delete-all", 0, 0, 'd', "When used with --xpath, remove all matching objects in the configuration instead of just the first one"}, {"empty", 0, 0, 'a', "\tOutput an empty CIB"}, {"md5-sum", 0, 0, '5', "\tCalculate the on-disk CIB digest"}, {"md5-sum-versioned", 0, 0, '6', "Calculate an on-the-wire versioned CIB digest"}, {"blank", 0, 0, '-', NULL, 1}, {"-spacer-",1, 0, '-', "\nAdditional options:"}, {"force", 0, 0, 'f'}, {"timeout", 1, 0, 't', "Time (in seconds) to wait before declaring the operation failed"}, {"user", 1, 0, 'U', "Run the command with permissions of the named user (valid only for the root and "CRM_DAEMON_USER" accounts)"}, {"sync-call", 0, 0, 's', "Wait for call to complete before returning"}, {"local", 0, 0, 'l', "\tCommand takes effect locally. Should only be used for queries"}, {"allow-create",0, 0, 'c', "(Advanced) Allow the target of a --modify,-M operation to be created if they do not exist"}, {"no-children", 0, 0, 'n', "(Advanced) When querying an object, do not return include its children in the result\n"}, {"no-bcast", 0, 0, 'b', NULL, 1}, {"-spacer-", 0, 0, '-', "Data:"}, {"xml-text", 1, 0, 'X', "Retrieve XML from the supplied string"}, {"xml-file", 1, 0, 'x', "Retrieve XML from the named file"}, {"xml-pipe", 0, 0, 'p', "Retrieve XML from stdin\n"}, {"scope", 1, 0, 'o', "Limit the scope of the operation to a specific section of the CIB."}, {"-spacer-", 0, 0, '-', "\tValid values are: nodes, resources, constraints, crm_config, rsc_defaults, op_defaults, status"}, {"xpath", 1, 0, 'A', "A valid XPath to use instead of --scope,-o"}, {"node-path", 0, 0, 'e', "When performing XPath queries, return the address of any matches found."}, {"-spacer-", 0, 0, '-', " Eg: /cib/configuration/resources/clone[@id='ms_RH1_SCS']/primitive[@id='prm_RH1_SCS']", pcmk_option_paragraph}, {"node", 1, 0, 'N', "(Advanced) Send command to the specified host\n"}, {"-space-", 0, 0, '!', NULL, 1}, {"-spacer-", 0, 0, '-', "\nExamples:\n"}, {"-spacer-", 0, 0, '-', "Query the configuration from the local node:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --query --local", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Query just the cluster options configuration:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --query --scope crm_config", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Query all 'target-role' settings:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --query --xpath \"//nvpair[@name='target-role']\"", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Remove all 'is-managed' settings:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --delete-all --xpath \"//nvpair[@name='is-managed']\"", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Remove the resource named 'old':", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --delete --xml-text ''", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Remove all resources from the configuration:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --replace --scope resources --xml-text ''", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Replace the complete configuration with the contents of $HOME/pacemaker.xml:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --replace --xml-file $HOME/pacemaker.xml", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Replace the constraints section of the configuration with the contents of $HOME/constraints.xml:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --replace --scope constraints --xml-file $HOME/constraints.xml", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Increase the configuration version to prevent old configurations from being loaded accidentally:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --modify --xml-text ''", pcmk_option_example}, {"-spacer-", 0, 0, '-', "Edit the configuration with your favorite $EDITOR:", pcmk_option_paragraph}, {"-spacer-", 0, 0, '-', " cibadmin --query > $HOME/local.xml", pcmk_option_example}, {"-spacer-", 0, 0, '-', " $EDITOR $HOME/local.xml", pcmk_option_example}, {"-spacer-", 0, 0, '-', " cibadmin --replace --xml-file $HOME/local.xml", pcmk_option_example}, {"-spacer-", 0, 0, '-', "SEE ALSO:"}, {"-spacer-", 0, 0, '-', " crm(8), pcs(8), crm_shadow(8), crm_diff(8)"}, /* Legacy options */ {"host", 1, 0, 'h', NULL, 1}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void print_xml_output(xmlNode * xml) { char *buffer; if (!xml) { return; } else if (xml->type != XML_ELEMENT_NODE) { return; } if (command_options & cib_xpath_address) { const char *id = crm_element_value(xml, XML_ATTR_ID); if (safe_str_eq((const char *)xml->name, "xpath-query")) { xmlNode *child = NULL; for (child = xml->children; child; child = child->next) { print_xml_output(child); } } else if (id) { printf("%s\n", id); } } else { buffer = dump_xml_formatted(xml); fprintf(stdout, "%s", crm_str(buffer)); free(buffer); } } +// Upgrade requested but already at latest schema +static void +report_schema_unchanged() +{ + const char *err = pcmk_strerror(pcmk_err_schema_unchanged); + + crm_info("Upgrade unnecessary: %s\n", err); + printf("Upgrade unnecessary: %s\n", err); + exit_code = CRM_EX_OK; +} + int main(int argc, char **argv) { int argerr = 0; int rc = pcmk_ok; int flag; const char *source = NULL; const char *admin_input_xml = NULL; const char *admin_input_file = NULL; gboolean dangerous_cmd = FALSE; gboolean admin_input_stdin = FALSE; xmlNode *output = NULL; xmlNode *input = NULL; int option_index = 0; crm_xml_init(); /* Sets buffer allocation strategy */ crm_log_cli_init("cibadmin"); set_crm_log_level(LOG_CRIT); crm_set_options(NULL, "command [options] [data]", long_options, "Provides direct access to the cluster configuration." "\n\nAllows the configuration, or sections of it, to be queried, modified, replaced and deleted." "\n\nWhere necessary, XML data will be obtained using the -X, -x, or -p options.\n"); if (argc < 2) { crm_help('?', CRM_EX_USAGE); } while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 't': message_timeout_ms = atoi(optarg); if (message_timeout_ms < 1) { message_timeout_ms = 30; } break; case 'A': obj_type = optarg; command_options |= cib_xpath; break; case 'e': command_options |= cib_xpath_address; break; case 'u': cib_action = CIB_OP_UPGRADE; dangerous_cmd = TRUE; break; case 'E': cib_action = CIB_OP_ERASE; dangerous_cmd = TRUE; break; case 'Q': cib_action = CIB_OP_QUERY; break; case 'P': cib_action = CIB_OP_APPLY_DIFF; break; case 'U': cib_user = optarg; break; case 'M': cib_action = CIB_OP_MODIFY; break; case 'R': cib_action = CIB_OP_REPLACE; break; case 'C': cib_action = CIB_OP_CREATE; break; case 'D': cib_action = CIB_OP_DELETE; break; case '5': cib_action = "md5-sum"; break; case '6': cib_action = "md5-sum-versioned"; break; case 'c': command_options |= cib_can_create; break; case 'n': command_options |= cib_no_children; break; case 'B': cib_action = CIB_OP_BUMP; crm_log_args(argc, argv); break; case 'V': command_options = command_options | cib_verbose; bump_log_num++; break; case '?': case '$': case '!': crm_help(flag, CRM_EX_OK); break; case 'o': crm_trace("Option %c => %s", flag, optarg); obj_type = optarg; break; case 'X': crm_trace("Option %c => %s", flag, optarg); admin_input_xml = optarg; crm_log_args(argc, argv); break; case 'x': crm_trace("Option %c => %s", flag, optarg); admin_input_file = optarg; crm_log_args(argc, argv); break; case 'p': admin_input_stdin = TRUE; crm_log_args(argc, argv); break; case 'N': case 'h': host = strdup(optarg); break; case 'l': command_options |= cib_scope_local; break; case 'd': cib_action = CIB_OP_DELETE; command_options |= cib_multiple; dangerous_cmd = TRUE; break; case 'b': dangerous_cmd = TRUE; command_options |= cib_inhibit_bcast; command_options |= cib_scope_local; break; case 's': command_options |= cib_sync_call; break; case 'f': force_flag = TRUE; command_options |= cib_quorum_override; crm_log_args(argc, argv); break; case 'a': output = createEmptyCib(1); if (optind < argc) { crm_xml_add(output, XML_ATTR_VALIDATION, argv[optind]); } admin_input_xml = dump_xml_formatted(output); fprintf(stdout, "%s\n", crm_str(admin_input_xml)); crm_exit(CRM_EX_OK); break; default: printf("Argument code 0%o (%c)" " is not (?yet?) supported\n", flag, flag); ++argerr; break; } } while (bump_log_num > 0) { crm_bump_log_level(argc, argv); bump_log_num--; } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); crm_help('?', CRM_EX_USAGE); } if (optind > argc || cib_action == NULL) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } if (dangerous_cmd && force_flag == FALSE) { fprintf(stderr, "The supplied command is considered dangerous." " To prevent accidental destruction of the cluster," " the --force flag is required in order to proceed.\n"); fflush(stderr); crm_exit(CRM_EX_UNSAFE); } if (admin_input_file != NULL) { input = filename2xml(admin_input_file); source = admin_input_file; } else if (admin_input_xml != NULL) { source = "input string"; input = string2xml(admin_input_xml); } else if (admin_input_stdin) { source = "STDIN"; input = stdin2xml(); } if (input != NULL) { crm_log_xml_debug(input, "[admin input]"); } else if (source) { fprintf(stderr, "Couldn't parse input from %s.\n", source); crm_exit(CRM_EX_CONFIG); } if (safe_str_eq(cib_action, "md5-sum")) { char *digest = NULL; if (input == NULL) { fprintf(stderr, "Please supply XML to process with -X, -x or -p\n"); crm_exit(CRM_EX_USAGE); } digest = calculate_on_disk_digest(input); fprintf(stderr, "Digest: "); fprintf(stdout, "%s\n", crm_str(digest)); free(digest); free_xml(input); crm_exit(CRM_EX_OK); } else if (safe_str_eq(cib_action, "md5-sum-versioned")) { char *digest = NULL; const char *version = NULL; if (input == NULL) { fprintf(stderr, "Please supply XML to process with -X, -x or -p\n"); crm_exit(CRM_EX_USAGE); } version = crm_element_value(input, XML_ATTR_CRM_VERSION); digest = calculate_xml_versioned_digest(input, FALSE, TRUE, version); fprintf(stderr, "Versioned (%s) digest: ", version); fprintf(stdout, "%s\n", crm_str(digest)); free(digest); free_xml(input); crm_exit(CRM_EX_OK); } rc = do_init(); if (rc != pcmk_ok) { crm_err("Init failed, could not perform requested operations"); fprintf(stderr, "Init failed, could not perform requested operations\n"); free_xml(input); crm_exit(crm_errno2exit(rc)); } rc = do_work(input, command_options, &output); if (rc > 0) { /* wait for the reply by creating a mainloop and running it until * the callbacks are invoked... */ request_id = rc; the_cib->cmds->register_callback(the_cib, request_id, message_timeout_ms, FALSE, NULL, "cibadmin_op_callback", cibadmin_op_callback); mainloop = g_main_loop_new(NULL, FALSE); crm_trace("%s waiting for reply from the local CIB", crm_system_name); crm_info("Starting mainloop"); g_main_loop_run(mainloop); } else if ((rc == -pcmk_err_schema_unchanged) && crm_str_eq(cib_action, CIB_OP_UPGRADE, TRUE)) { - - // Already at latest schema - crm_info("Upgrade unnecessary: %s\n", pcmk_strerror(rc)); - printf("Upgrade unnecessary: %s\n", pcmk_strerror(rc)); + report_schema_unchanged(); } else if (rc < 0) { crm_err("Call failed: %s", pcmk_strerror(rc)); fprintf(stderr, "Call failed: %s\n", pcmk_strerror(rc)); if (rc == -pcmk_err_schema_validation) { if (crm_str_eq(cib_action, CIB_OP_UPGRADE, TRUE)) { xmlNode *obj = NULL; int version = 0, rc = 0; rc = the_cib->cmds->query(the_cib, NULL, &obj, command_options); if (rc == pcmk_ok) { update_validation(&obj, &version, 0, TRUE, FALSE); } } else if (output) { validate_xml_verbose(output); } } exit_code = crm_errno2exit(rc); } if (output != NULL) { print_xml_output(output); free_xml(output); } crm_trace("%s exiting normally", crm_system_name); free_xml(input); rc = the_cib->cmds->signoff(the_cib); if (exit_code == CRM_EX_OK) { exit_code = crm_errno2exit(rc); } cib_delete(the_cib); return crm_exit(exit_code); } int do_work(xmlNode * input, int call_options, xmlNode ** output) { /* construct the request */ the_cib->call_timeout = message_timeout_ms; if (strcasecmp(CIB_OP_REPLACE, cib_action) == 0 && safe_str_eq(crm_element_name(input), XML_TAG_CIB)) { xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, input); if (status == NULL) { create_xml_node(input, XML_CIB_TAG_STATUS); } } if (cib_action != NULL) { crm_trace("Passing \"%s\" to variant_op...", cib_action); return cib_internal_op(the_cib, cib_action, host, obj_type, input, output, call_options, cib_user); } else { crm_err("You must specify an operation"); } return -EINVAL; } int do_init(void) { int rc = pcmk_ok; the_cib = cib_new(); rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s", pcmk_strerror(rc)); fprintf(stderr, "Connection to the CIB manager failed: %s\n", pcmk_strerror(rc)); } return rc; } void cibadmin_op_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { exit_code = crm_errno2exit(rc); - if (rc != 0) { + if (rc == -pcmk_err_schema_unchanged) { + report_schema_unchanged(); + + } else if (rc != pcmk_ok) { crm_warn("Call %s failed (%d): %s", cib_action, rc, pcmk_strerror(rc)); fprintf(stderr, "Call %s failed (%d): %s\n", cib_action, rc, pcmk_strerror(rc)); print_xml_output(output); } else if (safe_str_eq(cib_action, CIB_OP_QUERY) && output == NULL) { crm_err("Query returned no output"); crm_log_xml_err(msg, "no output"); } else if (output == NULL) { crm_info("Call passed"); } else { crm_info("Call passed"); print_xml_output(output); } if (call_id == request_id) { g_main_loop_quit(mainloop); } else { crm_info("Message was not the response we were looking for (%d vs. %d)", call_id, request_id); } } diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 42de6a0ea3..d76955272a 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,1980 +1,1983 @@ /* * Copyright 2004-2018 Andrew Beekhof * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include int resource_verbose = 0; bool do_force = FALSE; int crmd_replies_needed = 1; /* The welcome message */ const char *attr_set_type = XML_TAG_ATTR_SETS; static int do_find_resource(const char *rsc, resource_t * the_rsc, pe_working_set_t * data_set) { int found = 0; GListPtr lpc = NULL; for (lpc = the_rsc->running_on; lpc != NULL; lpc = lpc->next) { node_t *node = (node_t *) lpc->data; if (BE_QUIET) { fprintf(stdout, "%s\n", node->details->uname); } else { const char *state = ""; if (!pe_rsc_is_clone(the_rsc) && the_rsc->fns->state(the_rsc, TRUE) == RSC_ROLE_MASTER) { state = "Master"; } fprintf(stdout, "resource %s is running on: %s %s\n", rsc, node->details->uname, state); } found++; } if (BE_QUIET == FALSE && found == 0) { fprintf(stderr, "resource %s is NOT running\n", rsc); } return found; } int cli_resource_search(resource_t *rsc, const char *requested_name, pe_working_set_t *data_set) { int found = 0; resource_t *parent = uber_parent(rsc); if (pe_rsc_is_clone(rsc)) { for (GListPtr iter = rsc->children; iter != NULL; iter = iter->next) { found += do_find_resource(requested_name, iter->data, data_set); } /* The anonymous clone children's common ID is supplied */ } else if (pe_rsc_is_clone(parent) && is_not_set(rsc->flags, pe_rsc_unique) && rsc->clone_name && safe_str_eq(requested_name, rsc->clone_name) && safe_str_neq(requested_name, rsc->id)) { for (GListPtr iter = parent->children; iter; iter = iter->next) { found += do_find_resource(requested_name, iter->data, data_set); } } else { found += do_find_resource(requested_name, rsc, data_set); } return found; } #define XPATH_MAX 1024 static int find_resource_attr(cib_t * the_cib, const char *attr, const char *rsc, const char *set_type, const char *set_name, const char *attr_id, const char *attr_name, char **value) { int offset = 0; int rc = pcmk_ok; xmlNode *xml_search = NULL; char *xpath_string = NULL; if(value) { *value = NULL; } if(the_cib == NULL) { return -ENOTCONN; } xpath_string = calloc(1, XPATH_MAX); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", get_object_path("resources")); offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//*[@id=\"%s\"]", rsc); if (set_type) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s", set_type); if (set_name) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "[@id=\"%s\"]", set_name); } } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "//nvpair["); if (attr_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@id=\"%s\"", attr_id); } if (attr_name) { if (attr_id) { offset += snprintf(xpath_string + offset, XPATH_MAX - offset, " and "); } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "@name=\"%s\"", attr_name); } offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "]"); CRM_LOG_ASSERT(offset > 0); rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search, cib_sync_call | cib_scope_local | cib_xpath); if (rc != pcmk_ok) { goto bail; } crm_log_xml_debug(xml_search, "Match"); if (xml_has_children(xml_search)) { xmlNode *child = NULL; rc = -EINVAL; printf("Multiple attributes match name=%s\n", attr_name); for (child = __xml_first_child(xml_search); child != NULL; child = __xml_next(child)) { printf(" Value: %s \t(id=%s)\n", crm_element_value(child, XML_NVPAIR_ATTR_VALUE), ID(child)); } } else if(value) { const char *tmp = crm_element_value(xml_search, attr); if (tmp) { *value = strdup(tmp); } } bail: free(xpath_string); free_xml(xml_search); return rc; } static resource_t * find_matching_attr_resource(resource_t * rsc, const char * rsc_id, const char * attr_set, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd) { int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; if(do_force == TRUE) { return rsc; } else if(rsc->parent) { switch(rsc->parent->variant) { case pe_group: if (BE_QUIET == FALSE) { printf("Performing %s of '%s' for '%s' will not apply to its peers in '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } break; case pe_clone: rc = find_resource_attr(cib, XML_ATTR_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); free(local_attr_id); if(rc != pcmk_ok) { rsc = rsc->parent; if (BE_QUIET == FALSE) { printf("Performing %s of '%s' on '%s', the parent of '%s'\n", cmd, attr_name, rsc->id, rsc_id); } } break; default: break; } } else if (rsc->parent && BE_QUIET == FALSE) { printf("Forcing %s of '%s' for '%s' instead of '%s'\n", cmd, attr_name, rsc_id, rsc->parent->id); } else if(rsc->parent == NULL && rsc->children) { resource_t *child = rsc->children->data; if(child->variant == pe_native) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if(rc == pcmk_ok) { rsc = child; if (BE_QUIET == FALSE) { printf("A value for '%s' already exists in child '%s', performing %s on that instead of '%s'\n", attr_name, lookup_id, cmd, rsc_id); } } free(local_attr_id); free(lookup_id); } } return rsc; } int cli_resource_update_attribute(resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_id, const char *attr_name, const char *attr_value, bool recursive, cib_t *cib, pe_working_set_t *data_set) { int rc = pcmk_ok; static bool need_init = TRUE; char *lookup_id = NULL; char *local_attr_id = NULL; char *local_attr_set = NULL; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; if(attr_id == NULL && do_force == FALSE && pcmk_ok != find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL)) { printf("\n"); } if (safe_str_eq(attr_set_type, XML_TAG_ATTR_SETS)) { if (do_force == FALSE) { rc = find_resource_attr(cib, XML_ATTR_ID, uber_parent(rsc)->id, XML_TAG_META_SETS, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("WARNING: There is already a meta attribute for '%s' called '%s' (id=%s)\n", uber_parent(rsc)->id, attr_name, local_attr_id); printf(" Delete '%s' first or use --force to override\n", local_attr_id); } free(local_attr_id); if (rc == pcmk_ok) { return -ENOTUNIQ; } } } else { rsc = find_matching_attr_resource(rsc, requested_name, attr_set, attr_id, attr_name, cib, "update"); } lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == pcmk_ok) { crm_debug("Found a match for name=%s: id=%s", attr_name, local_attr_id); attr_id = local_attr_id; } else if (rc != -ENXIO) { free(lookup_id); free(local_attr_id); return rc; } else { const char *tag = crm_element_name(rsc->xml); if (attr_set == NULL) { local_attr_set = crm_concat(lookup_id, attr_set_type, '-'); attr_set = local_attr_set; } if (attr_id == NULL) { local_attr_id = crm_concat(attr_set, attr_name, '-'); attr_id = local_attr_id; } xml_top = create_xml_node(NULL, tag); crm_xml_add(xml_top, XML_ATTR_ID, lookup_id); xml_obj = create_xml_node(xml_top, attr_set_type); crm_xml_add(xml_obj, XML_ATTR_ID, attr_set); } xml_obj = crm_create_nvpair_xml(xml_obj, attr_id, attr_name, attr_value); if (xml_top == NULL) { xml_top = xml_obj; } crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, XML_CIB_TAG_RESOURCES, xml_top, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Set '%s' option: id=%s%s%s%s%s=%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : "", attr_value); } free_xml(xml_top); free(lookup_id); free(local_attr_id); free(local_attr_set); if(recursive && safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { GListPtr lpc = NULL; if(need_init) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, data_set->input); need_init = FALSE; unpack_constraints(cib_constraints, data_set); for (lpc = data_set->resources; lpc != NULL; lpc = lpc->next) { resource_t *r = (resource_t *) lpc->data; clear_bit(r->flags, pe_rsc_allocating); } } crm_debug("Looking for dependencies %p", rsc->rsc_cons_lhs); set_bit(rsc->flags, pe_rsc_allocating); for (lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) { rsc_colocation_t *cons = (rsc_colocation_t *) lpc->data; resource_t *peer = cons->rsc_lh; crm_debug("Checking %s %d", cons->id, cons->score); if (cons->score > 0 && is_not_set(peer->flags, pe_rsc_allocating)) { /* Don't get into colocation loops */ crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, peer->id); cli_resource_update_attribute(peer, peer->id, NULL, NULL, attr_name, attr_value, recursive, cib, data_set); } } } return rc; } int cli_resource_delete_attribute(resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_id, const char *attr_name, cib_t *cib, pe_working_set_t *data_set) { xmlNode *xml_obj = NULL; int rc = pcmk_ok; char *lookup_id = NULL; char *local_attr_id = NULL; if(attr_id == NULL && do_force == FALSE && find_resource_attr( cib, XML_ATTR_ID, uber_parent(rsc)->id, NULL, NULL, NULL, attr_name, NULL) != pcmk_ok) { printf("\n"); } if(safe_str_eq(attr_set_type, XML_TAG_META_SETS)) { rsc = find_matching_attr_resource(rsc, requested_name, attr_set, attr_id, attr_name, cib, "delete"); } lookup_id = clone_strip(rsc->id); rc = find_resource_attr(cib, XML_ATTR_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &local_attr_id); if (rc == -ENXIO) { free(lookup_id); return pcmk_ok; } else if (rc != pcmk_ok) { free(lookup_id); return rc; } if (attr_id == NULL) { attr_id = local_attr_id; } xml_obj = crm_create_nvpair_xml(NULL, attr_id, attr_name, NULL); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->remove(cib, XML_CIB_TAG_RESOURCES, xml_obj, cib_options); if (rc == pcmk_ok && BE_QUIET == FALSE) { printf("Deleted '%s' option: id=%s%s%s%s%s\n", lookup_id, local_attr_id, attr_set ? " set=" : "", attr_set ? attr_set : "", attr_name ? " name=" : "", attr_name ? attr_name : ""); } free(lookup_id); free_xml(xml_obj); free(local_attr_id); return rc; } static int send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, const char *host_uname, const char *rsc_id, bool only_failed, pe_working_set_t * data_set) { char *our_pid = NULL; char *key = NULL; int rc = -ECOMM; xmlNode *cmd = NULL; xmlNode *xml_rsc = NULL; const char *value = NULL; const char *router_node = host_uname; xmlNode *params = NULL; xmlNode *msg_data = NULL; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found", rsc_id); return -ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s", rsc_id); return -EINVAL; } else if (host_uname == NULL) { CMD_ERR("Please supply a node name with --node"); return -EINVAL; } else { node_t *node = pe_find_node(data_set->nodes, host_uname); if (node && is_remote_node(node)) { node = pe__current_node(node->details->remote_rsc); if (node == NULL) { CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", host_uname); return -ENXIO; } router_node = node->details->uname; } } key = generate_transition_key(0, getpid(), 0, "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); free(key); crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); if (safe_str_neq(router_node, host_uname)) { crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); } xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); if (rsc->clone_name) { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id); } else { crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_ATTR_TYPE); if (value == NULL) { CMD_ERR("%s has no type! Aborting...", rsc_id); return -ENXIO; } value = crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_CLASS); if (value == NULL) { CMD_ERR("%s has no class! Aborting...", rsc_id); return -ENXIO; } crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); params = create_xml_node(msg_data, XML_TAG_ATTRS); crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS); crm_xml_add(params, key, "60000"); /* 1 minute */ free(key); our_pid = crm_getpid_s(); cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); /* crm_log_xml_warn(cmd, "send_lrm_rsc_op"); */ free_xml(msg_data); if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { rc = 0; } else { crm_debug("Could not send %s op to the controller", op); rc = -ENOTCONN; } free_xml(cmd); return rc; } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(resource_t *rsc) { const char *name = (rsc->clone_name? rsc->clone_name : rsc->id); return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } static int clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { int rc = pcmk_ok; /* Erase the resource's entire LRM history in the CIB, even if we're only * clearing a single operation's fail count. If we erased only entries for a * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc_id, TRUE, data_set); if (rc != pcmk_ok) { return rc; } crmd_replies_needed++; crm_trace("Processing %d mainloop inputs", crmd_replies_needed); while (g_main_context_iteration(NULL, FALSE)) { crm_trace("Processed mainloop input, %d still remaining", crmd_replies_needed); } if (crmd_replies_needed < 0) { crmd_replies_needed = 0; } return rc; } static int clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, const char *rsc_id, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { int rc = pcmk_ok; const char *failed_value = NULL; const char *failed_id = NULL; const char *interval_ms_s = NULL; GHashTable *rscs = NULL; GHashTableIter iter; /* Create a hash table to use as a set of resources to clean. This lets us * clean each resource only once (per node) regardless of how many failed * operations it has. */ rscs = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); // Normalize interval to milliseconds for comparison to history entry if (operation) { interval_ms_s = crm_strdup_printf("%u", crm_parse_interval_spec(interval_spec)); } for (xmlNode *xml_op = __xml_first_child(data_set->failed); xml_op != NULL; xml_op = __xml_next(xml_op)) { failed_id = crm_element_value(xml_op, XML_LRM_ATTR_RSCID); if (failed_id == NULL) { // Malformed history entry, should never happen continue; } // No resource specified means all resources match if (rsc_id) { resource_t *fail_rsc = pe_find_resource_with_flags(data_set->resources, failed_id, pe_find_renamed|pe_find_anon); if (!fail_rsc || safe_str_neq(rsc_id, fail_rsc->id)) { continue; } } // Host name should always have been provided by this point failed_value = crm_element_value(xml_op, XML_ATTR_UNAME); if (safe_str_neq(node_name, failed_value)) { continue; } // No operation specified means all operations match if (operation) { failed_value = crm_element_value(xml_op, XML_LRM_ATTR_TASK); if (safe_str_neq(operation, failed_value)) { continue; } // Interval (if operation was specified) defaults to 0 (not all) failed_value = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS); if (safe_str_neq(interval_ms_s, failed_value)) { continue; } } + /* not available until glib 2.32 g_hash_table_add(rscs, (gpointer) failed_id); + */ + g_hash_table_insert(rscs, (gpointer) failed_id, (gpointer) failed_id); } g_hash_table_iter_init(&iter, rscs); while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { crm_debug("Erasing failures of %s on %s", failed_id, node_name); rc = clear_rsc_history(crmd_channel, node_name, failed_id, data_set); if (rc != pcmk_ok) { return rc; } } g_hash_table_destroy(rscs); return rc; } static int clear_rsc_fail_attrs(resource_t *rsc, const char *operation, const char *interval_spec, node_t *node) { int rc = pcmk_ok; int attr_options = attrd_opt_none; char *rsc_name = rsc_fail_name(rsc); if (is_remote_node(node)) { attr_options |= attrd_opt_remote; } rc = attrd_clear_delegate(NULL, node->details->uname, rsc_name, operation, interval_spec, NULL, attr_options); free(rsc_name); return rc; } int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pe_working_set_t *data_set) { int rc = pcmk_ok; node_t *node = NULL; if (rsc == NULL) { return -ENXIO; } else if (rsc->children) { GListPtr lpc = NULL; for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; rc = cli_resource_delete(crmd_channel, host_uname, child, operation, interval_spec, just_failures, data_set); if (rc != pcmk_ok) { return rc; } } return pcmk_ok; } else if (host_uname == NULL) { GListPtr lpc = NULL; GListPtr nodes = g_hash_table_get_values(rsc->known_on); if(nodes == NULL && do_force) { nodes = node_list_dup(data_set->nodes, FALSE, FALSE); } else if(nodes == NULL && rsc->exclusive_discover) { GHashTableIter iter; pe_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) { if(node->weight >= 0) { nodes = g_list_prepend(nodes, node); } } } else if(nodes == NULL) { nodes = g_hash_table_get_values(rsc->allowed_nodes); } for (lpc = nodes; lpc != NULL; lpc = lpc->next) { node = (node_t *) lpc->data; if (node->details->online) { rc = cli_resource_delete(crmd_channel, node->details->uname, rsc, operation, interval_spec, just_failures, data_set); } if (rc != pcmk_ok) { g_list_free(nodes); return rc; } } g_list_free(nodes); return pcmk_ok; } node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { printf("Unable to clean up %s because node %s not found\n", rsc->id, host_uname); return -ENODEV; } if (!node->details->rsc_discovery_enabled) { printf("Unable to clean up %s because resource discovery disabled on %s\n", rsc->id, host_uname); return -EOPNOTSUPP; } if (crmd_channel == NULL) { printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n", rsc->id, host_uname); return pcmk_ok; } rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node); if (rc != pcmk_ok) { printf("Unable to clean up %s failures on %s: %s\n", rsc->id, host_uname, pcmk_strerror(rc)); return rc; } if (just_failures) { rc = clear_rsc_failures(crmd_channel, host_uname, rsc->id, operation, interval_spec, data_set); } else { rc = clear_rsc_history(crmd_channel, host_uname, rsc->id, data_set); } if (rc != pcmk_ok) { printf("Cleaned %s failures on %s, but unable to clean history: %s\n", rsc->id, host_uname, pcmk_strerror(rc)); } else { printf("Cleaned up %s on %s\n", rsc->id, host_uname); } return rc; } int cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { int rc = pcmk_ok; int attr_options = attrd_opt_none; const char *display_name = node_name? node_name : "all nodes"; if (crmd_channel == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", display_name); return pcmk_ok; } crmd_replies_needed = 0; if (node_name) { node_t *node = pe_find_node(data_set->nodes, node_name); if (node == NULL) { CMD_ERR("Unknown node: %s", node_name); return -ENXIO; } if (is_remote_node(node)) { attr_options |= attrd_opt_remote; } } rc = attrd_clear_delegate(NULL, node_name, NULL, operation, interval_spec, NULL, attr_options); if (rc != pcmk_ok) { printf("Unable to clean up all failures on %s: %s\n", display_name, pcmk_strerror(rc)); return rc; } if (node_name) { rc = clear_rsc_failures(crmd_channel, node_name, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on %s, but unable to clean history: %s\n", node_name, pcmk_strerror(rc)); return rc; } } else { for (GList *iter = data_set->nodes; iter; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n", pcmk_strerror(rc)); return rc; } } } printf("Cleaned up all resources on %s\n", display_name); return pcmk_ok; } void cli_resource_check(cib_t * cib_conn, resource_t *rsc) { int need_nl = 0; char *role_s = NULL; char *managed = NULL; resource_t *parent = uber_parent(rsc); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_MANAGED, &managed); find_resource_attr(cib_conn, XML_NVPAIR_ATTR_VALUE, parent->id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &role_s); if(role_s) { enum rsc_role_e role = text2role(role_s); free(role_s); if(role == RSC_ROLE_UNKNOWN) { // Treated as if unset } else if(role == RSC_ROLE_STOPPED) { printf("\n * The configuration specifies that '%s' should remain stopped\n", parent->id); need_nl++; } else if (is_set(parent->flags, pe_rsc_promotable) && (role == RSC_ROLE_SLAVE)) { printf("\n * The configuration specifies that '%s' should not be promoted\n", parent->id); need_nl++; } } if(managed && crm_is_true(managed) == FALSE) { printf("%s * The configuration prevents the cluster from stopping or starting '%s' (unmanaged)\n", need_nl == 0?"\n":"", parent->id); need_nl++; } free(managed); if(need_nl) { printf("\n"); } } int cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set) { crm_warn("Failing: %s", rsc_id); return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); } static GHashTable * generate_resource_params(resource_t * rsc, pe_working_set_t * data_set) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; if (!rsc) { crm_err("Resource does not exist in config"); return NULL; } params = crm_str_table_new(); meta = crm_str_table_new(); combined = crm_str_table_new(); get_rsc_attributes(params, rsc, NULL /* TODO: Pass in local node */ , data_set); get_meta_attributes(meta, rsc, NULL /* TODO: Pass in local node */ , data_set); if (params) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { g_hash_table_insert(combined, strdup(key), strdup(value)); } g_hash_table_destroy(params); } if (meta) { char *key = NULL; char *value = NULL; g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } static bool resource_is_running_on(resource_t *rsc, const char *host) { bool found = TRUE; GListPtr hIter = NULL; GListPtr hosts = NULL; if(rsc == NULL) { return FALSE; } rsc->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pe_node_t *node = (pe_node_t *) hIter->data; if(strcmp(host, node->details->uname) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } else if(strcmp(host, node->details->id) == 0) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if(host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = FALSE; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = FALSE; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { resource_t *rsc = (resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (rsc->variant == pe_group) { active = g_list_concat(active, get_active_resources(host, rsc->children)); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } static GList* subtract_lists(GList *from, GList *items) { GList *item = NULL; GList *result = g_list_copy(from); for (item = items; item != NULL; item = item->next) { GList *candidate = NULL; for (candidate = from; candidate != NULL; candidate = candidate->next) { crm_info("Comparing %s with %s", (const char *) candidate->data, (const char *) item->data); if(strcmp(candidate->data, item->data) == 0) { result = g_list_remove(result, candidate->data); break; } } } return result; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { fprintf(stdout, "%s%s\n", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as working set input * * This also updates the working set timestamp to the current time. * * \param[in] data_set Working set instance to update * \param[in] xml XML to use as input * * \return pcmk_ok on success, -ENOKEY if unable to upgrade XML * \note On success, caller is responsible for freeing memory allocated for * data_set->now. * \todo This follows the example of other callers of cli_config_update() * and returns -ENOKEY ("Required key not available") if that fails, * but perhaps -pcmk_err_schema_validation would be better in that case. */ int update_working_set_xml(pe_working_set_t *data_set, xmlNode **xml) { if (cli_config_update(xml, NULL, FALSE) == FALSE) { return -ENOKEY; } data_set->input = *xml; data_set->now = crm_time_new(NULL); return pcmk_ok; } /*! * \internal * \brief Update a working set's XML input based on a CIB query * * \param[in] data_set Data set instance to initialize * \param[in] cib Connection to the CIB manager * * \return pcmk_ok on success, -errno on failure * \note On success, caller is responsible for freeing memory allocated for * data_set->input and data_set->now. */ static int update_working_set_from_cib(pe_working_set_t * data_set, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_scope_local | cib_sync_call); if (rc != pcmk_ok) { fprintf(stderr, "Could not obtain the current CIB: %s (%d)\n", pcmk_strerror(rc), rc); return rc; } rc = update_working_set_xml(data_set, &cib_xml_copy); if (rc != pcmk_ok) { fprintf(stderr, "Could not upgrade the current CIB XML\n"); free_xml(cib_xml_copy); return rc; } return pcmk_ok; } static int update_dataset(cib_t *cib, pe_working_set_t * data_set, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc; cleanup_alloc_calculations(data_set); rc = update_working_set_from_cib(data_set, cib); if (rc != pcmk_ok) { return rc; } if(simulate) { pid = crm_getpid_s(); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { fprintf(stderr, "Could not create shadow cib: '%s'\n", pid); rc = -ENXIO; goto cleanup; } rc = write_xml_file(data_set->input, shadow_file, FALSE); if (rc < 0) { fprintf(stderr, "Could not populate shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); if(rc != pcmk_ok) { fprintf(stderr, "Could not connect to shadow cib: %s (%d)\n", pcmk_strerror(rc), rc); goto cleanup; } do_calculations(data_set, data_set->input, NULL); run_simulation(data_set, shadow_cib, NULL, TRUE); rc = update_dataset(shadow_cib, data_set, FALSE); } else { cluster_status(data_set); } cleanup: /* Do not free data_set->input here, we need rsc->xml to be valid later on */ cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } static int max_delay_for_resource(pe_working_set_t * data_set, resource_t *rsc) { int delay = 0; int max_delay = 0; if(rsc && rsc->children) { GList *iter = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; delay = max_delay_for_resource(data_set, child); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, child->id); max_delay = delay; } } } else if(rsc) { char *key = crm_strdup_printf("%s_%s_0", rsc->id, RSC_STOP); action_t *stop = custom_action(rsc, key, RSC_STOP, NULL, TRUE, FALSE, data_set); const char *value = g_hash_table_lookup(stop->meta, XML_ATTR_TIMEOUT); max_delay = crm_int_helper(value, NULL); pe_free_action(stop); } return max_delay; } static int max_delay_in(pe_working_set_t * data_set, GList *resources) { int max_delay = 0; GList *item = NULL; for (item = resources; item != NULL; item = item->next) { int delay = 0; resource_t *rsc = pe_find_resource(data_set->resources, (const char *)item->data); delay = max_delay_for_resource(data_set, rsc); if(delay > max_delay) { double seconds = delay / 1000.0; crm_trace("Calculated new delay of %.1fs due to %s", seconds, rsc->id); max_delay = delay; } } return 5 + (max_delay / 1000); } #define waiting_for_starts(d, r, h) ((g_list_length(d) > 0) || \ (resource_is_running_on((r), (h)) == FALSE)) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in] rsc The resource to restart * \param[in] host The host to restart the resource on (or NULL for all) * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but a two-second * granularity is actually used; if 0, a timeout will be * calculated based on the resource timeout) * \param[in] cib Connection to the CIB manager * * \return pcmk_ok on success, -errno on failure (exits on certain failures) */ int cli_resource_restart(resource_t * rsc, const char *host, int timeout_ms, cib_t * cib) { int rc = 0; int lpc = 0; int before = 0; int step_timeout_s = 0; int sleep_interval = 2; int timeout = timeout_ms / 1000; bool stop_via_ban = FALSE; char *rsc_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pe_working_set_t data_set; if(resource_is_running_on(rsc, host) == FALSE) { const char *id = rsc->clone_name?rsc->clone_name:rsc->id; if(host) { printf("%s is not running on %s and so cannot be restarted\n", id, host); } else { printf("%s is not running anywhere and so cannot be restarted\n", id); } return -ENXIO; } /* We might set the target-role meta-attribute */ attr_set_type = XML_TAG_META_SETS; rsc_id = strdup(rsc->id); if ((pe_rsc_is_clone(rsc) || pe_bundle_replicas(rsc)) && host) { stop_via_ban = TRUE; } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ set_working_set_defaults(&data_set); rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stdout, "Could not get new resource list: %s (%d)\n", pcmk_strerror(rc), rc); free(rsc_id); return rc; } restart_target_active = get_active_resources(host, data_set.resources); current_active = get_active_resources(host, data_set.resources); dump_list(current_active, "Origin"); if (stop_via_ban) { /* Stop the clone or bundle instance by banning it from the host */ BE_QUIET = TRUE; rc = cli_resource_ban(rsc_id, host, NULL, cib); } else { /* Stop the resource by setting target-role to Stopped. * Remember any existing target-role so we can restore it later * (though it only makes any difference if it's Slave). */ char *lookup_id = clone_strip(rsc->id); find_resource_attr(cib, XML_NVPAIR_ATTR_VALUE, lookup_id, NULL, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, &orig_target_role); free(lookup_id); rc = cli_resource_update_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, RSC_STOPPED, FALSE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not set target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); if (current_active) { g_list_free_full(current_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } free(rsc_id); return crm_exit(crm_errno2exit(rc)); } rc = update_dataset(cib, &data_set, TRUE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources would be stopped\n"); goto failure; } target_active = get_active_resources(host, data_set.resources); dump_list(target_active, "Target"); list_delta = subtract_lists(current_active, target_active); fprintf(stdout, "Waiting for %d resources to stop:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while(g_list_length(list_delta) > 0) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; lpc < step_timeout_s && g_list_length(list_delta) > 0; lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were stopped\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(current_active, target_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ fprintf(stderr, "Could not complete shutdown of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } if (stop_via_ban) { rc = cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } if(rc != pcmk_ok) { fprintf(stderr, "Could not unset target-role for %s: %s (%d)\n", rsc_id, pcmk_strerror(rc), rc); free(rsc_id); return crm_exit(crm_errno2exit(rc)); } if (target_active) { g_list_free_full(target_active, free); } target_active = restart_target_active; if (list_delta) { g_list_free(list_delta); } list_delta = subtract_lists(target_active, current_active); fprintf(stdout, "Waiting for %d resources to start again:\n", g_list_length(list_delta)); display_list(list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = max_delay_in(&data_set, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, &data_set, FALSE); if(rc != pcmk_ok) { fprintf(stderr, "Could not determine which resources were started\n"); goto failure; } if (current_active) { g_list_free_full(current_active, free); } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ current_active = get_active_resources(NULL, data_set.resources); g_list_free(list_delta); list_delta = subtract_lists(target_active, current_active); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ fprintf(stdout, "Could not complete restart of %s, %d resources remaining\n", rsc_id, g_list_length(list_delta)); display_list(list_delta, " * "); rc = -ETIME; goto failure; } } rc = pcmk_ok; goto done; failure: if (stop_via_ban) { cli_resource_clear(rsc_id, host, NULL, cib); } else if (orig_target_role) { cli_resource_update_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, orig_target_role, FALSE, cib, &data_set); free(orig_target_role); } else { cli_resource_delete_attribute(rsc, rsc_id, NULL, NULL, XML_RSC_ATTR_TARGET_ROLE, cib, &data_set); } done: if (list_delta) { g_list_free(list_delta); } if (current_active) { g_list_free_full(current_active, free); } if (target_active && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active) { g_list_free_full(restart_target_active, free); } cleanup_alloc_calculations(&data_set); free(rsc_id); return rc; } static inline int action_is_pending(action_t *action) { if(is_set(action->flags, pe_action_optional)) { return FALSE; } else if(is_set(action->flags, pe_action_runnable) == FALSE) { return FALSE; } else if(is_set(action->flags, pe_action_pseudo)) { return FALSE; } else if(safe_str_eq("notify", action->task)) { return FALSE; } return TRUE; } /*! * \internal * \brief Return TRUE if any actions in a list are pending * * \param[in] actions List of actions to check * * \return TRUE if any actions in the list are pending, FALSE otherwise */ static bool actions_are_pending(GListPtr actions) { GListPtr action; for (action = actions; action != NULL; action = action->next) { action_t *a = (action_t *)action->data; if (action_is_pending(a)) { crm_notice("Waiting for %s (flags=0x%.8x)", a->uuid, a->flags); return TRUE; } } return FALSE; } /*! * \internal * \brief Print pending actions to stderr * * \param[in] actions List of actions to check * * \return void */ static void print_pending_actions(GListPtr actions) { GListPtr action; fprintf(stderr, "Pending actions:\n"); for (action = actions; action != NULL; action = action->next) { action_t *a = (action_t *) action->data; if (action_is_pending(a)) { fprintf(stderr, "\tAction %d: %s", a->id, a->uuid); if (a->node) { fprintf(stderr, "\ton %s", a->node->details->uname); } fprintf(stderr, "\n"); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in] timeout_ms Consider failed if actions do not complete in this time * (specified in milliseconds, but one-second granularity * is actually used; if 0, a default will be used) * \param[in] cib Connection to the CIB manager * * \return pcmk_ok on success, -errno on failure */ int wait_till_stable(int timeout_ms, cib_t * cib) { pe_working_set_t data_set; int rc = -1; int timeout_s = timeout_ms? ((timeout_ms + 999) / 1000) : WAIT_DEFAULT_TIMEOUT_S; time_t expire_time = time(NULL) + timeout_s; time_t time_diff; bool printed_version_warning = BE_QUIET; // i.e. don't print if quiet set_working_set_defaults(&data_set); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff > 0) { crm_info("Waiting up to %ld seconds for cluster actions to complete", time_diff); } else { print_pending_actions(data_set.actions); cleanup_alloc_calculations(&data_set); return -ETIME; } if (rc == pcmk_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ cleanup_alloc_calculations(&data_set); rc = update_working_set_from_cib(&data_set, cib); if (rc != pcmk_ok) { cleanup_alloc_calculations(&data_set); return rc; } do_calculations(&data_set, data_set.input, NULL); if (!printed_version_warning) { /* If the DC has a different version than the local node, the two * could come to different conclusions about what actions need to be * done. Warn the user in this case. * * @TODO A possible long-term solution would be to reimplement the * wait as a new controller operation that would be forwarded to the * DC. However, that would have potential problems of its own. */ const char *dc_version = g_hash_table_lookup(data_set.config_hash, "dc-version"); if (safe_str_neq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION)) { printf("warning: --wait command may not work properly in mixed-version cluster\n"); printed_version_warning = TRUE; } } } while (actions_are_pending(data_set.actions)); return pcmk_ok; } int cli_resource_execute(resource_t *rsc, const char *requested_name, const char *rsc_action, GHashTable *override_hash, int timeout_ms, cib_t * cib, pe_working_set_t *data_set) { int rc = pcmk_ok; svc_action_t *op = NULL; const char *rid = NULL; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; const char *action = NULL; GHashTable *params = NULL; if (safe_str_eq(rsc_action, "validate")) { action = "validate-all"; } else if (safe_str_eq(rsc_action, "force-check")) { action = "monitor"; } else if (safe_str_eq(rsc_action, "force-stop")) { action = rsc_action+6; } else if (safe_str_eq(rsc_action, "force-start") || safe_str_eq(rsc_action, "force-demote") || safe_str_eq(rsc_action, "force-promote")) { action = rsc_action+6; if(pe_rsc_is_clone(rsc)) { rc = cli_resource_search(rsc, requested_name, data_set); if(rc > 0 && do_force == FALSE) { CMD_ERR("It is not safe to %s %s here: the cluster claims it is already active", action, rsc->id); CMD_ERR("Try setting target-role=stopped first or specifying --force"); crm_exit(CRM_EX_UNSAFE); } } } if(pe_rsc_is_clone(rsc)) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->children->data; } if(rsc->variant == pe_group) { CMD_ERR("Sorry, --%s doesn't support group resources", rsc_action); crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); } rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); rprov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER); rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE); if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { CMD_ERR("Sorry, --%s doesn't support %s resources yet", rsc_action, rclass); crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); } params = generate_resource_params(rsc, data_set); /* add meta_timeout env needed by some resource agents */ if (timeout_ms == 0) { timeout_ms = pe_get_configured_timeout(rsc, action, data_set); } g_hash_table_insert(params, strdup("CRM_meta_timeout"), crm_strdup_printf("%d", timeout_ms)); /* add crm_feature_set env needed by some resource agents */ g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id; op = resources_action_create(rid, rclass, rprov, rtype, action, 0, timeout_ms, params, 0); if (op == NULL) { /* Re-run with stderr enabled so we can display a sane error message */ crm_enable_stderr(TRUE); op = resources_action_create(rid, rclass, rprov, rtype, action, 0, timeout_ms, params, 0); /* We know op will be NULL, but this makes static analysis happy */ services_action_free(op); return crm_exit(CRM_EX_DATAERR); } setenv("HA_debug", resource_verbose > 0 ? "1" : "0", 1); if(resource_verbose > 1) { setenv("OCF_TRACE_RA", "1", 1); } if (override_hash) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, override_hash); while (g_hash_table_iter_next(&iter, (gpointer *) & name, (gpointer *) & value)) { printf("Overriding the cluster configuration for '%s' with '%s' = '%s'\n", rsc->id, name, value); g_hash_table_replace(op->params, strdup(name), strdup(value)); } } if (services_action_sync(op)) { int more, lpc, last; char *local_copy = NULL; if (op->status == PCMK_LRM_OP_DONE) { printf("Operation %s for %s (%s:%s:%s) returned: '%s' (%d)\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, services_ocf_exitcode_str(op->rc), op->rc); } else { printf("Operation %s for %s (%s:%s:%s) failed: '%s' (%d)\n", action, rsc->id, rclass, rprov ? rprov : "", rtype, services_lrm_status_str(op->status), op->status); } /* hide output for validate-all if not in verbose */ if (resource_verbose == 0 && safe_str_eq(action, "validate-all")) goto done; if (op->stdout_data) { local_copy = strdup(op->stdout_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stdout: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } if (op->stderr_data) { local_copy = strdup(op->stderr_data); more = strlen(local_copy); last = 0; for (lpc = 0; lpc < more; lpc++) { if (local_copy[lpc] == '\n' || local_copy[lpc] == 0) { local_copy[lpc] = 0; printf(" > stderr: %s\n", local_copy + last); last = lpc + 1; } } free(local_copy); } } done: rc = op->rc; services_action_free(op); return rc; } int cli_resource_move(resource_t *rsc, const char *rsc_id, const char *host_name, cib_t *cib, pe_working_set_t *data_set) { int rc = pcmk_ok; unsigned int count = 0; node_t *current = NULL; node_t *dest = pe_find_node(data_set->nodes, host_name); bool cur_is_dest = FALSE; if (dest == NULL) { return -pcmk_err_node_unknown; } if (scope_master && is_not_set(rsc->flags, pe_rsc_promotable)) { resource_t *p = uber_parent(rsc); if (is_set(p->flags, pe_rsc_promotable)) { CMD_ERR("Using parent '%s' for --move command instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { CMD_ERR("Ignoring '--master' option: %s is not a promotable resource", rsc_id); scope_master = FALSE; } } current = pe__find_active_requires(rsc, &count); if (is_set(rsc->flags, pe_rsc_promotable)) { GListPtr iter = NULL; unsigned int master_count = 0; pe_node_t *master_node = NULL; for(iter = rsc->children; iter; iter = iter->next) { resource_t *child = (resource_t *)iter->data; enum rsc_role_e child_role = child->fns->state(child, TRUE); if(child_role == RSC_ROLE_MASTER) { rsc = child; master_node = pe__current_node(child); master_count++; } } if (scope_master || master_count) { count = master_count; current = master_node; } } if (count > 1) { if (pe_rsc_is_clone(rsc)) { current = NULL; } else { return -pcmk_err_multiple; } } if (current && (current->details == dest->details)) { cur_is_dest = TRUE; if (do_force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, scope_master?"promoted":"active", dest->details->uname); } else { return -pcmk_err_already; } } /* Clear any previous constraints for 'dest' */ cli_resource_clear(rsc_id, dest->details->uname, data_set->nodes, cib); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(rsc_id, dest->details->uname, cib); crm_trace("%s%s now prefers node %s%s", rsc->id, scope_master?" (master)":"", dest->details->uname, do_force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if(do_force && (cur_is_dest == FALSE)) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(rsc_id, current->details->uname, NULL, cib); } else if(count > 1) { CMD_ERR("Resource '%s' is currently %s in %d locations. One may now move to %s", rsc_id, scope_master?"promoted":"active", count, dest->details->uname); CMD_ERR("You can prevent '%s' from being %s at a specific location with:" " --ban %s--host ", rsc_id, scope_master?"promoted":"active", scope_master?"--master ":""); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; } static void cli_resource_why_without_rsc_and_host(cib_t *cib_conn,GListPtr resources) { GListPtr lpc = NULL; GListPtr hosts = NULL; for (lpc = resources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; rsc->fns->location(rsc, &hosts, TRUE); if (hosts == NULL) { printf("Resource %s is not running\n", rsc->id); } else { printf("Resource %s is running\n", rsc->id); } cli_resource_check(cib_conn, rsc); g_list_free(hosts); hosts = NULL; } } static void cli_resource_why_with_rsc_and_host(cib_t *cib_conn, GListPtr resources, resource_t *rsc, const char *host_uname) { if (resource_is_running_on(rsc, host_uname)) { printf("Resource %s is running on host %s\n",rsc->id,host_uname); } else { printf("Resource %s is not running on host %s\n", rsc->id, host_uname); } cli_resource_check(cib_conn, rsc); } static void cli_resource_why_without_rsc_with_host(cib_t *cib_conn,GListPtr resources,node_t *node) { const char* host_uname = node->details->uname; GListPtr allResources = node->details->allocated_rsc; GListPtr activeResources = node->details->running_rsc; GListPtr unactiveResources = subtract_lists(allResources,activeResources); GListPtr lpc = NULL; for (lpc = activeResources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; printf("Resource %s is running on host %s\n",rsc->id,host_uname); cli_resource_check(cib_conn,rsc); } for(lpc = unactiveResources; lpc != NULL; lpc = lpc->next) { resource_t *rsc = (resource_t *) lpc->data; printf("Resource %s is assigned to host %s but not running\n", rsc->id, host_uname); cli_resource_check(cib_conn,rsc); } g_list_free(allResources); g_list_free(activeResources); g_list_free(unactiveResources); } static void cli_resource_why_with_rsc_without_host(cib_t *cib_conn, GListPtr resources, resource_t *rsc) { GListPtr hosts = NULL; rsc->fns->location(rsc, &hosts, TRUE); printf("Resource %s is %srunning\n", rsc->id, (hosts? "" : "not ")); cli_resource_check(cib_conn, rsc); g_list_free(hosts); } void cli_resource_why(cib_t *cib_conn, GListPtr resources, resource_t *rsc, node_t *node) { const char *host_uname = (node == NULL)? NULL : node->details->uname; if ((rsc == NULL) && (host_uname == NULL)) { cli_resource_why_without_rsc_and_host(cib_conn, resources); } else if ((rsc != NULL) && (host_uname != NULL)) { cli_resource_why_with_rsc_and_host(cib_conn, resources, rsc, host_uname); } else if ((rsc == NULL) && (host_uname != NULL)) { cli_resource_why_without_rsc_with_host(cib_conn, resources, node); } else if ((rsc != NULL) && (host_uname == NULL)) { cli_resource_why_with_rsc_without_host(cib_conn, resources, rsc); } }