diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c index cf2caf5238..65fee22de4 100644 --- a/daemons/attrd/attrd_alerts.c +++ b/daemons/attrd/attrd_alerts.c @@ -1,146 +1,146 @@ /* * Copyright 2015-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" static GListPtr attrd_alert_list = NULL; static void attrd_lrmd_callback(lrmd_event_data_t * op) { CRM_CHECK(op != NULL, return); switch (op->type) { case lrmd_event_disconnect: crm_info("Lost connection to executor"); attrd_lrmd_disconnect(); break; default: break; } } static lrmd_t * -attrd_lrmd_connect() +attrd_lrmd_connect(void) { if (the_lrmd == NULL) { the_lrmd = lrmd_api_new(); the_lrmd->cmds->set_callback(the_lrmd, attrd_lrmd_callback); } if (!the_lrmd->cmds->is_connected(the_lrmd)) { const unsigned int max_attempts = 10; int ret = -ENOTCONN; for (int fails = 0; fails < max_attempts; ++fails) { ret = the_lrmd->cmds->connect(the_lrmd, T_ATTRD, NULL); if (ret == pcmk_ok) { break; } crm_debug("Could not connect to executor, %d tries remaining", (max_attempts - fails)); /* @TODO We don't want to block here with sleep, but we should wait * some time between connection attempts. We could possibly add a * timer with a callback, but then we'd likely need an alert queue. */ } if (ret != pcmk_ok) { attrd_lrmd_disconnect(); } } return the_lrmd; } void -attrd_lrmd_disconnect() { +attrd_lrmd_disconnect(void) { if (the_lrmd) { lrmd_t *conn = the_lrmd; the_lrmd = NULL; /* in case we're called recursively */ lrmd_api_delete(conn); /* will disconnect if necessary */ } } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlNode *crmalerts = NULL; if (rc == -ENXIO) { crm_debug("Local CIB has no alerts section"); return; } else if (rc != pcmk_ok) { crm_notice("Could not query local CIB: %s", pcmk_strerror(rc)); return; } crmalerts = output; if (crmalerts && !crm_str_eq(crm_element_name(crmalerts), XML_CIB_TAG_ALERTS, TRUE)) { crmalerts = first_named_child(crmalerts, XML_CIB_TAG_ALERTS); } if (!crmalerts) { crm_notice("CIB query result has no " XML_CIB_TAG_ALERTS " section"); return; } pe_free_alert_list(attrd_alert_list); attrd_alert_list = pe_unpack_alerts(crmalerts); } #define XPATH_ALERTS \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_ALERTS gboolean attrd_read_options(gpointer user_data) { int call_id; CRM_CHECK(the_cib != NULL, return TRUE); call_id = the_cib->cmds->query(the_cib, XPATH_ALERTS, NULL, cib_xpath | cib_scope_local); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, NULL, "config_query_callback", config_query_callback, free); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } void attrd_cib_updated_cb(const char *event, xmlNode * msg) { if (!attrd_shutting_down() && pcmk__alert_in_patchset(msg, FALSE)) { mainloop_set_trigger(attrd_config_read); } } int attrd_send_attribute_alert(const char *node, int nodeid, const char *attr, const char *value) { if (attrd_alert_list == NULL) { return pcmk_ok; } return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list, node, nodeid, attr, value); } diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index 8481ea3993..61e5493131 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -1,449 +1,449 @@ /* * Copyright 2013-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" lrmd_t *the_lrmd = NULL; crm_cluster_t *attrd_cluster = NULL; crm_trigger_t *attrd_config_read = NULL; static crm_exit_t attrd_exit_status = CRM_EX_OK; static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down()) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } static void attrd_cib_replaced_cb(const char *event, xmlNode * msg) { if (attrd_shutting_down()) { return; } if (attrd_election_won()) { crm_notice("Updating all attributes after %s event", event); write_attributes(TRUE, FALSE); } // Check for changes in alerts mainloop_set_trigger(attrd_config_read); } static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (attrd_shutting_down()) { crm_info("Connection disconnection complete"); } else { /* eventually this should trigger a reconnect, not a shutdown */ crm_crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } return; } static void attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG), "Cleared transient attributes: %s " CRM_XS " xpath=%s rc=%d", pcmk_strerror(rc), (char *) user_data, rc); } #define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS /*! * \internal * \brief Wipe all transient attributes for this node from the CIB * * Clear any previous transient node attributes from the CIB. This is * normally done by the DC's controller when this node leaves the cluster, but * this handles the case where the node restarted so quickly that the * cluster layer didn't notice. * * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), * ideally we'd skip this and sync our attributes from the writer. * However, currently we reject any values for us that the writer has, in * attrd_peer_update(). */ static void -attrd_erase_attrs() +attrd_erase_attrs(void) { int call_id; char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", xpath); call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_quorum_override | cib_xpath); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, "attrd_erase_cb", attrd_erase_cb, free); } static int attrd_cib_connect(int max_retry) { static int attempts = 0; int rc = -ENOTCONN; the_cib = cib_new(); if (the_cib == NULL) { return -ENOTCONN; } do { if(attempts > 0) { sleep(attempts); } attempts++; crm_debug("Connection attempt %d to the CIB manager", attempts); rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); } while(rc != pcmk_ok && attempts < max_retry); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); goto cleanup; } crm_debug("Connected to the CIB manager after %d attempts", attempts); rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); if(rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); goto cleanup; } return pcmk_ok; cleanup: the_cib->cmds->signoff(the_cib); cib_delete(the_cib); the_cib = NULL; return -ENOTCONN; } /*! * \internal * \brief Prepare the CIB after cluster is connected */ static void -attrd_cib_init() +attrd_cib_init(void) { // We have no attribute values in memory, wipe the CIB to match attrd_erase_attrs(); // Set a trigger for reading the CIB (for the alerts section) attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); } static qb_ipcs_service_t *ipcs = NULL; static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *xml = NULL; const char *op; // Sanity-check, and parse XML from IPC data CRM_CHECK((c != NULL) && (client != NULL), return 0); if (data == NULL) { crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); return 0; } xml = pcmk__client_data2xml(client, data, &id, &flags); if (xml == NULL) { crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(xml, PCMK__XA_ATTR_USER, client->user); #endif op = crm_element_value(xml, PCMK__XA_TASK); if (client->name == NULL) { const char *value = crm_element_value(xml, F_ORIG); client->name = crm_strdup_printf("%s.%d", value?value:"unknown", client->pid); } if (safe_str_eq(op, PCMK__ATTRD_CMD_PEER_REMOVE)) { attrd_send_ack(client, id, flags); attrd_client_peer_remove(client->name, xml); } else if (safe_str_eq(op, PCMK__ATTRD_CMD_CLEAR_FAILURE)) { attrd_send_ack(client, id, flags); attrd_client_clear_failure(xml); } else if (safe_str_eq(op, PCMK__ATTRD_CMD_UPDATE)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, PCMK__ATTRD_CMD_UPDATE_BOTH)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, PCMK__ATTRD_CMD_UPDATE_DELAY)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, PCMK__ATTRD_CMD_REFRESH)) { attrd_send_ack(client, id, flags); attrd_client_refresh(); } else if (safe_str_eq(op, PCMK__ATTRD_CMD_QUERY)) { /* queries will get reply, so no ack is necessary */ attrd_client_query(client, id, flags, xml); } else { crm_info("Ignoring request from client %s with unknown operation %s", client->name, op); } free_xml(xml); return 0; } void -attrd_ipc_fini() +attrd_ipc_fini(void) { if (ipcs != NULL) { pcmk__drop_all_clients(ipcs); qb_ipcs_destroy(ipcs); ipcs = NULL; } } static int -attrd_cluster_connect() +attrd_cluster_connect(void) { attrd_cluster = calloc(1, sizeof(crm_cluster_t)); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); return -ENOTCONN; } return pcmk_ok; } static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "help", no_argument, NULL, '?', "\tThis text", pcmk__option_default }, { "verbose", no_argument, NULL, 'V', "\tIncrease debug output", pcmk__option_default }, { 0, 0, 0, 0 } }; int main(int argc, char **argv) { int flag = 0; int index = 0; int argerr = 0; crm_ipc_t *old_instance = NULL; attrd_init_mainloop(); crm_log_preinit(NULL, argc, argv); pcmk__set_cli_options(NULL, "[options]", long_options, "daemon for managing Pacemaker node attributes"); mainloop_add_signal(SIGTERM, attrd_shutdown); while (1) { flag = pcmk__next_cli_option(argc, argv, &index, NULL); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ pcmk__cli_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker node attribute manager"); old_instance = crm_ipc_new(T_ATTRD, 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-attrd is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute); /* Connect to the CIB before connecting to the cluster or listening for IPC. * This allows us to assume the CIB is connected whenever we process a * cluster or IPC message (which also avoids start-up race conditions). */ if (attrd_cib_connect(10) != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; goto done; } crm_info("CIB connection active"); if (attrd_cluster_connect() != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; goto done; } crm_info("Cluster connection active"); // Initialization that requires the cluster to be connected attrd_election_init(); attrd_cib_init(); /* Set a private attribute for ourselves with the protocol version we * support. This lets all nodes determine the minimum supported version * across all nodes. It also ensures that the writer learns our node name, * so it can send our attributes to the CIB. */ attrd_broadcast_protocol(); attrd_init_ipc(&ipcs, attrd_ipc_dispatch); crm_notice("Pacemaker node attribute manager successfully started and accepting connections"); attrd_run_mainloop(); done: crm_info("Shutting down attribute manager"); attrd_election_fini(); attrd_ipc_fini(); attrd_lrmd_disconnect(); attrd_cib_disconnect(); g_hash_table_destroy(attributes); crm_exit(attrd_exit_status); } diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index e62beabd2f..023b55c9e6 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -1,773 +1,773 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include char *max_epoch = NULL; char *max_generation_from = NULL; xmlNode *max_generation_xml = NULL; void finalize_join_for(gpointer key, gpointer value, gpointer user_data); void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data); gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source); /* Numeric counter used to identify join rounds (an unsigned int would be * appropriate, except we get and set it in XML as int) */ static int current_join_id = 0; unsigned long long saved_ccm_membership_id = 0; void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase) { enum crm_join_phase last = 0; CRM_CHECK(node != NULL, return); /* Remote nodes do not participate in joins */ if (is_set(node->flags, crm_remote_node)) { return; } last = node->join; if(phase == last) { crm_trace("Node %s join-%d phase is still %s " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(last), node->id, source); } else if ((phase <= crm_join_none) || (phase == (last + 1))) { node->join = phase; crm_trace("Node %s join-%d phase is now %s (was %s) " CRM_XS " nodeid=%u source=%s", node->uname, current_join_id, crm_join_phase_str(phase), crm_join_phase_str(last), node->id, source); } else { crm_warn("Rejecting join-%d phase update for node %s because " "can't go from %s to %s " CRM_XS " nodeid=%u source=%s", current_join_id, node->uname, crm_join_phase_str(last), crm_join_phase_str(phase), node->id, source); } } static void -start_join_round() +start_join_round(void) { GHashTableIter iter; crm_node_t *peer = NULL; crm_debug("Starting new join round join-%d", current_join_id); g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { crm_update_peer_join(__FUNCTION__, peer, crm_join_none); } if (max_generation_from != NULL) { free(max_generation_from); max_generation_from = NULL; } if (max_generation_xml != NULL) { free_xml(max_generation_xml); max_generation_xml = NULL; } clear_bit(fsa_input_register, R_HAVE_CIB); clear_bit(fsa_input_register, R_CIB_ASKED); } /*! * \internal * \brief Create a join message from the DC * * \param[in] join_op Join operation name * \param[in] host_to Recipient of message */ static xmlNode * create_dc_message(const char *join_op, const char *host_to) { xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD, CRM_SYSTEM_DC, NULL); /* Identify which election this is a part of */ crm_xml_add_int(msg, F_CRM_JOIN_ID, current_join_id); /* Add a field specifying whether the DC is shutting down. This keeps the * joining node from fencing the old DC if it becomes the new DC. */ crm_xml_add_boolean(msg, F_CRM_DC_LEAVING, is_set(fsa_input_register, R_SHUTDOWN)); return msg; } static void join_make_offer(gpointer key, gpointer value, gpointer user_data) { xmlNode *offer = NULL; crm_node_t *member = (crm_node_t *)value; CRM_ASSERT(member != NULL); if (crm_is_peer_active(member) == FALSE) { crm_info("Not making join-%d offer to inactive node %s", current_join_id, (member->uname? member->uname : "with unknown name")); if(member->expected == NULL && safe_str_eq(member->state, CRM_NODE_LOST)) { /* You would think this unsafe, but in fact this plus an * active resource is what causes it to be fenced. * * Yes, this does mean that any node that dies at the same * time as the old DC and is not running resource (still) * won't be fenced. * * I'm not happy about this either. */ crm_update_peer_expected(__FUNCTION__, member, CRMD_JOINSTATE_DOWN); } return; } if (member->uname == NULL) { crm_info("Not making join-%d offer to node uuid %s with unknown name", current_join_id, member->uuid); return; } if (saved_ccm_membership_id != crm_peer_seq) { saved_ccm_membership_id = crm_peer_seq; crm_info("Making join-%d offers based on membership event %llu", current_join_id, crm_peer_seq); } if(user_data && member->join > crm_join_none) { crm_info("Not making join-%d offer to already known node %s (%s)", current_join_id, member->uname, crm_join_phase_str(member->join)); return; } crm_update_peer_join(__FUNCTION__, (crm_node_t*)member, crm_join_none); offer = create_dc_message(CRM_OP_JOIN_OFFER, member->uname); // Advertise our feature set so the joining node can bail if not compatible crm_xml_add(offer, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); crm_info("Sending join-%d offer to %s", current_join_id, member->uname); send_cluster_message(member, crm_msg_crmd, offer, TRUE); free_xml(offer); crm_update_peer_join(__FUNCTION__, member, crm_join_welcomed); } /* A_DC_JOIN_OFFER_ALL */ void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int count; /* Reset everyone's status back to down or in_ccm in the CIB. * Any nodes that are active in the CIB but not in the cluster membership * will be seen as offline by the scheduler anyway. */ current_join_id++; start_join_round(); /* do_update_cib_nodes(TRUE, __FUNCTION__); */ update_dc(NULL); if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) { crm_info("A new node joined the cluster"); } g_hash_table_foreach(crm_peer_cache, join_make_offer, NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } /* A_DC_JOIN_OFFER_ONE */ void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_node_t *member; ha_msg_input_t *welcome = NULL; int count; const char *join_to = NULL; if (msg_data->data == NULL) { crm_info("Making join-%d offers to any unconfirmed nodes " "because an unknown node joined", current_join_id); g_hash_table_foreach(crm_peer_cache, join_make_offer, &member); check_join_state(cur_state, __FUNCTION__); return; } welcome = fsa_typed_data(fsa_dt_ha_msg); if (welcome == NULL) { // fsa_typed_data() already logged an error return; } join_to = crm_element_value(welcome->msg, F_CRM_HOST_FROM); if (join_to == NULL) { crm_err("Can't make join-%d offer to unknown node", current_join_id); return; } member = crm_get_peer(0, join_to); /* It is possible that a node will have been sick or starting up when the * original offer was made. However, it will either re-announce itself in * due course, or we can re-store the original offer on the client. */ crm_update_peer_join(__FUNCTION__, member, crm_join_none); join_make_offer(NULL, member, NULL); /* If the offer isn't to the local node, make an offer to the local node as * well, to ensure the correct value for max_generation_from. */ if (strcmp(join_to, fsa_our_uname) != 0) { member = crm_get_peer(0, fsa_our_uname); join_make_offer(NULL, member, NULL); } /* This was a genuine join request; cancel any existing transition and * invoke the scheduler. */ abort_transition(INFINITY, tg_restart, "Node join", NULL); count = crmd_join_phase_count(crm_join_welcomed); crm_info("Waiting on join-%d requests from %d outstanding node%s", current_join_id, count, pcmk__plural_s(count)); // Don't waste time by invoking the scheduler yet } static int compare_int_fields(xmlNode * left, xmlNode * right, const char *field) { const char *elem_l = crm_element_value(left, field); const char *elem_r = crm_element_value(right, field); long long int_elem_l = elem_l? crm_parse_ll(elem_l, NULL) : -1; long long int_elem_r = elem_r? crm_parse_ll(elem_r, NULL) : -1; if (int_elem_l < int_elem_r) { return -1; } else if (int_elem_l > int_elem_r) { return 1; } return 0; } /* A_DC_JOIN_PROCESS_REQ */ void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *generation = NULL; int cmp = 0; int join_id = -1; int count = 0; gboolean ack_nack_bool = TRUE; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); const char *ref = crm_element_value(join_ack->msg, F_CRM_REFERENCE); const char *join_version = crm_element_value(join_ack->msg, XML_ATTR_CRM_VERSION); crm_node_t *join_node = NULL; if (join_from == NULL) { crm_err("Ignoring invalid join request without node name"); return; } join_node = crm_get_peer(0, join_from); crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); if (join_id != current_join_id) { crm_debug("Ignoring join-%d request from %s because we are on join-%d", join_id, join_from, current_join_id); check_join_state(cur_state, __FUNCTION__); return; } generation = join_ack->xml; if (max_generation_xml != NULL && generation != NULL) { int lpc = 0; const char *attributes[] = { XML_ATTR_GENERATION_ADMIN, XML_ATTR_GENERATION, XML_ATTR_NUMUPDATES, }; for (lpc = 0; cmp == 0 && lpc < DIMOF(attributes); lpc++) { cmp = compare_int_fields(max_generation_xml, generation, attributes[lpc]); } } if (ref == NULL) { ref = "none"; // for logging only } if (crm_is_peer_active(join_node) == FALSE) { crm_err("Rejecting join-%d request from inactive node %s " CRM_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if (generation == NULL) { crm_err("Rejecting invalid join-%d request from node %s " "missing CIB generation " CRM_XS " ref=%s", join_id, join_from, ref); ack_nack_bool = FALSE; } else if ((join_version == NULL) || !feature_set_compatible(CRM_FEATURE_SET, join_version)) { crm_err("Rejecting join-%d request from node %s because feature set %s" " is incompatible with ours (%s) " CRM_XS " ref=%s", join_id, join_from, (join_version? join_version : "pre-3.1.0"), CRM_FEATURE_SET, ref); ack_nack_bool = FALSE; } else if (max_generation_xml == NULL) { crm_debug("Accepting join-%d request from %s " "(with first CIB generation) " CRM_XS " ref=%s", join_id, join_from, ref); max_generation_xml = copy_xml(generation); max_generation_from = strdup(join_from); } else if (cmp < 0 || (cmp == 0 && safe_str_eq(join_from, fsa_our_uname))) { crm_debug("Accepting join-%d request from %s (with better " "CIB generation than current best from %s) " CRM_XS " ref=%s", join_id, join_from, max_generation_from, ref); crm_log_xml_debug(max_generation_xml, "Old max generation"); crm_log_xml_debug(generation, "New max generation"); free(max_generation_from); free_xml(max_generation_xml); max_generation_from = strdup(join_from); max_generation_xml = copy_xml(join_ack->xml); } else { crm_debug("Accepting join-%d request from %s " CRM_XS " ref=%s", join_id, join_from, ref); } if (ack_nack_bool == FALSE) { crm_update_peer_join(__FUNCTION__, join_node, crm_join_nack); crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_NACK); } else { crm_update_peer_join(__FUNCTION__, join_node, crm_join_integrated); crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); } count = crmd_join_phase_count(crm_join_integrated); crm_debug("%d node%s currently integrated in join-%d", count, pcmk__plural_s(count), join_id); if (check_join_state(cur_state, __FUNCTION__) == FALSE) { // Don't waste time by invoking the scheduler yet count = crmd_join_phase_count(crm_join_welcomed); crm_debug("Waiting on join-%d requests from %d outstanding node%s", join_id, count, pcmk__plural_s(count)); } } /* A_DC_JOIN_FINALIZE */ void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { char *sync_from = NULL; int rc = pcmk_ok; int count_welcomed = crmd_join_phase_count(crm_join_welcomed); int count_integrated = crmd_join_phase_count(crm_join_integrated); /* This we can do straight away and avoid clients timing us out * while we compute the latest CIB */ if (count_welcomed != 0) { crm_debug("Waiting on join-%d requests from %d outstanding node%s " "before finalizing join", current_join_id, count_welcomed, pcmk__plural_s(count_welcomed)); crmd_join_phase_log(LOG_DEBUG); /* crmd_fsa_stall(FALSE); Needed? */ return; } else if (count_integrated == 0) { crm_debug("Finalization not needed for join-%d at the current time", current_join_id); crmd_join_phase_log(LOG_DEBUG); check_join_state(fsa_state, __FUNCTION__); return; } clear_bit(fsa_input_register, R_HAVE_CIB); if (max_generation_from == NULL || safe_str_eq(max_generation_from, fsa_our_uname)) { set_bit(fsa_input_register, R_HAVE_CIB); } if (is_set(fsa_input_register, R_IN_TRANSITION)) { crm_warn("Delaying join-%d finalization while transition in progress", current_join_id); crmd_join_phase_log(LOG_DEBUG); crmd_fsa_stall(FALSE); return; } if (max_generation_from && is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { /* ask for the agreed best CIB */ sync_from = strdup(max_generation_from); set_bit(fsa_input_register, R_CIB_ASKED); crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)", current_join_id, count_integrated, pcmk__plural_s(count_integrated), sync_from); crm_log_xml_notice(max_generation_xml, "Requested CIB version"); } else { /* Send _our_ CIB out to everyone */ sync_from = strdup(fsa_our_uname); crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)", current_join_id, count_integrated, pcmk__plural_s(count_integrated)); crm_log_xml_debug(max_generation_xml, "Requested CIB version"); } crmd_join_phase_log(LOG_DEBUG); rc = fsa_cib_conn->cmds->sync_from(fsa_cib_conn, sync_from, NULL, cib_quorum_override); fsa_register_cib_callback(rc, FALSE, sync_from, finalize_sync_callback); } void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { CRM_LOG_ASSERT(-EPERM != rc); clear_bit(fsa_input_register, R_CIB_ASKED); if (rc != pcmk_ok) { do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR), "Could not sync CIB from %s in join-%d: %s", (char *) user_data, current_join_id, pcmk_strerror(rc)); /* restart the whole join process */ register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL, __FUNCTION__); } else if (!AM_I_DC) { crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id); } else if (fsa_state != S_FINALIZE_JOIN) { crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN (%s)", current_join_id, fsa_state2string(fsa_state)); } else { set_bit(fsa_input_register, R_HAVE_CIB); clear_bit(fsa_input_register, R_CIB_ASKED); /* make sure dc_uuid is re-set to us */ if (check_join_state(fsa_state, __FUNCTION__) == FALSE) { int count_integrated = crmd_join_phase_count(crm_join_integrated); crm_debug("Notifying %d node%s of join-%d results", count_integrated, pcmk__plural_s(count_integrated), current_join_id); g_hash_table_foreach(crm_peer_cache, finalize_join_for, NULL); } } } static void join_update_complete_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { fsa_data_t *msg_data = NULL; if (rc == pcmk_ok) { crm_debug("join-%d node history update (via CIB call %d) complete", current_join_id, call_id); check_join_state(fsa_state, __FUNCTION__); } else { crm_err("join-%d node history update (via CIB call %d) failed: %s " "(next transition may determine resource status incorrectly)", current_join_id, call_id, pcmk_strerror(rc)); crm_log_xml_debug(msg, "failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } /* A_DC_JOIN_PROCESS_ACK */ void do_dc_join_ack(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int join_id = -1; int call_id = 0; ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg); enum controld_section_e section = controld_section_lrm; const char *op = crm_element_value(join_ack->msg, F_CRM_TASK); const char *join_from = crm_element_value(join_ack->msg, F_CRM_HOST_FROM); crm_node_t *peer = NULL; // Sanity checks if (join_from == NULL) { crm_warn("Ignoring message received without node identification"); return; } if (op == NULL) { crm_warn("Ignoring message received from %s without task", join_from); return; } if (strcmp(op, CRM_OP_JOIN_CONFIRM)) { crm_debug("Ignoring '%s' message from %s while waiting for '%s'", op, join_from, CRM_OP_JOIN_CONFIRM); return; } if (crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id) != 0) { crm_warn("Ignoring join confirmation from %s without valid join ID", join_from); return; } peer = crm_get_peer(0, join_from); if (peer->join != crm_join_finalized) { crm_info("Ignoring out-of-sequence join-%d confirmation from %s " "(currently %s not %s)", join_id, join_from, crm_join_phase_str(peer->join), crm_join_phase_str(crm_join_finalized)); return; } if (join_id != current_join_id) { crm_err("Rejecting join-%d confirmation from %s " "because currently on join-%d", join_id, join_from, current_join_id); crm_update_peer_join(__FUNCTION__, peer, crm_join_nack); return; } crm_update_peer_join(__FUNCTION__, peer, crm_join_confirmed); /* Update CIB with node's current executor state. A new transition will be * triggered later, when the CIB notifies us of the change. */ if (controld_shutdown_lock_enabled) { section = controld_section_lrm_unlocked; } controld_delete_node_state(join_from, section, cib_scope_local); if (safe_str_eq(join_from, fsa_our_uname)) { xmlNode *now_dc_lrmd_state = controld_query_executor_state(fsa_our_uname); if (now_dc_lrmd_state != NULL) { fsa_cib_update(XML_CIB_TAG_STATUS, now_dc_lrmd_state, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); free_xml(now_dc_lrmd_state); crm_debug("Updating local node history for join-%d " "from query result (via CIB call %d)", join_id, call_id); } else { fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); crm_warn("Updating local node history from join-%d confirmation " "because query failed (via CIB call %d)", join_id, call_id); } } else { fsa_cib_update(XML_CIB_TAG_STATUS, join_ack->xml, cib_scope_local | cib_quorum_override | cib_can_create, call_id, NULL); crm_debug("Updating node history for %s from join-%d confirmation " "(via CIB call %d)", join_from, join_id, call_id); } fsa_register_cib_callback(call_id, FALSE, NULL, join_update_complete_callback); } void finalize_join_for(gpointer key, gpointer value, gpointer user_data) { xmlNode *acknak = NULL; xmlNode *tmp1 = NULL; crm_node_t *join_node = value; const char *join_to = join_node->uname; if(join_node->join != crm_join_integrated) { crm_trace("Not updating non-integrated node %s (%s) for join-%d", join_to, crm_join_phase_str(join_node->join), current_join_id); return; } crm_trace("Updating node state for %s", join_to); tmp1 = create_xml_node(NULL, XML_CIB_TAG_NODE); set_uuid(tmp1, XML_ATTR_UUID, join_node); crm_xml_add(tmp1, XML_ATTR_UNAME, join_to); fsa_cib_anon_update(XML_CIB_TAG_NODES, tmp1); free_xml(tmp1); join_node = crm_get_peer(0, join_to); if (crm_is_peer_active(join_node) == FALSE) { /* * NACK'ing nodes that the membership layer doesn't know about yet * simply creates more churn * * Better to leave them waiting and let the join restart when * the new membership event comes in * * All other NACKs (due to versions etc) should still be processed */ crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_PENDING); return; } // Acknowledge node's join request crm_debug("Acknowledging join-%d request from %s", current_join_id, join_to); acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to); crm_xml_add(acknak, CRM_OP_JOIN_ACKNAK, XML_BOOLEAN_TRUE); crm_update_peer_join(__FUNCTION__, join_node, crm_join_finalized); crm_update_peer_expected(__FUNCTION__, join_node, CRMD_JOINSTATE_MEMBER); send_cluster_message(crm_get_peer(0, join_to), crm_msg_crmd, acknak, TRUE); free_xml(acknak); return; } gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source) { static unsigned long long highest_seq = 0; if (saved_ccm_membership_id != crm_peer_seq) { crm_debug("join-%d: Membership changed from %llu to %llu " CRM_XS " highest=%llu state=%s for=%s", current_join_id, saved_ccm_membership_id, crm_peer_seq, highest_seq, fsa_state2string(cur_state), source); if(highest_seq < crm_peer_seq) { /* Don't spam the FSA with duplicates */ highest_seq = crm_peer_seq; register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL); } } else if (cur_state == S_INTEGRATION) { if (crmd_join_phase_count(crm_join_welcomed) == 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Integration of %d peer%s complete " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL); return TRUE; } } else if (cur_state == S_FINALIZE_JOIN) { if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_debug("join-%d: Delaying finalization until we have CIB " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); return TRUE; } else if (crmd_join_phase_count(crm_join_welcomed) != 0) { int count = crmd_join_phase_count(crm_join_welcomed); crm_debug("join-%d: Still waiting on %d welcomed node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_integrated) != 0) { int count = crmd_join_phase_count(crm_join_integrated); crm_debug("join-%d: Still waiting on %d integrated node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else if (crmd_join_phase_count(crm_join_finalized) != 0) { int count = crmd_join_phase_count(crm_join_finalized); crm_debug("join-%d: Still waiting on %d finalized node%s " CRM_XS " state=%s for=%s", current_join_id, count, pcmk__plural_s(count), fsa_state2string(cur_state), source); crmd_join_phase_log(LOG_DEBUG); } else { crm_debug("join-%d: Complete " CRM_XS " state=%s for=%s", current_join_id, fsa_state2string(cur_state), source); register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL); return TRUE; } } return FALSE; } void do_dc_join_final(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Ensuring DC, quorum and node attributes are up-to-date"); crm_update_quorum(crm_have_quorum, TRUE); } int crmd_join_phase_count(enum crm_join_phase phase) { int count = 0; crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { if(peer->join == phase) { count++; } } return count; } void crmd_join_phase_log(int level) { crm_node_t *peer; GHashTableIter iter; g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) { do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->uname, crm_join_phase_str(peer->join)); } } diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index 4a09960468..16c78eaeb5 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,468 +1,468 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include static mainloop_io_t *pe_subsystem = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void pe_subsystem_free(void) { clear_bit(fsa_input_register, R_PE_REQUIRED); if (pe_subsystem) { controld_expect_sched_reply(NULL); mainloop_del_ipc_client(pe_subsystem); pe_subsystem = NULL; clear_bit(fsa_input_register, R_PE_CONNECTED); } } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in] output Result of CIB query * \param[in] user_data Unique identifier for filename (will be freed) * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure * * \param[in] user_data Ignored */ static void pe_ipc_destroy(gpointer user_data) { // If we aren't connected to the scheduler, we can't expect a reply controld_expect_sched_reply(NULL); if (is_set(fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Connection to the scheduler failed " CRM_XS " uuid=%s", uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents); } else { crm_info("Connection to the scheduler released"); } clear_bit(fsa_input_register, R_PE_CONNECTED); pe_subsystem = NULL; mainloop_set_trigger(fsa_source); return; } /*! * \internal * \brief Handle message from scheduler connection * * \param[in] buffer XML message (will be freed) * \param[in] length Ignored * \param[in] userdata Ignored * * \return 0 */ static int pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *msg = string2xml(buffer); if (msg) { route_message(C_IPC_MESSAGE, msg); } free_xml(msg); return 0; } /*! * \internal * \brief Make new connection to scheduler * * \return TRUE on success, FALSE otherwise */ static bool -pe_subsystem_new() +pe_subsystem_new(void) { struct ipc_client_callbacks pe_callbacks = { .dispatch = pe_ipc_dispatch, .destroy = pe_ipc_destroy }; set_bit(fsa_input_register, R_PE_REQUIRED); pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, G_PRIORITY_DEFAULT, 5 * 1024 * 1024 /* 5MB */, NULL, &pe_callbacks); if (pe_subsystem == NULL) { return FALSE; } set_bit(fsa_input_register, R_PE_CONNECTED); return TRUE; } /*! * \internal * \brief Send an XML message to the scheduler * * \param[in] cmd XML message to send * * \return pcmk_ok on success, -errno otherwise */ static int pe_subsystem_send(xmlNode *cmd) { if (pe_subsystem) { int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd, 0, 0, NULL); if (sent == 0) { sent = -ENODATA; } else if (sent > 0) { sent = pcmk_ok; } return sent; } return -ENOTCONN; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_PE_STOP) { pe_subsystem_free(); } if ((action & A_PE_START) && (is_not_set(fsa_input_register, R_PE_CONNECTED))) { if (cur_state == S_STOPPING) { crm_info("Ignoring request to connect to scheduler while shutting down"); } else if (!pe_subsystem_new()) { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; static mainloop_timer_t *controld_sched_timer = NULL; // @TODO Make this a configurable cluster option if there's demand for it #define SCHED_TIMEOUT_MS (120000) /*! * \internal * \brief Handle a timeout waiting for scheduler reply * * \param[in] user_data Ignored * * \return FALSE (indicating that timer should not be restarted) */ static gboolean controld_sched_timeout(gpointer user_data) { if (AM_I_DC) { /* If this node is the DC but can't communicate with the scheduler, just * exit (and likely get fenced) so this node doesn't interfere with any * further DC elections. * * @TODO We could try something less drastic first, like disconnecting * and reconnecting to the scheduler, but something is likely going * seriously wrong, so perhaps it's better to just fail as quickly as * possible. */ crmd_exit(CRM_EX_FATAL); } return FALSE; } void -controld_stop_sched_timer() +controld_stop_sched_timer(void) { if (controld_sched_timer && fsa_pe_ref) { crm_trace("Stopping timer for scheduler reply %s", fsa_pe_ref); } mainloop_timer_stop(controld_sched_timer); } /*! * \internal * \brief Set the scheduler request currently being waited on * * \param[in] msg Request to expect reply to (or NULL for none) */ void controld_expect_sched_reply(xmlNode *msg) { char *ref = NULL; if (msg) { ref = crm_element_value_copy(msg, XML_ATTR_REFERENCE); CRM_ASSERT(ref != NULL); if (controld_sched_timer == NULL) { controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", SCHED_TIMEOUT_MS, FALSE, controld_sched_timeout, NULL); } mainloop_timer_start(controld_sched_timer); } else { controld_stop_sched_timer(); } free(fsa_pe_ref); fsa_pe_ref = ref; } /*! * \internal * \brief Free the scheduler reply timer */ void -controld_free_sched_timer() +controld_free_sched_timer(void) { if (controld_sched_timer != NULL) { mainloop_timer_del(controld_sched_timer); controld_sched_timer = NULL; } } /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); register_fsa_action(A_PE_START); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (is_set(fsa_input_register, R_HAVE_CIB) == FALSE) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); controld_expect_sched_reply(NULL); fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; char *xpath_string = NULL; xmlXPathObjectPtr xpathObj = NULL; xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']", get_object_path(XML_CIB_TAG_CRMCONFIG), XML_CIB_TAG_PROPSET, attr_name); xpathObj = xpath_search(xml, xpath_string); max = numXpathResults(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value); configuration = find_entity(xml, XML_CIB_TAG_CONFIGURATION, NULL); if (configuration == NULL) { configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); } crm_config = find_entity(configuration, XML_CIB_TAG_CRMCONFIG, NULL); if (crm_config == NULL) { crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); } cluster_property_set = find_entity(crm_config, XML_CIB_TAG_PROPSET, NULL); if (cluster_property_set == NULL) { cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); } xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); } freeXpathObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { xmlNode *cmd = NULL; pid_t watchdog = pcmk_locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (AM_I_DC == FALSE || is_set(fsa_input_register, R_PE_CONNECTED) == FALSE) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); register_fsa_action(A_PE_INVOKE); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_err("Invoking scheduler in state: %s", fsa_state2string(fsa_state)); return; } CRM_LOG_ASSERT(output != NULL); /* Refresh the remote node cache and the known node cache when the * scheduler is invoked */ crm_peer_caches_refresh(output); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); force_local_option(output, XML_ATTR_HAVE_WATCHDOG, watchdog?"true":"false"); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); rc = pe_subsystem_send(cmd); if (rc < 0) { crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __FUNCTION__); } else { controld_expect_sched_reply(cmd); crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); } free_xml(cmd); } diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c index 24e5f9bc69..a67deedb7c 100644 --- a/daemons/controld/pacemaker-controld.c +++ b/daemons/controld/pacemaker-controld.c @@ -1,176 +1,176 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hV" void usage(const char *cmd, int exit_status); _Noreturn void crmd_init(void); void crmd_hamsg_callback(const xmlNode * msg, void *private_data); extern void init_dotfile(void); GMainLoop *crmd_mainloop = NULL; static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "help", no_argument, NULL, '?', "\tThis text", pcmk__option_default }, { "verbose", no_argument, NULL, 'V', "\tIncrease debug output", pcmk__option_default }, { 0, 0, 0, 0 } }; int main(int argc, char **argv) { int flag; int index = 0; int argerr = 0; crm_ipc_t *old_instance = NULL; crmd_mainloop = g_main_loop_new(NULL, FALSE); crm_log_preinit(NULL, argc, argv); pcmk__set_cli_options(NULL, "[options]", long_options, "daemon for coordinating a Pacemaker cluster's " "response to events"); while (1) { flag = pcmk__next_cli_option(argc, argv, &index, NULL); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ pcmk__cli_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { crmd_metadata(); return CRM_EX_OK; } else if (argc - optind == 1 && safe_str_eq("version", argv[optind])) { fprintf(stdout, "CRM Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); return CRM_EX_OK; } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); if (optind > argc) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } crm_notice("Starting Pacemaker controller"); old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-controld is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { crm_err("Terminating due to bad permissions on " PE_STATE_DIR); fprintf(stderr, "ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n"); fflush(stderr); return CRM_EX_FATAL; } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) { crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR); fprintf(stderr, "ERROR: Bad permissions on " CRM_CONFIG_DIR " (see logs for details)\n"); fflush(stderr); return CRM_EX_FATAL; } crmd_init(); return 0; // not reachable } static void -log_deprecation_warnings() +log_deprecation_warnings(void) { // Add deprecations here as needed } void crmd_init(void) { crm_exit_t exit_code = CRM_EX_OK; enum crmd_fsa_state state; log_deprecation_warnings(); fsa_state = S_STARTING; fsa_input_register = 0; /* zero out the regester */ init_dotfile(); register_fsa_input(C_STARTUP, I_STARTUP, NULL); crm_peer_init(); state = s_crmd_fsa(C_STARTUP); if (state == S_PENDING || state == S_STARTING) { /* Create the mainloop and run it... */ crm_trace("Starting %s's mainloop", crm_system_name); g_main_loop_run(crmd_mainloop); if (is_set(fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting automated respawn"); exit_code = CRM_EX_FATAL; } } else { crm_err("Startup of %s failed. Current state: %s", crm_system_name, fsa_state2string(state)); exit_code = CRM_EX_ERROR; } crm_info("%s[%lu] exiting with status %d (%s)", crm_system_name, (unsigned long) getpid(), exit_code, crm_exit_str(exit_code)); crmd_fast_exit(exit_code); } diff --git a/daemons/execd/execd_alerts.c b/daemons/execd/execd_alerts.c index 683643e180..10eca36ef3 100644 --- a/daemons/execd/execd_alerts.c +++ b/daemons/execd/execd_alerts.c @@ -1,175 +1,175 @@ /* * Copyright 2016-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" /* Track in-flight alerts so we can wait for them at shutdown */ static GHashTable *inflight_alerts; /* key = call_id, value = timeout */ static gboolean draining_alerts = FALSE; static inline void add_inflight_alert(int call_id, int timeout) { if (inflight_alerts == NULL) { inflight_alerts = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(inflight_alerts, GINT_TO_POINTER(call_id), GINT_TO_POINTER(timeout)); } static inline void remove_inflight_alert(int call_id) { if (inflight_alerts != NULL) { g_hash_table_remove(inflight_alerts, GINT_TO_POINTER(call_id)); } } static int -max_inflight_timeout() +max_inflight_timeout(void) { GHashTableIter iter; gpointer timeout; int max_timeout = 0; if (inflight_alerts) { g_hash_table_iter_init(&iter, inflight_alerts); while (g_hash_table_iter_next(&iter, NULL, &timeout)) { if (GPOINTER_TO_INT(timeout) > max_timeout) { max_timeout = GPOINTER_TO_INT(timeout); } } } return max_timeout; } struct alert_cb_s { char *client_id; int call_id; }; static void alert_complete(svc_action_t *action) { struct alert_cb_s *cb_data = (struct alert_cb_s *) (action->cb_data); remove_inflight_alert(cb_data->call_id); crm_debug("Alert pid %d for %s completed with rc=%d", action->pid, cb_data->client_id, action->rc); free(cb_data->client_id); free(action->cb_data); action->cb_data = NULL; } int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id, xmlNode *request) { static int alert_sequence_no = 0; xmlNode *alert_xml = get_xpath_object("//" F_LRMD_ALERT, request, LOG_ERR); const char *alert_id = crm_element_value(alert_xml, F_LRMD_ALERT_ID); const char *alert_path = crm_element_value(alert_xml, F_LRMD_ALERT_PATH); svc_action_t *action = NULL; int alert_timeout = 0; int rc = pcmk_ok; GHashTable *params = NULL; struct alert_cb_s *cb_data = NULL; if ((alert_id == NULL) || (alert_path == NULL)) { return -EINVAL; } if (draining_alerts) { return pcmk_ok; } crm_element_value_int(alert_xml, F_LRMD_TIMEOUT, &alert_timeout); crm_info("Executing alert %s for %s", alert_id, client->id); params = xml2list(alert_xml); pcmk__add_alert_key_int(params, PCMK__alert_key_node_sequence, ++alert_sequence_no); cb_data = calloc(1, sizeof(struct alert_cb_s)); CRM_CHECK(cb_data != NULL, rc = -ENOMEM; goto err); cb_data->client_id = strdup(client->id); CRM_CHECK(cb_data->client_id != NULL, rc = -ENOMEM; goto err); crm_element_value_int(request, F_LRMD_CALLID, &(cb_data->call_id)); action = services_alert_create(alert_id, alert_path, alert_timeout, params, alert_sequence_no, cb_data); rc = services_action_user(action, CRM_DAEMON_USER); if (rc < 0) { goto err; } add_inflight_alert(cb_data->call_id, alert_timeout); if (services_alert_async(action, alert_complete) == FALSE) { services_action_free(action); } return pcmk_ok; err: if (cb_data) { if (cb_data->client_id) { free(cb_data->client_id); } free(cb_data); } if (action) { services_action_free(action); } return rc; } static bool drain_check(guint remaining_timeout_ms) { if (inflight_alerts != NULL) { guint count = g_hash_table_size(inflight_alerts); if (count > 0) { crm_trace("%d alerts pending (%.3fs timeout remaining)", count, remaining_timeout_ms / 1000.0); return TRUE; } } return FALSE; } void lrmd_drain_alerts(GMainLoop *mloop) { if (inflight_alerts != NULL) { guint timer_ms = max_inflight_timeout() + 5000; crm_trace("Draining in-flight alerts (timeout %.3fs)", timer_ms / 1000.0); draining_alerts = TRUE; pcmk_drain_main_loop(mloop, timer_ms, drain_check); g_hash_table_destroy(inflight_alerts); inflight_alerts = NULL; } } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 88a78544d1..d13d1a16ac 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,2743 +1,2743 @@ /* * Copyright 2009-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GListPtr capable; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data); static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; /* seconds */ int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; uint32_t victim_nodeid; char *action; char *device; char *mode; GListPtr device_list; GListPtr device_next; void *internal_user_data; void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc); static gboolean is_action_required(const char *action, stonith_device_t *device) { return device && device->automatic_unfencing && safe_str_eq(action, "on"); } static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_max = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX); if (value) { delay_max = crm_parse_interval_spec(value) / 1000; } return delay_max; } static int get_action_delay_base(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_base = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE); if (value) { delay_base = crm_parse_interval_spec(value) / 1000; } return delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(stonith_device_t * device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (safe_str_eq(action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = "off"; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->mode); free(cmd->op); free(cmd); } static async_command_t * create_async_command(xmlNode * msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; // Value -1 means disable any static/random fencing delays crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->mode = crm_element_value_copy(op, F_STONITH_MODE); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd->done_cb = st_child_done; cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT); if (value) { action_limit = crm_parse_int(value, "1"); if (action_limit == 0) { /* pcmk_action_limit should not be 0. Enforce it to be 1. */ action_limit = 1; } } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(GPid pid, gpointer user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s'%s%s on %s now running with pid=%d, timeout=%ds", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, pid, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : ""); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace ("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds", pending_op->action, pending_op->victim ? " targeting " : "", pending_op->victim ? pending_op->victim : "", device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("Nothing further to do for %s for now", device->id); return TRUE; } if(safe_str_eq(device->agent, STONITH_WATCHDOG_AGENT)) { if(safe_str_eq(cmd->action, "reboot")) { pcmk_panic(__FUNCTION__); goto done; } else if(safe_str_eq(cmd->action, "off")) { pcmk_panic(__FUNCTION__); goto done; } else { crm_info("Faking success for %s watchdog operation", cmd->action); cmd->done_cb(0, 0, NULL, cmd); goto done; } } #if SUPPORT_CIBSECRETS if (pcmk__substitute_secrets(device->id, device->params) != pcmk_rc_ok) { /* replacing secrets failed! */ if (safe_str_eq(cmd->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", device->id); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", device->id); exec_rc = PCMK_OCF_NOT_CONFIGURED; cmd->done_cb(0, exec_rc, NULL, cmd); goto done; } } #endif action_str = cmd->action; if (safe_str_eq(cmd->action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) { crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent); action_str = "off"; } action = stonith_action_create(device->agent, action_str, cmd->victim, cmd->victim_nodeid, cmd->timeout, device->params, device->aliases); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { crm_warn("Operation '%s'%s%s on %s failed: %s (%d)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, pcmk_strerror(exec_rc), exec_rc); cmd->activating_on = NULL; cmd->done_cb(0, exec_rc, NULL, cmd); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = NULL; cmd->delay_id = 0; device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && cmd->victim) { crm_node_t *node = crm_get_peer(0, cmd->victim); cmd->victim_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s on %s for remote peer %s with op id (%s) (timeout=%ds)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s on %s for %s (timeout=%ds)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn("Base-delay (%ds) is larger than max-delay (%ds) " "for %s on %s - limiting to max-delay", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dont_call] We're not using rand() for security cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s on %s for %ds (timeout=%ds, " "requested_delay=%ds, base=%ds, max=%ds)", cmd->action, cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "", device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); cmd->done_cb(0, -ENODEV, NULL, cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); free(device->on_target_actions); free(device->agent); free(device->id); free(device); } -void free_device_list() +void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void -init_device_list() +init_device_list(void) { if (device_list == NULL) { device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GListPtr * targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = crm_strcase_table_new(); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void -free_metadata_cache() { +free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void -init_metadata_cache() { +init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = crm_str_table_new(); } } static xmlNode * get_agent_metadata(const char *agent) { xmlNode *xml = NULL; char *buffer = NULL; init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if(safe_str_eq(agent, STONITH_WATCHDOG_AGENT)) { return NULL; } else if(buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return NULL; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return NULL; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } xml = string2xml(buffer); return xml; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } #define MAX_ACTION_LEN 256 static char * add_action(char *actions, const char *action) { int offset = 0; if (actions == NULL) { actions = calloc(1, MAX_ACTION_LEN); } else { offset = strlen(actions); } if (offset > 0) { offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " "); } offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action); return actions; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *on_target = NULL; const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; on_target = crm_element_value(match, "on_target"); action = crm_element_value(match, "name"); if(safe_str_eq(action, "list")) { set_bit(device->flags, st_device_supports_list); } else if(safe_str_eq(action, "status")) { set_bit(device->flags, st_device_supports_status); } else if(safe_str_eq(action, "reboot")) { set_bit(device->flags, st_device_supports_reboot); } else if (safe_str_eq(action, "on")) { /* "automatic" means the cluster will unfence node when it joins */ const char *automatic = crm_element_value(match, "automatic"); /* "required" is a deprecated synonym for "automatic" */ const char *required = crm_element_value(match, "required"); if (crm_is_true(automatic) || crm_is_true(required)) { device->automatic_unfencing = TRUE; } } if (action && crm_is_true(on_target)) { device->on_target_actions = add_action(device->on_target_actions, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in,out] params Device parameters */ static GHashTable * xml2device_params(const char *name, xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "reboot") == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "off") == 0) { map_action(params, "reboot", value); } else { map_action(params, "off", value); map_action(params, "reboot", value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static stonith_device_t * build_device_from_xml(xmlNode * msg) { const char *value; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, "agent"); CRM_CHECK(agent != NULL, return device); device = calloc(1, sizeof(stonith_device_t)); CRM_CHECK(device != NULL, {free(agent); return device;}); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); device->agent_metadata = get_agent_metadata(device->agent); read_action_metadata(device); value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (safe_str_eq(value, "unfencing")) { device->automatic_unfencing = TRUE; } if (is_action_required("on", device)) { crm_info("The fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions) { crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node", device->id, device->on_target_actions); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) { check_type = "static-list"; } else if(is_set(dev->flags, st_device_supports_list)){ check_type = "dynamic-list"; } else if(is_set(dev->flags, st_device_supports_status)){ check_type = "status"; } else { check_type = "none"; } } return check_type; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *victim, int timeout, void *internal_user_data, void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); cmd->victim = victim ? strdup(victim) : NULL; cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for (lpc = 0; lpc < max; lpc++) { const char *value = g_list_nth_data(list, lpc); if (safe_str_eq(item, value)) { return TRUE; } else { crm_trace("%d: '%s' != '%s'", lpc, item, value); } } return FALSE; } static void status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (rc == 1 /* unknown */ ) { crm_trace("Host %s is not known by %s", search->host, dev->id); } else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) { crm_trace("Host %s is known by %s", search->host, dev->id); can = TRUE; } else { crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host, rc); } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); /* If we successfully got the targets earlier, don't disable. */ if (rc != 0 && !dev->targets) { crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output); /* Fall back to status */ g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status")); g_list_free_full(dev->targets, free); dev->targets = NULL; } else if (!rc) { crm_info("Refreshing port list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(output); dev->targets_age = time(NULL); } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (string_in_list(dev->targets, alias)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || safe_str_neq(other_value, value)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t * device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (safe_str_neq(dup->agent, device->agent)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); CRM_CHECK(device != NULL, return -ENOMEM); dup = device_has_duplicate(device); if (dup) { crm_debug("Device '%s' already existed in device list (%d active devices)", device->id, g_hash_table_size(device_list)); free_device(device); device = dup; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting an existing entry for %s from the cib", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); } if (desc) { *desc = device->id; } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } int stonith_device_remove(const char *id, gboolean from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); if (!device) { crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); return pcmk_ok; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } else { crm_trace("Not removing '%s' from the device list (%d active devices) " "- still %s%s_registered", id, g_hash_table_size(device_list), device->cib_registered?"cib":"", device->api_registered?"api":""); } return pcmk_ok; } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(stonith_topology_t * tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void -free_topology_list() +free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void -init_topology_list() +init_topology_list(void) { if (topology == NULL) { topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); } } char *stonith_level_key(xmlNode *level, int mode) { if(mode == -1) { mode = stonith_level_kind(level); } switch(mode) { case 0: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case 1: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case 2: { const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE); if(name && value) { return crm_strdup_printf("%s=%s", name, value); } } default: return crm_strdup_printf("Unknown-%d-%s", mode, ID(level)); } } int stonith_level_kind(xmlNode * level) { int mode = 0; const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET); if(target == NULL) { mode++; target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN); } if(stand_alone == FALSE && target == NULL) { mode++; if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) { mode++; } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) { mode++; } } return mode; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Register a STONITH level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]") * * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index */ int stonith_level_register(xmlNode *msg, char **desc) { int id = 0; xmlNode *level; int mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; /* Allow the XML here to point to the level tag directly, or wrapped in * another tag. If directly, don't search by xpath, because it might give * multiple hits (e.g. if the XML is the CIB). */ if (safe_str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL)) { level = msg; } else { level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); } CRM_CHECK(level != NULL, return -EINVAL); mode = stonith_level_kind(level); target = stonith_level_key(level, mode); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) { crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology)); free(target); crm_log_xml_err(level, "Bad topology"); return -EINVAL; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); crm_info("Target %s has %d active fencing levels", tp->target, count_active_levels(tp)); return pcmk_ok; } int stonith_level_remove(xmlNode *msg, char **desc) { int id = 0; stonith_topology_t *tp; char *target; /* Unlike additions, removal requests should always have one level tag */ xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); CRM_CHECK(level != NULL, return -EINVAL); target = stonith_level_key(level, -1); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (id >= ST_LEVEL_MAX) { free(target); return -EINVAL; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { crm_info("Topology for %s not found (%d active entries)", target, g_hash_table_size(topology)); } else if (id == 0 && g_hash_table_remove(topology, target)) { crm_info("Removed all %s related entries from the topology (%d active entries)", target, g_hash_table_size(topology)); } else if (id > 0 && tp->levels[id] != NULL) { g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; crm_info("Removed level '%d' from topology for %s (%d active levels remaining)", id, target, count_active_levels(tp)); } free(target); return pcmk_ok; } /*! * \internal * \brief Schedule an (asynchronous) action directly on a stonith device * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg API message XML with desired action * \param[out] output Unused * * \return -EINPROGRESS on success, -errno otherwise * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static int stonith_device_action(xmlNode * msg, char **output) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); const char *action = crm_element_value(op, F_STONITH_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); return -EPROTO; } device = g_hash_table_lookup(device_list, id); if ((device == NULL) || (!device->api_registered && !strcmp(action, "monitor"))) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not found", action, id); return -ENODEV; } cmd = create_async_command(msg); if (cmd == NULL) { return -EPROTO; } schedule_stonith_command(cmd, device); return -EINPROGRESS; } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { crm_debug("Finished Search. %d devices can perform action (%s) on node %s", g_list_length(search->capable), search->action ? search->action : "", search->host ? search->host : ""); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = safe_str_eq(target, stonith_our_uname); if (device && action && device->on_target_actions && strstr(device->on_target_actions, action)) { if (!localhost_is_target) { crm_trace("'%s' operation with %s can only be executed for localhost not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } static void can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = NULL; const char *host = search->host; const char *alias = NULL; CRM_LOG_ASSERT(dev != NULL); if (dev == NULL) { goto search_report_results; } else if (host == NULL) { can = TRUE; goto search_report_results; } /* Short-circuit query if this host is not allowed to perform the action */ if (safe_str_eq(search->action, "reboot")) { /* A "reboot" *might* get remapped to "off" then "on", so short-circuit * only if all three are disallowed. If only one or two are disallowed, * we'll report that with the results. We never allow suicide for * remapped "on" operations because the host is off at that point. */ if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide) && !localhost_is_eligible(dev, "off", host, search->allow_suicide) && !localhost_is_eligible(dev, "on", host, FALSE)) { goto search_report_results; } } else if (!localhost_is_eligible(dev, search->action, host, search->allow_suicide)) { goto search_report_results; } alias = g_hash_table_lookup(dev->aliases, host); if (alias == NULL) { alias = host; } check_type = target_list_type(dev); if (safe_str_eq(check_type, "none")) { can = TRUE; } else if (safe_str_eq(check_type, "static-list")) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if (string_in_list(dev->targets, host)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if (safe_str_eq(check_type, "dynamic-list")) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (string_in_list(dev->targets, alias)) { can = TRUE; } } else if (safe_str_eq(check_type, "status")) { crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " STONITH_ATTR_HOSTCHECK ": %s", check_type); check_type = "Invalid " STONITH_ATTR_HOSTCHECK; } if (safe_str_eq(host, alias)) { crm_notice("%s is%s eligible to fence (%s) %s: %s", dev->id, (can? "" : " not"), search->action, host, check_type); } else { crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s", dev->id, (can? "" : " not"), search->action, host, alias, check_type); } search_report_results: search_devices_record_result(search, dev ? dev->id : NULL, can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; int per_device_timeout = DEFAULT_QUERY_TIMEOUT; int devices_needing_async_query = 0; char *key = NULL; const char *check_type = NULL; GHashTableIter gIter; stonith_device_t *device = NULL; if (!g_hash_table_size(device_list)) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { callback(NULL, user_data); return; } g_hash_table_iter_init(&gIter, device_list); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) { check_type = target_list_type(device); if (safe_str_eq(check_type, "status") || safe_str_eq(check_type, "dynamic-list")) { devices_needing_async_query++; } } /* If we have devices that require an async event in order to know what * nodes they can fence, we have to give the events a timeout. The total * query timeout is divided among those events. */ if (devices_needing_async_query) { per_device_timeout = timeout / devices_needing_async_query; if (!per_device_timeout) { crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); per_device_timeout = DEFAULT_QUERY_TIMEOUT; } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) { crm_notice("STONITH timeout %ds is low for the current configuration;" " consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); } } search->host = host ? strdup(host) : NULL; search->action = action ? strdup(action) : NULL; search->per_device_timeout = per_device_timeout; /* We are guaranteed this many replies. Even if a device gets * unregistered some how during the async search, we will get * the correct number of replies. */ search->replies_needed = g_hash_table_size(device_list); search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* kick off the search */ crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s", search->replies_needed, search->action ? search->action : "", search->host ? search->host : ""); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device */ static void add_action_specific_attributes(xmlNode *xml, const char *action, stonith_device_t *device) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action '%s' is required on %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %dms on %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %dms on %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000); } delay_base = get_action_delay_base(device, action); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %dms on %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %dms on %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %dms and a randomly chosen " "maximum delay of %dms on %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' on %s is disallowed for local host", action, device->id); crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device); add_disallowed(child, action, device, target, allow_suicide); } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GListPtr lpc = NULL; /* Pack the results into XML */ list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (is_not_set(device->flags, st_device_supports_reboot) && safe_str_eq(query->action, "reboot")) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = "off"; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device); if (safe_str_eq(query->action, "reboot")) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "off", device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "on", device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching devices for '%s'", available_devices, query->target); } else { crm_debug("%d devices installed", available_devices); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } static void stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options) { struct st_query_data *query = NULL; const char *action = NULL; const char *target = NULL; int timeout = 0; xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_NEVER); crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout); if (dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); if (device && safe_str_eq(device, "manual_ack")) { /* No query or reply necessary */ return; } } crm_log_xml_debug(msg, "Query"); query = calloc(1, sizeof(struct st_query_data)); query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok); query->remote_peer = remote_peer ? strdup(remote_peer) : NULL; query->client_id = client_id ? strdup(client_id) : NULL; query->target = target ? strdup(target) : NULL; query->action = action ? strdup(action) : NULL; query->call_options = call_options; get_capable_devices(target, action, timeout, is_set(call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb); } #define ST_LOG_OUTPUT_MAX 512 static void log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output, gboolean op_merged) { if (rc == 0) { next = NULL; } if (cmd->victim != NULL) { do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned%s: %d (%s)%s%s", cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, (op_merged? " (merged)" : ""), rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } else { do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned%s: %d (%s)%s%s", cmd->action, pid, cmd->device, (op_merged? " (merged)" : ""), rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } if (output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid); crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output); free(prefix); } } static void stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid, int options) { xmlNode *reply = NULL; gboolean bcast = FALSE; reply = stonith_construct_async_reply(cmd, output, NULL, rc); if (safe_str_eq(cmd->action, "metadata")) { /* Too verbose to log */ crm_trace("Metadata query for %s", cmd->device); output = NULL; } else if (crm_str_eq(cmd->action, "monitor", TRUE) || crm_str_eq(cmd->action, "list", TRUE) || crm_str_eq(cmd->action, "status", TRUE)) { crm_trace("Never broadcast '%s' replies", cmd->action); } else if (!stand_alone && safe_str_eq(cmd->origin, cmd->victim) && safe_str_neq(cmd->action, "on")) { crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output, (options & st_reply_opt_merged ? TRUE : FALSE)); crm_log_xml_trace(reply, "Reply"); if (options & st_reply_opt_merged) { crm_xml_add(reply, F_STONITH_MERGED, "true"); } if (bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if (cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } free_xml(reply); } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled '%s' action on %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data) { stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; async_command_t *cmd = user_data; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(cmd != NULL, return); cmd->active_on = NULL; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if (device) { if (!device->verified && (rc == pcmk_ok) && (safe_str_eq(cmd->action, "list") || safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)", cmd->action, cmd->device, rc, g_list_length(cmd->device_next)); if (rc == 0) { GListPtr iter; /* see if there are any required devices left to execute for this op */ for (iter = cmd->device_next; iter != NULL; iter = iter->next) { next_device = g_hash_table_lookup(device_list, iter->data); if (next_device != NULL && is_action_required(cmd->action, next_device)) { cmd->device_next = iter->next; break; } next_device = NULL; } } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->device_next->data); cmd->device_next = cmd->device_next->next; } /* this operation requires more fencing, hooray! */ if (next_device) { log_operation(cmd, rc, pid, next_device->id, output, FALSE); schedule_stonith_command(cmd, next_device); /* Prevent cmd from being freed */ cmd = NULL; goto done; } stonith_send_async_reply(cmd, output, rc, pid, st_reply_opt_none); if (rc != 0) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if (cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (safe_str_eq(cmd->client, cmd_other->client) || safe_str_neq(cmd->victim, cmd_other->victim) || safe_str_neq(cmd->action, cmd_other->action) || safe_str_neq(cmd->device, cmd_other->device)) { continue; } /* Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to * off, then cmd->action will be reboot and will be merged with any * other reboot requests. If the originating fencer remapped a * topology reboot to off then on, we will get here once with * cmd->action "off" and once with "on", and they will be merged * separately with similar requests. */ crm_notice ("Merging stonith action '%s' targeting %s originating from client %s with identical stonith request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); stonith_send_async_reply(cmd_other, output, rc, pid, st_reply_opt_merged); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); if (device) { cmd->device_list = devices; cmd->device_next = devices->next; devices = NULL; /* list owned by cmd now */ } } /* we have a device, schedule it for fencing. */ if (device) { schedule_stonith_command(cmd, device); /* in progress */ return; } /* no device found! */ stonith_send_async_reply(cmd, NULL, -ENODEV, 0, st_reply_opt_none); free_async_command(cmd); g_list_free_full(devices, free); } static int stonith_fence(xmlNode * msg) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); if (cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); return -ENODEV; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (cmd->options & st_opt_cs_nodeid) { int nodeid = crm_atoi(host, NULL); crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY); if (node) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb); } return -EINPROGRESS; } xmlNode * stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); for (lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } static xmlNode * stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if (data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } /*! * \internal * \brief Determine if we need to use an alternate node to * fence the target. If so return that node's uname * * \retval NULL, no alternate host * \retval uname, uname of alternate host to use */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target); if (find_topology_for_host(target) && safe_str_eq(target, stonith_our_uname)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if (fencing_peer_active(entry) && safe_str_neq(entry->uname, target)) { alternate_host = entry->uname; break; } } if (alternate_host == NULL) { crm_err("No alternate host available to handle complex self fencing request"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id) { if (remote_peer) { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL); } } static int handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; xmlNode *data = NULL; xmlNode *reply = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); pcmk__ipc_send_xml(client, id, reply, flags); client->request_id = 0; free_xml(reply); return 0; } else if (crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { rc = stonith_device_action(request, &output); } else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } stonith_query(request, remote_peer, client_id, call_options); return 0; } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } if (flags & crm_ipc_client_response) { pcmk__ipc_send_ack(client, id, flags, "ack"); } return 0; } else if (crm_str_eq(op, STONITH_OP_RELAY, TRUE)) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s", stonith_our_uname, client ? client->name : remote_peer, crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { if (remote_peer || stand_alone) { rc = stonith_fence(request); } else if (call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); crm_notice("Received manual confirmation that %s is fenced", target); rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (client) { int tolerance = 0; crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'", client->name, client->id, action, target, device ? device : "(any)"); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device ? device : "(any)"); } alternate_host = check_alternate_host(target); if (alternate_host && client) { const char *client_id = NULL; crm_notice("Forwarding complex self fencing request to peer %s", alternate_host); if (client->id) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } /* Create a record of it, otherwise call_id will be 0 if we need to notify of failures */ create_remote_stonith_op(client_id, request, FALSE); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) { rc = stonith_fence_history(request, &data, remote_peer, call_options); if (call_options & st_opt_discard_reply) { /* we don't expect answers to the broadcast * we might have sent out */ free_xml(data); return pcmk_ok; } } else if (crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { const char *device_id = NULL; rc = stonith_device_register(request, &device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); rc = stonith_device_remove(device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) { char *device_id = NULL; rc = stonith_level_register(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); free(device_id); } else if (crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) { char *device_id = NULL; rc = stonith_level_remove(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); } else if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { int node_id = 0; const char *name = NULL; crm_element_value_int(request, XML_ATTR_ID, &node_id); name = crm_element_value(request, XML_ATTR_UNAME); reap_crm_member(node_id, name); return pcmk_ok; } else { crm_err("Unknown IPC request %s from %s", op, (client? client->name : remote_peer)); } done: /* Always reply unless the request is in process still. * If in progress, a reply will happen async after the request * processing is finished */ if (rc != -EINPROGRESS) { crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, id, is_set(call_options, st_opt_sync_call), call_options, crm_element_value(request, F_STONITH_CALLOPTS)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } reply = stonith_construct_reply(request, output, data, rc); stonith_send_reply(reply, call_options, remote_peer, client_id); } free(output); free_xml(data); free_xml(reply); return rc; } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { process_remote_stonith_query(request); } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { process_remote_stonith_exec(request); } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; /* Copy op for reporting. The original might get freed by handle_reply() * before we use it in crm_debug(): * handle_reply() * |- process_remote_stonith_exec() * |-- remote_op_done() * |--- handle_local_reply_and_notify() * |---- crm_xml_add(...F_STONITH_OPERATION...) * |--- free_xml(op->request) */ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "", id, client ? client->name : remote_peer, call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } crm_debug("Processed %s%s from %s: %s (%d)", op, is_reply ? " reply" : "", client ? client->name : remote_peer, rc > 0 ? "" : pcmk_strerror(rc), rc); free(op); } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index e974642604..450814c04b 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,1529 +1,1529 @@ /* * Copyright 2009-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include /* U32T ~ PRIu32, X32T ~ PRIx32 */ #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; char *stonith_our_uuid = NULL; long stonith_watchdog_timeout_ms = 0; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; static gboolean no_cib_connect = FALSE; static gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static xmlNode *local_cib = NULL; static pe_working_set_t *fenced_data_set = NULL; static cib_t *cib_api = NULL; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, "nack"); return 0; } op = crm_element_value(request, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %" X32T "/%u for command %" U32T " from %s", flags, call_options, id, pcmk__client_name(c)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (crm_str_eq(op, "poke", TRUE)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ pcmk__client_t *client_obj = NULL; int local_rc = pcmk_rc_ok; crm_trace("Sending response"); client_obj = pcmk__find_client_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = EPROTO; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer ? "(originator of delegated request)" : ""); } local_rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, (sync_reply? crm_ipc_flags_none : crm_ipc_server_event)); } if ((local_rc != pcmk_rc_ok) && (client_obj != NULL)) { crm_warn("%s reply to %s failed: %s", (sync_reply? "Synchronous" : "Asynchronous"), (client_obj? client_obj->name : "unknown client"), pcmk_rc_str(local_rc)); } } long long get_stonith_flag(const char *name) { if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) { return st_callback_notify_fence; } else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return st_callback_device_add; } else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return st_callback_device_del; } else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) { return st_callback_notify_history; } else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (client->options & get_stonith_flag(type)) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event|crm_ipc_server_error); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s.%.6s", type, pcmk__client_name(client), client->id); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static void do_stonith_notify_config(int options, const char *op, int rc, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); do_stonith_notify(options, op, rc, notify_data); free_xml(notify_data); } void do_stonith_notify_device(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); } void do_stonith_notify_level(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { int rc; char *desc = NULL; CRM_CHECK(match != NULL, return); crm_trace("Updating %s", ID(match)); if(remove) { int index = 0; char *key = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } rc = stonith_level_register(match, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void -fencing_topology_init() +fencing_topology_init(void) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static pe_node_t * our_node_allowed_for(pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in] rsc Resource to check * \param[in] data_set Cluster working set with device information */ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; pe_node_t *parent = NULL; gboolean remove = TRUE; /* If this is a complex resource, check children rather than this resource itself. * TODO: Mark each installed device and remove if untouched when this process finishes. */ if(rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(pe_rsc_is_clone(rsc)) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { return; } /* If this STONITH resource is disabled, just remove it. */ if (pe__resource_is_disabled(rsc)) { crm_info("Device %s has been disabled", rsc->id); goto update_done; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } goto update_done; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); goto update_done; } else { /* Our node is allowed, so update the device information */ int rc; xmlNode *data; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); get_rsc_attributes(rsc->parameters, rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES); g_hash_table_iter_init(&gIter, rsc->parameters); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } remove = FALSE; data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, agent, params, rsc_provides); stonith_key_value_freeall(params, 1, 1); rc = stonith_device_register(data, NULL, TRUE); CRM_ASSERT(rc == pcmk_ok); free_xml(data); } update_done: if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) { stonith_device_remove(rsc_name(rsc), TRUE); } } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GListPtr gIter = NULL; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); CRM_ASSERT(fenced_data_set != NULL); fenced_data_set->input = local_cib; fenced_data_set->now = crm_time_new(NULL); fenced_data_set->flags |= pe_flag_quick_location; fenced_data_set->localhost = stonith_our_uname; cluster_status(fenced_data_set); pcmk__schedule_actions(fenced_data_set, NULL, NULL); for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, fenced_data_set); } fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it pe_reset_working_set(fenced_data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if ((op == NULL) || (strcmp(op, "move") == 0) || strstr(xpath, "/"XML_CIB_TAG_STATUS)) { continue; } else if (safe_str_eq(op, "delete") && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS) || strstr(xpath, XML_TAG_META_SETS)) { needs_update = TRUE; reason = strdup("(meta) attribute deleted from resource"); break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, TRUE); } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } else { crm_trace("No updates for device list found in cib"); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_NEVER); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; xmlNode *stonith_watchdog_xml = NULL; const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting the full cib"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } crm_peer_caches_refresh(local_cib); stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_NEVER); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) { long timeout_ms = 0; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_NEVER); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if(value) { timeout_ms = crm_get_msec(value); } if (timeout_ms < 0) { timeout_ms = pcmk__auto_watchdog_timeout(); } if(timeout_ms != stonith_watchdog_timeout_ms) { crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); stonith_watchdog_timeout_ms = timeout_ms; } } else { stonith_watchdog_timeout_ms = 0; } if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { crm_trace("Ignoring cib updates while stonith is disabled"); stonith_enabled_saved = FALSE; return; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating stonith device and topology lists now that stonith is enabled"); stonith_enabled_saved = TRUE; fencing_topology_init(); cib_devices_update(); } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from the cib: init"); have_cib_devices = TRUE; local_cib = copy_xml(output); crm_peer_caches_refresh(local_cib); fencing_topology_init(); cib_devices_update(); } static void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } else { stonith_cleanup(); crm_exit(CRM_EX_OK); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); free(stonith_our_uname); stonith_our_uname = NULL; free_xml(local_cib); local_cib = NULL; } static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "stand-alone", no_argument, 0, 's', NULL, pcmk__option_default }, { "stand-alone-w-cpg", no_argument, 0, 'c', NULL, pcmk__option_default }, { "logfile", required_argument, 0, 'l', NULL, pcmk__option_default }, { "verbose", no_argument, 0, 'V', NULL, pcmk__option_default }, { "version", no_argument, 0, '$', NULL, pcmk__option_default }, { "help", no_argument, 0, '?', NULL, pcmk__option_default }, { 0, 0, 0, 0 } }; static void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } int main(int argc, char **argv) { int flag; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t cluster; const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" }; crm_ipc_t *old_instance = NULL; crm_log_preinit(NULL, argc, argv); pcmk__set_cli_options(NULL, "[options]", long_options, "daemon for executing fencing devices in a " "Pacemaker cluster"); while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': crm_add_logfile(optarg); break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" Instance attributes available for all \"stonith\"-class resources" " and used by Pacemaker's fence daemon, formerly known as stonithd\n"); printf(" Instance attributes available for all \"stonith\"-class resources\n"); printf(" \n"); #if 0 // priority is not implemented yet printf(" \n"); printf(" Devices that are not in a topology " "are tried in order of highest to lowest integer priority\n"); printf(" \n"); printf(" \n"); #endif printf(" \n", STONITH_ATTR_HOSTARG); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf (" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf (" How to determine which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list " "(query the device via the 'list' command), static-list " "(check the " STONITH_ATTR_HOSTLIST " attribute), status " "(query the device via the 'status' command), none (assume " "every device can fence every machine)\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_DELAY_MAX); printf (" Enable a random delay for stonith actions and specify the maximum of random delay.\n"); printf (" This prevents double fencing when using slow devices such as sbd.\n" "Use this to enable a random delay for stonith actions.\n" "The overall delay is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_DELAY_BASE); printf (" Enable a base delay for stonith actions and specify base delay value.\n"); printf (" This prevents double fencing when different delays are configured on the nodes.\n" "Use this to enable a static delay for stonith actions.\n" "The overall delay is derived from a random delay value adding this static delay so that the sum is kept below the maximum delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_ACTION_LIMIT); printf (" The maximum number of actions can be performed in parallel on this device\n"); printf (" Cluster property concurrent-fencing=true needs to be configured first.\n" "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return CRM_EX_OK; } if (optind != argc) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); fenced_data_set = pe_new_working_set(); CRM_ASSERT(fenced_data_set != NULL); set_bit(fenced_data_set->flags, pe_flag_no_counts); set_bit(fenced_data_set->flags, pe_flag_no_compat); if (stand_alone == FALSE) { if (is_corosync_cluster()) { #if SUPPORT_COROSYNC cluster.destroy = stonith_peer_cs_destroy; cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(&cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } stonith_our_uname = cluster.uname; stonith_our_uuid = cluster.uuid; if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } init_device_list(); init_topology_list(); if(stonith_watchdog_timeout_ms > 0) { int rc; xmlNode *xml; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname); xml = create_device_registration_xml("watchdog", st_namespace_internal, STONITH_WATCHDOG_AGENT, params, NULL); stonith_key_value_freeall(params, 1, 1); rc = stonith_device_register(xml, NULL, FALSE); free_xml(xml); if (rc != pcmk_ok) { crm_crit("Cannot register watchdog pseudo fence agent"); crm_exit(CRM_EX_FATAL); } } pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); /* Create the mainloop and run it... */ mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); stonith_cleanup(); pe_free_working_set(fenced_data_set); crm_exit(CRM_EX_OK); } diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index d484b68cf8..612efcbda5 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -1,337 +1,337 @@ /* * Copyright 2004-2018 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_CLUSTER_INTERNAL__H # define CRM_CLUSTER_INTERNAL__H # include typedef struct crm_ais_host_s AIS_Host; typedef struct crm_ais_msg_s AIS_Message; struct crm_ais_host_s { uint32_t id; uint32_t pid; gboolean local; enum crm_ais_msg_types type; uint32_t size; char uname[MAX_NAME]; } __attribute__ ((packed)); struct crm_ais_msg_s { cs_ipc_header_response_t header __attribute__ ((aligned(8))); uint32_t id; gboolean is_compressed; AIS_Host host; AIS_Host sender; uint32_t size; uint32_t compressed_size; /* 584 bytes */ char data[0]; } __attribute__ ((packed)); /* *INDENT-OFF* */ enum crm_proc_flag { crm_proc_none = 0x00000001, // Cluster layers crm_proc_cpg = 0x04000000, // Daemons crm_proc_execd = 0x00000010, crm_proc_based = 0x00000100, crm_proc_controld = 0x00000200, crm_proc_attrd = 0x00001000, crm_proc_schedulerd = 0x00010000, crm_proc_fenced = 0x00100000, }; /* *INDENT-ON* */ /*! * \internal * \brief Return the process bit corresponding to the current cluster stack * * \return Process flag if detectable, otherwise 0 */ static inline uint32_t -crm_get_cluster_proc() +crm_get_cluster_proc(void) { switch (get_cluster_type()) { case pcmk_cluster_corosync: return crm_proc_cpg; default: break; } return crm_proc_none; } static inline const char * peer2text(enum crm_proc_flag proc) { const char *text = "unknown"; switch (proc) { case crm_proc_none: text = "none"; break; case crm_proc_based: text = "pacemaker-based"; break; case crm_proc_controld: text = "pacemaker-controld"; break; case crm_proc_schedulerd: text = "pacemaker-schedulerd"; break; case crm_proc_execd: text = "pacemaker-execd"; break; case crm_proc_attrd: text = "pacemaker-attrd"; break; case crm_proc_fenced: text = "pacemaker-fenced"; break; case crm_proc_cpg: text = "corosync-cpg"; break; } return text; } static inline const char * ais_dest(const AIS_Host *host) { if (host->local) { return "local"; } else if (host->size > 0) { return host->uname; } else { return ""; } } # define ais_data_len(msg) (msg->is_compressed?msg->compressed_size:msg->size) /* typedef enum { CS_OK = 1, CS_ERR_LIBRARY = 2, CS_ERR_VERSION = 3, CS_ERR_INIT = 4, CS_ERR_TIMEOUT = 5, CS_ERR_TRY_AGAIN = 6, CS_ERR_INVALID_PARAM = 7, CS_ERR_NO_MEMORY = 8, CS_ERR_BAD_HANDLE = 9, CS_ERR_BUSY = 10, CS_ERR_ACCESS = 11, CS_ERR_NOT_EXIST = 12, CS_ERR_NAME_TOO_LONG = 13, CS_ERR_EXIST = 14, CS_ERR_NO_SPACE = 15, CS_ERR_INTERRUPT = 16, CS_ERR_NAME_NOT_FOUND = 17, CS_ERR_NO_RESOURCES = 18, CS_ERR_NOT_SUPPORTED = 19, CS_ERR_BAD_OPERATION = 20, CS_ERR_FAILED_OPERATION = 21, CS_ERR_MESSAGE_ERROR = 22, CS_ERR_QUEUE_FULL = 23, CS_ERR_QUEUE_NOT_AVAILABLE = 24, CS_ERR_BAD_FLAGS = 25, CS_ERR_TOO_BIG = 26, CS_ERR_NO_SECTIONS = 27, CS_ERR_CONTEXT_NOT_FOUND = 28, CS_ERR_TOO_MANY_GROUPS = 30, CS_ERR_SECURITY = 100 } cs_error_t; */ static inline const char * ais_error2text(int error) { const char *text = "unknown"; # if SUPPORT_COROSYNC switch (error) { case CS_OK: text = "OK"; break; case CS_ERR_LIBRARY: text = "Library error"; break; case CS_ERR_VERSION: text = "Version error"; break; case CS_ERR_INIT: text = "Initialization error"; break; case CS_ERR_TIMEOUT: text = "Timeout"; break; case CS_ERR_TRY_AGAIN: text = "Try again"; break; case CS_ERR_INVALID_PARAM: text = "Invalid parameter"; break; case CS_ERR_NO_MEMORY: text = "No memory"; break; case CS_ERR_BAD_HANDLE: text = "Bad handle"; break; case CS_ERR_BUSY: text = "Busy"; break; case CS_ERR_ACCESS: text = "Access error"; break; case CS_ERR_NOT_EXIST: text = "Doesn't exist"; break; case CS_ERR_NAME_TOO_LONG: text = "Name too long"; break; case CS_ERR_EXIST: text = "Exists"; break; case CS_ERR_NO_SPACE: text = "No space"; break; case CS_ERR_INTERRUPT: text = "Interrupt"; break; case CS_ERR_NAME_NOT_FOUND: text = "Name not found"; break; case CS_ERR_NO_RESOURCES: text = "No resources"; break; case CS_ERR_NOT_SUPPORTED: text = "Not supported"; break; case CS_ERR_BAD_OPERATION: text = "Bad operation"; break; case CS_ERR_FAILED_OPERATION: text = "Failed operation"; break; case CS_ERR_MESSAGE_ERROR: text = "Message error"; break; case CS_ERR_QUEUE_FULL: text = "Queue full"; break; case CS_ERR_QUEUE_NOT_AVAILABLE: text = "Queue not available"; break; case CS_ERR_BAD_FLAGS: text = "Bad flags"; break; case CS_ERR_TOO_BIG: text = "Too big"; break; case CS_ERR_NO_SECTIONS: text = "No sections"; break; } # endif return text; } static inline const char * msg_type2text(enum crm_ais_msg_types type) { const char *text = "unknown"; switch (type) { case crm_msg_none: text = "unknown"; break; case crm_msg_ais: text = "ais"; break; case crm_msg_cib: text = "cib"; break; case crm_msg_crmd: text = "crmd"; break; case crm_msg_pe: text = "pengine"; break; case crm_msg_te: text = "tengine"; break; case crm_msg_lrmd: text = "lrmd"; break; case crm_msg_attrd: text = "attrd"; break; case crm_msg_stonithd: text = "stonithd"; break; case crm_msg_stonith_ng: text = "stonith-ng"; break; } return text; } gboolean check_message_sanity(const AIS_Message * msg, const char *data); # if SUPPORT_COROSYNC gboolean send_cpg_iov(struct iovec * iov); char *get_corosync_uuid(crm_node_t *peer); char *corosync_node_name(uint64_t /*cmap_handle_t */ cmap_handle, uint32_t nodeid); char *corosync_cluster_name(void); int corosync_cmap_has_config(const char *prefix); gboolean corosync_initialize_nodelist(void *cluster, gboolean force_member, xmlNode * xml_parent); gboolean send_cluster_message_cs(xmlNode * msg, gboolean local, crm_node_t * node, enum crm_ais_msg_types dest); enum cluster_type_e find_corosync_variant(void); void terminate_cs_connection(crm_cluster_t * cluster); gboolean init_cs_connection(crm_cluster_t * cluster); gboolean init_cs_connection_once(crm_cluster_t * cluster); # endif crm_node_t *crm_update_peer_proc(const char *source, crm_node_t * peer, uint32_t flag, const char *status); crm_node_t *crm_update_peer_state(const char *source, crm_node_t * node, const char *state, uint64_t membership); void crm_update_peer_uname(crm_node_t *node, const char *uname); void crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected); void crm_reap_unseen_nodes(uint64_t ring_id); gboolean cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer)); gboolean node_name_is_valid(const char *key, const char *name); crm_node_t * crm_find_peer_full(unsigned int id, const char *uname, int flags); crm_node_t * crm_find_peer(unsigned int id, const char *uname); void crm_peer_caches_refresh(xmlNode *cib); crm_node_t *crm_find_known_peer_full(unsigned int id, const char *uname, int flags); #endif diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index 7762e8650c..4f4c66fcfc 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -1,270 +1,270 @@ /* * Copyright 2015-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_COMMON_INTERNAL__H #define CRM_COMMON_INTERNAL__H #include // getpid() #include // bool #include // strcmp() #include // uid_t, gid_t, pid_t #include // guint, GList, GHashTable #include // xmlNode #include // crm_strdup_printf() // Internal ACL-related utilities (from acl.c) char *pcmk__uid2username(uid_t uid); const char *pcmk__update_acl_user(xmlNode *request, const char *field, const char *peer_user); #if ENABLE_ACL # include static inline bool pcmk__is_privileged(const char *user) { return user && (!strcmp(user, CRM_DAEMON_USER) || !strcmp(user, "root")); } #endif #if SUPPORT_CIBSECRETS // Internal CIB utilities (from cib_secrets.c) */ int pcmk__substitute_secrets(const char *rsc_id, GHashTable *params); #endif /* internal digest-related utilities (from digest.c) */ bool pcmk__verify_digest(xmlNode *input, const char *expected); /* internal I/O utilities (from io.c) */ int pcmk__real_path(const char *path, char **resolved_path); char *pcmk__series_filename(const char *directory, const char *series, int sequence, bool bzip); int pcmk__read_series_sequence(const char *directory, const char *series, unsigned int *seq); void pcmk__write_series_sequence(const char *directory, const char *series, unsigned int sequence, int max); int pcmk__chown_series_sequence(const char *directory, const char *series, uid_t uid, gid_t gid); int pcmk__build_path(const char *path_c, mode_t mode); bool pcmk__daemon_can_write(const char *dir, const char *file); void pcmk__sync_directory(const char *name); int pcmk__file_contents(const char *filename, char **contents); int pcmk__write_sync(int fd, const char *contents); int pcmk__set_nonblocking(int fd); const char *pcmk__get_tmpdir(void); void pcmk__close_fds_in_child(bool); /*! * \internal * \brief Open /dev/null to consume next available file descriptor * * Open /dev/null, disregarding the result. This is intended when daemonizing to * be able to null stdin, stdout, and stderr. * * \param[in] flags O_RDONLY (stdin) or O_WRONLY (stdout and stderr) */ static inline void pcmk__open_devnull(int flags) { // Static analysis clutter // cppcheck-suppress leakReturnValNotUsed (void) open("/dev/null", flags); } /* internal logging utilities */ # define pcmk__config_err(fmt...) do { \ crm_config_error = TRUE; \ crm_err(fmt); \ } while (0) # define pcmk__config_warn(fmt...) do { \ crm_config_warning = TRUE; \ crm_warn(fmt); \ } while (0) /* internal procfs utilities (from procfs.c) */ pid_t pcmk__procfs_pid_of(const char *name); unsigned int pcmk__procfs_num_cores(void); /* internal XML schema functions (from xml.c) */ void crm_schema_init(void); void crm_schema_cleanup(void); /* internal functions related to process IDs (from pid.c) */ /*! * \internal * \brief Check whether process exists (by PID and optionally executable path) * * \param[in] pid PID of process to check * \param[in] daemon If not NULL, path component to match with procfs entry * * \return Standard Pacemaker return code * \note Particular return codes of interest include pcmk_rc_ok for alive, * ESRCH for process is not alive (verified by kill and/or executable path * match), EACCES for caller unable or not allowed to check. A result of * "alive" is less reliable when \p daemon is not provided or procfs is * not available, since there is no guarantee that the PID has not been * recycled for another process. * \note This function cannot be used to verify \e authenticity of the process. */ int pcmk__pid_active(pid_t pid, const char *daemon); int pcmk__read_pidfile(const char *filename, pid_t *pid); int pcmk__pidfile_matches(const char *filename, pid_t expected_pid, const char *expected_name, pid_t *pid); int pcmk__lock_pidfile(const char *filename, const char *name); /* interal functions related to resource operations (from operations.c) */ // printf-style format to create operation ID from resource, action, interval #define PCMK__OP_FMT "%s_%s_%u" char *pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms); char *pcmk__notify_key(const char *rsc_id, const char *notify_type, const char *op_type); char *pcmk__transition_key(int transition_id, int action_id, int target_rc, const char *node); void pcmk__filter_op_for_digest(xmlNode *param_set); // miscellaneous utilities (from utils.c) const char *pcmk_message_name(const char *name); extern int pcmk__score_red; extern int pcmk__score_green; extern int pcmk__score_yellow; /* internal generic string functions (from strings.c) */ int pcmk__guint_from_hash(GHashTable *table, const char *key, guint default_val, guint *result); bool pcmk__starts_with(const char *str, const char *prefix); bool pcmk__ends_with(const char *s, const char *match); bool pcmk__ends_with_ext(const char *s, const char *match); char *pcmk__add_word(char *list, const char *word); int pcmk__compress(const char *data, unsigned int length, unsigned int max, char **result, unsigned int *result_len); /* Correctly displaying singular or plural is complicated; consider "1 node has" * vs. "2 nodes have". A flexible solution is to pluralize entire strings, e.g. * * if (a == 1) { * crm_info("singular message"): * } else { * crm_info("plural message"); * } * * though even that's not sufficient for all languages besides English (if we * ever desire to do translations of output and log messages). But the following * convenience macros are "good enough" and more concise for many cases. */ /* Example: * crm_info("Found %d %s", nentries, * pcmk__plural_alt(nentries, "entry", "entries")); */ #define pcmk__plural_alt(i, s1, s2) (((i) == 1)? (s1) : (s2)) // Example: crm_info("Found %d node%s", nnodes, pcmk__plural_s(nnodes)); #define pcmk__plural_s(i) pcmk__plural_alt(i, "", "s") static inline int pcmk__str_empty(const char *s) { return (s == NULL) || (s[0] == '\0'); } static inline char * -pcmk__getpid_s() +pcmk__getpid_s(void) { return crm_strdup_printf("%lu", (unsigned long) getpid()); } // More efficient than g_list_length(list) == 1 static inline bool pcmk__list_of_1(GList *list) { return list && (list->next == NULL); } // More efficient than g_list_length(list) > 1 static inline bool pcmk__list_of_multiple(GList *list) { return list && (list->next != NULL); } /* convenience functions for failure-related node attributes */ #define PCMK__FAIL_COUNT_PREFIX "fail-count" #define PCMK__LAST_FAILURE_PREFIX "last-failure" /*! * \internal * \brief Generate a failure-related node attribute name for a resource * * \param[in] prefix Start of attribute name * \param[in] rsc_id Resource name * \param[in] op Operation name * \param[in] interval_ms Operation interval * * \return Newly allocated string with attribute name * * \note Failure attributes are named like PREFIX-RSC#OP_INTERVAL (for example, * "fail-count-myrsc#monitor_30000"). The '#' is used because it is not * a valid character in a resource ID, to reliably distinguish where the * operation name begins. The '_' is used simply to be more comparable to * action labels like "myrsc_monitor_30000". */ static inline char * pcmk__fail_attr_name(const char *prefix, const char *rsc_id, const char *op, guint interval_ms) { CRM_CHECK(prefix && rsc_id && op, return NULL); return crm_strdup_printf("%s-%s#%s_%u", prefix, rsc_id, op, interval_ms); } static inline char * pcmk__failcount_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__FAIL_COUNT_PREFIX, rsc_id, op, interval_ms); } static inline char * pcmk__lastfailure_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__LAST_FAILURE_PREFIX, rsc_id, op, interval_ms); } #endif /* CRM_COMMON_INTERNAL__H */ diff --git a/include/crm/common/util.h b/include/crm/common/util.h index d6a91508d1..22ac8eb65f 100644 --- a/include/crm/common/util.h +++ b/include/crm/common/util.h @@ -1,230 +1,230 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRM_COMMON_UTIL__H # define CRM_COMMON_UTIL__H #ifdef __cplusplus extern "C" { #endif /** * \file * \brief Utility functions * \ingroup core */ # include // gid_t, mode_t, size_t, time_t, uid_t # include # include # include // uint32_t # include # include # include # include # include # include # include # define ONLINESTATUS "online" // Status of an online client # define OFFLINESTATUS "offline" // Status of an offline client // public name/value pair functions (from nvpair.c) int pcmk_scan_nvpair(const char *input, char **name, char **value); char *pcmk_format_nvpair(const char *name, const char *value, const char *units); char *pcmk_format_named_time(const char *name, time_t epoch_time); /* public Pacemaker Remote functions (from remote.c) */ int crm_default_remote_port(void); /* public string functions (from strings.c) */ char *crm_itoa_stack(int an_int, char *buf, size_t len); gboolean crm_is_true(const char *s); int crm_str_to_boolean(const char *s, int *ret); long long crm_parse_ll(const char *text, const char *default_text); int crm_parse_int(const char *text, const char *default_text); long long crm_get_msec(const char *input); char * crm_strip_trailing_newline(char *str); gboolean crm_str_eq(const char *a, const char *b, gboolean use_case); gboolean safe_str_neq(const char *a, const char *b); gboolean crm_strcase_equal(gconstpointer a, gconstpointer b); guint crm_strcase_hash(gconstpointer v); guint g_str_hash_traditional(gconstpointer v); char *crm_strdup_printf(char const *format, ...) __attribute__ ((__format__ (__printf__, 1, 2))); int pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end); gboolean pcmk__str_in_list(GList *lst, const gchar *s); # define safe_str_eq(a, b) crm_str_eq(a, b, FALSE) # define crm_str_hash g_str_hash_traditional static inline char * crm_itoa(int an_int) { return crm_strdup_printf("%d", an_int); } static inline char * crm_ftoa(double a_float) { return crm_strdup_printf("%f", a_float); } static inline char * crm_ttoa(time_t epoch_time) { return crm_strdup_printf("%lld", (long long) epoch_time); } /*! * \brief Create hash table with dynamically allocated string keys/values * * \return Newly allocated hash table * \note It is the caller's responsibility to free the result, using * g_hash_table_destroy(). */ static inline GHashTable * -crm_str_table_new() +crm_str_table_new(void) { return g_hash_table_new_full(crm_str_hash, g_str_equal, free, free); } /*! * \brief Create hash table with case-insensitive dynamically allocated string keys/values * * \return Newly allocated hash table * \note It is the caller's responsibility to free the result, using * g_hash_table_destroy(). */ static inline GHashTable * -crm_strcase_table_new() +crm_strcase_table_new(void) { return g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, free); } GHashTable *crm_str_table_dup(GHashTable *old_table); # define crm_atoi(text, default_text) crm_parse_int(text, default_text) /* public I/O functions (from io.c) */ void crm_build_path(const char *path_c, mode_t mode); guint crm_parse_interval_spec(const char *input); int char2score(const char *score); char *score2char(int score); char *score2char_stack(int score, char *buf, size_t len); /* public operation functions (from operations.c) */ gboolean parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms); gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc); gboolean decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id, int *op_status, int *op_rc, int *target_rc); int rsc_op_expected_rc(lrmd_event_data_t *event); gboolean did_rsc_op_fail(lrmd_event_data_t *event, int target_rc); bool crm_op_needs_metadata(const char *rsc_class, const char *op); xmlNode *crm_create_op_xml(xmlNode *parent, const char *prefix, const char *task, const char *interval_spec, const char *timeout); #define CRM_DEFAULT_OP_TIMEOUT_S "20s" // Public resource agent functions (from agents.c) // Capabilities supported by a resource agent standard enum pcmk_ra_caps { pcmk_ra_cap_none = 0, pcmk_ra_cap_provider = (1 << 0), // Requires provider pcmk_ra_cap_status = (1 << 1), // Supports status instead of monitor pcmk_ra_cap_params = (1 << 2), // Supports parameters pcmk_ra_cap_unique = (1 << 3), // Supports unique clones pcmk_ra_cap_promotable = (1 << 4), // Supports promotable clones pcmk_ra_cap_stdin = (1 << 5), // Reads from standard input }; uint32_t pcmk_get_ra_caps(const char *standard); char *crm_generate_ra_key(const char *standard, const char *provider, const char *type); int crm_parse_agent_spec(const char *spec, char **standard, char **provider, char **type); int compare_version(const char *version1, const char *version2); /* coverity[+kill] */ void crm_abort(const char *file, const char *function, int line, const char *condition, gboolean do_core, gboolean do_fork); static inline gboolean is_not_set(long long word, long long bit) { return ((word & bit) == 0); } static inline gboolean is_set(long long word, long long bit) { return ((word & bit) == bit); } static inline gboolean is_set_any(long long word, long long bit) { return ((word & bit) != 0); } static inline guint crm_hash_table_size(GHashTable * hashtable) { if (hashtable == NULL) { return 0; } return g_hash_table_size(hashtable); } char *crm_meta_name(const char *field); const char *crm_meta_value(GHashTable * hash, const char *field); char *crm_md5sum(const char *buffer); char *crm_generate_uuid(void); bool crm_is_daemon_name(const char *name); int crm_user_lookup(const char *name, uid_t * uid, gid_t * gid); int pcmk_daemon_user(uid_t *uid, gid_t *gid); #ifdef HAVE_GNUTLS_GNUTLS_H void crm_gnutls_global_init(void); #endif char *pcmk_hostname(void); bool pcmk_str_is_infinity(const char *s); bool pcmk_str_is_minus_infinity(const char *s); #ifndef PCMK__NO_COMPAT /* Everything here is deprecated and kept only for public API backward * compatibility. It will be moved to compatibility.h when 2.1.0 is released. */ //! \deprecated Use crm_parse_interval_spec() instead #define crm_get_interval crm_parse_interval_spec //! \deprecated Use pcmk_get_ra_caps() instead bool crm_provider_required(const char *standard); #endif // PCMK__NO_COMPAT #ifdef __cplusplus } #endif #endif diff --git a/lib/cib/cib_client.c b/lib/cib/cib_client.c index 227adcb048..4b6107809b 100644 --- a/lib/cib/cib_client.c +++ b/lib/cib/cib_client.c @@ -1,679 +1,679 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *cib_op_callback_table = NULL; int cib_client_set_op_callback(cib_t * cib, void (*callback) (const xmlNode * msg, int call_id, int rc, xmlNode * output)); int cib_client_add_notify_callback(cib_t * cib, const char *event, void (*callback) (const char *event, xmlNode * msg)); int cib_client_del_notify_callback(cib_t * cib, const char *event, void (*callback) (const char *event, xmlNode * msg)); gint ciblib_GCompareFunc(gconstpointer a, gconstpointer b); #define op_common(cib) do { \ if(cib == NULL) { \ return -EINVAL; \ } else if(cib->delegate_fn == NULL) { \ return -EPROTONOSUPPORT; \ } \ } while(0) static int cib_client_noop(cib_t * cib, int call_options) { op_common(cib); return cib_internal_op(cib, CRM_OP_NOOP, NULL, NULL, NULL, NULL, call_options, NULL); } static int cib_client_ping(cib_t * cib, xmlNode ** output_data, int call_options) { op_common(cib); return cib_internal_op(cib, CRM_OP_PING, NULL, NULL, NULL, output_data, call_options, NULL); } static int cib_client_query(cib_t * cib, const char *section, xmlNode ** output_data, int call_options) { return cib->cmds->query_from(cib, NULL, section, output_data, call_options); } static int cib_client_query_from(cib_t * cib, const char *host, const char *section, xmlNode ** output_data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_QUERY, host, section, NULL, output_data, call_options, NULL); } static int cib_client_is_master(cib_t * cib) { op_common(cib); return cib_internal_op(cib, CIB_OP_ISMASTER, NULL, NULL, NULL, NULL, cib_scope_local | cib_sync_call, NULL); } static int cib_client_set_slave(cib_t * cib, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_SLAVE, NULL, NULL, NULL, NULL, call_options, NULL); } static int cib_client_set_slave_all(cib_t * cib, int call_options) { return -EPROTONOSUPPORT; } static int cib_client_set_master(cib_t * cib, int call_options) { op_common(cib); crm_trace("Adding cib_scope_local to options"); return cib_internal_op(cib, CIB_OP_MASTER, NULL, NULL, NULL, NULL, call_options | cib_scope_local, NULL); } static int cib_client_bump_epoch(cib_t * cib, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_BUMP, NULL, NULL, NULL, NULL, call_options, NULL); } static int cib_client_upgrade(cib_t * cib, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_UPGRADE, NULL, NULL, NULL, NULL, call_options, NULL); } static int cib_client_sync(cib_t * cib, const char *section, int call_options) { return cib->cmds->sync_from(cib, NULL, section, call_options); } static int cib_client_sync_from(cib_t * cib, const char *host, const char *section, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_SYNC, host, section, NULL, NULL, call_options, NULL); } static int cib_client_create(cib_t * cib, const char *section, xmlNode * data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_CREATE, NULL, section, data, NULL, call_options, NULL); } static int cib_client_modify(cib_t * cib, const char *section, xmlNode * data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_MODIFY, NULL, section, data, NULL, call_options, NULL); } static int cib_client_update(cib_t * cib, const char *section, xmlNode * data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_MODIFY, NULL, section, data, NULL, call_options, NULL); } static int cib_client_replace(cib_t * cib, const char *section, xmlNode * data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_REPLACE, NULL, section, data, NULL, call_options, NULL); } static int cib_client_delete(cib_t * cib, const char *section, xmlNode * data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_DELETE, NULL, section, data, NULL, call_options, NULL); } static int cib_client_delete_absolute(cib_t * cib, const char *section, xmlNode * data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_DELETE_ALT, NULL, section, data, NULL, call_options, NULL); } static int cib_client_erase(cib_t * cib, xmlNode ** output_data, int call_options) { op_common(cib); return cib_internal_op(cib, CIB_OP_ERASE, NULL, NULL, NULL, output_data, call_options, NULL); } static void cib_destroy_op_callback(gpointer data) { cib_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); if (blob->user_data && blob->free_func) { blob->free_func(blob->user_data); } free(blob); } static void -destroy_op_callback_table() +destroy_op_callback_table(void) { if (cib_op_callback_table != NULL) { g_hash_table_destroy(cib_op_callback_table); cib_op_callback_table = NULL; } } char * get_shadow_file(const char *suffix) { char *cib_home = NULL; char *fullname = NULL; char *name = crm_strdup_printf("shadow.%s", suffix); const char *dir = getenv("CIB_shadow_dir"); if (dir == NULL) { uid_t uid = geteuid(); struct passwd *pwent = getpwuid(uid); const char *user = NULL; if (pwent) { user = pwent->pw_name; } else { user = getenv("USER"); crm_perror(LOG_ERR, "Assuming %s because cannot get user details for user ID %d", (user? user : "unprivileged user"), uid); } if (safe_str_eq(user, "root") || safe_str_eq(user, CRM_DAEMON_USER)) { dir = CRM_CONFIG_DIR; } else { const char *home = NULL; if ((home = getenv("HOME")) == NULL) { if (pwent) { home = pwent->pw_dir; } } dir = pcmk__get_tmpdir(); if (home && home[0] == '/') { int rc = 0; cib_home = crm_strdup_printf("%s/.cib", home); rc = mkdir(cib_home, 0700); if (rc < 0 && errno != EEXIST) { crm_perror(LOG_ERR, "Couldn't create user-specific shadow directory: %s", cib_home); errno = 0; } else { dir = cib_home; } } } } fullname = crm_strdup_printf("%s/%s", dir, name); free(cib_home); free(name); return fullname; } cib_t * cib_shadow_new(const char *shadow) { cib_t *new_cib = NULL; char *shadow_file = NULL; CRM_CHECK(shadow != NULL, return NULL); shadow_file = get_shadow_file(shadow); new_cib = cib_file_new(shadow_file); free(shadow_file); return new_cib; } cib_t * cib_new_no_shadow(void) { unsetenv("CIB_shadow"); return cib_new(); } cib_t * cib_new(void) { const char *value = getenv("CIB_shadow"); if (value && value[0] != 0) { return cib_shadow_new(value); } value = getenv("CIB_file"); if (value) { return cib_file_new(value); } value = getenv("CIB_port"); if (value) { gboolean encrypted = TRUE; int port = crm_parse_int(value, NULL); const char *server = getenv("CIB_server"); const char *user = getenv("CIB_user"); const char *pass = getenv("CIB_passwd"); value = getenv("CIB_encrypted"); if (value && crm_is_true(value) == FALSE) { crm_info("Disabling TLS"); encrypted = FALSE; } if (user == NULL) { user = CRM_DAEMON_USER; crm_info("Defaulting to user: %s", user); } if (server == NULL) { server = "localhost"; crm_info("Defaulting to localhost"); } return cib_remote_new(server, user, pass, port, encrypted); } return cib_native_new(); } /*! * \internal * \brief Create a generic CIB connection instance * * \return Newly allocated and initialized cib_t instance * * \note This is called by each variant's cib_*_new() function before setting * variant-specific values. */ cib_t * cib_new_variant(void) { cib_t *new_cib = NULL; new_cib = calloc(1, sizeof(cib_t)); remove_cib_op_callback(0, TRUE); /* remove all */ new_cib->call_id = 1; new_cib->variant = cib_undefined; new_cib->type = cib_no_connection; new_cib->state = cib_disconnected; new_cib->op_callback = NULL; new_cib->variant_opaque = NULL; new_cib->notify_list = NULL; /* the rest will get filled in by the variant constructor */ new_cib->cmds = calloc(1, sizeof(cib_api_operations_t)); new_cib->cmds->set_op_callback = cib_client_set_op_callback; new_cib->cmds->add_notify_callback = cib_client_add_notify_callback; new_cib->cmds->del_notify_callback = cib_client_del_notify_callback; new_cib->cmds->register_callback = cib_client_register_callback; new_cib->cmds->register_callback_full = cib_client_register_callback_full; new_cib->cmds->noop = cib_client_noop; new_cib->cmds->ping = cib_client_ping; new_cib->cmds->query = cib_client_query; new_cib->cmds->sync = cib_client_sync; new_cib->cmds->query_from = cib_client_query_from; new_cib->cmds->sync_from = cib_client_sync_from; new_cib->cmds->is_master = cib_client_is_master; new_cib->cmds->set_master = cib_client_set_master; new_cib->cmds->set_slave = cib_client_set_slave; new_cib->cmds->set_slave_all = cib_client_set_slave_all; new_cib->cmds->upgrade = cib_client_upgrade; new_cib->cmds->bump_epoch = cib_client_bump_epoch; new_cib->cmds->create = cib_client_create; new_cib->cmds->modify = cib_client_modify; new_cib->cmds->update = cib_client_update; new_cib->cmds->replace = cib_client_replace; new_cib->cmds->remove = cib_client_delete; new_cib->cmds->erase = cib_client_erase; new_cib->cmds->delete_absolute = cib_client_delete_absolute; return new_cib; } /*! * \brief Free all callbacks for a CIB connection * * \param[in] cib CIB connection to clean up */ void cib_free_callbacks(cib_t *cib) { if (cib) { GList *list = cib->notify_list; while (list != NULL) { cib_notify_client_t *client = g_list_nth_data(list, 0); list = g_list_remove(list, client); free(client); } cib->notify_list = NULL; } destroy_op_callback_table(); } /*! * \brief Free all memory used by CIB connection * * \param[in] cib CIB connection to delete */ void cib_delete(cib_t *cib) { cib_free_callbacks(cib); if (cib) { cib->cmds->free(cib); } } int cib_client_set_op_callback(cib_t * cib, void (*callback) (const xmlNode * msg, int call_id, int rc, xmlNode * output)) { if (callback == NULL) { crm_info("Un-Setting operation callback"); } else { crm_trace("Setting operation callback"); } cib->op_callback = callback; return pcmk_ok; } int cib_client_add_notify_callback(cib_t * cib, const char *event, void (*callback) (const char *event, xmlNode * msg)) { GList *list_item = NULL; cib_notify_client_t *new_client = NULL; if (cib->variant != cib_native && cib->variant != cib_remote) { return -EPROTONOSUPPORT; } crm_trace("Adding callback for %s events (%d)", event, g_list_length(cib->notify_list)); new_client = calloc(1, sizeof(cib_notify_client_t)); new_client->event = event; new_client->callback = callback; list_item = g_list_find_custom(cib->notify_list, new_client, ciblib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -EINVAL; } else { cib->notify_list = g_list_append(cib->notify_list, new_client); cib->cmds->register_notification(cib, event, 1); crm_trace("Callback added (%d)", g_list_length(cib->notify_list)); } return pcmk_ok; } static int get_notify_list_event_count(cib_t * cib, const char *event) { GList *l = NULL; int count = 0; for (l = g_list_first(cib->notify_list); l; l = g_list_next(l)) { cib_notify_client_t *client = (cib_notify_client_t *)l->data; if (strcmp(client->event, event) == 0) { count++; } } crm_trace("event(%s) count : %d", event, count); return count; } int cib_client_del_notify_callback(cib_t * cib, const char *event, void (*callback) (const char *event, xmlNode * msg)) { GList *list_item = NULL; cib_notify_client_t *new_client = NULL; if (cib->variant != cib_native && cib->variant != cib_remote) { return -EPROTONOSUPPORT; } if (get_notify_list_event_count(cib, event) == 0) { crm_debug("The callback of the event does not exist(%s)", event); return pcmk_ok; } crm_debug("Removing callback for %s events", event); new_client = calloc(1, sizeof(cib_notify_client_t)); new_client->event = event; new_client->callback = callback; list_item = g_list_find_custom(cib->notify_list, new_client, ciblib_GCompareFunc); if (list_item != NULL) { cib_notify_client_t *list_client = list_item->data; cib->notify_list = g_list_remove(cib->notify_list, list_client); free(list_client); crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } if (get_notify_list_event_count(cib, event) == 0) { /* When there is not the registration of the event, the processing turns off a notice. */ cib->cmds->register_notification(cib, event, 0); } free(new_client); return pcmk_ok; } gint ciblib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const cib_notify_client_t *a_client = a; const cib_notify_client_t *b_client = b; CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->callback == b_client->callback) { return 0; } else if (((long)a_client->callback) < ((long)b_client->callback)) { crm_trace("callbacks for %s are not equal: %p < %p", a_client->event, a_client->callback, b_client->callback); return -1; } crm_trace("callbacks for %s are not equal: %p > %p", a_client->event, a_client->callback, b_client->callback); return 1; } return rc; } static gboolean cib_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_debug("Async call %d timed out after %ds", timer->call_id, timer->timeout); cib_native_callback(timer->cib, NULL, timer->call_id, -ETIME); /* Always return TRUE, never remove the handler * We do that in remove_cib_op_callback() */ return TRUE; } gboolean cib_client_register_callback(cib_t * cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback) (xmlNode *, int, int, xmlNode *, void *)) { return cib_client_register_callback_full(cib, call_id, timeout, only_success, user_data, callback_name, callback, NULL); } gboolean cib_client_register_callback_full(cib_t *cib, int call_id, int timeout, gboolean only_success, void *user_data, const char *callback_name, void (*callback)(xmlNode *, int, int, xmlNode *, void *), void (*free_func)(void *)) { cib_callback_client_t *blob = NULL; if (call_id < 0) { if (only_success == FALSE) { callback(NULL, call_id, call_id, NULL, user_data); } else { crm_warn("CIB call failed: %s", pcmk_strerror(call_id)); } if (user_data && free_func) { free_func(user_data); } return FALSE; } blob = calloc(1, sizeof(cib_callback_client_t)); blob->id = callback_name; blob->only_success = only_success; blob->user_data = user_data; blob->callback = callback; blob->free_func = free_func; if (timeout > 0) { struct timer_rec_s *async_timer = NULL; async_timer = calloc(1, sizeof(struct timer_rec_s)); blob->timer = async_timer; async_timer->cib = cib; async_timer->call_id = call_id; async_timer->timeout = timeout * 1000; async_timer->ref = g_timeout_add(async_timer->timeout, cib_async_timeout_handler, async_timer); } crm_trace("Adding callback %s for call %d", callback_name, call_id); g_hash_table_insert(cib_op_callback_table, GINT_TO_POINTER(call_id), blob); return TRUE; } void remove_cib_op_callback(int call_id, gboolean all_callbacks) { if (all_callbacks) { destroy_op_callback_table(); cib_op_callback_table = g_hash_table_new_full(g_direct_hash, g_direct_equal, NULL, cib_destroy_op_callback); } else { g_hash_table_remove(cib_op_callback_table, GINT_TO_POINTER(call_id)); } } int num_cib_op_callbacks(void) { if (cib_op_callback_table == NULL) { return 0; } return g_hash_table_size(cib_op_callback_table); } static void cib_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); cib_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void cib_dump_pending_callbacks(void) { if (cib_op_callback_table == NULL) { return; } return g_hash_table_foreach(cib_op_callback_table, cib_dump_pending_op, NULL); } diff --git a/lib/common/ipc.c b/lib/common/ipc.c index 6f4c8e0123..34bd594e95 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -1,1848 +1,1848 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED) # ifdef US_AUTH_PEERCRED_UCRED # ifndef _GNU_SOURCE # define _GNU_SOURCE # endif # endif # include #elif defined(US_AUTH_GETPEERUCRED) # include #endif #include #include #include #include #include #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include /* PCMK__SPECIAL_PID* */ #define PCMK_IPC_VERSION 1 /* Evict clients whose event queue grows this large (by default) */ #define PCMK_IPC_DEFAULT_QUEUE_MAX 500 struct crm_ipc_response_header { struct qb_ipc_response_header qb; uint32_t size_uncompressed; uint32_t size_compressed; uint32_t flags; uint8_t version; /* Protect against version changes for anyone that might bother to statically link us */ }; static int hdr_offset = 0; static unsigned int ipc_buffer_max = 0; static unsigned int pick_ipc_buffer(unsigned int max); static inline void crm_ipc_init(void) { if (hdr_offset == 0) { hdr_offset = sizeof(struct crm_ipc_response_header); } if (ipc_buffer_max == 0) { ipc_buffer_max = pick_ipc_buffer(0); } } unsigned int crm_ipc_default_buffer_size(void) { return pick_ipc_buffer(0); } static char * generateReference(const char *custom1, const char *custom2) { static uint ref_counter = 0; return crm_strdup_printf("%s-%s-%lld-%u", (custom1? custom1 : "_empty_"), (custom2? custom2 : "_empty_"), (long long) time(NULL), ref_counter++); } xmlNode * create_request_adv(const char *task, xmlNode * msg_data, const char *host_to, const char *sys_to, const char *sys_from, const char *uuid_from, const char *origin) { char *true_from = NULL; xmlNode *request = NULL; char *reference = generateReference(task, sys_from); if (uuid_from != NULL) { true_from = generate_hash_key(sys_from, uuid_from); } else if (sys_from != NULL) { true_from = strdup(sys_from); } else { crm_err("No sys from specified"); } // host_from will get set for us if necessary by the controller when routed request = create_xml_node(NULL, __FUNCTION__); crm_xml_add(request, F_CRM_ORIGIN, origin); crm_xml_add(request, F_TYPE, T_CRM); crm_xml_add(request, F_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(request, F_CRM_MSG_TYPE, XML_ATTR_REQUEST); crm_xml_add(request, F_CRM_REFERENCE, reference); crm_xml_add(request, F_CRM_TASK, task); crm_xml_add(request, F_CRM_SYS_TO, sys_to); crm_xml_add(request, F_CRM_SYS_FROM, true_from); /* HOSTTO will be ignored if it is to the DC anyway. */ if (host_to != NULL && strlen(host_to) > 0) { crm_xml_add(request, F_CRM_HOST_TO, host_to); } if (msg_data != NULL) { add_message_xml(request, F_CRM_DATA, msg_data); } free(reference); free(true_from); return request; } /* * This method adds a copy of xml_response_data */ xmlNode * create_reply_adv(xmlNode * original_request, xmlNode * xml_response_data, const char *origin) { xmlNode *reply = NULL; const char *host_from = crm_element_value(original_request, F_CRM_HOST_FROM); const char *sys_from = crm_element_value(original_request, F_CRM_SYS_FROM); const char *sys_to = crm_element_value(original_request, F_CRM_SYS_TO); const char *type = crm_element_value(original_request, F_CRM_MSG_TYPE); const char *operation = crm_element_value(original_request, F_CRM_TASK); const char *crm_msg_reference = crm_element_value(original_request, F_CRM_REFERENCE); if (type == NULL) { crm_err("Cannot create new_message, no message type in original message"); CRM_ASSERT(type != NULL); return NULL; #if 0 } else if (strcasecmp(XML_ATTR_REQUEST, type) != 0) { crm_err("Cannot create new_message, original message was not a request"); return NULL; #endif } reply = create_xml_node(NULL, __FUNCTION__); if (reply == NULL) { crm_err("Cannot create new_message, malloc failed"); return NULL; } crm_xml_add(reply, F_CRM_ORIGIN, origin); crm_xml_add(reply, F_TYPE, T_CRM); crm_xml_add(reply, F_CRM_VERSION, CRM_FEATURE_SET); crm_xml_add(reply, F_CRM_MSG_TYPE, XML_ATTR_RESPONSE); crm_xml_add(reply, F_CRM_REFERENCE, crm_msg_reference); crm_xml_add(reply, F_CRM_TASK, operation); /* since this is a reply, we reverse the from and to */ crm_xml_add(reply, F_CRM_SYS_TO, sys_from); crm_xml_add(reply, F_CRM_SYS_FROM, sys_to); /* HOSTTO will be ignored if it is to the DC anyway. */ if (host_from != NULL && strlen(host_from) > 0) { crm_xml_add(reply, F_CRM_HOST_TO, host_from); } if (xml_response_data != NULL) { add_message_xml(reply, F_CRM_DATA, xml_response_data); } return reply; } /* Libqb based IPC */ /* Server... */ static GHashTable *client_connections = NULL; /*! * \internal * \brief Count IPC clients * * \return Number of active IPC client connections */ guint pcmk__ipc_client_count() { return client_connections? g_hash_table_size(client_connections) : 0; } /*! * \internal * \brief Execute a function for each active IPC client connection * * \param[in] func Function to call * \param[in] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach(). */ void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach(client_connections, func, user_data); } } /*! * \internal * \brief Remote IPC clients based on iterative function result * * \param[in] func Function to call for each active IPC client * \param[in] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach_remove(). */ void pcmk__foreach_ipc_client_remove(GHRFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach_remove(client_connections, func, user_data); } } pcmk__client_t * pcmk__find_client(qb_ipcs_connection_t *c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } pcmk__client_t * pcmk__find_client_by_id(const char *id) { gpointer key; pcmk__client_t *client; GHashTableIter iter; if (client_connections && id) { g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id=%s", id); return NULL; } const char * pcmk__client_name(pcmk__client_t *c) { if (c == NULL) { return "null"; } else if (c->name == NULL && c->id == NULL) { return "unknown"; } else if (c->name == NULL) { return c->id; } else { return c->name; } } const char * pcmk__client_type_str(enum pcmk__client_type client_type) { switch (client_type) { case PCMK__CLIENT_IPC: return "IPC"; case PCMK__CLIENT_TCP: return "TCP"; #ifdef HAVE_GNUTLS_GNUTLS_H case PCMK__CLIENT_TLS: return "TLS"; #endif default: return "unknown"; } } void pcmk__client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); if (active) { crm_err("Exiting with %d active IPC client%s", active, pcmk__plural_s(active)); } g_hash_table_destroy(client_connections); client_connections = NULL; } } void pcmk__drop_all_clients(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, pcmk__client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } /*! * \internal * \brief Allocate a new pcmk__client_t object based on an IPC connection * * \param[in] c IPC connection (or NULL to allocate generic client) * \param[in] key Connection table key (or NULL to use sane default) * \param[in] uid_client UID corresponding to c (ignored if c is NULL) * * \return Pointer to new pcmk__client_t (or NULL on error) */ static pcmk__client_t * client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) { pcmk__client_t *client = calloc(1, sizeof(pcmk__client_t)); if (client == NULL) { crm_perror(LOG_ERR, "Allocating client"); return NULL; } if (c) { #if ENABLE_ACL client->user = pcmk__uid2username(uid_client); if (client->user == NULL) { client->user = strdup("#unprivileged"); CRM_CHECK(client->user != NULL, free(client); return NULL); crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", uid_client); } #endif client->ipcs = c; client->kind = PCMK__CLIENT_IPC; client->pid = pcmk__client_pid(c); if (key == NULL) { key = c; } } client->id = crm_generate_uuid(); if (client->id == NULL) { crm_err("Could not generate UUID for client"); free(client->user); free(client); return NULL; } if (key == NULL) { key = client->id; } if (client_connections == NULL) { crm_trace("Creating IPC client table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(client_connections, key, client); return client; } /*! * \brief Allocate a new pcmk__client_t object and generate its ID * * \param[in] key What to use as connections hash table key (NULL to use ID) * * \return Pointer to new pcmk__client_t (asserts on failure) */ pcmk__client_t * pcmk__new_unauth_client(void *key) { pcmk__client_t *client = client_from_connection(NULL, key, 0); CRM_ASSERT(client != NULL); return client; } pcmk__client_t * pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) { gid_t uid_cluster = 0; gid_t gid_cluster = 0; pcmk__client_t *client = NULL; CRM_CHECK(c != NULL, return NULL); if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { static bool need_log = TRUE; if (need_log) { crm_warn("Could not find user and group IDs for user %s", CRM_DAEMON_USER); need_log = FALSE; } } if (uid_client != 0) { crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } /* TODO: Do our own auth checking, return NULL if unauthorized */ client = client_from_connection(c, NULL, uid_client); if (client == NULL) { return NULL; } if ((uid_client == 0) || (uid_client == uid_cluster)) { /* Remember when a connection came from root or hacluster */ set_bit(client->flags, pcmk__client_privileged); } crm_debug("New IPC client %s for PID %u with uid %d and gid %d", client->id, client->pid, uid_client, gid_client); return client; } static struct iovec * -pcmk__new_ipc_event() +pcmk__new_ipc_event(void) { struct iovec *iov = calloc(2, sizeof(struct iovec)); CRM_ASSERT(iov != NULL); return iov; } /*! * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() * * \param[in] event I/O vector to free */ void pcmk_free_ipc_event(struct iovec *event) { if (event != NULL) { free(event[0].iov_base); free(event[1].iov_base); free(event); } } static void free_event(gpointer data) { pcmk_free_ipc_event((struct iovec *) data); } static void add_event(pcmk__client_t *c, struct iovec *iov) { if (c->event_queue == NULL) { c->event_queue = g_queue_new(); } g_queue_push_tail(c->event_queue, iov); } void pcmk__free_client(pcmk__client_t *c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } if (c->event_queue) { crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); g_queue_free_full(c->event_queue, free_event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } free(c->remote->buffer); free(c->remote); } free(c); } /*! * \internal * \brief Raise IPC eviction threshold for a client, if allowed * * \param[in,out] client Client to modify * \param[in] qmax New threshold (as non-NULL string) * * \return TRUE if change was allowed, FALSE otherwise */ bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) { if (is_set(client->flags, pcmk__client_privileged)) { long long qmax_int; errno = 0; qmax_int = crm_parse_ll(qmax, NULL); if ((errno == 0) && (qmax_int > 0)) { client->queue_max = (unsigned int) qmax_int; return TRUE; } } return FALSE; } int pcmk__client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } /*! * \internal * \brief Retrieve message XML from data read from client IPC * * \param[in] c IPC client connection * \param[in] data Data read from client connection * \param[out] id Where to store message ID from libqb header * \param[out] flags Where to store flags from libqb header * * \return Message XML on success, NULL otherwise */ xmlNode * pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; char *uncompressed = NULL; char *text = ((char *)data) + sizeof(struct crm_ipc_response_header); struct crm_ipc_response_header *header = data; if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (is_set(header->flags, crm_ipc_proxied)) { /* Mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel, to avoid * blocking the controller serving as proxy. */ c->flags |= pcmk__client_proxied; } if(header->version > PCMK_IPC_VERSION) { crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d", header->version, PCMK_IPC_VERSION); return NULL; } if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; uncompressed = calloc(1, size_u); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); text = uncompressed; if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); return NULL; } } CRM_ASSERT(text[header->size_uncompressed - 1] == 0); xml = string2xml(text); crm_log_xml_trace(xml, "[IPC received]"); free(uncompressed); return xml; } static int crm_ipcs_flush_events(pcmk__client_t *c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { pcmk__client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } /*! * \internal * \brief Add progressive delay before next event queue flush * * \param[in,out] c Client connection to add delay to * \param[in] queue_len Current event queue length */ static inline void delay_next_flush(pcmk__client_t *c, unsigned int queue_len) { /* Delay a maximum of 1.5 seconds */ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c); } /*! * \internal * \brief Send client any messages in its queue * * \param[in] c Client to flush * * \return Standard Pacemaker return value */ static int crm_ipcs_flush_events(pcmk__client_t *c) { int rc = pcmk_rc_ok; ssize_t qb_rc = 0; unsigned int sent = 0; unsigned int queue_len = 0; if (c == NULL) { return rc; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return rc; } if (c->event_queue) { queue_len = g_queue_get_length(c->event_queue); } while (sent < 100) { struct crm_ipc_response_header *header = NULL; struct iovec *event = NULL; if (c->event_queue) { // We don't pop unless send is successful event = g_queue_peek_head(c->event_queue); } if (event == NULL) { // Queue is empty break; } qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (qb_rc < 0) { rc = (int) -qb_rc; break; } event = g_queue_pop_head(c->event_queue); sent++; header = event[0].iov_base; if (header->size_compressed) { crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent", header->qb.id, c->ipcs, c->pid, (long long) qb_rc); } else { crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s", header->qb.id, c->ipcs, c->pid, (long long) qb_rc, (char *) (event[1].iov_base)); } pcmk_free_ipc_event(event); } queue_len -= sent; if (sent > 0 || queue_len) { crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), (long long) qb_rc); } if (queue_len) { /* Allow clients to briefly fall behind on processing incoming messages, * but drop completely unresponsive clients so the connection doesn't * consume resources indefinitely. */ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { /* Don't evict for a new or shrinking backlog */ crm_warn("Client with process ID %u has a backlog of %u messages " CRM_XS " %p", c->pid, queue_len, c->ipcs); } else { crm_err("Evicting client with process ID %u due to backlog of %u messages " CRM_XS " %p", c->pid, queue_len, c->ipcs); c->queue_backlog = 0; qb_ipcs_disconnect(c->ipcs); return rc; } } c->queue_backlog = queue_len; delay_next_flush(c, queue_len); } else { /* Event queue is empty, there is no backlog */ c->queue_backlog = 0; } return rc; } /*! * \internal * \brief Create an I/O vector for sending an IPC XML message * * \param[in] request Identifier for libqb response header * \param[in] message XML message to send * \param[in] max_send_size If 0, default IPC buffer size is used * \param[out] result Where to store prepared I/O vector * \param[out] bytes Size of prepared data in bytes * * \return Standard Pacemaker return code */ int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, uint32_t max_send_size, struct iovec **result, ssize_t *bytes) { static unsigned int biggest = 0; struct iovec *iov; unsigned int total = 0; char *compressed = NULL; char *buffer = NULL; struct crm_ipc_response_header *header = NULL; if ((message == NULL) || (result == NULL)) { return EINVAL; } header = calloc(1, sizeof(struct crm_ipc_response_header)); if (header == NULL) { return errno; } buffer = dump_xml_unformatted(message); crm_ipc_init(); if (max_send_size == 0) { max_send_size = ipc_buffer_max; } CRM_LOG_ASSERT(max_send_size != 0); *result = NULL; iov = pcmk__new_ipc_event(); iov[0].iov_len = hdr_offset; iov[0].iov_base = header; header->version = PCMK_IPC_VERSION; header->size_uncompressed = 1 + strlen(buffer); total = iov[0].iov_len + header->size_uncompressed; if (total < max_send_size) { iov[1].iov_base = buffer; iov[1].iov_len = header->size_uncompressed; } else { unsigned int new_size = 0; if (pcmk__compress(buffer, (unsigned int) header->size_uncompressed, (unsigned int) max_send_size, &compressed, &new_size) == pcmk_rc_ok) { header->flags |= crm_ipc_compressed; header->size_compressed = new_size; iov[1].iov_len = header->size_compressed; iov[1].iov_base = compressed; free(buffer); biggest = QB_MAX(header->size_compressed, biggest); } else { crm_log_xml_trace(message, "EMSGSIZE"); biggest = QB_MAX(header->size_uncompressed, biggest); crm_err("Could not compress %u-byte message into less than IPC " "limit of %u bytes; set PCMK_ipc_buffer to higher value " "(%u bytes suggested)", header->size_uncompressed, max_send_size, 4 * biggest); free(compressed); free(buffer); pcmk_free_ipc_event(iov); return EMSGSIZE; } } header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; CRM_ASSERT(header->qb.size > 0); if (bytes != NULL) { *bytes = header->qb.size; } return pcmk_rc_ok; } int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) { int rc = pcmk_rc_ok; static uint32_t id = 1; struct crm_ipc_response_header *header = iov[0].iov_base; if (c->flags & pcmk__client_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (is_not_set(flags, crm_ipc_server_event)) { flags |= crm_ipc_server_event; /* this flag lets us know this was originally meant to be a response. * even though we're sending it over the event channel. */ flags |= crm_ipc_proxied_relay_response; } } header->flags |= flags; if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); add_event(c, iov); } else { struct iovec *iov_copy = pcmk__new_ipc_event(); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); add_event(c, iov_copy); } } else { ssize_t qb_rc; CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (qb_rc < header->qb.size) { if (qb_rc < 0) { rc = (int) -qb_rc; } crm_notice("Response %d to pid %d failed: %s " CRM_XS " bytes=%u rc=%lld ipcs=%p", header->qb.id, c->pid, pcmk_rc_str(rc), header->qb.size, (long long) qb_rc, c->ipcs); } else { crm_trace("Response %d sent, %lld bytes to %p[%d]", header->qb.id, (long long) qb_rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { pcmk_free_ipc_event(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if ((rc == EPIPE) || (rc == ENOTCONN)) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, uint32_t flags) { struct iovec *iov = NULL; int rc = pcmk_rc_ok; if (c == NULL) { return EINVAL; } crm_ipc_init(); rc = pcmk__ipc_prepare_iov(request, message, ipc_buffer_max, &iov, NULL); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_send_iov(c, iov, flags | crm_ipc_server_free); } else { pcmk_free_ipc_event(iov); crm_notice("IPC message to pid %d failed: %s " CRM_XS " rc=%d", c->pid, pcmk_rc_str(rc), rc); } return rc; } void pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag) { if (flags & crm_ipc_client_response) { xmlNode *ack = create_xml_node(NULL, tag); crm_trace("Ack'ing IPC message from %s", pcmk__client_name(c)); c->request_id = 0; crm_xml_add(ack, "function", function); crm_xml_add_int(ack, "line", line); pcmk__ipc_send_xml(c, request, ack, flags); free_xml(ack); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-based API * * \param[out] ipcs_ro New IPC server for read-only pacemaker-based API * \param[out] ipcs_rw New IPC server for read/write pacemaker-based API * \param[out] ipcs_shm New IPC server for shared-memory pacemaker-based API * \param[in] ro_cb IPC callbacks for read-only API * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Destroy IPC servers for pacemaker-based API * * \param[out] ipcs_ro IPC server for read-only pacemaker-based API * \param[out] ipcs_rw IPC server for read/write pacemaker-based API * \param[out] ipcs_shm IPC server for shared-memory pacemaker-based API * * \note This is a convenience function for calling qb_ipcs_destroy() for each * argument. */ void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-controld API * * \param[in] cb IPC callbacks * * \return Newly created IPC server */ qb_ipcs_service_t * pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-attrd API * * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create pacemaker-attrd server: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-fenced API * * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, QB_LOOP_HIGH); if (*ipcs == NULL) { crm_err("Failed to create fencer: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /* Client... */ #define MIN_MSG_SIZE 12336 /* sizeof(struct qb_ipc_connection_response) */ #define MAX_MSG_SIZE 128*1024 /* 128k default */ struct crm_ipc_s { struct pollfd pfd; /* the max size we can send/receive over ipc */ unsigned int max_buf_size; /* Size of the allocated 'buffer' */ unsigned int buf_size; int msg_size; int need_reply; char *buffer; char *name; qb_ipcc_connection_t *ipc; }; static unsigned int pick_ipc_buffer(unsigned int max) { static unsigned int global_max = 0; if (global_max == 0) { const char *env = getenv("PCMK_ipc_buffer"); if (env) { int env_max = crm_parse_int(env, "0"); global_max = (env_max > 0)? QB_MAX(MIN_MSG_SIZE, env_max) : MAX_MSG_SIZE; } else { global_max = MAX_MSG_SIZE; } } return QB_MAX(max, global_max); } crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); client->name = strdup(name); client->buf_size = pick_ipc_buffer(max_size); client->buffer = malloc(client->buf_size); /* Clients initiating connection pick the max buf size */ client->max_buf_size = client->buf_size; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \brief Establish an IPC connection to a Pacemaker component * * \param[in] client Connection instance obtained from crm_ipc_new() * * \return TRUE on success, FALSE otherwise (in which case errno will be set; * specifically, in case of discovering the remote side is not * authentic, its value is set to ECONNABORTED). */ bool crm_ipc_connect(crm_ipc_t * client) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rv; client->need_reply = FALSE; client->ipc = qb_ipcc_connect(client->name, client->buf_size); if (client->ipc == NULL) { crm_debug("Could not establish %s connection: %s (%d)", client->name, pcmk_strerror(errno), errno); return FALSE; } client->pfd.fd = crm_ipc_get_fd(client); if (client->pfd.fd < 0) { rv = errno; /* message already omitted */ crm_ipc_close(client); errno = rv; return FALSE; } rv = pcmk_daemon_user(&cl_uid, &cl_gid); if (rv < 0) { /* message already omitted */ crm_ipc_close(client); errno = -rv; return FALSE; } if (!(rv = crm_ipc_is_authentic_process(client->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid))) { crm_err("Daemon (IPC %s) is not authentic:" " process %lld (uid: %lld, gid: %lld)", client->name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); crm_ipc_close(client); errno = ECONNABORTED; return FALSE; } else if (rv < 0) { errno = -rv; crm_perror(LOG_ERR, "Could not verify authenticity of daemon (IPC %s)", client->name); crm_ipc_close(client); errno = -rv; return FALSE; } qb_ipcc_context_set(client->ipc, client); #ifdef HAVE_IPCS_GET_BUFFER_SIZE client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc); if (client->max_buf_size > client->buf_size) { free(client->buffer); client->buffer = calloc(1, client->max_buf_size); client->buf_size = client->max_buf_size; } #endif return TRUE; } void crm_ipc_close(crm_ipc_t * client) { if (client) { crm_trace("Disconnecting %s IPC connection %p (%p)", client->name, client, client->ipc); if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying an active IPC connection to %s", client->name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } crm_trace("Destroying IPC connection to %s: %p", client->name, client); free(client->buffer); free(client->name); free(client); } } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = 0; if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) { return fd; } errno = EINVAL; crm_perror(LOG_ERR, "Could not obtain file IPC descriptor for %s", (client? client->name : "unspecified client")); return -errno; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; CRM_ASSERT(client != NULL); if (crm_ipc_connected(client) == FALSE) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } // \return Standard Pacemaker return code static int crm_ipc_decompress(crm_ipc_t * client) { struct crm_ipc_response_header *header = (struct crm_ipc_response_header *)(void*)client->buffer; if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; /* never let buf size fall below our max size required for ipc reads. */ unsigned int new_buf_size = QB_MAX((hdr_offset + size_u), client->max_buf_size); char *uncompressed = calloc(1, new_buf_size); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed + hdr_offset, &size_u, client->buffer + hdr_offset, header->size_compressed, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); return EILSEQ; } /* * This assert no longer holds true. For an identical msg, some clients may * require compression, and others may not. If that same msg (event) is sent * to multiple clients, it could result in some clients receiving a compressed * msg even though compression was not explicitly required for them. * * CRM_ASSERT((header->size_uncompressed + hdr_offset) >= ipc_buffer_max); */ CRM_ASSERT(size_u == header->size_uncompressed); memcpy(uncompressed, client->buffer, hdr_offset); /* Preserve the header */ header = (struct crm_ipc_response_header *)(void*)uncompressed; free(client->buffer); client->buf_size = new_buf_size; client->buffer = uncompressed; } CRM_ASSERT(client->buffer[hdr_offset + header->size_uncompressed - 1] == 0); return pcmk_rc_ok; } long crm_ipc_read(crm_ipc_t * client) { struct crm_ipc_response_header *header = NULL; CRM_ASSERT(client != NULL); CRM_ASSERT(client->ipc != NULL); CRM_ASSERT(client->buffer != NULL); crm_ipc_init(); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size, 0); if (client->msg_size >= 0) { int rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } header = (struct crm_ipc_response_header *)(void*)client->buffer; if(header->version > PCMK_IPC_VERSION) { crm_err("Filtering incompatible v%d IPC message, we only support versions <= %d", header->version, PCMK_IPC_VERSION); return -EBADMSG; } crm_trace("Received %s event %d, size=%u, rc=%d, text: %.100s", client->name, header->qb.id, header->qb.size, client->msg_size, client->buffer + hdr_offset); } else { crm_trace("No message from %s received: %s", client->name, pcmk_strerror(client->msg_size)); } if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) { crm_err("Connection to %s failed", client->name); } if (header) { /* Data excluding the header */ return header->size_uncompressed; } return -ENOMSG; } const char * crm_ipc_buffer(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->buffer + sizeof(struct crm_ipc_response_header); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { struct crm_ipc_response_header *header = NULL; CRM_ASSERT(client != NULL); if (client->buffer == NULL) { return 0; } header = (struct crm_ipc_response_header *)(void*)client->buffer; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); int rc = pcmk_rc_ok; crm_ipc_init(); /* get the reply */ crm_trace("client %s waiting on reply to msg id %d", client->name, request_id); do { *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000); if (*bytes > 0) { struct crm_ipc_response_header *hdr = NULL; rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return rc; } hdr = (struct crm_ipc_response_header *)(void*)client->buffer; if (hdr->qb.id == request_id) { /* Got it */ break; } else if (hdr->qb.id < request_id) { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "OldIpcReply"); } else { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "ImpossibleReply"); CRM_ASSERT(hdr->qb.id <= request_id); } } else if (crm_ipc_connected(client) == FALSE) { crm_err("Server disconnected client %s while waiting for msg id %d", client->name, request_id); break; } } while (time(NULL) < timeout); if (*bytes < 0) { rc = (int) -*bytes; // System errno } return rc; } /*! * \brief Send an IPC XML message * * \param[in] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode ** reply) { int rc = 0; ssize_t qb_rc = 0; ssize_t bytes = 0; struct iovec *iov; static uint32_t id = 0; static int factor = 8; struct crm_ipc_response_header *header; crm_ipc_init(); if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (crm_ipc_connected(client) == FALSE) { /* Don't even bother */ crm_notice("Can't send IPC request to %s: Connection closed", client->name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } if (client->need_reply) { qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (qb_rc < 0) { crm_warn("Sending IPC to %s disabled until pending reply received", client->name); return -EALREADY; } else { crm_notice("Sending IPC to %s re-enabled after pending reply received", client->name); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); if (rc != pcmk_rc_ok) { crm_warn("Couldn't prepare IPC request to %s: %s " CRM_XS " rc=%d", client->name, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; header->flags |= flags; if(is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ clear_bit(flags, crm_ipc_client_response); } if(header->size_compressed) { if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { crm_notice("Compressed message exceeds %d0%% of configured IPC " "limit (%u bytes); consider setting PCMK_ipc_buffer to " "%u or higher", factor, client->max_buf_size, 2 * client->max_buf_size); factor++; } } crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->name, header->qb.id, header->qb.size, ms_timeout); if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); do { /* @TODO Is this check really needed? Won't qb_ipcc_sendv() return * an error if it's not connected? */ if (!crm_ipc_connected(client)) { goto send_cleanup; } qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout)); rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { goto send_cleanup; } else if (is_not_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes); if (rc != pcmk_rc_ok) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } rc = (int) bytes; // Negative system errno, or size of reply received } else { // No timeout, and client response needed do { qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1); } while ((qb_rc == -EAGAIN) && crm_ipc_connected(client)); rc = (int) qb_rc; // Negative system errno, or size of reply received } if (rc > 0) { struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer; crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s", rc, hdr->qb.id, client->name, header->qb.id, crm_ipc_buffer(client)); if (reply) { *reply = string2xml(crm_ipc_buffer(client)); } } else { crm_trace("No reply to %s IPC %d: rc=%d", client->name, header->qb.id, rc); } send_cleanup: if (crm_ipc_connected(client) == FALSE) { crm_notice("Couldn't send %s IPC request %d: Connection closed " CRM_XS " rc=%d", client->name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " CRM_XS " rc=%d", client->name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " CRM_XS " rc=%d", client->name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } pcmk_free_ipc_event(iov); return rc; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #if defined(US_AUTH_PEERCRED_UCRED) struct ucred ucred; socklen_t ucred_len = sizeof(ucred); if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) && ucred_len == sizeof(ucred)) { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; #elif defined(US_AUTH_PEERCRED_SOCKPEERCRED) struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) && sockpeercred_len == sizeof(sockpeercred_len)) { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; #elif defined(US_AUTH_GETPEEREID) if (!getpeereid(sock, &found_uid, &found_gid)) { found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */ #elif defined(US_AUTH_GETPEERUCRED) ucred_t *ucred; if (!getpeerucred(sock, &ucred)) { errno = 0; found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ret = -errno; ucred_free(ucred); if (ret) { return (ret < 0) ? ret : -pcmk_err_generic; } #else # error "No way to authenticate a Unix socket peer" errno = 0; if (0) { #endif if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } ret = (found_uid == 0 || found_uid == refuid || found_gid == refgid); } else { ret = (errno > 0) ? -errno : -pcmk_err_generic; } return ret; } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; c = qb_ipcc_connect(name, 0); if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = crm_ipc_is_authentic_process(fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc < 0) { rc = pcmk_legacy2rc(auth_rc); crm_err("Could not get peer credentials from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } if (auth_rc == 0) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } /* Utils */ xmlNode * create_hello_message(const char *uuid, const char *client_name, const char *major_version, const char *minor_version) { xmlNode *hello_node = NULL; xmlNode *hello = NULL; if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name) || pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) { crm_err("Could not create IPC hello message from %s (UUID %s): " "missing information", client_name? client_name : "unknown client", uuid? uuid : "unknown"); return NULL; } hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); if (hello_node == NULL) { crm_err("Could not create IPC hello message from %s (UUID %s): " "Message data creation failed", client_name, uuid); return NULL; } crm_xml_add(hello_node, "major_version", major_version); crm_xml_add(hello_node, "minor_version", minor_version); crm_xml_add(hello_node, "client_name", client_name); crm_xml_add(hello_node, "client_uuid", uuid); hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); if (hello == NULL) { crm_err("Could not create IPC hello message from %s (UUID %s): " "Request creation failed", client_name, uuid); return NULL; } free_xml(hello_node); crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); return hello; } diff --git a/lib/services/dbus.c b/lib/services/dbus.c index 58df9270df..a66a82eb8b 100644 --- a/lib/services/dbus.c +++ b/lib/services/dbus.c @@ -1,665 +1,665 @@ /* * Copyright (C) 2014-2016 Andrew Beekhof * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #define BUS_PROPERTY_IFACE "org.freedesktop.DBus.Properties" static GList *conn_dispatches = NULL; struct db_getall_data { char *name; char *target; char *object; void *userdata; void (*callback)(const char *name, const char *value, void *userdata); }; static void free_db_getall_data(struct db_getall_data *data) { free(data->target); free(data->object); free(data->name); free(data); } DBusConnection * pcmk_dbus_connect(void) { DBusError err; DBusConnection *connection; dbus_error_init(&err); connection = dbus_bus_get(DBUS_BUS_SYSTEM, &err); if (dbus_error_is_set(&err)) { crm_err("Could not connect to System DBus: %s", err.message); dbus_error_free(&err); return NULL; } if(connection) { pcmk_dbus_connection_setup_with_select(connection); } return connection; } void pcmk_dbus_disconnect(DBusConnection *connection) { return; } /*! * \internal * \brief Check whether a DBus reply indicates an error occurred * * \param[in] pending If non-NULL, indicates that a DBus request was sent * \param[in] reply Reply received from DBus * \param[out] ret If non-NULL, will be set to DBus error, if any * * \return TRUE if an error was found, FALSE otherwise * * \note Following the DBus API convention, a TRUE return is exactly equivalent * to ret being set. If ret is provided and this function returns TRUE, * the caller is responsible for calling dbus_error_free() on ret when * done using it. */ bool pcmk_dbus_find_error(DBusPendingCall *pending, DBusMessage *reply, DBusError *ret) { DBusError error; dbus_error_init(&error); if(pending == NULL) { dbus_set_error_const(&error, "org.clusterlabs.pacemaker.NoRequest", "No request sent"); } else if(reply == NULL) { dbus_set_error_const(&error, "org.clusterlabs.pacemaker.NoReply", "No reply"); } else { DBusMessageIter args; int dtype = dbus_message_get_type(reply); char *sig; switch(dtype) { case DBUS_MESSAGE_TYPE_METHOD_RETURN: dbus_message_iter_init(reply, &args); sig = dbus_message_iter_get_signature(&args); crm_trace("DBus call returned output args '%s'", sig); dbus_free(sig); break; case DBUS_MESSAGE_TYPE_INVALID: dbus_set_error_const(&error, "org.clusterlabs.pacemaker.InvalidReply", "Invalid reply"); break; case DBUS_MESSAGE_TYPE_METHOD_CALL: dbus_set_error_const(&error, "org.clusterlabs.pacemaker.InvalidReply.Method", "Invalid reply (method call)"); break; case DBUS_MESSAGE_TYPE_SIGNAL: dbus_set_error_const(&error, "org.clusterlabs.pacemaker.InvalidReply.Signal", "Invalid reply (signal)"); break; case DBUS_MESSAGE_TYPE_ERROR: dbus_set_error_from_message(&error, reply); break; default: dbus_set_error(&error, "org.clusterlabs.pacemaker.InvalidReply.Type", "Unknown reply type %d", dtype); } } if (dbus_error_is_set(&error)) { crm_trace("DBus reply indicated error '%s' (%s)", error.name, error.message); if (ret) { dbus_error_init(ret); dbus_move_error(&error, ret); } else { dbus_error_free(&error); } return TRUE; } return FALSE; } /*! * \internal * \brief Send a DBus request and wait for the reply * * \param[in] msg DBus request to send * \param[in] connection DBus connection to use * \param[out] error If non-NULL, will be set to error, if any * \param[in] timeout Timeout to use for request * * \return DBus reply * * \note If error is non-NULL, it is initialized, so the caller may always use * dbus_error_is_set() to determine whether an error occurred; the caller * is responsible for calling dbus_error_free() in this case. */ DBusMessage * pcmk_dbus_send_recv(DBusMessage *msg, DBusConnection *connection, DBusError *error, int timeout) { const char *method = NULL; DBusMessage *reply = NULL; DBusPendingCall* pending = NULL; CRM_ASSERT(dbus_message_get_type (msg) == DBUS_MESSAGE_TYPE_METHOD_CALL); method = dbus_message_get_member (msg); /* Ensure caller can reliably check whether error is set */ if (error) { dbus_error_init(error); } if (timeout <= 0) { /* DBUS_TIMEOUT_USE_DEFAULT (-1) tells DBus to use a sane default */ timeout = DBUS_TIMEOUT_USE_DEFAULT; } // send message and get a handle for a reply if (!dbus_connection_send_with_reply(connection, msg, &pending, timeout)) { if(error) { dbus_set_error(error, "org.clusterlabs.pacemaker.SendFailed", "Could not queue DBus '%s' request", method); } return NULL; } dbus_connection_flush(connection); if(pending) { /* block until we receive a reply */ dbus_pending_call_block(pending); /* get the reply message */ reply = dbus_pending_call_steal_reply(pending); } (void)pcmk_dbus_find_error(pending, reply, error); if(pending) { /* free the pending message handle */ dbus_pending_call_unref(pending); } return reply; } /*! * \internal * \brief Send a DBus message with a callback for the reply * * \param[in] msg DBus message to send * \param[in,out] connection DBus connection to send on * \param[in] done Function to call when pending call completes * \param[in] user_data Data to pass to done callback * * \return Handle for reply on success, NULL on error * \note The caller can assume that the done callback is called always and * only when the return value is non-NULL. (This allows the caller to * know where it should free dynamically allocated user_data.) */ DBusPendingCall * pcmk_dbus_send(DBusMessage *msg, DBusConnection *connection, void(*done)(DBusPendingCall *pending, void *user_data), void *user_data, int timeout) { const char *method = NULL; DBusPendingCall* pending = NULL; CRM_ASSERT(done); CRM_ASSERT(dbus_message_get_type (msg) == DBUS_MESSAGE_TYPE_METHOD_CALL); method = dbus_message_get_member (msg); if (timeout <= 0) { /* DBUS_TIMEOUT_USE_DEFAULT (-1) tells DBus to use a sane default */ timeout = DBUS_TIMEOUT_USE_DEFAULT; } // send message and get a handle for a reply if (!dbus_connection_send_with_reply(connection, msg, &pending, timeout)) { crm_err("Send with reply failed for %s", method); return NULL; } else if (pending == NULL) { crm_err("No pending call found for %s: Connection to System DBus may be closed", method); return NULL; } crm_trace("DBus %s call sent", method); if (dbus_pending_call_get_completed(pending)) { crm_info("DBus %s call completed too soon", method); if(done) { #if 0 /* This sounds like a good idea, but allegedly it breaks things */ done(pending, user_data); pending = NULL; #else CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL)); #endif } } else if(done) { CRM_ASSERT(dbus_pending_call_set_notify(pending, done, user_data, NULL)); } return pending; } bool pcmk_dbus_type_check(DBusMessage *msg, DBusMessageIter *field, int expected, const char *function, int line) { int dtype = 0; DBusMessageIter lfield; if(field == NULL) { if(dbus_message_iter_init(msg, &lfield)) { field = &lfield; } } if(field == NULL) { do_crm_log_alias(LOG_ERR, __FILE__, function, line, "Empty parameter list in reply expecting '%c'", expected); return FALSE; } dtype = dbus_message_iter_get_arg_type(field); if(dtype != expected) { DBusMessageIter args; char *sig; dbus_message_iter_init(msg, &args); sig = dbus_message_iter_get_signature(&args); do_crm_log_alias(LOG_ERR, __FILE__, function, line, "Unexpected DBus type, expected %c in '%s' instead of %c", expected, sig, dtype); dbus_free(sig); return FALSE; } return TRUE; } static char * pcmk_dbus_lookup_result(DBusMessage *reply, struct db_getall_data *data) { DBusError error; char *output = NULL; DBusMessageIter dict; DBusMessageIter args; if (pcmk_dbus_find_error((void*)&error, reply, &error)) { crm_err("Cannot get properties from %s for %s: %s", data->target, data->object, error.message); dbus_error_free(&error); goto cleanup; } dbus_message_iter_init(reply, &args); if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __FUNCTION__, __LINE__)) { crm_err("Invalid reply from %s for %s", data->target, data->object); goto cleanup; } dbus_message_iter_recurse(&args, &dict); while (dbus_message_iter_get_arg_type (&dict) != DBUS_TYPE_INVALID) { DBusMessageIter sv; DBusMessageIter v; DBusBasicValue name; DBusBasicValue value; if(!pcmk_dbus_type_check(reply, &dict, DBUS_TYPE_DICT_ENTRY, __FUNCTION__, __LINE__)) { dbus_message_iter_next (&dict); continue; } dbus_message_iter_recurse(&dict, &sv); while (dbus_message_iter_get_arg_type (&sv) != DBUS_TYPE_INVALID) { int dtype = dbus_message_iter_get_arg_type(&sv); switch(dtype) { case DBUS_TYPE_STRING: dbus_message_iter_get_basic(&sv, &name); if(data->name && strcmp(name.str, data->name) != 0) { dbus_message_iter_next (&sv); /* Skip the value */ } break; case DBUS_TYPE_VARIANT: dbus_message_iter_recurse(&sv, &v); if(pcmk_dbus_type_check(reply, &v, DBUS_TYPE_STRING, __FUNCTION__, __LINE__)) { dbus_message_iter_get_basic(&v, &value); crm_trace("Property %s[%s] is '%s'", data->object, name.str, value.str); if(data->callback) { data->callback(name.str, value.str, data->userdata); } else { free(output); output = strdup(value.str); } if(data->name) { goto cleanup; } } break; default: pcmk_dbus_type_check(reply, &sv, DBUS_TYPE_STRING, __FUNCTION__, __LINE__); } dbus_message_iter_next (&sv); } dbus_message_iter_next (&dict); } if(data->name && data->callback) { crm_trace("No value for property %s[%s]", data->object, data->name); data->callback(data->name, NULL, data->userdata); } cleanup: free_db_getall_data(data); return output; } static void pcmk_dbus_lookup_cb(DBusPendingCall *pending, void *user_data) { DBusMessage *reply = NULL; char *value = NULL; if(pending) { reply = dbus_pending_call_steal_reply(pending); } value = pcmk_dbus_lookup_result(reply, user_data); free(value); if(reply) { dbus_message_unref(reply); } } char * pcmk_dbus_get_property(DBusConnection *connection, const char *target, const char *obj, const gchar * iface, const char *name, void (*callback)(const char *name, const char *value, void *userdata), void *userdata, DBusPendingCall **pending, int timeout) { DBusMessage *msg; const char *method = "GetAll"; char *output = NULL; struct db_getall_data *query_data = NULL; crm_debug("Calling: %s on %s", method, target); msg = dbus_message_new_method_call(target, // target for the method call obj, // object to call on BUS_PROPERTY_IFACE, // interface to call on method); // method name if (NULL == msg) { crm_err("Call to %s failed: No message", method); return NULL; } CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &iface, DBUS_TYPE_INVALID)); query_data = malloc(sizeof(struct db_getall_data)); if(query_data == NULL) { crm_err("Call to %s failed: malloc failed", method); return NULL; } query_data->target = strdup(target); query_data->object = strdup(obj); query_data->callback = callback; query_data->userdata = userdata; query_data->name = NULL; if(name) { query_data->name = strdup(name); } if (query_data->callback) { DBusPendingCall *local_pending; local_pending = pcmk_dbus_send(msg, connection, pcmk_dbus_lookup_cb, query_data, timeout); if (local_pending == NULL) { // pcmk_dbus_lookup_cb() was not called in this case free_db_getall_data(query_data); query_data = NULL; } if (pending) { *pending = local_pending; } } else { DBusMessage *reply = pcmk_dbus_send_recv(msg, connection, NULL, timeout); output = pcmk_dbus_lookup_result(reply, query_data); if(reply) { dbus_message_unref(reply); } } dbus_message_unref(msg); return output; } static void pcmk_dbus_connection_dispatch_status(DBusConnection *connection, DBusDispatchStatus new_status, void *data) { crm_trace("New status %d for connection %p", new_status, connection); if (new_status == DBUS_DISPATCH_DATA_REMAINS){ conn_dispatches = g_list_prepend(conn_dispatches, connection); } } static void -pcmk_dbus_connections_dispatch() +pcmk_dbus_connections_dispatch(void) { GList *gIter = NULL; for (gIter = conn_dispatches; gIter != NULL; gIter = gIter->next) { DBusConnection *connection = gIter->data; while (dbus_connection_get_dispatch_status(connection) == DBUS_DISPATCH_DATA_REMAINS) { crm_trace("Dispatching for connection %p", connection); dbus_connection_dispatch(connection); } } g_list_free(conn_dispatches); conn_dispatches = NULL; } /* Copied from dbus-watch.c */ static const char* dbus_watch_flags_to_string(int flags) { const char *watch_type; if ((flags & DBUS_WATCH_READABLE) && (flags & DBUS_WATCH_WRITABLE)) { watch_type = "readwrite"; } else if (flags & DBUS_WATCH_READABLE) { watch_type = "read"; } else if (flags & DBUS_WATCH_WRITABLE) { watch_type = "write"; } else { watch_type = "not read or write"; } return watch_type; } static int pcmk_dbus_watch_dispatch(gpointer userdata) { bool oom = FALSE; DBusWatch *watch = userdata; int flags = dbus_watch_get_flags(watch); bool enabled = dbus_watch_get_enabled (watch); mainloop_io_t *client = dbus_watch_get_data(watch); crm_trace("Dispatching client %p: %s", client, dbus_watch_flags_to_string(flags)); if (enabled && (flags & (DBUS_WATCH_READABLE|DBUS_WATCH_WRITABLE))) { oom = !dbus_watch_handle(watch, flags); } else if(enabled) { oom = !dbus_watch_handle(watch, DBUS_WATCH_ERROR); } if(flags != dbus_watch_get_flags(watch)) { flags = dbus_watch_get_flags(watch); crm_trace("Dispatched client %p: %s (%d)", client, dbus_watch_flags_to_string(flags), flags); } if(oom) { crm_err("DBus encountered OOM while attempting to dispatch %p (%s)", client, dbus_watch_flags_to_string(flags)); } else { pcmk_dbus_connections_dispatch(); } return 0; } static void pcmk_dbus_watch_destroy(gpointer userdata) { mainloop_io_t *client = dbus_watch_get_data(userdata); crm_trace("Destroyed %p", client); } struct mainloop_fd_callbacks pcmk_dbus_cb = { .dispatch = pcmk_dbus_watch_dispatch, .destroy = pcmk_dbus_watch_destroy, }; static dbus_bool_t pcmk_dbus_watch_add(DBusWatch *watch, void *data) { int fd = dbus_watch_get_unix_fd(watch); mainloop_io_t *client = mainloop_add_fd( "dbus", G_PRIORITY_DEFAULT, fd, watch, &pcmk_dbus_cb); crm_trace("Added watch %p with fd=%d to client %p", watch, fd, client); dbus_watch_set_data(watch, client, NULL); return TRUE; } static void pcmk_dbus_watch_toggle(DBusWatch *watch, void *data) { mainloop_io_t *client = dbus_watch_get_data(watch); crm_notice("DBus client %p is now %s", client, (dbus_watch_get_enabled(watch)? "enabled" : "disabled")); } static void pcmk_dbus_watch_remove(DBusWatch *watch, void *data) { mainloop_io_t *client = dbus_watch_get_data(watch); crm_trace("Removed client %p (%p)", client, data); mainloop_del_fd(client); } static gboolean pcmk_dbus_timeout_dispatch(gpointer data) { crm_info("Timeout %p expired", data); dbus_timeout_handle(data); return FALSE; } static dbus_bool_t pcmk_dbus_timeout_add(DBusTimeout *timeout, void *data) { guint id = g_timeout_add(dbus_timeout_get_interval(timeout), pcmk_dbus_timeout_dispatch, timeout); crm_trace("Adding timeout %p (%d)", timeout, dbus_timeout_get_interval(timeout)); if(id) { dbus_timeout_set_data(timeout, GUINT_TO_POINTER(id), NULL); } return TRUE; } static void pcmk_dbus_timeout_remove(DBusTimeout *timeout, void *data) { void *vid = dbus_timeout_get_data(timeout); guint id = GPOINTER_TO_UINT(vid); crm_trace("Removing timeout %p (%p)", timeout, data); if(id) { g_source_remove(id); dbus_timeout_set_data(timeout, 0, NULL); } } static void pcmk_dbus_timeout_toggle(DBusTimeout *timeout, void *data) { bool enabled = dbus_timeout_get_enabled(timeout); crm_trace("Toggling timeout for %p to %s", timeout, enabled?"off":"on"); if(enabled) { pcmk_dbus_timeout_add(timeout, data); } else { pcmk_dbus_timeout_remove(timeout, data); } } /* Inspired by http://www.kolej.mff.cuni.cz/~vesej3am/devel/dbus-select.c */ void pcmk_dbus_connection_setup_with_select(DBusConnection *c) { dbus_connection_set_exit_on_disconnect(c, FALSE); dbus_connection_set_timeout_functions(c, pcmk_dbus_timeout_add, pcmk_dbus_timeout_remove, pcmk_dbus_timeout_toggle, NULL, NULL); dbus_connection_set_watch_functions(c, pcmk_dbus_watch_add, pcmk_dbus_watch_remove, pcmk_dbus_watch_toggle, NULL, NULL); dbus_connection_set_dispatch_status_function(c, pcmk_dbus_connection_dispatch_status, NULL, NULL); pcmk_dbus_connection_dispatch_status(c, dbus_connection_get_dispatch_status(c), NULL); } diff --git a/tools/cibadmin.c b/tools/cibadmin.c index f7413fcb57..675b877545 100644 --- a/tools/cibadmin.c +++ b/tools/cibadmin.c @@ -1,772 +1,772 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include static int message_timeout_ms = 30; static int command_options = 0; static int request_id = 0; static int bump_log_num = 0; static const char *host = NULL; static const char *cib_user = NULL; static const char *cib_action = NULL; static const char *obj_type = NULL; static cib_t *the_cib = NULL; static GMainLoop *mainloop = NULL; static gboolean force_flag = FALSE; static crm_exit_t exit_code = CRM_EX_OK; int do_init(void); int do_work(xmlNode *input, int command_options, xmlNode **output); void cibadmin_op_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "help", no_argument, NULL, '?', "\tThis text", pcmk__option_default }, { "version", no_argument, NULL, '$', "\tVersion information", pcmk__option_default }, { "verbose", no_argument, NULL, 'V', "\tIncrease debug output\n", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "Commands:", pcmk__option_default }, { "upgrade", no_argument, NULL, 'u', "\tUpgrade the configuration to the latest syntax", pcmk__option_default }, { "query", no_argument, NULL, 'Q', "\tQuery the contents of the CIB", pcmk__option_default }, { "erase", no_argument, NULL, 'E', "\tErase the contents of the whole CIB", pcmk__option_default }, { "bump", no_argument, NULL, 'B', "\tIncrease the CIB's epoch value by 1", pcmk__option_default }, { "create", no_argument, NULL, 'C', "\tCreate an object in the CIB (will fail if object already exists)", pcmk__option_default }, { "modify", no_argument, NULL, 'M', "\tFind object somewhere in CIB's XML tree and update it " "(fails if object does not exist unless -c is also specified)", pcmk__option_default }, { "patch", no_argument, NULL, 'P', "\tSupply an update in the form of an XML diff (see crm_diff(8))", pcmk__option_default }, { "replace", no_argument, NULL, 'R', "\tRecursively replace an object in the CIB", pcmk__option_default }, { "delete", no_argument, NULL, 'D', "\tDelete first object matching supplied criteria " "(for example, )", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\n\tThe XML element name and all attributes must match " "in order for the element to be deleted.\n", pcmk__option_default }, { "delete-all", no_argument, NULL, 'd', "When used with --xpath, remove all matching objects in the " "configuration instead of just the first one", pcmk__option_default }, { "empty", no_argument, NULL, 'a', "\tOutput an empty CIB", pcmk__option_default }, { "md5-sum", no_argument, NULL, '5', "\tCalculate the on-disk CIB digest", pcmk__option_default }, { "md5-sum-versioned", no_argument, NULL, '6', "Calculate an on-the-wire versioned CIB digest", pcmk__option_default }, { "blank", no_argument, NULL, '-', NULL, pcmk__option_hidden }, { "-spacer-", required_argument, NULL, '-', "\nAdditional options:", pcmk__option_default }, { "force", no_argument, NULL, 'f', NULL, pcmk__option_default }, { "timeout", required_argument, NULL, 't', "Time (in seconds) to wait before declaring the operation failed", pcmk__option_default }, { "user", required_argument, NULL, 'U', "Run the command with permissions of the named user (valid only for " "the root and " CRM_DAEMON_USER " accounts)", pcmk__option_default }, { "sync-call", no_argument, NULL, 's', "Wait for call to complete before returning", pcmk__option_default }, { "local", no_argument, NULL, 'l', "\tCommand takes effect locally (should be used only for queries)", pcmk__option_default }, { "allow-create", no_argument, NULL, 'c', "(Advanced) Allow target of --modify/-M to be created " "if it does not exist", pcmk__option_default }, { "no-children", no_argument, NULL, 'n', "(Advanced) When querying an object, do not include its children " "in the result", pcmk__option_default }, { "no-bcast", no_argument, NULL, 'b', NULL, pcmk__option_hidden }, { "-spacer-", no_argument, NULL, '-', "\nData:", pcmk__option_default }, { "xml-text", required_argument, NULL, 'X', "Retrieve XML from the supplied string", pcmk__option_default }, { "xml-file", required_argument, NULL, 'x', "Retrieve XML from the named file", pcmk__option_default }, { "xml-pipe", no_argument, NULL, 'p', "Retrieve XML from stdin\n", pcmk__option_default }, { "scope", required_argument, NULL, 'o', "Limit scope of operation to specific section of CIB", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "\tValid values: nodes, resources, constraints, crm_config, " "rsc_defaults, op_defaults, status", pcmk__option_default }, { "xpath", required_argument, NULL, 'A', "A valid XPath to use instead of --scope/-o", pcmk__option_default }, { "node-path", no_argument, NULL, 'e', "When performing XPath queries, return path of any matches found", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "(for example, \"/cib/configuration/resources/clone[@id='ms_RH1_SCS']" "/primitive[@id='prm_RH1_SCS']\")", pcmk__option_paragraph }, { "node", required_argument, NULL, 'N', "(Advanced) Send command to the specified host", pcmk__option_default }, { "-spacer-", no_argument, NULL, '!', NULL, pcmk__option_hidden }, { "-spacer-", no_argument, NULL, '-', "\n\nExamples:\n", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', "Query the configuration from the local node:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --query --local", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Query just the cluster options configuration:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --query --scope crm_config", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Query all 'target-role' settings:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --query --xpath \"//nvpair[@name='target-role']\"", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Remove all 'is-managed' settings:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --delete-all --xpath \"//nvpair[@name='is-managed']\"", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Remove the resource named 'old':", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --delete --xml-text ''", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Remove all resources from the configuration:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --replace --scope resources --xml-text ''", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Replace complete configuration with contents of $HOME/pacemaker.xml:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --replace --xml-file $HOME/pacemaker.xml", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Replace constraints section of configuration with contents of " "$HOME/constraints.xml:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --replace --scope constraints --xml-file " "$HOME/constraints.xml", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Increase configuration version to prevent old configurations from " "being loaded accidentally:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --modify --xml-text ''", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "Edit the configuration with your favorite $EDITOR:", pcmk__option_paragraph }, { "-spacer-", no_argument, NULL, '-', " cibadmin --query > $HOME/local.xml", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', " $EDITOR $HOME/local.xml", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', " cibadmin --replace --xml-file $HOME/local.xml", pcmk__option_example }, { "-spacer-", no_argument, NULL, '-', "SEE ALSO:", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', " crm(8), pcs(8), crm_shadow(8), crm_diff(8)", pcmk__option_default }, { "host", required_argument, NULL, 'h', "deprecated", pcmk__option_hidden }, { 0, 0, 0, 0 } }; static void print_xml_output(xmlNode * xml) { char *buffer; if (!xml) { return; } else if (xml->type != XML_ELEMENT_NODE) { return; } if (command_options & cib_xpath_address) { const char *id = crm_element_value(xml, XML_ATTR_ID); if (safe_str_eq((const char *)xml->name, "xpath-query")) { xmlNode *child = NULL; for (child = xml->children; child; child = child->next) { print_xml_output(child); } } else if (id) { printf("%s\n", id); } } else { buffer = dump_xml_formatted(xml); fprintf(stdout, "%s", crm_str(buffer)); free(buffer); } } // Upgrade requested but already at latest schema static void -report_schema_unchanged() +report_schema_unchanged(void) { const char *err = pcmk_strerror(pcmk_err_schema_unchanged); crm_info("Upgrade unnecessary: %s\n", err); printf("Upgrade unnecessary: %s\n", err); exit_code = CRM_EX_OK; } int main(int argc, char **argv) { int argerr = 0; int rc = pcmk_ok; int flag; const char *source = NULL; const char *admin_input_xml = NULL; const char *admin_input_file = NULL; gboolean dangerous_cmd = FALSE; gboolean admin_input_stdin = FALSE; xmlNode *output = NULL; xmlNode *input = NULL; int option_index = 0; crm_log_cli_init("cibadmin"); set_crm_log_level(LOG_CRIT); pcmk__set_cli_options(NULL, " [options]", long_options, "query and edit the Pacemaker configuration"); if (argc < 2) { pcmk__cli_help('?', CRM_EX_USAGE); } while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); if (flag == -1) break; switch (flag) { case 't': message_timeout_ms = atoi(optarg); if (message_timeout_ms < 1) { message_timeout_ms = 30; } break; case 'A': obj_type = optarg; command_options |= cib_xpath; break; case 'e': command_options |= cib_xpath_address; break; case 'u': cib_action = CIB_OP_UPGRADE; dangerous_cmd = TRUE; break; case 'E': cib_action = CIB_OP_ERASE; dangerous_cmd = TRUE; break; case 'Q': cib_action = CIB_OP_QUERY; break; case 'P': cib_action = CIB_OP_APPLY_DIFF; break; case 'U': cib_user = optarg; break; case 'M': cib_action = CIB_OP_MODIFY; break; case 'R': cib_action = CIB_OP_REPLACE; break; case 'C': cib_action = CIB_OP_CREATE; break; case 'D': cib_action = CIB_OP_DELETE; break; case '5': cib_action = "md5-sum"; break; case '6': cib_action = "md5-sum-versioned"; break; case 'c': command_options |= cib_can_create; break; case 'n': command_options |= cib_no_children; break; case 'B': cib_action = CIB_OP_BUMP; crm_log_args(argc, argv); break; case 'V': command_options = command_options | cib_verbose; bump_log_num++; break; case '?': case '$': case '!': pcmk__cli_help(flag, CRM_EX_OK); break; case 'o': crm_trace("Option %c => %s", flag, optarg); obj_type = optarg; break; case 'X': crm_trace("Option %c => %s", flag, optarg); admin_input_xml = optarg; crm_log_args(argc, argv); break; case 'x': crm_trace("Option %c => %s", flag, optarg); admin_input_file = optarg; crm_log_args(argc, argv); break; case 'p': admin_input_stdin = TRUE; crm_log_args(argc, argv); break; case 'N': case 'h': host = strdup(optarg); break; case 'l': command_options |= cib_scope_local; break; case 'd': cib_action = CIB_OP_DELETE; command_options |= cib_multiple; dangerous_cmd = TRUE; break; case 'b': dangerous_cmd = TRUE; command_options |= cib_inhibit_bcast; command_options |= cib_scope_local; break; case 's': command_options |= cib_sync_call; break; case 'f': force_flag = TRUE; command_options |= cib_quorum_override; crm_log_args(argc, argv); break; case 'a': output = createEmptyCib(1); if (optind < argc) { crm_xml_add(output, XML_ATTR_VALIDATION, argv[optind]); } admin_input_xml = dump_xml_formatted(output); fprintf(stdout, "%s\n", crm_str(admin_input_xml)); crm_exit(CRM_EX_OK); break; default: printf("Argument code 0%o (%c)" " is not (?yet?) supported\n", flag, flag); ++argerr; break; } } while (bump_log_num > 0) { crm_bump_log_level(argc, argv); bump_log_num--; } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); pcmk__cli_help('?', CRM_EX_USAGE); } if (optind > argc || cib_action == NULL) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } if (dangerous_cmd && force_flag == FALSE) { fprintf(stderr, "The supplied command is considered dangerous." " To prevent accidental destruction of the cluster," " the --force flag is required in order to proceed.\n"); fflush(stderr); crm_exit(CRM_EX_UNSAFE); } if (admin_input_file != NULL) { input = filename2xml(admin_input_file); source = admin_input_file; } else if (admin_input_xml != NULL) { source = "input string"; input = string2xml(admin_input_xml); } else if (admin_input_stdin) { source = "STDIN"; input = stdin2xml(); } if (input != NULL) { crm_log_xml_debug(input, "[admin input]"); } else if (source) { fprintf(stderr, "Couldn't parse input from %s.\n", source); crm_exit(CRM_EX_CONFIG); } if (safe_str_eq(cib_action, "md5-sum")) { char *digest = NULL; if (input == NULL) { fprintf(stderr, "Please supply XML to process with -X, -x or -p\n"); crm_exit(CRM_EX_USAGE); } digest = calculate_on_disk_digest(input); fprintf(stderr, "Digest: "); fprintf(stdout, "%s\n", crm_str(digest)); free(digest); free_xml(input); crm_exit(CRM_EX_OK); } else if (safe_str_eq(cib_action, "md5-sum-versioned")) { char *digest = NULL; const char *version = NULL; if (input == NULL) { fprintf(stderr, "Please supply XML to process with -X, -x or -p\n"); crm_exit(CRM_EX_USAGE); } version = crm_element_value(input, XML_ATTR_CRM_VERSION); digest = calculate_xml_versioned_digest(input, FALSE, TRUE, version); fprintf(stderr, "Versioned (%s) digest: ", version); fprintf(stdout, "%s\n", crm_str(digest)); free(digest); free_xml(input); crm_exit(CRM_EX_OK); } rc = do_init(); if (rc != pcmk_ok) { crm_err("Init failed, could not perform requested operations"); fprintf(stderr, "Init failed, could not perform requested operations\n"); free_xml(input); crm_exit(crm_errno2exit(rc)); } rc = do_work(input, command_options, &output); if (rc > 0) { /* wait for the reply by creating a mainloop and running it until * the callbacks are invoked... */ request_id = rc; the_cib->cmds->register_callback(the_cib, request_id, message_timeout_ms, FALSE, NULL, "cibadmin_op_callback", cibadmin_op_callback); mainloop = g_main_loop_new(NULL, FALSE); crm_trace("%s waiting for reply from the local CIB", crm_system_name); crm_info("Starting mainloop"); g_main_loop_run(mainloop); } else if ((rc == -pcmk_err_schema_unchanged) && crm_str_eq(cib_action, CIB_OP_UPGRADE, TRUE)) { report_schema_unchanged(); } else if (rc < 0) { crm_err("Call failed: %s", pcmk_strerror(rc)); fprintf(stderr, "Call failed: %s\n", pcmk_strerror(rc)); if (rc == -pcmk_err_schema_validation) { if (crm_str_eq(cib_action, CIB_OP_UPGRADE, TRUE)) { xmlNode *obj = NULL; int version = 0, rc = 0; rc = the_cib->cmds->query(the_cib, NULL, &obj, command_options); if (rc == pcmk_ok) { update_validation(&obj, &version, 0, TRUE, FALSE); } } else if (output) { validate_xml_verbose(output); } } exit_code = crm_errno2exit(rc); } if (output != NULL) { print_xml_output(output); free_xml(output); } crm_trace("%s exiting normally", crm_system_name); free_xml(input); rc = the_cib->cmds->signoff(the_cib); if (exit_code == CRM_EX_OK) { exit_code = crm_errno2exit(rc); } cib_delete(the_cib); crm_exit(exit_code); } int do_work(xmlNode * input, int call_options, xmlNode ** output) { /* construct the request */ the_cib->call_timeout = message_timeout_ms; if (strcasecmp(CIB_OP_REPLACE, cib_action) == 0 && safe_str_eq(crm_element_name(input), XML_TAG_CIB)) { xmlNode *status = get_object_root(XML_CIB_TAG_STATUS, input); if (status == NULL) { create_xml_node(input, XML_CIB_TAG_STATUS); } } if (cib_action != NULL) { crm_trace("Passing \"%s\" to variant_op...", cib_action); return cib_internal_op(the_cib, cib_action, host, obj_type, input, output, call_options, cib_user); } else { crm_err("You must specify an operation"); } return -EINVAL; } int do_init(void) { int rc = pcmk_ok; the_cib = cib_new(); rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB: %s", pcmk_strerror(rc)); fprintf(stderr, "Could not connect to the CIB: %s\n", pcmk_strerror(rc)); } return rc; } void cibadmin_op_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { exit_code = crm_errno2exit(rc); if (rc == -pcmk_err_schema_unchanged) { report_schema_unchanged(); } else if (rc != pcmk_ok) { crm_warn("Call %s failed (%d): %s", cib_action, rc, pcmk_strerror(rc)); fprintf(stderr, "Call %s failed (%d): %s\n", cib_action, rc, pcmk_strerror(rc)); print_xml_output(output); } else if (safe_str_eq(cib_action, CIB_OP_QUERY) && output == NULL) { crm_err("Query returned no output"); crm_log_xml_err(msg, "no output"); } else if (output == NULL) { crm_info("Call passed"); } else { crm_info("Call passed"); print_xml_output(output); } if (call_id == request_id) { g_main_loop_quit(mainloop); } else { crm_info("Message was not the response we were looking for (%d vs. %d)", call_id, request_id); } } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 108e1109cf..c4b76b4064 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2199 +1,2199 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static unsigned int show; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; static guint timer_id = 0; static mainloop_timer_t *refresh_timer = NULL; static pe_working_set_t *mon_data_set = NULL; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; crm_trigger_t *refresh_trigger = NULL; static pcmk__supported_format_t formats[] = { #if CURSES_ENABLED CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, CRM_MON_SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; /* Define exit codes for monitoring-compatible output * For nagios plugins, the possibilities are * OK=0, WARN=1, CRIT=2, and UNKNOWN=3 */ #define MON_STATUS_WARN CRM_EX_ERROR #define MON_STATUS_CRIT CRM_EX_INVALID_PARAM #define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE #define RECONNECT_MSECS 5000 struct { int reconnect_msec; gboolean daemonize; gboolean show_bans; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; unsigned int mon_ops; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .reconnect_msec = RECONNECT_MSECS, .mon_ops = mon_op_default }; static void clean_up_connections(void); static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); static gboolean mon_refresh_display(gpointer user_data); static int cib_connect(gboolean full); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); static void kick_refresh(gboolean data_updated); static unsigned int all_includes(mon_output_format_t fmt) { if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) { return ~mon_show_options; } else { return mon_show_all; } } static unsigned int default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_monitor: case mon_output_plain: case mon_output_console: return mon_show_stack | mon_show_dc | mon_show_times | mon_show_counts | mon_show_nodes | mon_show_resources | mon_show_failures; break; case mon_output_xml: case mon_output_legacy_xml: return all_includes(fmt); break; case mon_output_html: case mon_output_cgi: return mon_show_summary | mon_show_nodes | mon_show_resources | mon_show_failures; break; default: return 0; break; } } struct { const char *name; unsigned int bit; } sections[] = { { "attributes", mon_show_attributes }, { "bans", mon_show_bans }, { "counts", mon_show_counts }, { "dc", mon_show_dc }, { "failcounts", mon_show_failcounts }, { "failures", mon_show_failures }, { "fencing", mon_show_fencing_all }, { "fencing-failed", mon_show_fence_failed }, { "fencing-pending", mon_show_fence_pending }, { "fencing-succeeded", mon_show_fence_worked }, { "nodes", mon_show_nodes }, { "operations", mon_show_operations }, { "options", mon_show_options }, { "resources", mon_show_resources }, { "stack", mon_show_stack }, { "summary", mon_show_summary }, { "tickets", mon_show_tickets }, { "times", mon_show_times }, { NULL } }; static unsigned int find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (crm_str_eq(sections[i].name, name, FALSE)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (crm_str_eq(*s, "all", TRUE)) { show = 0; } else if (crm_str_eq(*s, "none", TRUE)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, G_OPTION_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, nodes, none, " "operations, options, resources, stack, summary, " "tickets, times"); return FALSE; } } g_strfreev(parts); return TRUE; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (crm_str_eq(*s, "all", TRUE)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= mon_show_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } } else if (crm_str_eq(*s, "default", TRUE) || crm_str_eq(*s, "defaults", TRUE)) { show |= default_includes(output_format); } else if (crm_str_eq(*s, "none", TRUE)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, G_OPTION_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " "default, failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, nodes, none, operations, " "options, resources, stack, summary, tickets, times"); return FALSE; } } g_strfreev(parts); return TRUE; } static gboolean apply_include_exclude(GSList *lst, mon_output_format_t fmt, GError **error) { gboolean rc = TRUE; GSList *node = lst; /* Set the default of what to display here. Note that we OR everything to * show instead of set show directly because it could have already had some * settings applied to it in main. */ show |= default_includes(fmt); while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("html"); output_format = mon_output_cgi; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } if (args->output_dest != NULL) { free(args->output_dest); } if (optarg != NULL) { args->output_dest = strdup(optarg); } args->output_ty = strdup("html"); output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); return TRUE; } static gboolean as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_monitor; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_legacy_xml; options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_atoi(optarg, "2"); switch (rc) { case 3: options.mon_ops |= mon_op_fence_full_history | mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing", data, err); case 2: options.mon_ops |= mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing", data, err); case 1: options.mon_ops |= mon_op_fence_history | mon_op_fence_connect; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.mon_ops &= ~(mon_op_fence_history | mon_op_fence_connect); return include_exclude_cb("--exclude", "fencing", data, err); default: g_set_error(err, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_group_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_inactive_resources; return TRUE; } static gboolean no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { output_format = mon_output_plain; return TRUE; } static gboolean one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_brief; return TRUE; } static gboolean print_clone_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_clone_detail; return TRUE; } static gboolean print_pending_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_pending; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_print_timing; return include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { options.reconnect_msec = crm_parse_interval_spec(optarg); } return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.mon_ops |= mon_op_one_shot; return TRUE; } static gboolean watch_fencing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.mon_ops |= mon_op_watch_fencing; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds)", "TIMESPEC" }, { "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, one_shot_cb, "Display the cluster status once on the console and exit", NULL }, { "daemonize", 'd', 0, G_OPTION_ARG_NONE, &options.daemonize, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, watch_fencing_cb, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_clone_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_pending_cb, "Display pending state if 'record-pending' is enabled", NULL }, { "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_simple_cb, "Display the cluster status once as a simple one line output (suitable for nagios)", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb, "Write cluster status to the named HTML file.\n" INDENT "Use --output-as=html --output-to=FILE instead.", "FILE" }, { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb, "Disable the use of ncurses.\n" INDENT "Use --output-as=text instead.", NULL }, { "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb, "Web mode with output suitable for CGI (preselected when run as *.cgi).\n" INDENT "Use --output-as=html --html-cgi instead.", NULL }, { NULL } }; /* *INDENT-ON* */ static gboolean mon_timer_popped(gpointer data) { int rc = pcmk_ok; #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif if (timer_id > 0) { g_source_remove(timer_id); timer_id = 0; } print_as(output_format, "Reconnecting...\n"); rc = cib_connect(TRUE); if (rc != pcmk_ok) { timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL); } return FALSE; } static void do_mon_cib_connection_destroy(gpointer user_data, bool is_error) { if (is_error) { out->err(out, "Connection to the cluster-daemons terminated"); } else { out->info(out, "Connection to the cluster-daemons terminated"); } if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } if (timer_id) { /* we'll trigger a new reconnect-timeout at the end */ g_source_remove(timer_id); timer_id = 0; } if (st) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); if (st->state != stonith_disconnected) { st->cmds->disconnect(st); } } if (cib) { cib->cmds->signoff(cib); timer_id = g_timeout_add(options.reconnect_msec, mon_timer_popped, NULL); } return; } static void mon_cib_connection_destroy_regular(gpointer user_data) { do_mon_cib_connection_destroy(user_data, false); } static void mon_cib_connection_destroy_error(gpointer user_data) { do_mon_cib_connection_destroy(user_data, true); } /* * Mainloop signal handler. */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if CURSES_ENABLED static sighandler_t ncurses_winch_handler; static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); mainloop_set_trigger(refresh_trigger); } not_done--; } #endif static int cib_connect(gboolean full) { int rc = pcmk_ok; static gboolean need_pass = TRUE; CRM_CHECK(cib != NULL, return -EINVAL); if (getenv("CIB_passwd") != NULL) { need_pass = FALSE; } if (is_set(options.mon_ops, mon_op_fence_connect) && st == NULL) { st = stonith_api_new(); } if (is_set(options.mon_ops, mon_op_fence_connect) && st != NULL && st->state == stonith_disconnected) { rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); if (is_set(options.mon_ops, mon_op_watch_fencing)) { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_event); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); } else { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_display); st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display); } } } if (cib->state != cib_connected_query && cib->state != cib_connected_command) { crm_trace("Connecting to the CIB"); /* Hack: the CIB signon will print the prompt for a password if needed, * but to stderr. If we're in curses, show it on the screen instead. * * @TODO Add a password prompt (maybe including input) function to * pcmk__output_t and use it in libcib. */ if ((output_format == mon_output_console) && need_pass && (cib->variant == cib_remote)) { need_pass = FALSE; print_as(output_format, "Password:"); } rc = cib->cmds->signon(cib, crm_system_name, cib_query); if (rc != pcmk_ok) { out->err(out, "Could not connect to the CIB: %s", pcmk_strerror(rc)); return rc; } rc = cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); if (rc == pcmk_ok) { mon_refresh_display(&output_format); } if (rc == pcmk_ok && full) { if (rc == pcmk_ok) { rc = cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy_regular); if (rc == -EPROTONOSUPPORT) { print_as (output_format, "Notification setup not supported, won't be able to reconnect after failure"); if (output_format == mon_output_console) { sleep(2); } rc = pcmk_ok; } } if (rc == pcmk_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); } if (rc != pcmk_ok) { out->err(out, "Notification setup failed, could not monitor CIB actions"); clean_up_connections(); } } } return rc; } #if CURSES_ENABLED static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(output_format, option, condition) \ out->info(out, "%c %c: \t%s", ((condition)? '*': ' '), option, get_option_desc(option)); static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': if (is_not_set(show, mon_show_fencing_all)) { options.mon_ops |= mon_op_fence_history; options.mon_ops |= mon_op_fence_connect; if (st == NULL) { mon_cib_connection_destroy_regular(NULL); } } if (is_set(show, mon_show_fence_failed) || is_set(show, mon_show_fence_pending) || is_set(show, mon_show_fence_worked)) { show &= ~mon_show_fencing_all; } else { show |= mon_show_fencing_all; } break; case 'c': show ^= mon_show_tickets; break; case 'f': show ^= mon_show_failcounts; break; case 'n': options.mon_ops ^= mon_op_group_by_node; break; case 'o': show ^= mon_show_operations; if (is_not_set(show, mon_show_operations)) { options.mon_ops &= ~mon_op_print_timing; } break; case 'r': options.mon_ops ^= mon_op_inactive_resources; break; case 'R': options.mon_ops ^= mon_op_print_clone_detail; break; case 't': options.mon_ops ^= mon_op_print_timing; if (is_set(options.mon_ops, mon_op_print_timing)) { show |= mon_show_operations; } break; case 'A': show ^= mon_show_attributes; break; case 'L': show ^= mon_show_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (is_set(show, mon_show_stack) || is_set(show, mon_show_dc) || is_set(show, mon_show_times) || is_set(show, mon_show_counts)) { show &= ~mon_show_summary; } else { show |= mon_show_summary; } /* Regardless, we don't show options in console mode. */ show &= ~mon_show_options; break; case 'b': options.mon_ops ^= mon_op_print_brief; break; case 'j': options.mon_ops ^= mon_op_print_pending; break; case '?': config_mode = TRUE; break; default: goto refresh; } if (!config_mode) goto refresh; blank_screen(); out->info(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', is_set(show, mon_show_tickets)); print_option_help(out, 'f', is_set(show, mon_show_failcounts)); print_option_help(out, 'n', is_set(options.mon_ops, mon_op_group_by_node)); print_option_help(out, 'o', is_set(show, mon_show_operations)); print_option_help(out, 'r', is_set(options.mon_ops, mon_op_inactive_resources)); print_option_help(out, 't', is_set(options.mon_ops, mon_op_print_timing)); print_option_help(out, 'A', is_set(show, mon_show_attributes)); print_option_help(out, 'L', is_set(show,mon_show_bans)); print_option_help(out, 'D', is_not_set(show, mon_show_summary)); print_option_help(out, 'R', is_set(options.mon_ops, mon_op_print_clone_detail)); print_option_help(out, 'b', is_set(options.mon_ops, mon_op_print_brief)); print_option_help(out, 'j', is_set(options.mon_ops, mon_op_print_pending)); print_option_help(out, 'm', is_set(show, mon_show_fencing_all)); out->info(out, "%s", "\nToggle fields via field letter, type any other key to return"); } refresh: mon_refresh_display(NULL); return TRUE; } #endif // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void -avoid_zombies() +avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; const char *description = "Notes:\n\n" "If this program is called as crm_mon.cgi, --output-as=html --html-cgi will\n" "automatically be added to the command line arguments.\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many different\n" "formats. It can be just an integer number of seconds, a number plus units\n" "(ms/msec/us/usec/s/sec/m/min/h/hr), or an ISO 8601 period specification.\n\n" "Output Control:\n\n" "By default, a certain list of sections are written to the output destination.\n" "The default varies based on the output format - XML includes everything, while\n" "other output formats will display less. This list can be modified with the\n" "--include and --exclude command line options. Each option may be given multiple\n" "times on the command line, and each can give a comma-separated list of sections.\n" "The options are applied to the default set, from left to right as seen on the\n" "command line. For a list of valid sections, pass --include=list or --exclude=list.\n\n" "Examples:\n\n" "Display the cluster status on the console with updates as they occur:\n\n" "\tcrm_mon\n\n" "Display the cluster status on the console just once then exit:\n\n" "\tcrm_mon -1\n\n" "Display your cluster status, group resources by node, and include inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html --output-to /path/to/docroot/filename.html\n\n" "Start crm_mon and export the current cluster status as XML to stdout, then exit:\n\n" "\tcrm_mon --output-as xml\n\n"; context = pcmk__build_arg_context(args, "console (default), html, text, xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /* If certain format options were specified, we want to set some extra * options. We can just process these like they were given on the * command line. */ static void -add_output_args() { +add_output_args(void) { GError *err = NULL; if (output_format == mon_output_plain) { if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_cgi) { if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_xml) { if (!pcmk__force_args(context, &err, "%s --xml-simple-list", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; if (!pcmk__force_args(context, &err, "%s --xml-legacy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } } /* Which output format to use could come from two places: The --as-xml * style arguments we gave in deprecated_entries above, or the formatted output * arguments added by pcmk__register_formats. If the latter were used, * output_format will be mon_output_unset. * * Call the callbacks as if those older style arguments were provided so * the various things they do get done. */ static void reconcile_output_format(pcmk__common_args_t *args) { gboolean retval = TRUE; GError *err = NULL; if (output_format != mon_output_unset) { return; } if (safe_str_eq(args->output_ty, "html")) { char *dest = NULL; if (args->output_dest != NULL) { dest = strdup(args->output_dest); } retval = as_html_cb("h", dest, NULL, &err); free(dest); } else if (safe_str_eq(args->output_ty, "text")) { retval = no_curses_cb("N", NULL, NULL, &err); } else if (safe_str_eq(args->output_ty, "xml")) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_xml; options.mon_ops |= mon_op_one_shot; } else if (is_set(options.mon_ops, mon_op_one_shot)) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_plain; } else { /* Neither old nor new arguments were given, so set the default. */ if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("console"); output_format = mon_output_console; } if (!retval) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } int main(int argc, char **argv) { int rc = pcmk_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); crm_log_cli_init("crm_mon"); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); if (pcmk__ends_with_ext(argv[0], ".cgi")) { output_format = mon_output_cgi; options.mon_ops |= mon_op_one_shot; } processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. This can't * be done in add_output_args. That function is called after command line * arguments are processed in the next block, which means it'll override whatever * title the user provides. Doing this here means the user can give their own * title on the command line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (is_set(options.mon_ops, mon_op_watch_fencing)) { fence_history_cb("--fence-history", "0", NULL, NULL); options.mon_ops |= mon_op_fence_connect; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } else { switch (cib->variant) { case cib_native: /* cib & fencing - everything available */ break; case cib_file: /* Don't try to connect to fencing as we * either don't have a running cluster or * the fencing-information would possibly * not match the cib data from a file. * As we don't expect cib-updates coming * in enforce one-shot. */ fence_history_cb("--fence-history", "0", NULL, NULL); options.mon_ops |= mon_op_one_shot; break; case cib_remote: /* updates coming in but no fencing */ fence_history_cb("--fence-history", "0", NULL, NULL); break; case cib_undefined: case cib_database: default: /* something is odd */ rc = -EINVAL; break; } } if (is_set(options.mon_ops, mon_op_one_shot)) { if (output_format == mon_output_console) { output_format = mon_output_plain; } } else if (options.daemonize) { if ((output_format == mon_output_console) || (output_format == mon_output_plain)) { output_format = mon_output_none; } crm_enable_stderr(FALSE); if ((args->output_dest == NULL || safe_str_eq(args->output_dest, "-")) && !options.external_agent) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to and --external-agent"); return clean_up(CRM_EX_USAGE); } if (cib) { /* to be on the safe side don't have cib-object around * when we are forking */ cib_delete(cib); cib = NULL; crm_make_daemon(crm_system_name, TRUE, options.pid_file); cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } /* otherwise assume we've got the same cib-object we've just destroyed * in our parent */ } } else if (output_format == mon_output_console) { #if CURSES_ENABLED crm_enable_stderr(FALSE); #else options.mon_ops |= mon_op_one_shot; output_format = mon_output_plain; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } } if (rc != pcmk_ok) { // Shouldn't really be possible g_set_error(&error, G_OPTION_ERROR, CRM_EX_ERROR, "Invalid CIB source"); return clean_up(CRM_EX_ERROR); } reconcile_output_format(args); add_output_args(); if (args->version && output_format == mon_output_console) { /* Use the text output format here if we are in curses mode but were given * --version. Displaying version information uses printf, and then we * immediately exit. We don't want to initialize curses for that. */ rc = pcmk__output_new(&out, "text", args->output_dest, argv); } else { rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); } if (rc != pcmk_rc_ok) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, output_format, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, output_format, &error)) { return clean_up(CRM_EX_USAGE); } crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } /* Extra sanity checks when in CGI mode */ if (output_format == mon_output_cgi) { if (cib && cib->variant == cib_file) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file"); return clean_up(CRM_EX_USAGE); } else if (options.external_agent != NULL) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent"); return clean_up(CRM_EX_USAGE); } else if (options.daemonize == TRUE) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d"); return clean_up(CRM_EX_USAGE); } } if (output_format == mon_output_xml || output_format == mon_output_legacy_xml) { options.mon_ops |= mon_op_print_timing; } crm_info("Starting %s", crm_system_name); if (cib) { do { if (is_not_set(options.mon_ops, mon_op_one_shot)) { print_as(output_format ,"Waiting until cluster is available on this node ...\n"); } rc = cib_connect(is_not_set(options.mon_ops, mon_op_one_shot)); if (is_set(options.mon_ops, mon_op_one_shot)) { break; } else if (rc != pcmk_ok) { sleep(options.reconnect_msec / 1000); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } else { if (output_format == mon_output_html && out->dest != stdout) { printf("Writing html to %s ...\n", args->output_dest); } } } while (rc == -ENOTCONN); } if (rc != pcmk_ok) { if (output_format == mon_output_monitor) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s", pcmk_strerror(rc)); return clean_up(MON_STATUS_CRIT); } else { if (rc == -ENOTCONN) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); } else { g_set_error(&error, G_OPTION_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_strerror(rc)); } } return clean_up(crm_errno2exit(rc)); } if (is_set(options.mon_ops, mon_op_one_shot)) { return clean_up(CRM_EX_OK); } mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL); } #endif refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } /*! * \internal * \brief Print one-line status suitable for use with monitoring software * * \param[in] data_set Working set of CIB state * \param[in] history List of stonith actions * * \note This function's output (and the return code when the program exits) * should conform to https://www.monitoring-plugins.org/doc/guidelines.html */ static void print_simple_status(pcmk__output_t *out, pe_working_set_t * data_set, stonith_history_t *history, unsigned int mon_ops) { GListPtr gIter = NULL; int nodes_online = 0; int nodes_standby = 0; int nodes_maintenance = 0; char *offline_nodes = NULL; gboolean no_dc = FALSE; gboolean offline = FALSE; if (data_set->dc_node == NULL) { mon_ops |= mon_op_has_warnings; no_dc = TRUE; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->standby && node->details->online) { nodes_standby++; } else if (node->details->maintenance && node->details->online) { nodes_maintenance++; } else if (node->details->online) { nodes_online++; } else { char *s = crm_strdup_printf("offline node: %s", node->details->uname); offline_nodes = pcmk__add_word(offline_nodes, s); free(s); mon_ops |= mon_op_has_warnings; offline = TRUE; } } if (is_set(mon_ops, mon_op_has_warnings)) { out->info(out, "CLUSTER WARN:%s%s%s", no_dc ? " No DC" : "", no_dc && offline ? "," : "", offline ? offline_nodes : ""); free(offline_nodes); } else { char *nodes_standby_s = NULL; char *nodes_maint_s = NULL; if (nodes_standby > 0) { nodes_standby_s = crm_strdup_printf(", %d standby node%s", nodes_standby, pcmk__plural_s(nodes_standby)); } if (nodes_maintenance > 0) { nodes_maint_s = crm_strdup_printf(", %d maintenance node%s", nodes_maintenance, pcmk__plural_s(nodes_maintenance)); } out->info(out, "CLUSTER OK: %d node%s online%s%s, " "%d resource instance%s configured", nodes_online, pcmk__plural_s(nodes_online), nodes_standby_s != NULL ? nodes_standby_s : "", nodes_maint_s != NULL ? nodes_maint_s : "", data_set->ninstances, pcmk__plural_s(data_set->ninstances)); free(nodes_standby_s); free(nodes_maint_s); } } /*! * \internal * \brief Reduce the stonith-history * for successful actions we keep the last of every action-type & target * for failed actions we record as well who had failed * for actions in progress we keep full track * * \param[in] history List of stonith actions * */ static stonith_history_t * reduce_stonith_history(stonith_history_t *history) { stonith_history_t *new = history, *hp, *np; if (new) { hp = new->next; new->next = NULL; while (hp) { stonith_history_t *hp_next = hp->next; hp->next = NULL; for (np = new; ; np = np->next) { if ((hp->state == st_done) || (hp->state == st_failed)) { /* action not in progress */ if (safe_str_eq(hp->target, np->target) && safe_str_eq(hp->action, np->action) && (hp->state == np->state) && ((hp->state == st_done) || safe_str_eq(hp->delegate, np->delegate))) { /* purge older hp */ stonith_history_free(hp); break; } } if (!np->next) { np->next = hp; break; } } hp = hp_next; } } return new; } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = crm_itoa(rc); char *status_s = crm_itoa(status); char *target_rc_s = crm_itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { crm_perror(LOG_ERR, "notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static void handle_rsc_op(xmlNode * xml, const char *node_id) { int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) { xmlNode *cIter; for(cIter = xml->children; cIter; cIter = cIter->next) { handle_rsc_op(cIter, node_id); } return; } id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); if (id == NULL) { /* Compatibility with <= 1.1.5 */ id = ID(rsc_op); } magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); while (n != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(n))) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, XML_ATTR_UNAME); } if (node == NULL && n) { node = ID(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_strerror(pcmk_ok); if (status == PCMK_LRM_OP_DONE && target_rc == rc) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_LRM_OP_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = services_lrm_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); } static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger(refresh_trigger); return FALSE; } #define NODE_PATT "/lrm[@id=" static char * get_node_from_xpath(const char *xpath) { char *nodeid = NULL; char *tmp = strstr(xpath, NODE_PATT); if(tmp) { tmp += strlen(NODE_PATT); tmp += 1; nodeid = strdup(tmp); tmp = strstr(nodeid, "\'"); CRM_ASSERT(tmp); tmp[0] = 0; } return nodeid; } static void crm_diff_update_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "delete") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if(strcmp(name, XML_TAG_CIB) == 0) { xmlNode *state = NULL; xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS); for (state = __xml_first_child_element(status); state != NULL; state = __xml_next_element(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { xmlNode *state = NULL; for (state = __xml_first_child_element(match); state != NULL; state = __xml_next_element(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { node = crm_element_value(match, XML_ATTR_UNAME); if (node == NULL) { node = ID(match); } handle_rsc_op(match, node); } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { node = ID(match); handle_rsc_op(match, node); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = get_node_from_xpath(xpath); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } } static void crm_diff_update_v1(const char *event, xmlNode * msg) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op, NULL); } freeXpathObject(xpathObj); } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); print_dot(output_format); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, "format", &format); switch(format) { case 1: crm_diff_update_v1(event, msg); break; case 2: crm_diff_update_v2(event, msg); break; default: crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { print_as(output_format, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; kick_refresh(cib_updated); } static gboolean mon_refresh_display(gpointer user_data) { xmlNode *cib_copy = copy_xml(current_cib); stonith_history_t *stonith_history = NULL; int history_rc = 0; last_refresh = time(NULL); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { if (cib) { cib->cmds->signoff(cib); } out->err(out, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation)); clean_up(CRM_EX_CONFIG); return FALSE; } /* get the stonith-history if there is evidence we need it */ while (is_set(options.mon_ops, mon_op_fence_history)) { if (st != NULL) { history_rc = st->cmds->history(st, st_opt_sync_call, NULL, &stonith_history, 120); if (history_rc != 0) { out->err(out, "Critical: Unable to get stonith-history"); mon_cib_connection_destroy_error(NULL); } else { stonith_history = stonith__sort_history(stonith_history); if (is_not_set(options.mon_ops, mon_op_fence_full_history) && output_format != mon_output_xml) { stonith_history = reduce_stonith_history(stonith_history); } break; /* all other cases are errors */ } } else { out->err(out, "Critical: No stonith-API"); } free_xml(cib_copy); out->err(out, "Reading stonith-history failed"); return FALSE; } if (mon_data_set == NULL) { mon_data_set = pe_new_working_set(); CRM_ASSERT(mon_data_set != NULL); } set_bit(mon_data_set->flags, pe_flag_no_compat); mon_data_set->input = cib_copy; cluster_status(mon_data_set); /* Unpack constraints if any section will need them * (tickets may be referenced in constraints but not granted yet, * and bans need negative location constraints) */ if (is_set(show, mon_show_bans) || is_set(show, mon_show_tickets)) { xmlNode *cib_constraints = get_object_root(XML_CIB_TAG_CONSTRAINTS, mon_data_set->input); unpack_constraints(cib_constraints, mon_data_set); } switch (output_format) { case mon_output_html: case mon_output_cgi: if (print_html_status(out, mon_data_set, stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node) != 0) { g_set_error(&error, G_OPTION_ERROR, CRM_EX_CANTCREAT, "Critical: Unable to output html file"); clean_up(CRM_EX_CANTCREAT); return FALSE; } break; case mon_output_legacy_xml: case mon_output_xml: print_xml_status(out, mon_data_set, crm_errno2exit(history_rc), stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node); break; case mon_output_monitor: print_simple_status(out, mon_data_set, stonith_history, options.mon_ops); if (is_set(options.mon_ops, mon_op_has_warnings)) { clean_up(MON_STATUS_WARN); return FALSE; } break; case mon_output_console: /* If curses is not enabled, this will just fall through to the plain * text case. */ #if CURSES_ENABLED blank_screen(); print_status(out, mon_data_set, stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node); refresh(); break; #endif case mon_output_plain: print_status(out, mon_data_set, stonith_history, options.mon_ops, show, options.neg_location_prefix, options.only_node); break; case mon_output_unset: case mon_output_none: break; } stonith_history_free(stonith_history); stonith_history = NULL; pe_reset_working_set(mon_data_set); return TRUE; } static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy_regular(NULL); } else if (options.external_agent) { char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)", e->operation, e->origin, e->target, pcmk_strerror(e->result), e->id); send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } static void kick_refresh(gboolean data_updated) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } /* Refresh * - immediately if the last update was more than 5s ago * - every 10 cib-updates * - at most 2s after the last update */ if ((now - last_refresh) > (options.reconnect_msec / 1000)) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else if(updates >= 10) { mainloop_set_trigger(refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy_regular(NULL); } else { print_dot(output_format); kick_refresh(TRUE); } } static void clean_up_connections(void) { if (cib != NULL) { cib->cmds->signoff(cib); cib_delete(cib); cib = NULL; } if (st != NULL) { if (st->state != stonith_disconnected) { st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); st->cmds->disconnect(st); } stonith_api_delete(st); st = NULL; } } static void handle_html_output(crm_exit_t exit_code) { xmlNodePtr html = NULL; pcmk__html_add_header(html, "meta", "http-equiv", "refresh", "content", crm_itoa(options.reconnect_msec/1000), NULL); out->finish(out, exit_code, true, (void **) &html); } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ clean_up_connections(); free(options.pid_file); free(options.neg_location_prefix); g_slist_free_full(options.includes_excludes, free); pe_free_working_set(mon_data_set); mon_data_set = NULL; g_strfreev(processed_args); /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if ((error != NULL || exit_code == CRM_EX_USAGE) && output_format == mon_output_console) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { switch (output_format) { case mon_output_cgi: case mon_output_html: handle_html_output(exit_code); break; default: out->finish(out, exit_code, true, NULL); break; } pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); } diff --git a/tools/crm_node.c b/tools/crm_node.c index d4461c5e58..34511f302d 100644 --- a/tools/crm_node.c +++ b/tools/crm_node.c @@ -1,668 +1,668 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "crm_node - Tool for displaying low-level node information" struct { gboolean corosync; gboolean dangerous_cmd; gboolean force_flag; char command; int nodeid; char *target_uname; } options = { .command = '\0', .force_flag = FALSE }; gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); static char *pid_s = NULL; static GMainLoop *mainloop = NULL; static crm_exit_t exit_code = CRM_EX_OK; #define INDENT " " static GOptionEntry command_entries[] = { { "cluster-id", 'i', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display this node's cluster id", NULL }, { "list", 'l', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display all known members (past and present) of this cluster", NULL }, { "name", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the name used by the cluster for this node", NULL }, { "partition", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the members of this partition", NULL }, { "quorum", 'q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display a 1 if our partition has quorum, 0 if not", NULL }, { "name-for-id", 'N', 0, G_OPTION_ARG_CALLBACK, name_cb, "Display the name used by the cluster for the node with the specified ID", "ID" }, { "remove", 'R', 0, G_OPTION_ARG_CALLBACK, remove_cb, "(Advanced) Remove the (stopped) node with the specified name from Pacemaker's\n" INDENT "configuration and caches (the node must already have been removed from\n" INDENT "the underlying cluster stack configuration", "NAME" }, { NULL } }; static GOptionEntry addl_entries[] = { { "force", 'f', 0, G_OPTION_ARG_NONE, &options.force_flag, NULL, NULL }, #if SUPPORT_COROSYNC /* Unused and deprecated */ { "corosync", 'C', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.corosync, NULL, NULL }, #endif // @TODO add timeout option for when IPC replies are needed { NULL } }; gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (safe_str_eq("-i", option_name) || safe_str_eq("--cluster-id", option_name)) { options.command = 'i'; } else if (safe_str_eq("-l", option_name) || safe_str_eq("--list", option_name)) { options.command = 'l'; } else if (safe_str_eq("-n", option_name) || safe_str_eq("--name", option_name)) { options.command = 'n'; } else if (safe_str_eq("-p", option_name) || safe_str_eq("--partition", option_name)) { options.command = 'p'; } else if (safe_str_eq("-q", option_name) || safe_str_eq("--quorum", option_name)) { options.command = 'q'; } else { g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "Unknown param passed to command_cb: %s\n", option_name); return FALSE; } return TRUE; } gboolean name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.command = 'N'; options.nodeid = crm_parse_int(optarg, NULL); return TRUE; } gboolean remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (optarg == NULL) { crm_err("-R option requires an argument"); g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, "-R option requires an argument"); return FALSE; } options.command = 'R'; options.dangerous_cmd = TRUE; options.target_uname = strdup(optarg); return TRUE; } /*! * \internal * \brief Exit crm_node * Clean up memory, and either quit mainloop (if running) or exit * * \param[in] value Exit status */ static void crm_node_exit(crm_exit_t value) { if (pid_s) { free(pid_s); pid_s = NULL; } exit_code = value; if (mainloop && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } else { crm_exit(exit_code); } } static void exit_disconnect(gpointer user_data) { fprintf(stderr, "error: Lost connection to cluster\n"); crm_node_exit(CRM_EX_DISCONNECT); } typedef int (*ipc_dispatch_fn) (const char *buffer, ssize_t length, gpointer userdata); static crm_ipc_t * new_mainloop_for_ipc(const char *system, ipc_dispatch_fn dispatch) { mainloop_io_t *source = NULL; crm_ipc_t *ipc = NULL; struct ipc_client_callbacks ipc_callbacks = { .dispatch = dispatch, .destroy = exit_disconnect }; mainloop = g_main_loop_new(NULL, FALSE); source = mainloop_add_ipc_client(system, G_PRIORITY_DEFAULT, 0, NULL, &ipc_callbacks); ipc = mainloop_get_ipc_client(source); if (ipc == NULL) { fprintf(stderr, "error: Could not connect to cluster (is it running?)\n"); crm_node_exit(CRM_EX_DISCONNECT); } return ipc; } static void -run_mainloop_and_exit() +run_mainloop_and_exit(void) { g_main_loop_run(mainloop); g_main_loop_unref(mainloop); mainloop = NULL; crm_node_exit(exit_code); } static int send_controller_hello(crm_ipc_t *controller) { xmlNode *hello = NULL; int rc; pid_s = pcmk__getpid_s(); hello = create_hello_message(pid_s, crm_system_name, "1", "0"); rc = crm_ipc_send(controller, hello, 0, 0, NULL); free_xml(hello); return (rc < 0)? rc : 0; } static int send_node_info_request(crm_ipc_t *controller, uint32_t nodeid) { xmlNode *ping = NULL; int rc; ping = create_request(CRM_OP_NODE_INFO, NULL, NULL, CRM_SYSTEM_CRMD, crm_system_name, pid_s); if (nodeid > 0) { crm_xml_add_int(ping, XML_ATTR_ID, nodeid); } rc = crm_ipc_send(controller, ping, 0, 0, NULL); free_xml(ping); return (rc < 0)? rc : 0; } static int dispatch_controller(const char *buffer, ssize_t length, gpointer userdata) { xmlNode *message = string2xml(buffer); xmlNode *data = NULL; const char *value = NULL; if (message == NULL) { fprintf(stderr, "error: Could not understand reply from controller\n"); crm_node_exit(CRM_EX_PROTOCOL); return 0; } crm_log_xml_trace(message, "controller reply"); exit_code = CRM_EX_PROTOCOL; // Validate reply value = crm_element_value(message, F_CRM_MSG_TYPE); if (safe_str_neq(value, XML_ATTR_RESPONSE)) { fprintf(stderr, "error: Message from controller was not a reply\n"); goto done; } value = crm_element_value(message, XML_ATTR_REFERENCE); if (value == NULL) { fprintf(stderr, "error: Controller reply did not specify original message\n"); goto done; } data = get_message_xml(message, F_CRM_DATA); if (data == NULL) { fprintf(stderr, "error: Controller reply did not contain any data\n"); goto done; } switch (options.command) { case 'i': value = crm_element_value(data, XML_ATTR_ID); if (value == NULL) { fprintf(stderr, "error: Controller reply did not contain node ID\n"); } else { printf("%s\n", value); exit_code = CRM_EX_OK; } break; case 'n': case 'N': value = crm_element_value(data, XML_ATTR_UNAME); if (value == NULL) { fprintf(stderr, "Node is not known to cluster\n"); exit_code = CRM_EX_NOHOST; } else { printf("%s\n", value); exit_code = CRM_EX_OK; } break; case 'q': value = crm_element_value(data, XML_ATTR_HAVE_QUORUM); if (value == NULL) { fprintf(stderr, "error: Controller reply did not contain quorum status\n"); } else { bool quorum = crm_is_true(value); printf("%d\n", quorum); exit_code = quorum? CRM_EX_OK : CRM_EX_QUORUM; } break; default: fprintf(stderr, "internal error: Controller reply not expected\n"); exit_code = CRM_EX_SOFTWARE; break; } done: free_xml(message); crm_node_exit(exit_code); return 0; } static void run_controller_mainloop(uint32_t nodeid) { crm_ipc_t *controller = NULL; int rc; controller = new_mainloop_for_ipc(CRM_SYSTEM_CRMD, dispatch_controller); rc = send_controller_hello(controller); if (rc < 0) { fprintf(stderr, "error: Could not register with controller: %s\n", pcmk_strerror(rc)); crm_node_exit(crm_errno2exit(rc)); } rc = send_node_info_request(controller, nodeid); if (rc < 0) { fprintf(stderr, "error: Could not ping controller: %s\n", pcmk_strerror(rc)); crm_node_exit(crm_errno2exit(rc)); } // Run main loop to get controller reply via dispatch_controller() run_mainloop_and_exit(); } static void -print_node_name() +print_node_name(void) { // Check environment first (i.e. when called by resource agent) const char *name = getenv("OCF_RESKEY_" CRM_META "_" XML_LRM_ATTR_TARGET); if (name != NULL) { printf("%s\n", name); crm_node_exit(CRM_EX_OK); } else { // Otherwise ask the controller run_controller_mainloop(0); } } static int cib_remove_node(long id, const char *name) { int rc; cib_t *cib = NULL; xmlNode *node = NULL; xmlNode *node_state = NULL; crm_trace("Removing %s from the CIB", name); if(name == NULL && id == 0) { return -ENOTUNIQ; } node = create_xml_node(NULL, XML_CIB_TAG_NODE); node_state = create_xml_node(NULL, XML_CIB_TAG_STATE); crm_xml_add(node, XML_ATTR_UNAME, name); crm_xml_add(node_state, XML_ATTR_UNAME, name); if (id > 0) { crm_xml_set_id(node, "%ld", id); crm_xml_add(node_state, XML_ATTR_ID, ID(node)); } cib = cib_new(); cib->cmds->signon(cib, crm_system_name, cib_command); rc = cib->cmds->remove(cib, XML_CIB_TAG_NODES, node, cib_sync_call); if (rc != pcmk_ok) { printf("Could not remove %s[%ld] from " XML_CIB_TAG_NODES ": %s", name, id, pcmk_strerror(rc)); } rc = cib->cmds->remove(cib, XML_CIB_TAG_STATUS, node_state, cib_sync_call); if (rc != pcmk_ok) { printf("Could not remove %s[%ld] from " XML_CIB_TAG_STATUS ": %s", name, id, pcmk_strerror(rc)); } cib->cmds->signoff(cib); cib_delete(cib); return rc; } static int tools_remove_node_cache(const char *node_name, long nodeid, const char *target) { int rc = -1; crm_ipc_t *conn = crm_ipc_new(target, 0); xmlNode *cmd = NULL; if (!conn) { return -ENOTCONN; } if (!crm_ipc_connect(conn)) { crm_perror(LOG_ERR, "Connection to %s failed", target); crm_ipc_destroy(conn); return -ENOTCONN; } if(safe_str_eq(target, CRM_SYSTEM_CRMD)) { // The controller requires a hello message before sending a request rc = send_controller_hello(conn); if (rc < 0) { fprintf(stderr, "error: Could not register with controller: %s\n", pcmk_strerror(rc)); return rc; } } crm_trace("Removing %s[%ld] from the %s membership cache", node_name, nodeid, target); if(safe_str_eq(target, T_ATTRD)) { cmd = create_xml_node(NULL, __FUNCTION__); crm_xml_add(cmd, F_TYPE, T_ATTRD); crm_xml_add(cmd, F_ORIG, crm_system_name); crm_xml_add(cmd, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); crm_xml_add(cmd, PCMK__XA_ATTR_NODE_NAME, node_name); if (nodeid > 0) { crm_xml_add_int(cmd, PCMK__XA_ATTR_NODE_ID, (int) nodeid); } } else { cmd = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, target, crm_system_name, pid_s); if (nodeid > 0) { crm_xml_set_id(cmd, "%ld", nodeid); } crm_xml_add(cmd, XML_ATTR_UNAME, node_name); } rc = crm_ipc_send(conn, cmd, 0, 0, NULL); crm_debug("%s peer cache cleanup for %s (%ld): %d", target, node_name, nodeid, rc); if (rc > 0) { rc = cib_remove_node(nodeid, node_name); } if (conn) { crm_ipc_close(conn); crm_ipc_destroy(conn); } free_xml(cmd); return rc > 0 ? 0 : rc; } static void remove_node(const char *target_uname) { int d = 0; long nodeid = 0; const char *node_name = NULL; char *endptr = NULL; const char *daemons[] = { CRM_SYSTEM_CRMD, "stonith-ng", T_ATTRD, CRM_SYSTEM_MCP, }; // Check whether node was specified by name or numeric ID errno = 0; nodeid = strtol(target_uname, &endptr, 10); if ((errno != 0) || (endptr == target_uname) || (*endptr != '\0') || (nodeid <= 0)) { // It's not a positive integer, so assume it's a node name nodeid = 0; node_name = target_uname; } for (d = 0; d < DIMOF(daemons); d++) { if (tools_remove_node_cache(node_name, nodeid, daemons[d])) { crm_err("Failed to connect to %s to remove node '%s'", daemons[d], target_uname); crm_node_exit(CRM_EX_ERROR); return; } } crm_node_exit(CRM_EX_OK); } static gint compare_node_xml(gconstpointer a, gconstpointer b) { const char *a_name = crm_element_value((xmlNode*) a, "uname"); const char *b_name = crm_element_value((xmlNode*) b, "uname"); return strcmp((a_name? a_name : ""), (b_name? b_name : "")); } static int node_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata) { GList *nodes = NULL; xmlNode *node = NULL; xmlNode *msg = string2xml(buffer); const char *uname; const char *state; if (msg == NULL) { fprintf(stderr, "error: Could not understand pacemakerd response\n"); crm_node_exit(CRM_EX_PROTOCOL); return 0; } crm_log_xml_trace(msg, "message"); for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) { nodes = g_list_insert_sorted(nodes, node, compare_node_xml); } for (GList *iter = nodes; iter; iter = iter->next) { node = (xmlNode*) iter->data; uname = crm_element_value(node, "uname"); state = crm_element_value(node, "state"); if (options.command == 'l') { int id = 0; crm_element_value_int(node, "id", &id); printf("%d %s %s\n", id, (uname? uname : ""), (state? state : "")); // This is CRM_NODE_MEMBER but we don't want to include cluster header } else if ((options.command == 'p') && safe_str_eq(state, "member")) { printf("%s ", (uname? uname : "")); } } if (options.command == 'p') { fprintf(stdout, "\n"); } free_xml(msg); crm_node_exit(CRM_EX_OK); return 0; } static void -run_pacemakerd_mainloop() +run_pacemakerd_mainloop(void) { crm_ipc_t *ipc = NULL; xmlNode *poke = NULL; ipc = new_mainloop_for_ipc(CRM_SYSTEM_MCP, node_mcp_dispatch); // Sending anything will get us a list of nodes poke = create_xml_node(NULL, "poke"); crm_ipc_send(ipc, poke, 0, 0, NULL); free_xml(poke); // Handle reply via node_mcp_dispatch() run_mainloop_and_exit(); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup *group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; context = pcmk__build_arg_context(args, NULL, &group, NULL); /* Add the -q option, which cannot be part of the globally supported options * because some tools use that flag for something else. */ pcmk__add_main_args(context, extra_prog_entries); pcmk__add_arg_group(context, "commands", "Commands:", "Show command help", command_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); return context; } int main(int argc, char **argv) { pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); GError *error = NULL; GOptionContext *context = NULL; GOptionGroup *output_group = NULL; gchar **processed_args = NULL; context = build_arg_context(args, output_group); crm_log_cli_init("crm_node"); processed_args = pcmk__cmdline_preproc(argv, "NR"); if (!g_option_context_parse_strv(context, &processed_args, &error)) { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); exit_code = CRM_EX_USAGE; goto done; } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (args->version) { /* FIXME: When crm_node is converted to use formatted output, this can go. */ pcmk__cli_help('v', CRM_EX_USAGE); } if (optind > argc || options.command == 0) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); exit_code = CRM_EX_USAGE; goto done; } if (options.dangerous_cmd && options.force_flag == FALSE) { fprintf(stderr, "The supplied command is considered dangerous." " To prevent accidental destruction of the cluster," " the --force flag is required in order to proceed.\n"); exit_code = CRM_EX_USAGE; goto done; } switch (options.command) { case 'n': print_node_name(); break; case 'R': remove_node(options.target_uname); break; case 'i': case 'q': case 'N': run_controller_mainloop(options.nodeid); break; case 'l': case 'p': run_pacemakerd_mainloop(); break; default: break; } done: g_strfreev(processed_args); g_clear_error(&error); pcmk__free_arg_context(context); crm_node_exit(exit_code); return exit_code; }