diff --git a/cts/patterns.py b/cts/patterns.py index 745eaaf4b5..1bdace0ead 100644 --- a/cts/patterns.py +++ b/cts/patterns.py @@ -1,391 +1,391 @@ """ Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS) """ # Pacemaker targets compatibility with Python 2.7 and 3.2+ from __future__ import print_function, unicode_literals, absolute_import, division -__copyright__ = "Copyright 2008-2018 the Pacemaker project contributors" +__copyright__ = "Copyright 2008-2019 the Pacemaker project contributors" __license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" import sys, os from cts.CTSvars import * patternvariants = {} class BasePatterns(object): def __init__(self, name): self.name = name patternvariants[name] = self self.ignore = [ "avoid confusing Valgrind", # Logging bug in some versions of libvirtd r"libvirtd.*: internal error: Failed to parse PCI config address", ] self.BadNews = [] self.components = {} self.commands = { "StatusCmd" : "crmadmin -t 60000 -S %s 2>/dev/null", "CibQuery" : "cibadmin -Ql", "CibAddXml" : "cibadmin --modify -c --xml-text %s", "CibDelXpath" : "cibadmin --delete --xpath %s", # 300,000 == 5 minutes "RscRunning" : CTSvars.CRM_DAEMON_DIR + "/cts-exec-helper -R -r %s", "CIBfile" : "%s:"+CTSvars.CRM_CONFIG_DIR+"/cib.xml", "TmpDir" : "/tmp", "BreakCommCmd" : "iptables -A INPUT -s %s -j DROP >/dev/null 2>&1", "FixCommCmd" : "iptables -D INPUT -s %s -j DROP >/dev/null 2>&1", # tc qdisc add dev lo root handle 1: cbq avpkt 1000 bandwidth 1000mbit # tc class add dev lo parent 1: classid 1:1 cbq rate "$RATE"kbps allot 17000 prio 5 bounded isolated # tc filter add dev lo parent 1: protocol ip prio 16 u32 match ip dst 127.0.0.1 match ip sport $PORT 0xFFFF flowid 1:1 # tc qdisc add dev lo parent 1: netem delay "$LATENCY"msec "$(($LATENCY/4))"msec 10% 2> /dev/null > /dev/null "ReduceCommCmd" : "", "RestoreCommCmd" : "tc qdisc del dev lo root", "SetCheckInterval" : "cibadmin --modify -c --xml-text ''", "ClearCheckInterval" : "cibadmin --delete --xpath \"//nvpair[@name='cluster-recheck-interval']\"", "MaintenanceModeOn" : "cibadmin --modify -c --xml-text ''", "MaintenanceModeOff" : "cibadmin --delete --xpath \"//nvpair[@name='maintenance-mode']\"", "StandbyCmd" : "crm_attribute -Vq -U %s -n standby -l forever -v %s 2>/dev/null", "StandbyQueryCmd" : "crm_attribute -qG -U %s -n standby -l forever -d off 2>/dev/null", } self.search = { "Pat:DC_IDLE" : "pacemaker-controld.*State transition.*-> S_IDLE", # This won't work if we have multiple partitions - "Pat:Local_started" : "%s\W.*The local CRM is operational", + "Pat:Local_started" : "%s\W.*controller successfully started", "Pat:NonDC_started" : r"%s\W.*State transition.*-> S_NOT_DC", "Pat:DC_started" : r"%s\W.*State transition.*-> S_IDLE", "Pat:We_stopped" : "%s\W.*OVERRIDE THIS PATTERN", "Pat:They_stopped" : "%s\W.*LOST:.* %s ", "Pat:They_dead" : "node %s.*: is dead", "Pat:TransitionComplete" : "Transition status: Complete: complete", "Pat:Fencing_start" : "(Initiating remote operation|Requesting peer fencing ).* (for|of) %s", "Pat:Fencing_ok" : r"pacemaker-fenced.*:\s*Operation .* of %s by .* for .*@.*: OK", "Pat:Fencing_recover" : r"schedulerd.*: Recover %s", "Pat:RscOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s.*: (0 \()?ok", "Pat:RscRemoteOpOK" : r"pacemaker-controld.*:\s+Result of %s operation for %s on %s: (0 \()?ok", "Pat:NodeFenced" : r"pacemaker-controld.*:\s* Peer %s was terminated \(.*\) by .* on behalf of .*: OK", "Pat:FenceOpOK" : "Operation .* for host '%s' with device .* returned: 0", } def get_component(self, key): if key in self.components: return self.components[key] print("Unknown component '%s' for %s" % (key, self.name)) return [] def get_patterns(self, key): if key == "BadNews": return self.BadNews elif key == "BadNewsIgnore": return self.ignore elif key == "Commands": return self.commands elif key == "Search": return self.search elif key == "Components": return self.components def __getitem__(self, key): if key == "Name": return self.name elif key in self.commands: return self.commands[key] elif key in self.search: return self.search[key] else: print("Unknown template '%s' for %s" % (key, self.name)) return None class crm_corosync(BasePatterns): ''' Patterns for Corosync version 2 cluster manager class ''' def __init__(self, name): BasePatterns.__init__(self, name) self.commands.update({ "StartCmd" : "service corosync start && service pacemaker start", "StopCmd" : "service pacemaker stop; [ ! -e /usr/sbin/pacemaker-remoted ] || service pacemaker_remote stop; service corosync stop", "EpochCmd" : "crm_node -e", "QuorumCmd" : "crm_node -q", "PartitionCmd" : "crm_node -p", }) self.search.update({ # Close enough ... "Corosync Cluster Engine exiting normally" isn't # printed reliably. "Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines", "Pat:They_stopped" : "%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost", "Pat:They_dead" : "pacemaker-controld.*Node %s(\[|\s).*state is now lost", "Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(", "Pat:ChildKilled" : r"%s\W.*pacemakerd.*%s\[[0-9]+\] terminated with signal 9", "Pat:ChildRespawn" : "%s\W.*pacemakerd.*Respawning failed child process: %s", "Pat:InfraUp" : "%s\W.*corosync.*Initializing transport", "Pat:PacemakerUp" : "%s\W.*pacemakerd.*Starting Pacemaker", }) self.ignore = self.ignore + [ r"crm_mon:", r"crmadmin:", r"update_trace_data", r"async_notify:.*strange, client not found", r"Parse error: Ignoring unknown option .*nodename", r"error.*: Operation 'reboot' .* with device 'FencingFail' returned:", r"getinfo response error: 1$", r"sbd.* error: inquisitor_child: DEBUG MODE IS ACTIVE", r"sbd.* pcmk:\s*error:.*Connection to cib_ro.* (failed|closed)", ] self.BadNews = [ r"error:", r"crit:", r"ERROR:", r"CRIT:", r"Shutting down...NOW", r"Timer I_TERMINATE just popped", r"input=I_ERROR", r"input=I_FAIL", r"input=I_INTEGRATED cause=C_TIMER_POPPED", r"input=I_FINALIZED cause=C_TIMER_POPPED", r"input=I_ERROR", r"(pacemakerd|pacemaker-execd|pacemaker-controld):.*, exiting", r"schedulerd.*Attempting recovery of resource", r"is taking more than 2x its timeout", r"Confirm not received from", r"Welcome reply not received from", r"Attempting to schedule .* after a stop", r"Resource .* was active at shutdown", r"duplicate entries for call_id", r"Search terminated:", r":global_timer_callback", r"Faking parameter digest creation", r"Parameters to .* action changed:", r"Parameters to .* changed", r"\[[0-9]+\] terminated with signal [0-9]+ \(", r"schedulerd:.*Recover .*\(.* -\> .*\)", r"rsyslogd.* imuxsock lost .* messages from pid .* due to rate-limiting", r"Peer is not part of our cluster", r"We appear to be in an election loop", r"Unknown node -> we will not deliver message", r"(Blackbox dump requested|Problem detected)", r"pacemakerd.*Could not connect to Cluster Configuration Database API", r"Receiving messages from a node we think is dead", r"share the same cluster nodeid", r"share the same name", #r"crm_ipc_send:.*Request .* failed", #r"crm_ipc_send:.*Sending to .* is disabled until pending reply is received", # Not inherently bad, but worth tracking #r"No need to invoke the TE", #r"ping.*: DEBUG: Updated connected = 0", #r"Digest mis-match:", r"pacemaker-controld:.*Transition failed: terminated", r"Local CIB .* differs from .*:", r"warn.*:\s*Continuing but .* will NOT be used", r"warn.*:\s*Cluster configuration file .* is corrupt", #r"Executing .* fencing operation", r"Election storm", r"stalled the FSA with pending inputs", ] self.components["common-ignore"] = [ r"Pending action:", r"resource( was|s were) active at shutdown", r"pending LRM operations at shutdown", r"Lost connection to the CIB manager", r"pacemaker-controld.*:\s*Action A_RECOVER .* not supported", r"pacemaker-controld.*:\s*Performing A_EXIT_1 - forcefully exiting ", r".*:\s*Executing .* fencing operation \(.*\) on ", r".*:\s*Requesting fencing \([^)]+\) of node ", r"(Blackbox dump requested|Problem detected)", # "Resource .*stonith::.* is active on 2 nodes attempting recovery", # "Transition .* ERRORs found during PE processing", ] self.components["corosync-ignore"] = [ r"error:.*Connection to the CPG API failed: Library error", r"\[[0-9]+\] exited with status [0-9]+ \(", r"pacemaker-based.*error:.*Corosync connection lost", r"pacemaker-fenced.*error:.*Corosync connection terminated", r"pacemaker-controld.*State transition .* S_RECOVERY", r"pacemaker-controld.*error:.*Input (I_ERROR|I_TERMINATE ) .*received in state", r"pacemaker-controld.*error:.*Could not recover from internal error", r"error:.*Connection to cib_(shm|rw).* (failed|closed)", r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"crit: Fencing daemon connection failed", ] self.components["corosync"] = [ # We expect each daemon to lose its cluster connection. # However, if the CIB manager loses its connection first, # it's possible for another daemon to lose that connection and # exit before losing the cluster connection. r"pacemakerd.*:\s*(crit|error):.*Lost connection to cluster layer", r"pacemaker-attrd.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", r"pacemaker-based.*:\s*(crit|error):.*Lost connection to cluster layer", r"pacemaker-controld.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", r"pacemaker-fenced.*:\s*(crit|error):.*Lost connection to (cluster layer|the CIB manager)", r"schedulerd.*Scheduling Node .* for STONITH", r"pacemaker-controld.*:\s*Peer .* was terminated \(.*\) by .* on behalf of .*:\s*OK", ] self.components["pacemaker-based"] = [ r"pacemakerd.* pacemaker-attrd\[[0-9]+\] exited with status 102", r"pacemakerd.* pacemaker-controld\[[0-9]+\] exited with status 1", r"pacemakerd.* Respawning failed child process: pacemaker-attrd", r"pacemakerd.* Respawning failed child process: pacemaker-based", r"pacemakerd.* Respawning failed child process: pacemaker-controld", r"pacemakerd.* Respawning failed child process: pacemaker-fenced", r"pacemaker-.* Connection to cib_.* (failed|closed)", r"pacemaker-attrd.*:.*Lost connection to the CIB manager", r"pacemaker-controld.*:.*Lost connection to the CIB manager", r"pacemaker-controld.*I_ERROR.*crmd_cib_connection_destroy", r"pacemaker-controld.* State transition .* S_RECOVERY", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", ] self.components["pacemaker-based-ignore"] = [ r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", ] self.components["pacemaker-execd"] = [ r"pacemaker-controld.*Connection to (pacemaker-execd|lrmd|executor) (failed|closed)", r"pacemaker-controld.*I_ERROR.*lrm_connection_destroy", r"pacemaker-controld.*State transition .* S_RECOVERY", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", r"pacemaker-controld.*Could not recover from internal error", r"pacemakerd.*pacemaker-execd.* terminated with signal 9", r"pacemakerd.*pacemaker-controld\[[0-9]+\] exited with status 1", r"pacemakerd.*Respawning failed child process: pacemaker-execd", r"pacemakerd.*Respawning failed child process: pacemaker-controld", ] self.components["pacemaker-execd-ignore"] = [] self.components["pacemaker-controld"] = [ # "WARN: determine_online_status: Node .* is unclean", # "Scheduling Node .* for STONITH", # "Executing .* fencing operation", # Only if the node wasn't the DC: "State transition S_IDLE", "State transition .* -> S_IDLE", ] self.components["pacemaker-controld-ignore"] = [] self.components["pacemaker-attrd"] = [] self.components["pacemaker-attrd-ignore"] = [] self.components["pacemaker-schedulerd"] = [ "State transition .* S_RECOVERY", r"Respawning failed child process: pacemaker-controld", r"pacemaker-controld\[[0-9]+\] exited with status 1 \(", "Connection to pengine failed", "Connection to pengine.* closed", r"Connection to the scheduler failed", "pacemaker-controld.*I_ERROR.*save_cib_contents", r"pacemaker-controld.*: Input I_TERMINATE .*from do_recover", "pacemaker-controld.*Could not recover from internal error", ] self.components["pacemaker-schedulerd-ignore"] = [] self.components["pacemaker-fenced"] = [ r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"Fencing daemon connection failed", r"pacemaker-controld.*:\s*warn.*:\s*Callback already present", ] self.components["pacemaker-fenced-ignore"] = [ r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", r"crit:.*Fencing daemon connection failed", r"error:.*Sign-in failed: triggered a retry", r"Connection to (fencer|stonith-ng) failed, finalizing .* pending operations", r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error", ] self.components["pacemaker-fenced-ignore"].extend(self.components["common-ignore"]) class crm_corosync_docker(crm_corosync): ''' Patterns for Corosync version 2 cluster manager class ''' def __init__(self, name): crm_corosync.__init__(self, name) self.commands.update({ "StartCmd" : "pcmk_start", "StopCmd" : "pcmk_stop", }) class PatternSelector(object): def __init__(self, name=None): self.name = name self.base = BasePatterns("crm-base") if not name: crm_corosync("crm-corosync") elif name == "crm-corosync": crm_corosync(name) elif name == "crm-corosync-docker": crm_corosync_docker(name) def get_variant(self, variant): if variant in patternvariants: return patternvariants[variant] print("defaulting to crm-base for %s" % variant) return self.base def get_patterns(self, variant, kind): return self.get_variant(variant).get_patterns(kind) def get_template(self, variant, key): v = self.get_variant(variant) return v[key] def get_component(self, variant, kind): return self.get_variant(variant).get_component(kind) def __getitem__(self, key): return self.get_template(self.name, key) # python cts/CTSpatt.py -k crm-corosync -t StartCmd if __name__ == '__main__': pdir=os.path.dirname(sys.path[0]) sys.path.insert(0, pdir) # So that things work from the source directory kind=None template=None skipthis=None args=sys.argv[1:] for i in range(0, len(args)): if skipthis: skipthis=None continue elif args[i] == "-k" or args[i] == "--kind": skipthis=1 kind = args[i+1] elif args[i] == "-t" or args[i] == "--template": skipthis=1 template = args[i+1] else: print("Illegal argument " + args[i]) print(PatternSelector(kind)[template]) diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index 9ffe20115c..15c6a8075e 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -1,441 +1,441 @@ /* * Copyright 2013-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-attrd.h" lrmd_t *the_lrmd = NULL; crm_cluster_t *attrd_cluster = NULL; crm_trigger_t *attrd_config_read = NULL; static crm_exit_t attrd_exit_status = CRM_EX_OK; static void attrd_cpg_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); } if (xml == NULL) { crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); } else { crm_node_t *peer = crm_get_peer(nodeid, from); attrd_peer_message(peer, xml); } free_xml(xml); free(data); } static void attrd_cpg_destroy(gpointer unused) { if (attrd_shutting_down()) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } } static void attrd_cib_replaced_cb(const char *event, xmlNode * msg) { if (attrd_shutting_down()) { return; } if (attrd_election_won()) { crm_notice("Updating all attributes after %s event", event); write_attributes(TRUE, FALSE); } // Check for changes in alerts mainloop_set_trigger(attrd_config_read); } static void attrd_cib_destroy_cb(gpointer user_data) { cib_t *conn = user_data; conn->cmds->signoff(conn); /* Ensure IPC is cleaned up */ if (attrd_shutting_down()) { crm_info("Connection disconnection complete"); } else { /* eventually this should trigger a reconnect, not a shutdown */ crm_crit("Lost connection to the CIB manager, shutting down"); attrd_exit_status = CRM_EX_DISCONNECT; attrd_shutdown(0); } return; } static void attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { do_crm_log_unlikely((rc? LOG_NOTICE : LOG_DEBUG), "Cleared transient attributes: %s " CRM_XS " xpath=%s rc=%d", pcmk_strerror(rc), (char *) user_data, rc); } #define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS /*! * \internal * \brief Wipe all transient attributes for this node from the CIB * * Clear any previous transient node attributes from the CIB. This is * normally done by the DC's controller when this node leaves the cluster, but * this handles the case where the node restarted so quickly that the * cluster layer didn't notice. * * \todo If pacemaker-attrd respawns after crashing (see PCMK_respawned), * ideally we'd skip this and sync our attributes from the writer. * However, currently we reject any values for us that the writer has, in * attrd_peer_update(). */ static void attrd_erase_attrs() { int call_id; char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", xpath); call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_quorum_override | cib_xpath); the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, "attrd_erase_cb", attrd_erase_cb, free); } static int attrd_cib_connect(int max_retry) { static int attempts = 0; int rc = -ENOTCONN; the_cib = cib_new(); if (the_cib == NULL) { return -ENOTCONN; } do { if(attempts > 0) { sleep(attempts); } attempts++; crm_debug("Connection attempt %d to the CIB manager", attempts); rc = the_cib->cmds->signon(the_cib, T_ATTRD, cib_command); } while(rc != pcmk_ok && attempts < max_retry); if (rc != pcmk_ok) { crm_err("Connection to the CIB manager failed: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); goto cleanup; } crm_debug("Connected to the CIB manager after %d attempts", attempts); rc = the_cib->cmds->set_connection_dnotify(the_cib, attrd_cib_destroy_cb); if (rc != pcmk_ok) { crm_err("Could not set disconnection callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_REPLACE_NOTIFY, attrd_cib_replaced_cb); if(rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); goto cleanup; } rc = the_cib->cmds->add_notify_callback(the_cib, T_CIB_DIFF_NOTIFY, attrd_cib_updated_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback (update)"); goto cleanup; } return pcmk_ok; cleanup: the_cib->cmds->signoff(the_cib); cib_delete(the_cib); the_cib = NULL; return -ENOTCONN; } /*! * \internal * \brief Prepare the CIB after cluster is connected */ static void attrd_cib_init() { // We have no attribute values in memory, wipe the CIB to match attrd_erase_attrs(); // Set a trigger for reading the CIB (for the alerts section) attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); // Always read the CIB at start-up mainloop_set_trigger(attrd_config_read); } static qb_ipcs_service_t *ipcs = NULL; static int32_t attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *xml = crm_ipcs_recv(client, data, size, &id, &flags); const char *op; if (xml == NULL) { crm_debug("No msg from %d (%p)", crm_ipcs_client_pid(c), c); return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(xml, F_ATTRD_USER, client->user); #endif crm_trace("Processing msg from %d (%p)", crm_ipcs_client_pid(c), c); crm_log_xml_trace(xml, __FUNCTION__); op = crm_element_value(xml, F_ATTRD_TASK); if (client->name == NULL) { const char *value = crm_element_value(xml, F_ORIG); client->name = crm_strdup_printf("%s.%d", value?value:"unknown", client->pid); } if (safe_str_eq(op, ATTRD_OP_PEER_REMOVE)) { attrd_send_ack(client, id, flags); attrd_client_peer_remove(client->name, xml); } else if (safe_str_eq(op, ATTRD_OP_CLEAR_FAILURE)) { attrd_send_ack(client, id, flags); attrd_client_clear_failure(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE_BOTH)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_UPDATE_DELAY)) { attrd_send_ack(client, id, flags); attrd_client_update(xml); } else if (safe_str_eq(op, ATTRD_OP_REFRESH)) { attrd_send_ack(client, id, flags); attrd_client_refresh(); } else if (safe_str_eq(op, ATTRD_OP_QUERY)) { /* queries will get reply, so no ack is necessary */ attrd_client_query(client, id, flags, xml); } else { crm_info("Ignoring request from client %s with unknown operation %s", client->name, op); } free_xml(xml); return 0; } void attrd_ipc_fini() { if (ipcs != NULL) { crm_client_disconnect_all(ipcs); qb_ipcs_destroy(ipcs); ipcs = NULL; } } static int attrd_cluster_connect() { attrd_cluster = calloc(1, sizeof(crm_cluster_t)); attrd_cluster->destroy = attrd_cpg_destroy; attrd_cluster->cpg.cpg_deliver_fn = attrd_cpg_dispatch; attrd_cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; crm_set_status_callback(&attrd_peer_change_cb); if (crm_cluster_connect(attrd_cluster) == FALSE) { crm_err("Cluster connection failed"); return -ENOTCONN; } return pcmk_ok; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag = 0; int index = 0; int argerr = 0; crm_ipc_t *old_instance = NULL; attrd_init_mainloop(); crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for aggregating and atomically storing node attribute updates into the CIB"); mainloop_add_signal(SIGTERM, attrd_shutdown); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ crm_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } crm_log_init(T_ATTRD, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_info("Starting up"); + crm_notice("Starting Pacemaker node attribute manager"); old_instance = crm_ipc_new(T_ATTRD, 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); - crm_err("pacemaker-attrdd is already active, aborting startup"); + crm_err("pacemaker-attrd is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } attributes = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_attribute); /* Connect to the CIB before connecting to the cluster or listening for IPC. * This allows us to assume the CIB is connected whenever we process a * cluster or IPC message (which also avoids start-up race conditions). */ if (attrd_cib_connect(10) != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; goto done; } crm_info("CIB connection active"); if (attrd_cluster_connect() != pcmk_ok) { attrd_exit_status = CRM_EX_FATAL; goto done; } crm_info("Cluster connection active"); // Initialization that requires the cluster to be connected attrd_election_init(); attrd_cib_init(); /* Set a private attribute for ourselves with the protocol version we * support. This lets all nodes determine the minimum supported version * across all nodes. It also ensures that the writer learns our node name, * so it can send our attributes to the CIB. */ attrd_broadcast_protocol(); attrd_init_ipc(&ipcs, attrd_ipc_dispatch); - crm_info("Accepting attribute updates"); + crm_notice("Pacemaker node attribute manager successfully started and accepting connections"); attrd_run_mainloop(); done: crm_info("Shutting down attribute manager"); attrd_election_fini(); attrd_ipc_fini(); attrd_lrmd_disconnect(); attrd_cib_disconnect(); g_hash_table_destroy(attributes); crm_exit(attrd_exit_status); } diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c index f004f671f5..f959976279 100644 --- a/daemons/based/pacemaker-based.c +++ b/daemons/based/pacemaker-based.c @@ -1,386 +1,369 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if HAVE_BZLIB_H # include #endif extern int init_remote_listener(int port, gboolean encrypted); gboolean cib_shutdown_flag = FALSE; int cib_status = pcmk_ok; crm_cluster_t crm_cluster; GMainLoop *mainloop = NULL; const char *cib_root = NULL; char *cib_our_uname = NULL; static gboolean preserve_status = FALSE; gboolean cib_writes_enabled = TRUE; int remote_fd = 0; int remote_tls_fd = 0; GHashTable *config_hash = NULL; GHashTable *local_notify_queue = NULL; -static int cib_init(void); +static void cib_init(void); void cib_shutdown(int nsig); static bool startCib(const char *filename); extern int write_cib_contents(gpointer p); void cib_cleanup(void); static void cib_enable_writes(int nsig) { crm_info("(Re)enabling disk writes"); cib_writes_enabled = TRUE; } -static void -log_cib_client(gpointer key, gpointer value, gpointer user_data) -{ - crm_info("Client %s", crm_client_name(value)); -} - /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"stand-alone", 0, 0, 's', "\tAdvanced use only"}, {"disk-writes", 0, 0, 'w', "\tAdvanced use only"}, {"cib-root", 1, 0, 'r', "\tAdvanced use only"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int rc = 0; int index = 0; int argerr = 0; struct passwd *pwentry = NULL; crm_ipc_t *old_instance = NULL; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for storing and replicating the cluster configuration"); mainloop_add_signal(SIGTERM, cib_shutdown); mainloop_add_signal(SIGPIPE, cib_enable_writes); cib_writer = mainloop_add_trigger(G_PRIORITY_LOW, write_cib_contents, NULL); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 's': stand_alone = TRUE; preserve_status = TRUE; cib_writes_enabled = FALSE; pwentry = getpwnam(CRM_DAEMON_USER); CRM_CHECK(pwentry != NULL, crm_perror(LOG_ERR, "Invalid uid (%s) specified", CRM_DAEMON_USER); return CRM_EX_FATAL); rc = setgid(pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set group to %d", pwentry->pw_gid); return CRM_EX_FATAL; } rc = initgroups(CRM_DAEMON_USER, pwentry->pw_gid); if (rc < 0) { crm_perror(LOG_ERR, "Could not setup groups for user %d", pwentry->pw_uid); return CRM_EX_FATAL; } rc = setuid(pwentry->pw_uid); if (rc < 0) { crm_perror(LOG_ERR, "Could not set user to %d", pwentry->pw_uid); return CRM_EX_FATAL; } break; case '?': /* Help message */ crm_help(flag, CRM_EX_OK); break; case 'w': cib_writes_enabled = TRUE; break; case 'r': cib_root = optarg; break; case 'm': cib_metadata(); return CRM_EX_OK; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { cib_metadata(); return CRM_EX_OK; } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker CIB manager"); + old_instance = crm_ipc_new(CIB_CHANNEL_RO, 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-based is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (cib_root == NULL) { cib_root = CRM_CONFIG_DIR; } else { crm_notice("Using custom config location: %s", cib_root); } if (pcmk__daemon_can_write(cib_root, NULL) == FALSE) { crm_err("Terminating due to bad permissions on %s", cib_root); fprintf(stderr, "ERROR: Bad permissions on %s (see logs for details)\n", cib_root); fflush(stderr); return CRM_EX_FATAL; } crm_peer_init(); - /* read local config file */ + // Read initial CIB, connect to cluster, and start IPC servers cib_init(); - // This should not be reachable - CRM_CHECK(crm_hash_table_size(client_connections) == 0, - crm_warn("Not all clients gone at exit")); - g_hash_table_foreach(client_connections, log_cib_client, NULL); - cib_cleanup(); + // Run the main loop + mainloop = g_main_loop_new(NULL, FALSE); + crm_notice("Pacemaker CIB manager successfully started and accepting connections"); + g_main_loop_run(mainloop); - crm_info("Done"); + /* If main loop returned, clean up and exit. We disconnect in case + * terminate_cib() was called with fast=-1. + */ + crm_cluster_disconnect(&crm_cluster); + cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); return CRM_EX_OK; } void cib_cleanup(void) { crm_peer_destroy(); if (local_notify_queue) { g_hash_table_destroy(local_notify_queue); } crm_client_cleanup(); g_hash_table_destroy(config_hash); free(cib_our_uname); } #if SUPPORT_COROSYNC static void cib_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ cib_peer_callback(xml, NULL); } free_xml(xml); free(data); } static void cib_cs_destroy(gpointer user_data) { if (cib_shutdown_flag) { crm_info("Corosync disconnection complete"); } else { crm_crit("Lost connection to cluster layer, shutting down"); terminate_cib(__FUNCTION__, CRM_EX_DISCONNECT); } } #endif static void cib_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { switch (type) { case crm_status_processes: if (cib_legacy_mode() && is_not_set(node->processes, crm_get_cluster_proc())) { uint32_t old = data? *(const uint32_t *)data : 0; if ((node->processes ^ old) & crm_proc_cpg) { crm_info("Attempting to disable legacy mode after %s left the cluster", node->uname); legacy_mode = FALSE; } } break; case crm_status_uname: case crm_status_nstate: if (cib_shutdown_flag && (crm_active_peers() < 2) && crm_hash_table_size(client_connections) == 0) { crm_info("No more peers"); terminate_cib(__FUNCTION__, -1); } break; } } -static int +static void cib_init(void) { if (is_corosync_cluster()) { #if SUPPORT_COROSYNC crm_cluster.destroy = cib_cs_destroy; crm_cluster.cpg.cpg_deliver_fn = cib_cs_dispatch; crm_cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } config_hash = crm_str_table_new(); if (startCib("cib.xml") == FALSE) { crm_crit("Cannot start CIB... terminating"); crm_exit(CRM_EX_NOINPUT); } if (stand_alone == FALSE) { if (is_corosync_cluster()) { crm_set_status_callback(&cib_peer_update_callback); } if (crm_cluster_connect(&crm_cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } cib_our_uname = crm_cluster.uname; } else { cib_our_uname = strdup("localhost"); } cib_ipc_servers_init(&ipcs_ro, &ipcs_rw, &ipcs_shm, &ipc_ro_callbacks, &ipc_rw_callbacks); if (stand_alone) { cib_is_master = TRUE; } - - /* Create the mainloop and run it... */ - mainloop = g_main_loop_new(NULL, FALSE); - crm_info("Starting %s mainloop", crm_system_name); - g_main_loop_run(mainloop); - - /* If main loop returned, clean up and exit. We disconnect in case - * terminate_cib() was called with fast=-1. - */ - crm_cluster_disconnect(&crm_cluster); - cib_ipc_servers_destroy(ipcs_ro, ipcs_rw, ipcs_shm); - - crm_exit(CRM_EX_OK); } static bool startCib(const char *filename) { gboolean active = FALSE; xmlNode *cib = readCibXmlFile(cib_root, filename, !preserve_status); if (activateCibXml(cib, TRUE, "start") == 0) { int port = 0; const char *port_s = NULL; active = TRUE; cib_read_config(config_hash, cib); port_s = crm_element_value(cib, "remote-tls-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_tls_fd = init_remote_listener(port, TRUE); } port_s = crm_element_value(cib, "remote-clear-port"); if (port_s) { port = crm_parse_int(port_s, "0"); remote_fd = init_remote_listener(port, FALSE); } - - crm_info("CIB Initialization completed successfully"); } - return active; } diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 0ac358cbeb..03a4fb7b63 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -1,899 +1,899 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; #if SUPPORT_COROSYNC extern gboolean crm_connect_corosync(crm_cluster_t * cluster); #endif void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; bool no_quorum_suicide_escalation = FALSE; /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = calloc(1, sizeof(crm_cluster_t)); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); set_bit(fsa_input_register, R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); if (is_corosync_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(cluster); #endif } if (registered == TRUE) { controld_election_init(cluster->uname); fsa_our_uname = cluster->uname; fsa_our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } } if (registered == FALSE) { set_bit(fsa_input_register, R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __FUNCTION__); clear_bit(fsa_input_register, R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __FUNCTION__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ set_bit(fsa_input_register, R_SHUTDOWN); if (stonith_api) { /* Prevent it from coming up again */ clear_bit(fsa_input_register, R_ST_REQUIRED); crm_info("Disconnecting from fencer"); stonith_api->cmds->disconnect(stonith_api); } } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; set_bit(fsa_input_register, R_SHUTDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", (fsa_our_dc? fsa_our_dc : "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); /* set_bit(fsa_input_register, R_STAYDOWN); */ if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; extern char *te_client_id; crm_exit_t crmd_fast_exit(crm_exit_t exit_code) { if (is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", exit_code, CRM_EX_FATAL); exit_code = CRM_EX_FATAL; } else if ((exit_code == CRM_EX_OK) && is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = CRM_EX_ERROR; } crm_exit(exit_code); } crm_exit_t crmd_exit(crm_exit_t exit_code) { GListPtr gIter = NULL; GMainLoop *mloop = crmd_mainloop; static bool in_progress = FALSE; if (in_progress && (exit_code == CRM_EX_OK)) { crm_debug("Exit is already in progress"); return exit_code; } else if(in_progress) { crm_notice("Error during shutdown process, exiting now with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } in_progress = TRUE; crm_trace("Preparing to exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); /* Suppress secondary errors resulting from us disconnecting everything */ set_bit(fsa_input_register, R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } controld_close_attrd_ipc(); pe_subsystem_free(); if(stonith_api) { crm_trace("Disconnecting fencing API"); clear_bit(fsa_input_register, R_ST_REQUIRED); stonith_api->cmds->free(stonith_api); stonith_api = NULL; } if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) { crm_debug("No mainloop detected"); exit_code = CRM_EX_ERROR; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if (exit_code != CRM_EX_OK) { crm_notice("Forcing immediate exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); return crmd_fast_exit(exit_code); } /* Clean up as much memory as possible for valgrind */ for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } clear_bit(fsa_input_register, R_MEMBERSHIP); g_list_free(fsa_message_queue); fsa_message_queue = NULL; metadata_cache_fini(); controld_election_fini(); /* Tear down the CIB manager connection, but don't free it yet -- it could * be used when we drain the mainloop later. */ cib_free_callbacks(fsa_cib_conn); fsa_cib_conn->cmds->signoff(fsa_cib_conn); verify_stopped(fsa_state, LOG_WARNING); clear_bit(fsa_input_register, R_LRM_CONNECTED); lrm_state_destroy_all(); /* This basically will not work, since mainloop has a reference to it */ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; mainloop_destroy_trigger(config_read); config_read = NULL; mainloop_destroy_trigger(stonith_reconnect); stonith_reconnect = NULL; mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; crm_client_cleanup(); crm_peer_destroy(); crm_timer_stop(transition_timer); crm_timer_stop(integration_timer); crm_timer_stop(finalization_timer); crm_timer_stop(election_trigger); crm_timer_stop(shutdown_escalation_timer); crm_timer_stop(wait_timer); crm_timer_stop(recheck_timer); free(transition_timer); transition_timer = NULL; free(integration_timer); integration_timer = NULL; free(finalization_timer); finalization_timer = NULL; free(election_trigger); election_trigger = NULL; free(shutdown_escalation_timer); shutdown_escalation_timer = NULL; free(wait_timer); wait_timer = NULL; free(recheck_timer); recheck_timer = NULL; free(fsa_our_dc_version); fsa_our_dc_version = NULL; free(fsa_our_uname); fsa_our_uname = NULL; free(fsa_our_uuid); fsa_our_uuid = NULL; free(fsa_our_dc); fsa_our_dc = NULL; free(fsa_cluster_name); fsa_cluster_name = NULL; free(te_uuid); te_uuid = NULL; free(te_client_id); te_client_id = NULL; free(fsa_pe_ref); fsa_pe_ref = NULL; free(failed_stop_offset); failed_stop_offset = NULL; free(failed_start_offset); failed_start_offset = NULL; free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); /* Don't re-enter this block */ crmd_mainloop = NULL; /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; throttle_fini(); /* Graceful */ crm_trace("Done preparing for exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); return exit_code; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_exit_t exit_code = CRM_EX_OK; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { log_level = LOG_ERR; exit_type = "forcefully"; exit_code = CRM_EX_ERROR; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the controller", fsa_action2string(action), exit_type); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { int was_error = 0; crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); crm_debug("Creating CIB manager and executor objects"); fsa_cib_conn = cib_new(); lrm_state_init_local(); /* set up the timers */ transition_timer = calloc(1, sizeof(fsa_timer_t)); integration_timer = calloc(1, sizeof(fsa_timer_t)); finalization_timer = calloc(1, sizeof(fsa_timer_t)); election_trigger = calloc(1, sizeof(fsa_timer_t)); shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t)); wait_timer = calloc(1, sizeof(fsa_timer_t)); recheck_timer = calloc(1, sizeof(fsa_timer_t)); if (election_trigger != NULL) { election_trigger->source_id = 0; election_trigger->period_ms = -1; election_trigger->fsa_input = I_DC_TIMEOUT; election_trigger->callback = crm_timer_popped; election_trigger->repeat = FALSE; } else { was_error = TRUE; } if (transition_timer != NULL) { transition_timer->source_id = 0; transition_timer->period_ms = -1; transition_timer->fsa_input = I_PE_CALC; transition_timer->callback = crm_timer_popped; transition_timer->repeat = FALSE; } else { was_error = TRUE; } if (integration_timer != NULL) { integration_timer->source_id = 0; integration_timer->period_ms = -1; integration_timer->fsa_input = I_INTEGRATED; integration_timer->callback = crm_timer_popped; integration_timer->repeat = FALSE; } else { was_error = TRUE; } if (finalization_timer != NULL) { finalization_timer->source_id = 0; finalization_timer->period_ms = -1; finalization_timer->fsa_input = I_FINALIZED; finalization_timer->callback = crm_timer_popped; finalization_timer->repeat = FALSE; /* for possible enabling... a bug in the join protocol left * a slave in S_PENDING while we think it's in S_NOT_DC * * raising I_FINALIZED put us into a transition loop which is * never resolved. * in this loop we continually send probes which the node * NACK's because it's in S_PENDING * * if we have nodes where the cluster layer is active but the * CRM is not... then this will be handled in the * integration phase */ finalization_timer->fsa_input = I_ELECTION; } else { was_error = TRUE; } if (shutdown_escalation_timer != NULL) { shutdown_escalation_timer->source_id = 0; shutdown_escalation_timer->period_ms = -1; shutdown_escalation_timer->fsa_input = I_STOP; shutdown_escalation_timer->callback = crm_timer_popped; shutdown_escalation_timer->repeat = FALSE; } else { was_error = TRUE; } if (wait_timer != NULL) { wait_timer->source_id = 0; wait_timer->period_ms = 2000; wait_timer->fsa_input = I_NULL; wait_timer->callback = crm_timer_popped; wait_timer->repeat = FALSE; } else { was_error = TRUE; } if (recheck_timer != NULL) { recheck_timer->source_id = 0; recheck_timer->period_ms = -1; recheck_timer->fsa_input = I_PE_CALC; recheck_timer->callback = crm_timer_popped; recheck_timer->repeat = FALSE; } else { was_error = TRUE; } if (was_error) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } static int32_t crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void crmd_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *msg = crm_ipcs_recv(client, data, size, &id, &flags); crm_trace("Invoked: %s", crm_client_name(client)); crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } #if ENABLE_ACL CRM_ASSERT(client->user != NULL); crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif crm_trace("Processing msg from %s", crm_client_name(client)); crm_log_xml_trace(msg, "controller[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (crmd_authorize_message(msg, client, NULL)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(fsa_source); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client) { crm_trace("Disconnecting %sregistered client %s (%p/%p)", (client->userdata? "" : "un"), crm_client_name(client), c, client); free(client->userdata); crm_client_destroy(client); trigger_fsa(fsa_source); } return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); crmd_ipc_closed(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, .connection_created = crmd_ipc_created, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (is_set(fsa_input_register, R_MEMBERSHIP) == FALSE) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_LRM_CONNECTED) == FALSE) { crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_CIB_CONNECTED) == FALSE) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_READ_CONFIG) == FALSE) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (is_set(fsa_input_register, R_PEER_DATA) == FALSE) { crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = crmd_ipc_server_init(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } if (stonith_reconnect == NULL) { int dummy; stonith_reconnect = mainloop_add_trigger(G_PRIORITY_LOW, te_connect_stonith, &dummy); } set_bit(fsa_input_register, R_ST_REQUIRED); mainloop_set_trigger(stonith_reconnect); - crm_notice("The local CRM is operational"); + crm_notice("Pacemaker controller successfully started and accepting connections"); clear_bit(fsa_input_register, R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { set_bit(fsa_input_register, R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* *INDENT-OFF* */ static pe_cluster_option crmd_opts[] = { /* name, old-name, validate, values, default, short description, long description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Version of Pacemaker on the cluster's DC.", "Includes the hash which identifies the exact changeset it was built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, "The messaging stack on which Pacemaker is currently running.", "Used for informational and diagnostic purposes." }, { XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", &check_time, "How long to wait for a response from other nodes during startup.", "The \"correct\" value will depend on the speed/load of your network and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, NULL, "time", "Zero disables polling. Positive values are an interval in seconds (unless other SI units are specified. eg. 5min)", "15min", &check_timer, "Polling interval for time based changes to options, resource parameters and constraints.", "The Cluster is primarily event driven, however the configuration can have elements that change based on time." " To ensure these changes take effect, we can optionally poll the cluster's status for changes." }, { "load-threshold", NULL, "percentage", NULL, "80%", &check_utilization, "The maximum amount of system resources that should be used by nodes in the cluster", "The cluster will slow down its recovery process when the amount of system resources used" " (currently CPU) approaches this limit", }, { "node-action-limit", NULL, "integer", NULL, "0", &check_number, "The maximum number of jobs that can be scheduled per node. Defaults to 2x cores"}, { XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." }, { "join-integration-timeout", "crmd-integration-timeout", "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***", "If need to adjust this value, it probably indicates the presence of a bug" }, { "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***", "If you need to adjust this value, it probably indicates the presence of a bug" }, { "transition-delay", "crmd-transition-delay", "time", NULL, "0s", &check_timer, "*** Advanced Use Only *** Enabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\n" "Useful if your configuration is sensitive to the order in which ping updates arrive." }, { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_sbd_timeout, "How long to wait before we can assume nodes are safely down", NULL }, { "stonith-max-attempts",NULL,"integer",NULL,"10",&check_positive_number, "How many times stonith can fail before it will no longer be attempted on a target" }, { "no-quorum-policy", NULL, "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL }, }; /* *INDENT-ON* */ void crmd_metadata(void) { config_metadata("pacemaker-controld", "1.0", "controller properties", "Cluster properties used by Pacemaker's controller," " formerly known as crmd", crmd_opts, DIMOF(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { verify_all_options(options, crmd_opts, DIMOF(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return get_cluster_pref(options, crmd_opts, DIMOF(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); set_bit(fsa_input_register, R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig) && (crm_element_name(crmconfig)) && (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = crm_str_table_new(); unpack_instance_attributes(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now); verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */ throttle_update_job_max(value); value = crmd_pref(config_hash, "load-threshold"); if(value) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = crmd_pref(config_hash, "no-quorum-policy"); if (safe_str_eq(value, "suicide") && pcmk_locate_sbd()) { no_quorum_suicide_escalation = TRUE; } value = crmd_pref(config_hash,"stonith-max-attempts"); update_stonith_max_attempts(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_get_msec(value); /* How long to declare an election over - even if not everyone voted */ crm_debug("Shutdown escalation occurs after: %dms", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); controld_set_election_period(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_timer->period_ms = crm_get_msec(value); crm_debug("Checking for expired actions every %dms", recheck_timer->period_ms); value = crmd_pref(config_hash, "transition-delay"); transition_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "join-integration-timeout"); integration_timer->period_ms = crm_get_msec(value); value = crmd_pref(config_hash, "join-finalization-timeout"); finalization_timer->period_ms = crm_get_msec(value); free(fsa_cluster_name); fsa_cluster_name = NULL; value = g_hash_table_lookup(config_hash, "cluster-name"); if (value) { fsa_cluster_name = strdup(value); } alerts = first_named_child(output, XML_CIB_TAG_ALERTS); crmd_unpack_alerts(alerts); set_bit(fsa_input_register, R_READ_CONFIG); crm_trace("Triggering FSA: %s", __FUNCTION__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if (crmd_mainloop != NULL && g_main_loop_is_running(crmd_mainloop)) { if (is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating the shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); } else { set_bit(fsa_input_register, R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms < 1) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); int msec = crm_get_msec(value); crm_debug("Using default shutdown escalation: %dms", msec); shutdown_escalation_timer->period_ms = msec; } /* can't rely on this... */ crm_notice("Shutting down cluster resource manager " CRM_XS " limit=%dms", shutdown_escalation_timer->period_ms); crm_timer_start(shutdown_escalation_timer); } } else { crm_info("exit from shutdown"); crmd_exit(CRM_EX_OK); } } diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c index 38842fbcda..148737f05a 100644 --- a/daemons/controld/pacemaker-controld.c +++ b/daemons/controld/pacemaker-controld.c @@ -1,173 +1,173 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hV" void usage(const char *cmd, int exit_status); int crmd_init(void); void crmd_hamsg_callback(const xmlNode * msg, void *private_data); extern void init_dotfile(void); GMainLoop *crmd_mainloop = NULL; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int index = 0; int argerr = 0; crm_ipc_t *old_instance = NULL; crmd_mainloop = g_main_loop_new(NULL, FALSE); crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for aggregating resource and node failures as well as co-ordinating the cluster's response"); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ crm_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { crmd_metadata(); return CRM_EX_OK; } else if (argc - optind == 1 && safe_str_eq("version", argv[optind])) { fprintf(stdout, "CRM Version: %s (%s)\n", PACEMAKER_VERSION, BUILD_VERSION); return CRM_EX_OK; } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_info("CRM Git Version: %s (%s)", PACEMAKER_VERSION, BUILD_VERSION); if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } + crm_notice("Starting Pacemaker controller"); + old_instance = crm_ipc_new(CRM_SYSTEM_CRMD, 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-controld is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { crm_err("Terminating due to bad permissions on " PE_STATE_DIR); fprintf(stderr, "ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n"); fflush(stderr); return CRM_EX_FATAL; } else if (pcmk__daemon_can_write(CRM_CONFIG_DIR, NULL) == FALSE) { crm_err("Terminating due to bad permissions on " CRM_CONFIG_DIR); fprintf(stderr, "ERROR: Bad permissions on " CRM_CONFIG_DIR " (see logs for details)\n"); fflush(stderr); return CRM_EX_FATAL; } return crmd_init(); } static void log_deprecation_warnings() { // Add deprecations here as needed } int crmd_init(void) { crm_exit_t exit_code = CRM_EX_OK; enum crmd_fsa_state state; log_deprecation_warnings(); fsa_state = S_STARTING; fsa_input_register = 0; /* zero out the regester */ init_dotfile(); - crm_debug("Starting %s", crm_system_name); register_fsa_input(C_STARTUP, I_STARTUP, NULL); crm_peer_init(); state = s_crmd_fsa(C_STARTUP); if (state == S_PENDING || state == S_STARTING) { /* Create the mainloop and run it... */ crm_trace("Starting %s's mainloop", crm_system_name); g_main_loop_run(crmd_mainloop); if (is_set(fsa_input_register, R_STAYDOWN)) { crm_info("Inhibiting automated respawn"); exit_code = CRM_EX_FATAL; } } else { crm_err("Startup of %s failed. Current state: %s", crm_system_name, fsa_state2string(state)); exit_code = CRM_EX_ERROR; } crm_info("%s[%lu] exiting with status %d (%s)", crm_system_name, (unsigned long) getpid(), exit_code, crm_exit_str(exit_code)); return crmd_fast_exit(exit_code); } diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 43fa371aff..9b45c09c55 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1,1841 +1,1858 @@ /* * Copyright 2012-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #ifdef HAVE_SYS_TIMEB_H # include #endif #define EXIT_REASON_MAX_LEN 128 GHashTable *rsc_list = NULL; typedef struct lrmd_cmd_s { int timeout; guint interval_ms; int start_delay; int timeout_orig; int call_id; int exec_rc; int lrmd_op_status; int call_opts; /* Timer ids, must be removed on cmd destruction. */ int delay_id; int stonith_recurring_id; int rsc_deleted; int service_flags; char *client_id; char *origin; char *rsc_id; char *action; char *real_action; char *exit_reason; char *output; char *userdata_str; #ifdef HAVE_SYS_TIMEB_H /* Recurring and systemd operations may involve more than one executor * command per operation, so they need info about the original and the most * recent. */ struct timeb t_first_run; /* Timestamp of when op first ran */ struct timeb t_run; /* Timestamp of when op most recently ran */ struct timeb t_first_queue; /* Timestamp of when op first was queued */ struct timeb t_queue; /* Timestamp of when op most recently was queued */ struct timeb t_rcchange; /* Timestamp of last rc change */ #endif int first_notify_sent; int last_notify_rc; int last_notify_op_status; int last_pid; GHashTable *params; } lrmd_cmd_t; static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc); static gboolean lrmd_rsc_dispatch(gpointer user_data); static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id); static void log_finished(lrmd_cmd_t * cmd, int exec_time, int queue_time) { char pid_str[32] = { 0, }; int log_level = LOG_INFO; if (cmd->last_pid) { snprintf(pid_str, 32, "%d", cmd->last_pid); } if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } #ifdef HAVE_SYS_TIMEB_H do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d exec-time:%dms queue-time:%dms", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc, exec_time, queue_time); #else do_crm_log(log_level, "finished - rsc:%s action:%s call_id:%d %s%s exit-code:%d", cmd->rsc_id, cmd->action, cmd->call_id, cmd->last_pid ? "pid:" : "", pid_str, cmd->exec_rc); #endif } static void log_execute(lrmd_cmd_t * cmd) { int log_level = LOG_INFO; if (safe_str_eq(cmd->action, "monitor")) { log_level = LOG_DEBUG; } do_crm_log(log_level, "executing - rsc:%s action:%s call_id:%d", cmd->rsc_id, cmd->action, cmd->call_id); } static const char * normalize_action_name(lrmd_rsc_t * rsc, const char *action) { if (safe_str_eq(action, "monitor") && is_set(pcmk_get_ra_caps(rsc->class), pcmk_ra_cap_status)) { return "status"; } return action; } static lrmd_rsc_t * build_rsc_from_xml(xmlNode * msg) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_rsc_t *rsc = NULL; rsc = calloc(1, sizeof(lrmd_rsc_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &rsc->call_opts); rsc->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); rsc->class = crm_element_value_copy(rsc_xml, F_LRMD_CLASS); rsc->provider = crm_element_value_copy(rsc_xml, F_LRMD_PROVIDER); rsc->type = crm_element_value_copy(rsc_xml, F_LRMD_TYPE); rsc->work = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_rsc_dispatch, rsc); return rsc; } static lrmd_cmd_t * create_lrmd_cmd(xmlNode * msg, crm_client_t * client) { int call_options = 0; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, msg, LOG_ERR); lrmd_cmd_t *cmd = NULL; cmd = calloc(1, sizeof(lrmd_cmd_t)); crm_element_value_int(msg, F_LRMD_CALLOPTS, &call_options); cmd->call_opts = call_options; cmd->client_id = strdup(client->id); crm_element_value_int(msg, F_LRMD_CALLID, &cmd->call_id); crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &cmd->interval_ms); crm_element_value_int(rsc_xml, F_LRMD_TIMEOUT, &cmd->timeout); crm_element_value_int(rsc_xml, F_LRMD_RSC_START_DELAY, &cmd->start_delay); cmd->timeout_orig = cmd->timeout; cmd->origin = crm_element_value_copy(rsc_xml, F_LRMD_ORIGIN); cmd->action = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ACTION); cmd->userdata_str = crm_element_value_copy(rsc_xml, F_LRMD_RSC_USERDATA_STR); cmd->rsc_id = crm_element_value_copy(rsc_xml, F_LRMD_RSC_ID); cmd->params = xml2list(rsc_xml); if (safe_str_eq(g_hash_table_lookup(cmd->params, "CRM_meta_on_fail"), "block")) { crm_debug("Setting flag to leave pid group on timeout and only kill action pid for " CRM_OP_FMT, cmd->rsc_id, cmd->action, cmd->interval_ms); cmd->service_flags |= SVC_ACTION_LEAVE_GROUP; } return cmd; } static void stop_recurring_timer(lrmd_cmd_t *cmd) { if (cmd) { if (cmd->stonith_recurring_id) { g_source_remove(cmd->stonith_recurring_id); } cmd->stonith_recurring_id = 0; } } static void free_lrmd_cmd(lrmd_cmd_t * cmd) { stop_recurring_timer(cmd); if (cmd->delay_id) { g_source_remove(cmd->delay_id); } if (cmd->params) { g_hash_table_destroy(cmd->params); } free(cmd->origin); free(cmd->action); free(cmd->real_action); free(cmd->userdata_str); free(cmd->rsc_id); free(cmd->output); free(cmd->exit_reason); free(cmd->client_id); free(cmd); } static gboolean stonith_recurring_op_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc; cmd->stonith_recurring_id = 0; if (!cmd->rsc_id) { return FALSE; } rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); CRM_ASSERT(rsc != NULL); /* take it out of recurring_ops list, and put it in the pending ops * to be executed */ rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); return FALSE; } static inline void start_recurring_timer(lrmd_cmd_t *cmd) { if (cmd && (cmd->interval_ms > 0)) { cmd->stonith_recurring_id = g_timeout_add(cmd->interval_ms, stonith_recurring_op_helper, cmd); } } static gboolean start_delay_helper(gpointer data) { lrmd_cmd_t *cmd = data; lrmd_rsc_t *rsc = NULL; cmd->delay_id = 0; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc) { mainloop_set_trigger(rsc->work); } return FALSE; } static gboolean merge_recurring_duplicate(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { GListPtr gIter = NULL; lrmd_cmd_t * dup = NULL; gboolean dup_pending = FALSE; if (cmd->interval_ms == 0) { return 0; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && (cmd->interval_ms == dup->interval_ms)) { dup_pending = TRUE; goto merge_dup; } } /* if dup is in recurring_ops list, that means it has already executed * and is in the interval loop. we can't just remove it in this case. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { dup = gIter->data; if (safe_str_eq(cmd->action, dup->action) && (cmd->interval_ms == dup->interval_ms)) { goto merge_dup; } } return FALSE; merge_dup: /* This should not occur. If it does, we need to investigate how something * like this is possible in the controller. */ crm_warn("Duplicate recurring op entry detected (" CRM_OP_FMT "), merging with previous op entry", rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); /* merge */ dup->first_notify_sent = 0; free(dup->userdata_str); dup->userdata_str = cmd->userdata_str; cmd->userdata_str = NULL; dup->call_id = cmd->call_id; if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* if we are waiting for the next interval, kick it off now */ if (dup_pending == TRUE) { stop_recurring_timer(cmd); stonith_recurring_op_helper(cmd); } } else if (dup_pending == FALSE) { /* if we've already handed this to the service lib, kick off an early execution */ services_action_kick(rsc->rsc_id, normalize_action_name(rsc, dup->action), dup->interval_ms); } free_lrmd_cmd(cmd); return TRUE; } static void schedule_lrmd_cmd(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { gboolean dup_processed = FALSE; CRM_CHECK(cmd != NULL, return); CRM_CHECK(rsc != NULL, return); crm_trace("Scheduling %s on %s", cmd->action, rsc->rsc_id); dup_processed = merge_recurring_duplicate(rsc, cmd); if (dup_processed) { /* duplicate recurring cmd found, cmds merged */ return; } /* The controller expects the executor to automatically cancel * recurring operations before a resource stops. */ if (safe_str_eq(cmd->action, "stop")) { cancel_all_recurring(rsc, NULL); } rsc->pending_ops = g_list_append(rsc->pending_ops, cmd); #ifdef HAVE_SYS_TIMEB_H ftime(&cmd->t_queue); if (cmd->t_first_queue.time == 0) { cmd->t_first_queue = cmd->t_queue; } #endif mainloop_set_trigger(rsc->work); if (cmd->start_delay) { cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static xmlNode * create_lrmd_reply(const char *origin, int rc, int call_id) { xmlNode *reply = create_xml_node(NULL, T_LRMD_REPLY); crm_xml_add(reply, F_LRMD_ORIGIN, origin); crm_xml_add_int(reply, F_LRMD_RC, rc); crm_xml_add_int(reply, F_LRMD_CALLID, call_id); return reply; } static void send_client_notify(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; int rc; + int log_level = LOG_WARNING; + const char *msg = NULL; - if (client == NULL) { - crm_err("Asked to send event to NULL client"); - return; - } else if (client->name == NULL) { - crm_trace("Asked to send event to client with no name"); + CRM_CHECK(client != NULL, return); + if (client->name == NULL) { + crm_trace("Skipping notification to client without name"); return; } rc = lrmd_server_send_notify(client, update_msg); - if ((rc <= 0) && (rc != -ENOTCONN)) { - crm_warn("Could not notify client %s/%s: %s " CRM_XS " rc=%d", - client->name, client->id, - (rc? pcmk_strerror(rc) : "no data sent"), rc); + if (rc > 0) { + return; // success + } + + switch (rc) { + case 0: + msg = "no data sent"; + break; + + case -ENOTCONN: + case -EPIPE: // Client exited without waiting for notification + log_level = LOG_INFO; + msg = "Disconnected"; + break; + + default: + msg = pcmk_strerror(rc); + break; } + do_crm_log(log_level, + "Could not notify client %s/%s: %s " CRM_XS " rc=%d", + client->name, client->id, msg, rc); } #ifdef HAVE_SYS_TIMEB_H /*! * \internal * \brief Return difference between two times in milliseconds * * \param[in] now More recent time (or NULL to use current time) * \param[in] old Earlier time * * \return milliseconds difference (or 0 if old is NULL or has time zero) */ static int time_diff_ms(struct timeb *now, struct timeb *old) { struct timeb local_now = { 0, }; if (now == NULL) { ftime(&local_now); now = &local_now; } if ((old == NULL) || (old->time == 0)) { return 0; } return difftime(now->time, old->time) * 1000 + now->millitm - old->millitm; } /*! * \internal * \brief Reset a command's operation times to their original values. * * Reset a command's run and queued timestamps to the timestamps of the original * command, so we report the entire time since then and not just the time since * the most recent command (for recurring and systemd operations). * * /param[in] cmd Executor command object to reset * * /note It's not obvious what the queued time should be for a systemd * start/stop operation, which might go like this: * initial command queued 5ms, runs 3s * monitor command queued 10ms, runs 10s * monitor command queued 10ms, runs 10s * Is the queued time for that operation 5ms, 10ms or 25ms? The current * implementation will report 5ms. If it's 25ms, then we need to * subtract 20ms from the total exec time so as not to count it twice. * We can implement that later if it matters to anyone ... */ static void cmd_original_times(lrmd_cmd_t * cmd) { cmd->t_run = cmd->t_first_run; cmd->t_queue = cmd->t_first_queue; } #endif static void send_cmd_complete_notify(lrmd_cmd_t * cmd) { int exec_time = 0; int queue_time = 0; xmlNode *notify = NULL; #ifdef HAVE_SYS_TIMEB_H exec_time = time_diff_ms(NULL, &cmd->t_run); queue_time = time_diff_ms(&cmd->t_run, &cmd->t_queue); #endif log_finished(cmd, exec_time, queue_time); /* if the first notify result for a cmd has already been sent earlier, and the * the option to only send notifies on result changes is set. Check to see * if the last result is the same as the new one. If so, suppress this update */ if (cmd->first_notify_sent && (cmd->call_opts & lrmd_opt_notify_changes_only)) { if (cmd->last_notify_rc == cmd->exec_rc && cmd->last_notify_op_status == cmd->lrmd_op_status) { /* only send changes */ return; } } cmd->first_notify_sent = 1; cmd->last_notify_rc = cmd->exec_rc; cmd->last_notify_op_status = cmd->lrmd_op_status; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_TIMEOUT, cmd->timeout); crm_xml_add_ms(notify, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(notify, F_LRMD_RSC_START_DELAY, cmd->start_delay); crm_xml_add_int(notify, F_LRMD_EXEC_RC, cmd->exec_rc); crm_xml_add_int(notify, F_LRMD_OP_STATUS, cmd->lrmd_op_status); crm_xml_add_int(notify, F_LRMD_CALLID, cmd->call_id); crm_xml_add_int(notify, F_LRMD_RSC_DELETED, cmd->rsc_deleted); #ifdef HAVE_SYS_TIMEB_H crm_xml_add_int(notify, F_LRMD_RSC_RUN_TIME, cmd->t_run.time); crm_xml_add_int(notify, F_LRMD_RSC_RCCHANGE_TIME, cmd->t_rcchange.time); crm_xml_add_int(notify, F_LRMD_RSC_EXEC_TIME, exec_time); crm_xml_add_int(notify, F_LRMD_RSC_QUEUE_TIME, queue_time); #endif crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_RSC_EXEC); crm_xml_add(notify, F_LRMD_RSC_ID, cmd->rsc_id); if(cmd->real_action) { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->real_action); } else { crm_xml_add(notify, F_LRMD_RSC_ACTION, cmd->action); } crm_xml_add(notify, F_LRMD_RSC_USERDATA_STR, cmd->userdata_str); crm_xml_add(notify, F_LRMD_RSC_OUTPUT, cmd->output); crm_xml_add(notify, F_LRMD_RSC_EXIT_REASON, cmd->exit_reason); if (cmd->params) { char *key = NULL; char *value = NULL; GHashTableIter iter; xmlNode *args = create_xml_node(notify, XML_TAG_ATTRS); g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { hash2smartfield((gpointer) key, (gpointer) value, args); } } if (cmd->client_id && (cmd->call_opts & lrmd_opt_notify_orig_only)) { crm_client_t *client = crm_client_get_by_id(cmd->client_id); if (client) { send_client_notify(client->id, client, notify); } } else if (client_connections != NULL) { g_hash_table_foreach(client_connections, send_client_notify, notify); } free_xml(notify); } static void send_generic_notify(int rc, xmlNode * request) { if (client_connections != NULL) { int call_id = 0; xmlNode *notify = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *op = crm_element_value(request, F_LRMD_OPERATION); crm_element_value_int(request, F_LRMD_CALLID, &call_id); notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add_int(notify, F_LRMD_RC, rc); crm_xml_add_int(notify, F_LRMD_CALLID, call_id); crm_xml_add(notify, F_LRMD_OPERATION, op); crm_xml_add(notify, F_LRMD_RSC_ID, rsc_id); g_hash_table_foreach(client_connections, send_client_notify, notify); free_xml(notify); } } static void cmd_reset(lrmd_cmd_t * cmd) { cmd->lrmd_op_status = 0; cmd->last_pid = 0; #ifdef HAVE_SYS_TIMEB_H memset(&cmd->t_run, 0, sizeof(cmd->t_run)); memset(&cmd->t_queue, 0, sizeof(cmd->t_queue)); #endif free(cmd->exit_reason); cmd->exit_reason = NULL; free(cmd->output); cmd->output = NULL; } static void cmd_finalize(lrmd_cmd_t * cmd, lrmd_rsc_t * rsc) { crm_trace("Resource operation rsc:%s action:%s completed (%p %p)", cmd->rsc_id, cmd->action, rsc ? rsc->active : NULL, cmd); if (rsc && (rsc->active == cmd)) { rsc->active = NULL; mainloop_set_trigger(rsc->work); } if (!rsc) { cmd->rsc_deleted = 1; } /* reset original timeout so client notification has correct information */ cmd->timeout = cmd->timeout_orig; send_cmd_complete_notify(cmd); if (cmd->interval_ms && (cmd->lrmd_op_status == PCMK_LRM_OP_CANCELLED)) { if (rsc) { rsc->recurring_ops = g_list_remove(rsc->recurring_ops, cmd); rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else if (cmd->interval_ms == 0) { if (rsc) { rsc->pending_ops = g_list_remove(rsc->pending_ops, cmd); } free_lrmd_cmd(cmd); } else { /* Clear all the values pertaining just to the last iteration of a recurring op. */ cmd_reset(cmd); } } static int ocf2uniform_rc(int rc) { if (rc < 0 || rc > PCMK_OCF_FAILED_MASTER) { return PCMK_OCF_UNKNOWN_ERROR; } return rc; } static int stonith2uniform_rc(const char *action, int rc) { switch (rc) { case pcmk_ok: rc = PCMK_OCF_OK; break; case -ENODEV: /* This should be possible only for probes in practice, but * interpret for all actions to be safe. */ if (safe_str_eq(action, "monitor")) { rc = PCMK_OCF_NOT_RUNNING; } else if (safe_str_eq(action, "stop")) { rc = PCMK_OCF_OK; } else { rc = PCMK_OCF_NOT_INSTALLED; } break; case -EOPNOTSUPP: rc = PCMK_OCF_UNIMPLEMENT_FEATURE; break; case -ETIME: case -ETIMEDOUT: rc = PCMK_OCF_TIMEOUT; break; default: rc = PCMK_OCF_UNKNOWN_ERROR; break; } return rc; } #if SUPPORT_NAGIOS static int nagios2uniform_rc(const char *action, int rc) { if (rc < 0) { return PCMK_OCF_UNKNOWN_ERROR; } switch (rc) { case NAGIOS_STATE_OK: return PCMK_OCF_OK; case NAGIOS_INSUFFICIENT_PRIV: return PCMK_OCF_INSUFFICIENT_PRIV; case NAGIOS_NOT_INSTALLED: return PCMK_OCF_NOT_INSTALLED; case NAGIOS_STATE_WARNING: case NAGIOS_STATE_CRITICAL: case NAGIOS_STATE_UNKNOWN: case NAGIOS_STATE_DEPENDENT: default: return PCMK_OCF_UNKNOWN_ERROR; } return PCMK_OCF_UNKNOWN_ERROR; } #endif static int get_uniform_rc(const char *standard, const char *action, int rc) { if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_OCF)) { return ocf2uniform_rc(rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_STONITH)) { return stonith2uniform_rc(action, rc); } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_SYSTEMD)) { return rc; } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_UPSTART)) { return rc; #if SUPPORT_NAGIOS } else if (safe_str_eq(standard, PCMK_RESOURCE_CLASS_NAGIOS)) { return nagios2uniform_rc(action, rc); #endif } else { return services_get_ocf_exitcode(action, rc); } } static int action_get_uniform_rc(svc_action_t * action) { lrmd_cmd_t *cmd = action->cb_data; return get_uniform_rc(action->standard, cmd->action, action->rc); } void notify_of_new_client(crm_client_t *new_client) { crm_client_t *client = NULL; GHashTableIter iter; xmlNode *notify = NULL; char *key = NULL; notify = create_xml_node(NULL, T_LRMD_NOTIFY); crm_xml_add(notify, F_LRMD_ORIGIN, __FUNCTION__); crm_xml_add(notify, F_LRMD_OPERATION, LRMD_OP_NEW_CLIENT); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & client)) { if (safe_str_eq(client->id, new_client->id)) { continue; } send_client_notify((gpointer) key, (gpointer) client, (gpointer) notify); } free_xml(notify); } static char * parse_exit_reason(const char *output) { const char *cur = NULL; const char *last = NULL; static int cookie_len = 0; char *eol = NULL; size_t reason_len = EXIT_REASON_MAX_LEN; if (output == NULL) { return NULL; } if (!cookie_len) { cookie_len = strlen(PCMK_OCF_REASON_PREFIX); } cur = strstr(output, PCMK_OCF_REASON_PREFIX); for (; cur != NULL; cur = strstr(cur, PCMK_OCF_REASON_PREFIX)) { /* skip over the cookie delimiter string */ cur += cookie_len; last = cur; } if (last == NULL) { return NULL; } // Truncate everything after a new line, and limit reason string size eol = strchr(last, '\n'); if (eol) { reason_len = QB_MIN(reason_len, eol - last); } return strndup(last, reason_len); } void client_disconnect_cleanup(const char *client_id) { GHashTableIter iter; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (rsc->call_opts & lrmd_opt_drop_recurring) { /* This client is disconnecting, drop any recurring operations * it may have initiated on the resource */ cancel_all_recurring(rsc, client_id); } } } static void action_complete(svc_action_t * action) { lrmd_rsc_t *rsc; lrmd_cmd_t *cmd = action->cb_data; const char *rclass = NULL; bool goagain = false; if (!cmd) { crm_err("Completed executor action (%s) does not match any known operations", action->id); return; } #ifdef HAVE_SYS_TIMEB_H if (cmd->exec_rc != action->rc) { ftime(&cmd->t_rcchange); } #endif cmd->last_pid = action->pid; cmd->exec_rc = action_get_uniform_rc(action); cmd->lrmd_op_status = action->status; rsc = cmd->rsc_id ? g_hash_table_lookup(rsc_list, cmd->rsc_id) : NULL; if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_SERVICE)) { rclass = resources_find_service_class(rsc->type); } else if(rsc) { rclass = rsc->class; } if (safe_str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)) { if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "start")) { /* systemd I curse thee! * * systemd returns from start actions after the start _begins_ * not after it completes. * * So we have to jump through a few hoops so that we don't * report 'complete' to the rest of pacemaker until, you know, * it's actually done. */ goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->action, "stop")) { goagain = true; cmd->real_action = cmd->action; cmd->action = strdup("monitor"); } else if(cmd->real_action) { /* Ok, so this is the follow up monitor action to check if start actually completed */ if(cmd->lrmd_op_status == PCMK_LRM_OP_DONE && cmd->exec_rc == PCMK_OCF_PENDING) { goagain = true; } else if(cmd->exec_rc == PCMK_OCF_OK && safe_str_eq(cmd->real_action, "stop")) { goagain = true; } else { #ifdef HAVE_SYS_TIMEB_H int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; crm_debug("%s %s is now complete (elapsed=%dms, remaining=%dms): %s (%d)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc); cmd_original_times(cmd); #endif // Monitors may return "not running", but start/stop shouldn't if ((cmd->lrmd_op_status == PCMK_LRM_OP_DONE) && (cmd->exec_rc == PCMK_OCF_NOT_RUNNING)) { if (safe_str_eq(cmd->real_action, "start")) { cmd->exec_rc = PCMK_OCF_UNKNOWN_ERROR; } else if (safe_str_eq(cmd->real_action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; } } } } } #if SUPPORT_NAGIOS if (rsc && safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS)) { if (safe_str_eq(cmd->action, "monitor") && (cmd->interval_ms == 0) && cmd->exec_rc == PCMK_OCF_OK) { /* Successfully executed --version for the nagios plugin */ cmd->exec_rc = PCMK_OCF_NOT_RUNNING; } else if (safe_str_eq(cmd->action, "start") && cmd->exec_rc != PCMK_OCF_OK) { goagain = true; } } #endif /* Wrapping this section in ifdef implies that systemd resources are not * fully supported on platforms without sys/timeb.h. Since timeb is * obsolete, we should eventually prefer a clock_gettime() implementation * (wrapped in its own ifdef) with timeb as a fallback. */ if(goagain) { #ifdef HAVE_SYS_TIMEB_H int time_sum = time_diff_ms(NULL, &cmd->t_first_run); int timeout_left = cmd->timeout_orig - time_sum; int delay = cmd->timeout_orig / 10; if(delay >= timeout_left && timeout_left > 20) { delay = timeout_left/2; } delay = QB_MIN(2000, delay); if (delay < timeout_left) { cmd->start_delay = delay; cmd->timeout = timeout_left; if(cmd->exec_rc == PCMK_OCF_OK) { crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); } else if(cmd->exec_rc == PCMK_OCF_PENDING) { crm_info("%s %s is still in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, time_sum, timeout_left, delay); } else { crm_notice("%s %s failed '%s' (%d): re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", cmd->rsc_id, cmd->action, services_ocf_exitcode_str(cmd->exec_rc), cmd->exec_rc, time_sum, timeout_left, delay); } cmd_reset(cmd); if(rsc) { rsc->active = NULL; } schedule_lrmd_cmd(rsc, cmd); /* Don't finalize cmd, we're not done with it yet */ return; } else { crm_notice("Giving up on %s %s (rc=%d): timeout (elapsed=%dms, remaining=%dms)", cmd->rsc_id, cmd->real_action?cmd->real_action:cmd->action, cmd->exec_rc, time_sum, timeout_left); cmd->lrmd_op_status = PCMK_LRM_OP_TIMEOUT; cmd->exec_rc = PCMK_OCF_TIMEOUT; cmd_original_times(cmd); } #endif } if (action->stderr_data) { cmd->output = strdup(action->stderr_data); cmd->exit_reason = parse_exit_reason(action->stderr_data); } else if (action->stdout_data) { cmd->output = strdup(action->stdout_data); } cmd_finalize(cmd, rsc); } /*! * \internal * \brief Determine operation status of a stonith operation * * Non-stonith resource operations get their operation status directly from the * service library, but the fencer does not have an equivalent, so we must infer * an operation status from the fencer API's return code. * * \param[in] action Name of action performed on stonith resource * \param[in] interval_ms Action interval * \param[in] rc Action result from fencer * * \return Operation status corresponding to fencer API return code */ static int stonith_rc2status(const char *action, guint interval_ms, int rc) { int status = PCMK_LRM_OP_DONE; switch (rc) { case pcmk_ok: break; case -EOPNOTSUPP: case -EPROTONOSUPPORT: status = PCMK_LRM_OP_NOTSUPPORTED; break; case -ETIME: case -ETIMEDOUT: status = PCMK_LRM_OP_TIMEOUT; break; case -ENOTCONN: case -ECOMM: // Couldn't talk to fencer status = PCMK_LRM_OP_ERROR; break; case -ENODEV: // The device is not registered with the fencer if (safe_str_neq(action, "monitor")) { status = PCMK_LRM_OP_ERROR; } else if (interval_ms > 0) { /* If we get here, the fencer somehow lost the registration of a * previously active device (possibly due to crash and respawn). In * that case, we need to indicate that the recurring monitor needs * to be cancelled. */ status = PCMK_LRM_OP_CANCELLED; } break; default: break; } return status; } static void stonith_action_complete(lrmd_cmd_t * cmd, int rc) { // This can be NULL if resource was removed before command completed lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, cmd->rsc_id); cmd->exec_rc = stonith2uniform_rc(cmd->action, rc); /* This function may be called with status already set to cancelled, if a * pending action was aborted. Otherwise, we need to determine status from * the fencer return code. */ if (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED) { cmd->lrmd_op_status = stonith_rc2status(cmd->action, cmd->interval_ms, rc); // Certain successful actions change the known state of the resource if (rsc && (cmd->exec_rc == PCMK_OCF_OK)) { if (safe_str_eq(cmd->action, "start")) { rsc->stonith_started = 1; } else if (safe_str_eq(cmd->action, "stop")) { rsc->stonith_started = 0; } } } /* The recurring timer should not be running at this point in any case, but * as a failsafe, stop it if it is. */ stop_recurring_timer(cmd); /* Reschedule this command if appropriate. If a recurring command is *not* * rescheduled, its status must be PCMK_LRM_OP_CANCELLED, otherwise it will * not be removed from recurring_ops by cmd_finalize(). */ if (rsc && (cmd->interval_ms > 0) && (cmd->lrmd_op_status != PCMK_LRM_OP_CANCELLED)) { start_recurring_timer(cmd); } cmd_finalize(cmd, rsc); } static void lrmd_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data) { stonith_action_complete(data->userdata, data->rc); } void stonith_connection_failed(void) { GHashTableIter iter; GList *cmd_list = NULL; GList *cmd_iter = NULL; lrmd_rsc_t *rsc = NULL; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & rsc)) { if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { if (rsc->active) { cmd_list = g_list_append(cmd_list, rsc->active); } if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, rsc->recurring_ops); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, rsc->pending_ops); } rsc->pending_ops = rsc->recurring_ops = NULL; } } if (!cmd_list) { return; } crm_err("Connection to fencer failed, finalizing %d pending operations", g_list_length(cmd_list)); for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { stonith_action_complete(cmd_iter->data, -ENOTCONN); } g_list_free(cmd_list); } /*! * \internal * \brief Execute a stonith resource "start" action * * Start a stonith resource by registering it with the fencer. * (Stonith agents don't have a start command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to start * \param[in] cmd Start command to execute * * \return pcmk_ok on success, -errno otherwise */ static int execd_stonith_start(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { char *key = NULL; char *value = NULL; stonith_key_value_t *device_params = NULL; int rc = pcmk_ok; // Convert command parameters to stonith API key/values if (cmd->params) { GHashTableIter iter; g_hash_table_iter_init(&iter, cmd->params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { device_params = stonith_key_value_add(device_params, key, value); } } /* The fencer will automatically register devices via CIB notifications * when the CIB changes, but to avoid a possible race condition between * the fencer receiving the notification and the executor requesting that * resource, the executor registers the device as well. The fencer knows how * to handle duplicate registrations. */ rc = stonith_api->cmds->register_device(stonith_api, st_opt_sync_call, cmd->rsc_id, rsc->provider, rsc->type, device_params); stonith_key_value_freeall(device_params, 1, 1); return rc; } /*! * \internal * \brief Execute a stonith resource "stop" action * * Stop a stonith resource by unregistering it with the fencer. * (Stonith agents don't have a stop command.) * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to stop * * \return pcmk_ok on success, -errno otherwise */ static inline int execd_stonith_stop(stonith_t *stonith_api, const lrmd_rsc_t *rsc) { /* @TODO Failure would indicate a problem communicating with fencer; * perhaps we should try reconnecting and retrying a few times? */ return stonith_api->cmds->remove_device(stonith_api, st_opt_sync_call, rsc->rsc_id); } /*! * \internal * \brief Execute a one-time stonith resource "monitor" action * * Probe a stonith resource by checking whether we started it * * \param[in] rsc Stonith resource to probe * * \return pcmk_ok if started, -errno otherwise */ static inline int execd_stonith_probe(lrmd_rsc_t *rsc) { return rsc->stonith_started? 0 : -ENODEV; } /*! * \internal * \brief Initiate a stonith resource agent "monitor" action * * \param[in] stonith_api Connection to fencer * \param[in] rsc Stonith resource to monitor * \param[in] cmd Monitor command being executed * * \return pcmk_ok if monitor was successfully initiated, -errno otherwise */ static inline int execd_stonith_monitor(stonith_t *stonith_api, lrmd_rsc_t *rsc, lrmd_cmd_t *cmd) { int rc = stonith_api->cmds->monitor(stonith_api, 0, cmd->rsc_id, cmd->timeout / 1000); rc = stonith_api->cmds->register_callback(stonith_api, rc, 0, 0, cmd, "lrmd_stonith_callback", lrmd_stonith_callback); if (rc == TRUE) { rsc->active = cmd; rc = pcmk_ok; } else { rc = -pcmk_err_generic; } return rc; } static void lrmd_rsc_execute_stonith(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { int rc = 0; bool do_monitor = FALSE; stonith_t *stonith_api = get_stonith_connection(); if (!stonith_api) { rc = -ENOTCONN; } else if (safe_str_eq(cmd->action, "start")) { rc = execd_stonith_start(stonith_api, rsc, cmd); if (rc == 0) { do_monitor = TRUE; } } else if (safe_str_eq(cmd->action, "stop")) { rc = execd_stonith_stop(stonith_api, rsc); } else if (safe_str_eq(cmd->action, "monitor")) { if (cmd->interval_ms > 0) { do_monitor = TRUE; } else { rc = execd_stonith_probe(rsc); } } if (do_monitor) { rc = execd_stonith_monitor(stonith_api, rsc, cmd); if (rc == pcmk_ok) { // Don't clean up yet, we will find out result of the monitor later return; } } stonith_action_complete(cmd, rc); } static int lrmd_rsc_execute_service_lib(lrmd_rsc_t * rsc, lrmd_cmd_t * cmd) { svc_action_t *action = NULL; GHashTable *params_copy = NULL; CRM_ASSERT(rsc); CRM_ASSERT(cmd); crm_trace("Creating action, resource:%s action:%s class:%s provider:%s agent:%s", rsc->rsc_id, cmd->action, rsc->class, rsc->provider, rsc->type); #if SUPPORT_NAGIOS /* Recurring operations are cancelled anyway for a stop operation */ if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS) && safe_str_eq(cmd->action, "stop")) { cmd->exec_rc = PCMK_OCF_OK; goto exec_done; } #endif params_copy = crm_str_table_dup(cmd->params); action = resources_action_create(rsc->rsc_id, rsc->class, rsc->provider, rsc->type, normalize_action_name(rsc, cmd->action), cmd->interval_ms, cmd->timeout, params_copy, cmd->service_flags); if (!action) { crm_err("Failed to create action, action:%s on resource %s", cmd->action, rsc->rsc_id); cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; goto exec_done; } action->cb_data = cmd; /* 'cmd' may not be valid after this point if * services_action_async() returned TRUE * * Upstart and systemd both synchronously determine monitor/status * results and call action_complete (which may free 'cmd') if necessary. */ if (services_action_async(action, action_complete)) { return TRUE; } cmd->exec_rc = action->rc; if(action->status != PCMK_LRM_OP_DONE) { cmd->lrmd_op_status = action->status; } else { cmd->lrmd_op_status = PCMK_LRM_OP_ERROR; } services_action_free(action); action = NULL; exec_done: cmd_finalize(cmd, rsc); return TRUE; } static gboolean lrmd_rsc_execute(lrmd_rsc_t * rsc) { lrmd_cmd_t *cmd = NULL; CRM_CHECK(rsc != NULL, return FALSE); if (rsc->active) { crm_trace("%s is still active", rsc->rsc_id); return TRUE; } if (rsc->pending_ops) { GList *first = rsc->pending_ops; cmd = first->data; if (cmd->delay_id) { crm_trace ("Command %s %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->rsc_id, cmd->action, cmd->start_delay); return TRUE; } rsc->pending_ops = g_list_remove_link(rsc->pending_ops, first); g_list_free_1(first); #ifdef HAVE_SYS_TIMEB_H if (cmd->t_first_run.time == 0) { ftime(&cmd->t_first_run); } ftime(&cmd->t_run); #endif } if (!cmd) { crm_trace("Nothing further to do for %s", rsc->rsc_id); return TRUE; } rsc->active = cmd; /* only one op at a time for a rsc */ if (cmd->interval_ms) { rsc->recurring_ops = g_list_append(rsc->recurring_ops, cmd); } log_execute(cmd); if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { lrmd_rsc_execute_stonith(rsc, cmd); } else { lrmd_rsc_execute_service_lib(rsc, cmd); } return TRUE; } static gboolean lrmd_rsc_dispatch(gpointer user_data) { return lrmd_rsc_execute(user_data); } void free_rsc(gpointer data) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = data; int is_stonith = safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH); gIter = rsc->pending_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; /* command was never executed */ cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, NULL); gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->pending_ops); gIter = rsc->recurring_ops; while (gIter != NULL) { GListPtr next = gIter->next; lrmd_cmd_t *cmd = gIter->data; if (is_stonith) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; /* If a stonith command is in-flight, just mark it as cancelled; * it is not safe to finalize/free the cmd until the stonith api * says it has either completed or timed out. */ if (rsc->active != cmd) { cmd_finalize(cmd, NULL); } } else { /* This command is already handed off to service library, * let service library cancel it and tell us via the callback * when it is cancelled. The rsc can be safely destroyed * even if we are waiting for the cancel result */ services_action_cancel(rsc->rsc_id, normalize_action_name(rsc, cmd->action), cmd->interval_ms); } gIter = next; } /* frees list, but not list elements. */ g_list_free(rsc->recurring_ops); free(rsc->rsc_id); free(rsc->class); free(rsc->provider); free(rsc->type); mainloop_destroy_trigger(rsc->work); free(rsc); } static xmlNode * process_lrmd_signon(crm_client_t *client, xmlNode *request, int call_id) { xmlNode *reply = NULL; int rc = pcmk_ok; const char *is_ipc_provider = crm_element_value(request, F_LRMD_IS_IPC_PROVIDER); const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { crm_err("Cluster API version must be greater than or equal to %s, not %s", LRMD_MIN_PROTOCOL_VERSION, protocol_version); rc = -EPROTO; } reply = create_lrmd_reply(__FUNCTION__, rc, call_id); crm_xml_add(reply, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_LRMD_CLIENTID, client->id); crm_xml_add(reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); if (crm_is_true(is_ipc_provider)) { // This is a remote connection from a cluster node's controller #ifdef SUPPORT_REMOTE ipc_proxy_add_provider(client); #endif } return reply; } static int process_lrmd_rsc_register(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = build_rsc_from_xml(request); lrmd_rsc_t *dup = g_hash_table_lookup(rsc_list, rsc->rsc_id); if (dup && safe_str_eq(rsc->class, dup->class) && safe_str_eq(rsc->provider, dup->provider) && safe_str_eq(rsc->type, dup->type)) { crm_warn("Can't add, RSC '%s' already present in the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); free_rsc(rsc); return rc; } g_hash_table_replace(rsc_list, rsc->rsc_id, rsc); crm_info("Added '%s' to the rsc list (%d active resources)", rsc->rsc_id, g_hash_table_size(rsc_list)); return rc; } static xmlNode * process_lrmd_get_rsc_info(xmlNode *request, int call_id) { int rc = pcmk_ok; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); xmlNode *reply = NULL; lrmd_rsc_t *rsc = NULL; if (rsc_id == NULL) { rc = -ENODEV; } else { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; } } reply = create_lrmd_reply(__FUNCTION__, rc, call_id); if (rsc) { crm_xml_add(reply, F_LRMD_RSC_ID, rsc->rsc_id); crm_xml_add(reply, F_LRMD_CLASS, rsc->class); crm_xml_add(reply, F_LRMD_PROVIDER, rsc->provider); crm_xml_add(reply, F_LRMD_TYPE, rsc->type); } return reply; } static int process_lrmd_rsc_unregister(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; lrmd_rsc_t *rsc = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (!rsc_id) { return -ENODEV; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return pcmk_ok; } if (rsc->active) { /* let the caller know there are still active ops on this rsc to watch for */ crm_trace("Operation still in progress: %p", rsc->active); rc = -EINPROGRESS; } g_hash_table_remove(rsc_list, rsc_id); return rc; } static int process_lrmd_rsc_exec(crm_client_t * client, uint32_t id, xmlNode * request) { lrmd_rsc_t *rsc = NULL; lrmd_cmd_t *cmd = NULL; xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); int call_id; if (!rsc_id) { return -EINVAL; } if (!(rsc = g_hash_table_lookup(rsc_list, rsc_id))) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); return -ENODEV; } cmd = create_lrmd_cmd(request, client); call_id = cmd->call_id; /* Don't reference cmd after handing it off to be scheduled. * The cmd could get merged and freed. */ schedule_lrmd_cmd(rsc, cmd); return call_id; } static int cancel_op(const char *rsc_id, const char *action, guint interval_ms) { GListPtr gIter = NULL; lrmd_rsc_t *rsc = g_hash_table_lookup(rsc_list, rsc_id); /* How to cancel an action. * 1. Check pending ops list, if it hasn't been handed off * to the service library or stonith recurring list remove * it there and that will stop it. * 2. If it isn't in the pending ops list, then it's either a * recurring op in the stonith recurring list, or the service * library's recurring list. Stop it there * 3. If not found in any lists, then this operation has either * been executed already and is not a recurring operation, or * never existed. */ if (!rsc) { return -ENODEV; } for (gIter = rsc->pending_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && (cmd->interval_ms == interval_ms)) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; cmd_finalize(cmd, rsc); return pcmk_ok; } } if (safe_str_eq(rsc->class, PCMK_RESOURCE_CLASS_STONITH)) { /* The service library does not handle stonith operations. * We have to handle recurring stonith operations ourselves. */ for (gIter = rsc->recurring_ops; gIter != NULL; gIter = gIter->next) { lrmd_cmd_t *cmd = gIter->data; if (safe_str_eq(cmd->action, action) && (cmd->interval_ms == interval_ms)) { cmd->lrmd_op_status = PCMK_LRM_OP_CANCELLED; if (rsc->active != cmd) { cmd_finalize(cmd, rsc); } return pcmk_ok; } } } else if (services_action_cancel(rsc_id, normalize_action_name(rsc, action), interval_ms) == TRUE) { /* The service library will tell the action_complete callback function * this action was cancelled, which will destroy the cmd and remove * it from the recurring_op list. Do not do that in this function * if the service library says it cancelled it. */ return pcmk_ok; } return -EOPNOTSUPP; } static void cancel_all_recurring(lrmd_rsc_t * rsc, const char *client_id) { GList *cmd_list = NULL; GList *cmd_iter = NULL; /* Notice a copy of each list is created when concat is called. * This prevents odd behavior from occurring when the cmd_list * is iterated through later on. It is possible the cancel_op * function may end up modifying the recurring_ops and pending_ops * lists. If we did not copy those lists, our cmd_list iteration * could get messed up.*/ if (rsc->recurring_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->recurring_ops)); } if (rsc->pending_ops) { cmd_list = g_list_concat(cmd_list, g_list_copy(rsc->pending_ops)); } if (!cmd_list) { return; } for (cmd_iter = cmd_list; cmd_iter; cmd_iter = cmd_iter->next) { lrmd_cmd_t *cmd = cmd_iter->data; if (cmd->interval_ms == 0) { continue; } if (client_id && safe_str_neq(cmd->client_id, client_id)) { continue; } cancel_op(rsc->rsc_id, cmd->action, cmd->interval_ms); } /* frees only the copied list data, not the cmds */ g_list_free(cmd_list); } static int process_lrmd_rsc_cancel(crm_client_t * client, uint32_t id, xmlNode * request) { xmlNode *rsc_xml = get_xpath_object("//" F_LRMD_RSC, request, LOG_ERR); const char *rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); const char *action = crm_element_value(rsc_xml, F_LRMD_RSC_ACTION); guint interval_ms = 0; crm_element_value_ms(rsc_xml, F_LRMD_RSC_INTERVAL, &interval_ms); if (!rsc_id || !action) { return -EINVAL; } return cancel_op(rsc_id, action, interval_ms); } static void add_recurring_op_xml(xmlNode *reply, lrmd_rsc_t *rsc) { xmlNode *rsc_xml = create_xml_node(reply, F_LRMD_RSC); crm_xml_add(rsc_xml, F_LRMD_RSC_ID, rsc->rsc_id); for (GList *item = rsc->recurring_ops; item != NULL; item = item->next) { lrmd_cmd_t *cmd = item->data; xmlNode *op_xml = create_xml_node(rsc_xml, T_LRMD_RSC_OP); crm_xml_add(op_xml, F_LRMD_RSC_ACTION, (cmd->real_action? cmd->real_action : cmd->action)); crm_xml_add_ms(op_xml, F_LRMD_RSC_INTERVAL, cmd->interval_ms); crm_xml_add_int(op_xml, F_LRMD_TIMEOUT, cmd->timeout_orig); } } static xmlNode * process_lrmd_get_recurring(xmlNode *request, int call_id) { int rc = pcmk_ok; const char *rsc_id = NULL; lrmd_rsc_t *rsc = NULL; xmlNode *reply = NULL; xmlNode *rsc_xml = NULL; // Resource ID is optional rsc_xml = first_named_child(request, F_LRMD_CALLDATA); if (rsc_xml) { rsc_xml = first_named_child(rsc_xml, F_LRMD_RSC); } if (rsc_xml) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); } // If resource ID is specified, resource must exist if (rsc_id != NULL) { rsc = g_hash_table_lookup(rsc_list, rsc_id); if (rsc == NULL) { crm_info("Resource '%s' not found (%d active resources)", rsc_id, g_hash_table_size(rsc_list)); rc = -ENODEV; } } reply = create_lrmd_reply(__FUNCTION__, rc, call_id); // If resource ID is not specified, check all resources if (rsc_id == NULL) { GHashTableIter iter; char *key = NULL; g_hash_table_iter_init(&iter, rsc_list); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &rsc)) { add_recurring_op_xml(reply, rsc); } } else if (rsc) { add_recurring_op_xml(reply, rsc); } return reply; } void process_lrmd_message(crm_client_t * client, uint32_t id, xmlNode * request) { int rc = pcmk_ok; int call_id = 0; const char *op = crm_element_value(request, F_LRMD_OPERATION); int do_reply = 0; int do_notify = 0; xmlNode *reply = NULL; crm_trace("Processing %s operation from %s", op, client->id); crm_element_value_int(request, F_LRMD_CALLID, &call_id); if (crm_str_eq(op, CRM_OP_IPC_FWD, TRUE)) { #ifdef SUPPORT_REMOTE ipc_proxy_forward_client(client, request); #endif do_reply = 1; } else if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { reply = process_lrmd_signon(client, request, call_id); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_REG, TRUE)) { rc = process_lrmd_rsc_register(client, id, request); do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_INFO, TRUE)) { reply = process_lrmd_get_rsc_info(request, call_id); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_UNREG, TRUE)) { rc = process_lrmd_rsc_unregister(client, id, request); /* don't notify anyone about failed un-registers */ if (rc == pcmk_ok || rc == -EINPROGRESS) { do_notify = 1; } do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_EXEC, TRUE)) { rc = process_lrmd_rsc_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_RSC_CANCEL, TRUE)) { rc = process_lrmd_rsc_cancel(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_POKE, TRUE)) { do_notify = 1; do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_CHECK, TRUE)) { xmlNode *data = get_message_xml(request, F_LRMD_CALLDATA); const char *timeout = crm_element_value(data, F_LRMD_WATCHDOG); CRM_LOG_ASSERT(data != NULL); check_sbd_timeout(timeout); } else if (crm_str_eq(op, LRMD_OP_ALERT_EXEC, TRUE)) { rc = process_lrmd_alert_exec(client, id, request); do_reply = 1; } else if (crm_str_eq(op, LRMD_OP_GET_RECURRING, TRUE)) { reply = process_lrmd_get_recurring(request, call_id); do_reply = 1; } else { rc = -EOPNOTSUPP; do_reply = 1; crm_err("Unknown %s from %s", op, client->name); crm_log_xml_warn(request, "UnknownOp"); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", op, client->id, rc, do_reply, do_notify); if (do_reply) { int send_rc = pcmk_ok; if (reply == NULL) { reply = create_lrmd_reply(__FUNCTION__, rc, call_id); } send_rc = lrmd_server_send_reply(client, id, reply); free_xml(reply); if (send_rc < 0) { crm_warn("Reply to client %s failed: %s " CRM_XS " %d", client->name, pcmk_strerror(send_rc), send_rc); } } if (do_notify) { send_generic_notify(rc, request); } } diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index 3cec288e75..21bb0edde8 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,622 +1,630 @@ /* * Copyright 2012-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #if defined(HAVE_GNUTLS_GNUTLS_H) && defined(SUPPORT_REMOTE) # define ENABLE_PCMK_REMOTE // Hidden in liblrmd extern int lrmd_tls_send_msg(crm_remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type); #endif static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; int lrmd_call_id = 0; #ifdef ENABLE_PCMK_REMOTE /* whether shutdown request has been sent */ static sig_atomic_t shutting_down = FALSE; /* timer for waiting for acknowledgment of shutdown request */ static guint shutdown_ack_timer = 0; static gboolean lrmd_exit(gpointer data); #endif static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; crm_err("Connection to fencer lost"); stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { stonith_api_delete(stonith_api); stonith_api = NULL; } if (!stonith_api) { int rc = 0; int tries = 10; stonith_api = stonith_api_new(); do { rc = stonith_api->cmds->connect(stonith_api, "pacemaker-execd", NULL); if (rc == pcmk_ok) { stonith_api->cmds->register_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); break; } sleep(1); tries--; } while (tries); if (rc) { crm_err("Unable to connect to stonith daemon to execute command. error: %s", pcmk_strerror(rc)); stonith_api_delete(stonith_api); stonith_api = NULL; } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { crm_client_t *new_client = crm_client_get(c); crm_trace("Connection %p", c); CRM_ASSERT(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); xmlNode *request = crm_ipcs_recv(client, data, size, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } if (!client->name) { const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); if (value == NULL) { client->name = crm_itoa(crm_ipcs_client_pid(c)); } else { client->name = strdup(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, F_LRMD_CLIENTID, client->id); crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); free_xml(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in] client Client connection to free */ void lrmd_client_destroy(crm_client_t *client) { crm_client_destroy(client); #ifdef ENABLE_PCMK_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { lrmd_exit(NULL); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef ENABLE_PCMK_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; int lrmd_server_send_reply(crm_client_t * client, uint32_t id, xmlNode * reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (client->kind) { case CRM_CLIENT_IPC: return crm_ipcs_send(client, id, reply, FALSE); #ifdef ENABLE_PCMK_REMOTE case CRM_CLIENT_TLS: return lrmd_tls_send_msg(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown client type %d", client->kind); } return -ENOTCONN; } int lrmd_server_send_notify(crm_client_t * client, xmlNode * msg) { crm_trace("Sending notification to client (%s)", client->id); switch (client->kind) { case CRM_CLIENT_IPC: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return -ENOTCONN; } return crm_ipcs_send(client, 0, msg, crm_ipc_server_event); #ifdef ENABLE_PCMK_REMOTE case CRM_CLIENT_TLS: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return -ENOTCONN; } return lrmd_tls_send_msg(client->remote, msg, 0, "notify"); #endif default: crm_err("Could not notify client: unknown type %d", client->kind); } return -ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately * * \param[in] data Ignored * * \return Doesn't return * \note This can be used as a timer callback. */ static gboolean lrmd_exit(gpointer data) { crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (stonith_api) { stonith_api->cmds->remove_notification(stonith_api, T_STONITH_NOTIFY_DISCONNECT); stonith_api->cmds->disconnect(stonith_api); stonith_api_delete(stonith_api); } if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef ENABLE_PCMK_REMOTE lrmd_tls_server_destroy(); ipc_proxy_cleanup(); #endif crm_client_cleanup(); g_hash_table_destroy(rsc_list); if (mainloop) { lrmd_drain_alerts(mainloop); } crm_exit(CRM_EX_OK); return FALSE; } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef ENABLE_PCMK_REMOTE crm_client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host (which ensures the proxy host * supports shutdown requests), then wait for all proxy hosts to * disconnect (which ensures that all resources have been stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ lrmd_tls_server_destroy(); /* Older controller versions will never acknowledge our request, so * set a fairly short timeout to exit quickly in that case. If we * get the ack, we'll defuse this timer. */ shutdown_ack_timer = g_timeout_add_seconds(20, lrmd_exit, NULL); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif lrmd_exit(NULL); } /*! * \internal * \brief Defuse short exit timer if shutting down */ void handle_shutdown_ack() { #ifdef ENABLE_PCMK_REMOTE if (shutting_down) { crm_info("Received shutdown ack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = 0; } return; } #endif crm_debug("Ignoring unexpected shutdown ack"); } /*! * \internal * \brief Make short exit timer fire immediately */ void handle_shutdown_nack() { #ifdef ENABLE_PCMK_REMOTE if (shutting_down) { crm_info("Received shutdown nack"); if (shutdown_ack_timer > 0) { g_source_remove(shutdown_ack_timer); shutdown_ack_timer = g_timeout_add(0, lrmd_exit, NULL); } return; } #endif crm_debug("Ignoring unexpected shutdown nack"); } static pid_t main_pid = 0; static void sigdone(void) { exit(CRM_EX_OK); } static void sigreap(void) { pid_t pid = 0; int status; do { /* * Opinions seem to differ as to what to put here: * -1, any child process * 0, any child process whose process group ID is equal to that of the calling process */ pid = waitpid(-1, &status, WNOHANG); if(pid == main_pid) { /* Exit when pacemaker-remote exits and use the same return code */ if (WIFEXITED(status)) { exit(WEXITSTATUS(status)); } exit(CRM_EX_ERROR); } } while (pid > 0); } static struct { int sig; void (*handler)(void); } sigmap[] = { { SIGCHLD, sigreap }, { SIGINT, sigdone }, }; static void spawn_pidone(int argc, char **argv, char **envp) { sigset_t set; if (getpid() != 1) { return; } sigfillset(&set); sigprocmask(SIG_BLOCK, &set, 0); main_pid = fork(); switch (main_pid) { case 0: sigprocmask(SIG_UNBLOCK, &set, NULL); setsid(); setpgid(0, 0); /* Child remains as pacemaker-remoted */ return; case -1: perror("fork"); } /* Parent becomes the reaper of zombie processes */ /* Safe to initialize logging now if needed */ #ifdef HAVE___PROGNAME /* Differentiate ourselves in the 'ps' output */ { char *p; int i, maxlen; char *LastArgv = NULL; const char *name = "pcmk-init"; for(i = 0; i < argc; i++) { if(!i || (LastArgv + 1 == argv[i])) LastArgv = argv[i] + strlen(argv[i]); } for(i = 0; envp[i] != NULL; i++) { if((LastArgv + 1) == envp[i]) { LastArgv = envp[i] + strlen(envp[i]); } } maxlen = (LastArgv - argv[0]) - 2; i = strlen(name); /* We can overwrite individual argv[] arguments */ snprintf(argv[0], maxlen, "%s", name); /* Now zero out everything else */ p = &argv[0][i]; while(p < LastArgv) *p++ = '\0'; argv[1] = NULL; } #endif /* HAVE___PROGNAME */ while (1) { int sig; size_t i; sigwait(&set, &sig); for (i = 0; i < DIMOF(sigmap); i++) { if (sigmap[i].sig == sig) { sigmap[i].handler(); break; } } } } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"logfile", 1, 0, 'l', "\tSend logs to the additional named logfile"}, #ifdef ENABLE_PCMK_REMOTE {"port", 1, 0, 'p', "\tPort to listen on"}, #endif {0, 0, 0, 0} }; /* *INDENT-ON* */ +#ifdef ENABLE_PCMK_REMOTE +# define EXECD_TYPE "remote" +#else +# define EXECD_TYPE "local" +#endif + int main(int argc, char **argv, char **envp) { int flag = 0; int index = 0; int bump_log_num = 0; const char *option = NULL; /* If necessary, create PID1 now before any FDs are opened */ spawn_pidone(argc, argv, envp); #ifndef ENABLE_PCMK_REMOTE crm_log_preinit("pacemaker-execd", argc, argv); crm_set_options(NULL, "[options]", long_options, "Resource agent executor daemon for cluster nodes"); #else crm_log_preinit("pacemaker-remoted", argc, argv); crm_set_options(NULL, "[options]", long_options, "Resource agent executor daemon for Pacemaker Remote nodes"); #endif while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) { break; } switch (flag) { case 'l': crm_add_logfile(optarg); break; case 'p': setenv("PCMK_remote_port", optarg, 1); break; case 'V': bump_log_num++; break; case '?': case '$': crm_help(flag, CRM_EX_OK); break; default: crm_help('?', CRM_EX_USAGE); break; } } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); while (bump_log_num > 0) { crm_bump_log_level(argc, argv); bump_log_num--; } option = daemon_option("logfacility"); if(option && safe_str_neq(option, "none")) { setenv("HA_LOGFACILITY", option, 1); /* Used by the ocf_log/ha_log OCF macro */ } option = daemon_option("logfile"); if(option && safe_str_neq(option, "none")) { setenv("HA_LOGFILE", option, 1); /* Used by the ocf_log/ha_log OCF macro */ if (daemon_option_enabled(crm_system_name, "debug")) { setenv("HA_DEBUGLOG", option, 1); /* Used by the ocf_log/ha_debug OCF macro */ } } + crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); + /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); /* Used by RAs - Leave owned by root */ crm_build_path(CRM_RSCTMP_DIR, 0755); rsc_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); crm_exit(CRM_EX_FATAL); } #ifdef ENABLE_PCMK_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); crm_exit(CRM_EX_FATAL); } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); - crm_info("Starting"); + crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); g_main_loop_run(mainloop); /* should never get here */ lrmd_exit(NULL); return CRM_EX_OK; } diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c index f202b00c13..d07c38919b 100644 --- a/daemons/execd/remoted_proxy.c +++ b/daemons/execd/remoted_proxy.c @@ -1,455 +1,455 @@ /* * Copyright 2012-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include "pacemaker-execd.h" #include #include #include #include #include #include #include #include static qb_ipcs_service_t *cib_ro = NULL; static qb_ipcs_service_t *cib_rw = NULL; static qb_ipcs_service_t *cib_shm = NULL; static qb_ipcs_service_t *attrd_ipcs = NULL; static qb_ipcs_service_t *crmd_ipcs = NULL; static qb_ipcs_service_t *stonith_ipcs = NULL; // An IPC provider is a cluster node controller connecting as a client static GList *ipc_providers = NULL; /* ipc clients == things like cibadmin, crm_resource, connecting locally */ static GHashTable *ipc_clients = NULL; /*! * \internal * \brief Get an IPC proxy provider * * \return Pointer to a provider if one exists, NULL otherwise * * \note Grab the first provider, which is the most recent connection. That way, * if we haven't yet timed out an old, failed connection, we don't try to * use it. */ crm_client_t * ipc_proxy_get_provider() { return ipc_providers? (crm_client_t*) (ipc_providers->data) : NULL; } /*! * \internal * \brief Accept a client connection on a proxy IPC server * * \param[in] c Client's IPC connection * \param[in] uid Client's user ID * \param[in] gid Client's group ID * \param[in] ipc_channel Name of IPC server to proxy * * \return pcmk_ok on success, -errno on error */ static int32_t ipc_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid, const char *ipc_channel) { crm_client_t *client; crm_client_t *ipc_proxy = ipc_proxy_get_provider(); xmlNode *msg; if (ipc_proxy == NULL) { - crm_warn("Cannot proxy request from uid %d gid %d " - "because not connected to cluster", uid, gid); + crm_warn("Cannot proxy IPC connection from uid %d gid %d to %s " + "because not connected to cluster", uid, gid, ipc_channel); return -EREMOTEIO; } /* This new client is a local IPC client on a Pacemaker Remote controlled * node, needing to access cluster node IPC services. */ client = crm_client_new(c, uid, gid); if (client == NULL) { return -EREMOTEIO; } /* This ipc client is bound to a single ipc provider. If the * provider goes away, this client is disconnected */ client->userdata = strdup(ipc_proxy->id); client->name = crm_strdup_printf("proxy-%s-%d-%.8s", ipc_channel, client->pid, client->id); g_hash_table_insert(ipc_clients, client->id, client); msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_NEW); crm_xml_add(msg, F_LRMD_IPC_IPC_SERVER, ipc_channel); crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); lrmd_server_send_notify(ipc_proxy, msg); free_xml(msg); crm_debug("Accepted IPC proxy connection (session ID %s) " "from uid %d gid %d on channel %s", client->id, uid, gid, ipc_channel); return 0; } static int32_t crmd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, CRM_SYSTEM_CRMD); } static int32_t attrd_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, T_ATTRD); } static int32_t stonith_proxy_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, "stonith-ng"); } static int32_t cib_proxy_accept_rw(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, CIB_CHANNEL_RW); } static int32_t cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { return ipc_proxy_accept(c, uid, gid, CIB_CHANNEL_RO); } static void ipc_proxy_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } void ipc_proxy_forward_client(crm_client_t *ipc_proxy, xmlNode *xml) { const char *session = crm_element_value(xml, F_LRMD_IPC_SESSION); const char *msg_type = crm_element_value(xml, F_LRMD_IPC_OP); xmlNode *msg = get_message_xml(xml, F_LRMD_IPC_MSG); crm_client_t *ipc_client; int rc = 0; /* If the IPC provider is acknowledging our shutdown request, * defuse the short exit timer to give the cluster time to * stop any resources we're running. */ if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_ACK)) { handle_shutdown_ack(); return; } if (safe_str_eq(msg_type, LRMD_IPC_OP_SHUTDOWN_NACK)) { handle_shutdown_nack(); return; } ipc_client = crm_client_get_by_id(session); if (ipc_client == NULL) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, F_LRMD_IPC_SESSION, session); lrmd_server_send_notify(ipc_proxy, msg); free_xml(msg); return; } /* This is an event or response from the ipc provider * going to the local ipc client. * * Looking at the chain of events. * * -----remote node----------------|---- cluster node ------ * ipc_client <--1--> this code * <--2--> pacemaker-controld:remote_proxy_cb/remote_proxy_relay_event() * <--3--> ipc server * * This function is receiving a msg from connection 2 * and forwarding it to connection 1. */ if (safe_str_eq(msg_type, LRMD_IPC_OP_EVENT)) { crm_trace("Sending event to %s", ipc_client->id); rc = crm_ipcs_send(ipc_client, 0, msg, crm_ipc_server_event); } else if (safe_str_eq(msg_type, LRMD_IPC_OP_RESPONSE)) { int msg_id = 0; crm_element_value_int(xml, F_LRMD_IPC_MSG_ID, &msg_id); crm_trace("Sending response to %d - %s", ipc_client->request_id, ipc_client->id); rc = crm_ipcs_send(ipc_client, msg_id, msg, FALSE); CRM_LOG_ASSERT(msg_id == ipc_client->request_id); ipc_client->request_id = 0; } else if (safe_str_eq(msg_type, LRMD_IPC_OP_DESTROY)) { qb_ipcs_disconnect(ipc_client->ipcs); } else { crm_err("Unknown ipc proxy msg type %s" , msg_type); } if (rc < 0) { crm_warn("IPC Proxy send to ipc client %s failed, rc = %d", ipc_client->id, rc); } } static int32_t ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *client = crm_client_get(c); crm_client_t *ipc_proxy = crm_client_get_by_id(client->userdata); xmlNode *request = NULL; xmlNode *msg = NULL; if (!ipc_proxy) { qb_ipcs_disconnect(client->ipcs); return 0; } /* This is a request from the local ipc client going * to the ipc provider. * * Looking at the chain of events. * * -----remote node----------------|---- cluster node ------ * ipc_client <--1--> this code * <--2--> pacemaker-controld:remote_proxy_dispatch_internal() * <--3--> ipc server * * This function is receiving a request from connection * 1 and forwarding it to connection 2. */ request = crm_ipcs_recv(client, data, size, &id, &flags); if (!request) { return 0; } CRM_CHECK(client != NULL, crm_err("Invalid client"); free_xml(request); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); free_xml(request); return FALSE); /* This ensures that synced request/responses happen over the event channel * in the controller, allowing the controller to process the messages async. */ set_bit(flags, crm_ipc_proxied); client->request_id = id; msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_REQUEST); crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); crm_xml_add(msg, F_LRMD_IPC_CLIENT, crm_client_name(client)); crm_xml_add(msg, F_LRMD_IPC_USER, client->user); crm_xml_add_int(msg, F_LRMD_IPC_MSG_ID, id); crm_xml_add_int(msg, F_LRMD_IPC_MSG_FLAGS, flags); add_message_xml(msg, F_LRMD_IPC_MSG, request); lrmd_server_send_notify(ipc_proxy, msg); free_xml(request); free_xml(msg); return 0; } /*! * \internal * \brief Notify a proxy provider that we wish to shut down * * \return 0 on success, -1 on error */ int ipc_proxy_shutdown_req(crm_client_t *ipc_proxy) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); int rc; crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_SHUTDOWN_REQ); /* We don't really have a session, but the controller needs this attribute * to recognize this as proxy communication. */ crm_xml_add(msg, F_LRMD_IPC_SESSION, "0"); rc = (lrmd_server_send_notify(ipc_proxy, msg) < 0)? -1 : 0; free_xml(msg); return rc; } static int32_t ipc_proxy_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); crm_client_t *ipc_proxy; if (client == NULL) { return 0; } ipc_proxy = crm_client_get_by_id(client->userdata); crm_trace("Connection %p", c); if (ipc_proxy) { xmlNode *msg = create_xml_node(NULL, T_LRMD_IPC_PROXY); crm_xml_add(msg, F_LRMD_IPC_OP, LRMD_IPC_OP_DESTROY); crm_xml_add(msg, F_LRMD_IPC_SESSION, client->id); lrmd_server_send_notify(ipc_proxy, msg); free_xml(msg); } g_hash_table_remove(ipc_clients, client->id); free(client->userdata); client->userdata = NULL; crm_client_destroy(client); return 0; } static void ipc_proxy_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); ipc_proxy_closed(c); } static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { .connection_accept = crmd_proxy_accept, .connection_created = ipc_proxy_created, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { .connection_accept = attrd_proxy_accept, .connection_created = ipc_proxy_created, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { .connection_accept = stonith_proxy_accept, .connection_created = ipc_proxy_created, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { .connection_accept = cib_proxy_accept_ro, .connection_created = ipc_proxy_created, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { .connection_accept = cib_proxy_accept_rw, .connection_created = ipc_proxy_created, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy }; void ipc_proxy_add_provider(crm_client_t *ipc_proxy) { // Prepending ensures the most recent connection is always first ipc_providers = g_list_prepend(ipc_providers, ipc_proxy); } void ipc_proxy_remove_provider(crm_client_t *ipc_proxy) { GHashTableIter iter; crm_client_t *ipc_client = NULL; char *key = NULL; GList *remove_these = NULL; GListPtr gIter = NULL; ipc_providers = g_list_remove(ipc_providers, ipc_proxy); g_hash_table_iter_init(&iter, ipc_clients); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & ipc_client)) { const char *proxy_id = ipc_client->userdata; if (safe_str_eq(proxy_id, ipc_proxy->id)) { crm_info("ipc proxy connection for client %s pid %d destroyed because cluster node disconnected.", ipc_client->id, ipc_client->pid); /* we can't remove during the iteration, so copy items * to a list we can destroy later */ remove_these = g_list_append(remove_these, ipc_client); } } for (gIter = remove_these; gIter != NULL; gIter = gIter->next) { ipc_client = gIter->data; // Disconnection callback will free the client here qb_ipcs_disconnect(ipc_client->ipcs); } /* just frees the list, not the elements in the list */ g_list_free(remove_these); } void ipc_proxy_init(void) { ipc_clients = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, NULL); cib_ipc_servers_init(&cib_ro, &cib_rw, &cib_shm, &cib_proxy_callbacks_ro, &cib_proxy_callbacks_rw); attrd_ipc_server_init(&attrd_ipcs, &attrd_proxy_callbacks); stonith_ipc_server_init(&stonith_ipcs, &stonith_proxy_callbacks); crmd_ipcs = crmd_ipc_server_init(&crmd_proxy_callbacks); if (crmd_ipcs == NULL) { crm_err("Failed to create controller: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } void ipc_proxy_cleanup(void) { if (ipc_providers) { g_list_free(ipc_providers); ipc_providers = NULL; } if (ipc_clients) { g_hash_table_destroy(ipc_clients); ipc_clients = NULL; } cib_ipc_servers_destroy(cib_ro, cib_rw, cib_shm); qb_ipcs_destroy(attrd_ipcs); qb_ipcs_destroy(stonith_ipcs); qb_ipcs_destroy(crmd_ipcs); cib_ro = NULL; cib_rw = NULL; cib_shm = NULL; } diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c index f3ab3c7994..8681419421 100644 --- a/daemons/execd/remoted_tls.c +++ b/daemons/execd/remoted_tls.c @@ -1,406 +1,408 @@ /* - * Copyright 2012-2018 David Vossel + * Copyright 2012-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pacemaker-execd.h" #ifdef HAVE_GNUTLS_GNUTLS_H # include // Hidden in liblrmd extern int lrmd_tls_set_key(gnutls_datum_t *key); # define LRMD_REMOTE_AUTH_TIMEOUT 10000 gnutls_psk_server_credentials_t psk_cred_s; gnutls_dh_params_t dh_params; static int ssock = -1; extern int lrmd_call_id; static void debug_log(int level, const char *str) { fputs(str, stderr); } /*! * \internal * \brief Read (more) TLS handshake data from client */ static int remoted__read_handshake_data(crm_client_t *client) { int rc = pcmk__read_handshake_data(client); if (rc == 0) { /* No more data is available at the moment. Just return for now; * we'll get invoked again once the client sends more. */ return 0; } else if (rc < 0) { crm_err("TLS handshake with remote client failed: %s " CRM_XS " rc=%d", gnutls_strerror(rc), rc); return -1; } if (client->remote->auth_timeout) { g_source_remove(client->remote->auth_timeout); } client->remote->auth_timeout = 0; client->remote->tls_handshake_complete = TRUE; crm_notice("Remote client connection accepted"); // Alert other clients of the new connection notify_of_new_client(client); return 0; } static int lrmd_remote_client_msg(gpointer data) { int id = 0; int rc = 0; int disconnected = 0; xmlNode *request = NULL; crm_client_t *client = data; if (client->remote->tls_handshake_complete == FALSE) { return remoted__read_handshake_data(client); } rc = crm_remote_ready(client->remote, 0); if (rc == 0) { /* no msg to read */ return 0; } else if (rc < 0) { crm_info("Remote client disconnected while polling it"); return -1; } crm_remote_recv(client->remote, -1, &disconnected); request = crm_remote_parse_buffer(client->remote); while (request) { crm_element_value_int(request, F_LRMD_REMOTE_MSG_ID, &id); crm_trace("Processing remote client request %d", id); if (!client->name) { const char *value = crm_element_value(request, F_LRMD_CLIENTNAME); if (value) { client->name = strdup(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, F_LRMD_CLIENTID, client->id); crm_xml_add(request, F_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, F_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); free_xml(request); /* process all the messages in the current buffer */ request = crm_remote_parse_buffer(client->remote); } if (disconnected) { crm_info("Remote client disconnected while reading from it"); return -1; } return 0; } static void lrmd_remote_client_destroy(gpointer user_data) { crm_client_t *client = user_data; if (client == NULL) { return; } crm_notice("Cleaning up after remote client %s disconnected " CRM_XS " id=%s", (client->name? client->name : ""), client->id); ipc_proxy_remove_provider(client); /* if this is the last remote connection, stop recurring * operations */ if (crm_hash_table_size(client_connections) == 1) { client_disconnect_cleanup(NULL); } if (client->remote->tls_session) { void *sock_ptr; int csock; sock_ptr = gnutls_transport_get_ptr(*client->remote->tls_session); csock = GPOINTER_TO_INT(sock_ptr); gnutls_bye(*client->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*client->remote->tls_session); gnutls_free(client->remote->tls_session); close(csock); } lrmd_client_destroy(client); return; } static gboolean lrmd_auth_timeout_cb(gpointer data) { crm_client_t *client = data; client->remote->auth_timeout = 0; if (client->remote->tls_handshake_complete == TRUE) { return FALSE; } mainloop_del_fd(client->remote->source); client->remote->source = NULL; crm_err("Remote client authentication timed out"); return FALSE; } static int lrmd_remote_listen(gpointer data) { int csock = 0; gnutls_session_t *session = NULL; crm_client_t *new_client = NULL; static struct mainloop_fd_callbacks lrmd_remote_fd_cb = { .dispatch = lrmd_remote_client_msg, .destroy = lrmd_remote_client_destroy, }; csock = crm_remote_accept(ssock); if (csock < 0) { return TRUE; } session = pcmk__new_tls_session(csock, GNUTLS_SERVER, GNUTLS_CRD_PSK, psk_cred_s); if (session == NULL) { close(csock); return TRUE; } new_client = crm_client_alloc(NULL); new_client->remote = calloc(1, sizeof(crm_remote_t)); new_client->kind = CRM_CLIENT_TLS; new_client->remote->tls_session = session; // Require the client to authenticate within this time new_client->remote->auth_timeout = g_timeout_add(LRMD_REMOTE_AUTH_TIMEOUT, lrmd_auth_timeout_cb, new_client); crm_info("Remote client pending authentication " CRM_XS " %p id: %s", new_client, new_client->id); new_client->remote->source = mainloop_add_fd("pacemaker-remote-client", G_PRIORITY_DEFAULT, csock, new_client, &lrmd_remote_fd_cb); return TRUE; } static void lrmd_remote_connection_destroy(gpointer user_data) { crm_notice("TLS server session ended"); return; } static int lrmd_tls_server_key_cb(gnutls_session_t session, const char *username, gnutls_datum_t * key) { return lrmd_tls_set_key(key); } static int bind_and_listen(struct addrinfo *addr) { int optval; int fd; int rc; char buffer[INET6_ADDRSTRLEN] = { 0, }; crm_sockaddr2str(addr->ai_addr, buffer); crm_trace("Attempting to bind to address %s", buffer); fd = socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); if (fd < 0) { crm_perror(LOG_ERR, "Listener socket creation failed"); return -1; } /* reuse address */ optval = 1; rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)); if (rc < 0) { crm_perror(LOG_ERR, "Local address reuse not allowed on %s", buffer); close(fd); return -1; } if (addr->ai_family == AF_INET6) { optval = 0; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &optval, sizeof(optval)); if (rc < 0) { crm_perror(LOG_INFO, "Couldn't disable IPV6-only on %s", buffer); close(fd); return -1; } } if (bind(fd, addr->ai_addr, addr->ai_addrlen) != 0) { crm_perror(LOG_ERR, "Cannot bind to %s", buffer); close(fd); return -1; } if (listen(fd, 10) == -1) { crm_perror(LOG_ERR, "Cannot listen on %s", buffer); close(fd); return -1; } return fd; } int lrmd_init_remote_tls_server() { int rc; int filter; int port = crm_default_remote_port(); struct addrinfo hints, *res = NULL, *iter; char port_str[6]; // at most "65535" gnutls_datum_t psk_key = { NULL, 0 }; static struct mainloop_fd_callbacks remote_listen_fd_callbacks = { .dispatch = lrmd_remote_listen, .destroy = lrmd_remote_connection_destroy, }; - crm_notice("Starting TLS listener on port %d", port); + crm_debug("Starting TLS listener on port %d", port); crm_gnutls_global_init(); gnutls_global_set_log_function(debug_log); if (pcmk__init_tls_dh(&dh_params) != GNUTLS_E_SUCCESS) { return -1; } gnutls_psk_allocate_server_credentials(&psk_cred_s); gnutls_psk_set_server_credentials_function(psk_cred_s, lrmd_tls_server_key_cb); gnutls_psk_set_server_dh_params(psk_cred_s, dh_params); /* The key callback won't get called until the first client connection * attempt. Do it once here, so we can warn the user at start-up if we can't * read the key. We don't error out, though, because it's fine if the key is * going to be added later. */ rc = lrmd_tls_set_key(&psk_key); if (rc != 0) { crm_warn("A cluster connection will not be possible until the key is available"); } gnutls_free(psk_key.data); memset(&hints, 0, sizeof(struct addrinfo)); /* Bind to the wildcard address (INADDR_ANY or IN6ADDR_ANY_INIT). * @TODO allow user to specify a specific address */ hints.ai_flags = AI_PASSIVE; hints.ai_family = AF_UNSPEC; /* Return IPv6 or IPv4 */ hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; snprintf(port_str, sizeof(port_str), "%d", port); rc = getaddrinfo(NULL, port_str, &hints, &res); if (rc) { crm_err("Unable to get IP address info for local node: %s", gai_strerror(rc)); return -1; } iter = res; filter = AF_INET6; /* Try IPv6 addresses first, then IPv4 */ while (iter) { if (iter->ai_family == filter) { ssock = bind_and_listen(iter); } if (ssock != -1) { break; } iter = iter->ai_next; if (iter == NULL && filter == AF_INET6) { iter = res; filter = AF_INET; } } if (ssock < 0) { goto init_remote_cleanup; } mainloop_add_fd("pacemaker-remote-server", G_PRIORITY_DEFAULT, ssock, NULL, &remote_listen_fd_callbacks); rc = ssock; init_remote_cleanup: if (rc < 0) { close(ssock); ssock = 0; } else { crm_debug("Started TLS listener on port %d", port); } freeaddrinfo(res); return rc; } void lrmd_tls_server_destroy(void) { if (psk_cred_s) { gnutls_psk_free_server_credentials(psk_cred_s); psk_cred_s = 0; } if (ssock > 0) { close(ssock); ssock = 0; } } #endif diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index b394fd6e6c..d25b84c6f7 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,2748 +1,2754 @@ /* - * Copyright 2009-2018 Andrew Beekhof + * Copyright 2009-2019 the Pacemaker project contributors + * + * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if SUPPORT_CIBSECRETS # include #endif #include GHashTable *device_list = NULL; GHashTable *topology = NULL; GList *cmd_list = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GListPtr capable; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data); static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; /* milliseconds */ int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *victim; uint32_t victim_nodeid; char *action; char *device; char *mode; GListPtr device_list; GListPtr device_next; void *internal_user_data; void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; } async_command_t; static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc); static gboolean is_action_required(const char *action, stonith_device_t *device) { return device && device->automatic_unfencing && safe_str_eq(action, "on"); } static int get_action_delay_max(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_max_ms = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX); if (value) { delay_max_ms = crm_get_msec(value); } return delay_max_ms; } static int get_action_delay_base(stonith_device_t * device, const char * action) { const char *value = NULL; int delay_base_ms = 0; if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) { return 0; } value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE); if (value) { delay_base_ms = crm_get_msec(value); } return delay_base_ms; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(stonith_device_t * device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (safe_str_eq(action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = "off"; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->victim); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->mode); free(cmd->op); free(cmd); } static async_command_t * create_async_command(xmlNode * msg) { async_command_t *cmd = NULL; xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *action = crm_element_value(op, F_STONITH_ACTION); CRM_CHECK(action != NULL, crm_log_xml_warn(msg, "NoAction"); return NULL); crm_log_xml_trace(msg, "Command"); cmd = calloc(1, sizeof(async_command_t)); crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->action = strdup(action); cmd->victim = crm_element_value_copy(op, F_STONITH_TARGET); cmd->mode = crm_element_value_copy(op, F_STONITH_MODE); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); CRM_CHECK(cmd->op != NULL, crm_log_xml_warn(msg, "NoOp"); free_async_command(cmd); return NULL); CRM_CHECK(cmd->client != NULL, crm_log_xml_warn(msg, "NoClient")); cmd->done_cb = st_child_done; cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, STONITH_ATTR_ACTION_LIMIT); if (value) { action_limit = crm_parse_int(value, "1"); if (action_limit == 0) { /* pcmk_action_limit should not be 0. Enforce it to be 1. */ action_limit = 1; } } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, active_cmds > 1 ? "s" : ""); return TRUE; } if (device->pending_ops) { GList *first = device->pending_ops; cmd = first->data; if (cmd && cmd->delay_id) { crm_trace ("Operation %s%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, cmd->start_delay); return TRUE; } device->pending_ops = g_list_remove_link(device->pending_ops, first); g_list_free_1(first); } if (cmd == NULL) { crm_trace("Nothing further to do for %s", device->id); return TRUE; } if(safe_str_eq(device->agent, STONITH_WATCHDOG_AGENT)) { if(safe_str_eq(cmd->action, "reboot")) { pcmk_panic(__FUNCTION__); return TRUE; } else if(safe_str_eq(cmd->action, "off")) { pcmk_panic(__FUNCTION__); return TRUE; } else { crm_info("Faking success for %s watchdog operation", cmd->action); cmd->done_cb(0, 0, NULL, cmd); return TRUE; } } #if SUPPORT_CIBSECRETS if (replace_secret_params(device->id, device->params) < 0) { /* replacing secrets failed! */ if (safe_str_eq(cmd->action,"stop")) { /* don't fail on stop! */ crm_info("proceeding with the stop operation for %s", device->id); } else { crm_err("failed to get secrets for %s, " "considering resource not configured", device->id); exec_rc = PCMK_OCF_NOT_CONFIGURED; cmd->done_cb(0, exec_rc, NULL, cmd); return TRUE; } } #endif action_str = cmd->action; if (safe_str_eq(cmd->action, "reboot") && is_not_set(device->flags, st_device_supports_reboot)) { crm_warn("Agent '%s' does not advertise support for 'reboot', performing 'off' action instead", device->agent); action_str = "off"; } action = stonith_action_create(device->agent, action_str, cmd->victim, cmd->victim_nodeid, cmd->timeout, device->params, device->aliases); /* for async exec, exec_rc is pid if positive and error code if negative/zero */ exec_rc = stonith_action_execute_async(action, (void *)cmd, cmd->done_cb); if (exec_rc > 0) { crm_debug("Operation %s%s%s on %s now running with pid=%d, timeout=%ds", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, exec_rc, cmd->timeout); cmd->active_on = device; } else { crm_warn("Operation %s%s%s on %s failed: %s (%d)", cmd->action, cmd->victim ? " for node " : "", cmd->victim ? cmd->victim : "", device->id, pcmk_strerror(exec_rc), exec_rc); cmd->done_cb(0, exec_rc, NULL, cmd); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = NULL; cmd->delay_id = 0; device = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && cmd->victim) { crm_node_t *node = crm_get_peer(0, cmd->victim); cmd->victim_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling %s on %s for remote peer %s with op id (%s) (timeout=%ds)", cmd->action, device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling %s on %s for %s (timeout=%ds)", cmd->action, device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn("Base-delay (%dms) is larger than max-delay (%dms) " "for %s on %s - limiting to max-delay", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dont_call] We're not using rand() for security cmd->start_delay = ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; crm_notice("Delaying %s on %s for %dms (timeout=%ds, base=%dms, " "max=%dms)", cmd->action, device->id, cmd->start_delay, cmd->timeout, delay_base, delay_max); cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GListPtr gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation %s", device->id, cmd->action); cmd->done_cb(0, -ENODEV, NULL, cmd); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); free(device->on_target_actions); free(device->agent); free(device->id); free(device); } void free_device_list() { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list() { if (device_list == NULL) { device_list = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GListPtr * targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = crm_strcase_table_new(); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } static void parse_host_line(const char *line, int max, GListPtr * output) { int lpc = 0; int last = 0; if (max <= 0) { return; } /* Check for any complaints about additional parameters that the device doesn't understand */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping: %s", line); return; } crm_trace("Processing %d bytes: [%s]", max, line); /* Skip initial whitespace */ for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { last = lpc + 1; } /* Now the actual content */ for (lpc = 0; lpc <= max; lpc++) { gboolean a_space = isspace(line[lpc]); if (a_space && lpc < max && isspace(line[lpc + 1])) { /* fast-forward to the end of the spaces */ } else if (a_space || line[lpc] == ',' || line[lpc] == ';' || line[lpc] == 0) { int rc = 1; char *entry = NULL; if (lpc != last) { entry = calloc(1, 1 + lpc - last); rc = sscanf(line + last, "%[a-zA-Z0-9_-.]", entry); } if (entry == NULL) { /* Skip */ } else if (rc != 1) { crm_warn("Could not parse (%d %d): %s", last, lpc, line + last); } else if (safe_str_neq(entry, "on") && safe_str_neq(entry, "off")) { crm_trace("Adding '%s'", entry); *output = g_list_append(*output, entry); entry = NULL; } free(entry); last = lpc + 1; } } } static GListPtr parse_host_list(const char *hosts) { int lpc = 0; int max = 0; int last = 0; GListPtr output = NULL; if (hosts == NULL) { return output; } max = strlen(hosts); for (lpc = 0; lpc <= max; lpc++) { if (hosts[lpc] == '\n' || hosts[lpc] == 0) { int len = lpc - last; if(len > 1) { char *line = strndup(hosts + last, len); line[len] = 0; /* Because it might be '\n' */ parse_host_line(line, len, &output); free(line); } last = lpc + 1; } } crm_trace("Parsed %d entries from '%s'", g_list_length(output), hosts); return output; } GHashTable *metadata_cache = NULL; void free_metadata_cache() { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache() { if (metadata_cache == NULL) { metadata_cache = crm_str_table_new(); } } static xmlNode * get_agent_metadata(const char *agent) { xmlNode *xml = NULL; char *buffer = NULL; init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if(safe_str_eq(agent, STONITH_WATCHDOG_AGENT)) { return NULL; } else if(buffer == NULL) { stonith_t *st = stonith_api_new(); int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return NULL; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } xml = string2xml(buffer); return xml; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } #define MAX_ACTION_LEN 256 static char * add_action(char *actions, const char *action) { int offset = 0; if (actions == NULL) { actions = calloc(1, MAX_ACTION_LEN); } else { offset = strlen(actions); } if (offset > 0) { offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, " "); } offset += snprintf(actions+offset, MAX_ACTION_LEN - offset, "%s", action); return actions; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *on_target = NULL; const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; on_target = crm_element_value(match, "on_target"); action = crm_element_value(match, "name"); if(safe_str_eq(action, "list")) { set_bit(device->flags, st_device_supports_list); } else if(safe_str_eq(action, "status")) { set_bit(device->flags, st_device_supports_status); } else if(safe_str_eq(action, "reboot")) { set_bit(device->flags, st_device_supports_reboot); } else if (safe_str_eq(action, "on")) { /* "automatic" means the cluster will unfence node when it joins */ const char *automatic = crm_element_value(match, "automatic"); /* "required" is a deprecated synonym for "automatic" */ const char *required = crm_element_value(match, "required"); if (crm_is_true(automatic) || crm_is_true(required)) { device->automatic_unfencing = TRUE; } } if (action && crm_is_true(on_target)) { device->on_target_actions = add_action(device->on_target_actions, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in,out] params Device parameters */ static GHashTable * xml2device_params(const char *name, xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "reboot") == 0) { crm_warn("Ignoring %s='reboot' (see stonith-action cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, "off") == 0) { map_action(params, "reboot", value); } else { map_action(params, "off", value); map_action(params, "reboot", value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static stonith_device_t * build_device_from_xml(xmlNode * msg) { const char *value; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, "agent"); CRM_CHECK(agent != NULL, return device); device = calloc(1, sizeof(stonith_device_t)); CRM_CHECK(device != NULL, {free(agent); return device;}); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTLIST); if (value) { device->targets = parse_host_list(value); } value = g_hash_table_lookup(device->params, STONITH_ATTR_HOSTMAP); device->aliases = build_port_aliases(value, &(device->targets)); device->agent_metadata = get_agent_metadata(device->agent); read_action_metadata(device); value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (safe_str_eq(value, "unfencing")) { device->automatic_unfencing = TRUE; } if (is_action_required("on", device)) { crm_info("The fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions) { crm_info("The fencing device '%s' requires actions (%s) to be executed on the target node", device->id, device->on_target_actions); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTCHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTLIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP)) { check_type = "static-list"; } else if(is_set(dev->flags, st_device_supports_list)){ check_type = "dynamic-list"; } else if(is_set(dev->flags, st_device_supports_status)){ check_type = "status"; } else { check_type = "none"; } } return check_type; } void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *victim, int timeout, void *internal_user_data, void (*done_cb) (GPid pid, int rc, const char *output, gpointer user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); cmd->victim = victim ? strdup(victim) : NULL; cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } gboolean string_in_list(GListPtr list, const char *item) { int lpc = 0; int max = g_list_length(list); for (lpc = 0; lpc < max; lpc++) { const char *value = g_list_nth_data(list, lpc); if (safe_str_eq(item, value)) { return TRUE; } else { crm_trace("%d: '%s' != '%s'", lpc, item, value); } } return FALSE; } static void status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (rc == 1 /* unknown */ ) { crm_trace("Host %s is not known by %s", search->host, dev->id); } else if (rc == 0 /* active */ || rc == 2 /* inactive */ ) { crm_trace("Host %s is known by %s", search->host, dev->id); can = TRUE; } else { crm_notice("Unknown result when testing if %s can fence %s: rc=%d", dev->id, search->host, rc); } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd->device ? g_hash_table_lookup(device_list, cmd->device) : NULL; gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); /* If we successfully got the targets earlier, don't disable. */ if (rc != 0 && !dev->targets) { crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output); /* Fall back to status */ g_hash_table_replace(dev->params, strdup(STONITH_ATTR_HOSTCHECK), strdup("status")); g_list_free_full(dev->targets, free); dev->targets = NULL; } else if (!rc) { crm_info("Refreshing port list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = parse_host_list(output); dev->targets_age = time(NULL); } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (string_in_list(dev->targets, alias)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || safe_str_neq(other_value, value)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(stonith_device_t * device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (safe_str_neq(dup->agent, device->agent)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(msg); CRM_CHECK(device != NULL, return -ENOMEM); dup = device_has_duplicate(device); if (dup) { crm_debug("Device '%s' already existed in device list (%d active devices)", device->id, g_hash_table_size(device_list)); free_device(device); device = dup; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting an existing entry for %s from the cib", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); crm_notice("Added '%s' to the device list (%d active devices)", device->id, g_hash_table_size(device_list)); } if (desc) { *desc = device->id; } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } int stonith_device_remove(const char *id, gboolean from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); if (!device) { crm_info("Device '%s' not found (%d active devices)", id, g_hash_table_size(device_list)); return pcmk_ok; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); crm_info("Removed '%s' from the device list (%d active devices)", id, g_hash_table_size(device_list)); } else { crm_trace("Not removing '%s' from the device list (%d active devices) " "- still %s%s_registered", id, g_hash_table_size(device_list), device->cib_registered?"cib":"", device->api_registered?"api":""); } return pcmk_ok; } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(stonith_topology_t * tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list() { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list() { if (topology == NULL) { topology = g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free_topology_entry); } } char *stonith_level_key(xmlNode *level, int mode) { if(mode == -1) { mode = stonith_level_kind(level); } switch(mode) { case 0: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case 1: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case 2: { const char *name = crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); const char *value = crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE); if(name && value) { return crm_strdup_printf("%s=%s", name, value); } } default: return crm_strdup_printf("Unknown-%d-%s", mode, ID(level)); } } int stonith_level_kind(xmlNode * level) { int mode = 0; const char *target = crm_element_value(level, XML_ATTR_STONITH_TARGET); if(target == NULL) { mode++; target = crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN); } if(stand_alone == FALSE && target == NULL) { mode++; if(crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) == NULL) { mode++; } else if(crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) == NULL) { mode++; } } return mode; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Register a STONITH level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, will be set to string representation ("TARGET[LEVEL]") * * \return pcmk_ok on success, -EINVAL if XML does not specify valid level index */ int stonith_level_register(xmlNode *msg, char **desc) { int id = 0; xmlNode *level; int mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; /* Allow the XML here to point to the level tag directly, or wrapped in * another tag. If directly, don't search by xpath, because it might give * multiple hits (e.g. if the XML is the CIB). */ if (safe_str_eq(TYPE(msg), XML_TAG_FENCING_LEVEL)) { level = msg; } else { level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); } CRM_CHECK(level != NULL, return -EINVAL); mode = stonith_level_kind(level); target = stonith_level_key(level, mode); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (mode >= 3 || (id <= 0) || (id >= ST_LEVEL_MAX)) { crm_trace("Could not add %s[%d] (%d) to the topology (%d active entries)", target, id, mode, g_hash_table_size(topology)); free(target); crm_log_xml_err(level, "Bad topology"); return -EINVAL; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); crm_info("Target %s has %d active fencing levels", tp->target, count_active_levels(tp)); return pcmk_ok; } int stonith_level_remove(xmlNode *msg, char **desc) { int id = 0; stonith_topology_t *tp; char *target; /* Unlike additions, removal requests should always have one level tag */ xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); CRM_CHECK(level != NULL, return -EINVAL); target = stonith_level_key(level, -1); crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); if (desc) { *desc = crm_strdup_printf("%s[%d]", target, id); } /* Sanity-check arguments */ if (id >= ST_LEVEL_MAX) { free(target); return -EINVAL; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { crm_info("Topology for %s not found (%d active entries)", target, g_hash_table_size(topology)); } else if (id == 0 && g_hash_table_remove(topology, target)) { crm_info("Removed all %s related entries from the topology (%d active entries)", target, g_hash_table_size(topology)); } else if (id > 0 && tp->levels[id] != NULL) { g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; crm_info("Removed level '%d' from topology for %s (%d active levels remaining)", id, target, count_active_levels(tp)); } free(target); return pcmk_ok; } static int stonith_device_action(xmlNode * msg, char **output) { int rc = pcmk_ok; xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if (id) { crm_trace("Looking for '%s'", id); device = g_hash_table_lookup(device_list, id); } if (device && device->api_registered == FALSE) { rc = -ENODEV; } else if (device) { cmd = create_async_command(msg); if (cmd == NULL) { return -EPROTO; } schedule_stonith_command(cmd, device); rc = -EINPROGRESS; } else { crm_info("Device %s not found", id ? id : ""); rc = -ENODEV; } return rc; } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { crm_debug("Finished Search. %d devices can perform action (%s) on node %s", g_list_length(search->capable), search->action ? search->action : "", search->host ? search->host : ""); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = safe_str_eq(target, stonith_our_uname); if (device && action && device->on_target_actions && strstr(device->on_target_actions, action)) { if (!localhost_is_target) { crm_trace("%s operation with %s can only be executed for localhost not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("%s operation does not support self-fencing", action); return FALSE; } return TRUE; } static void can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = NULL; const char *host = search->host; const char *alias = NULL; CRM_LOG_ASSERT(dev != NULL); if (dev == NULL) { goto search_report_results; } else if (host == NULL) { can = TRUE; goto search_report_results; } /* Short-circuit query if this host is not allowed to perform the action */ if (safe_str_eq(search->action, "reboot")) { /* A "reboot" *might* get remapped to "off" then "on", so short-circuit * only if all three are disallowed. If only one or two are disallowed, * we'll report that with the results. We never allow suicide for * remapped "on" operations because the host is off at that point. */ if (!localhost_is_eligible(dev, "reboot", host, search->allow_suicide) && !localhost_is_eligible(dev, "off", host, search->allow_suicide) && !localhost_is_eligible(dev, "on", host, FALSE)) { goto search_report_results; } } else if (!localhost_is_eligible(dev, search->action, host, search->allow_suicide)) { goto search_report_results; } alias = g_hash_table_lookup(dev->aliases, host); if (alias == NULL) { alias = host; } check_type = target_list_type(dev); if (safe_str_eq(check_type, "none")) { can = TRUE; } else if (safe_str_eq(check_type, "static-list")) { /* Presence in the hostmap is sufficient * Only use if all hosts on which the device can be active can always fence all listed hosts */ if (string_in_list(dev->targets, host)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, STONITH_ATTR_HOSTMAP) && g_hash_table_lookup(dev->aliases, host)) { can = TRUE; } } else if (safe_str_eq(check_type, "dynamic-list")) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { - crm_trace("Running %s command to see if %s can fence %s (%s)", + crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "list", NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (string_in_list(dev->targets, alias)) { can = TRUE; } } else if (safe_str_eq(check_type, "status")) { - crm_trace("Running %s command to see if %s can fence %s (%s)", + crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev->id, search->host, search->action); schedule_internal_command(__FUNCTION__, dev, "status", search->host, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { - crm_err("Unknown check type: %s", check_type); + crm_err("Invalid value for " STONITH_ATTR_HOSTCHECK ": %s", check_type); + check_type = "Invalid " STONITH_ATTR_HOSTCHECK; } if (safe_str_eq(host, alias)) { - crm_notice("%s can%s fence (%s) %s: %s", dev->id, can ? "" : " not", search->action, host, check_type); + crm_notice("%s is%s eligible to fence (%s) %s: %s", + dev->id, (can? "" : " not"), search->action, host, + check_type); } else { - crm_notice("%s can%s fence (%s) %s (aka. '%s'): %s", dev->id, can ? "" : " not", search->action, host, alias, + crm_notice("%s is%s eligible to fence (%s) %s (aka. '%s'): %s", + dev->id, (can? "" : " not"), search->action, host, alias, check_type); } search_report_results: search_devices_record_result(search, dev ? dev->id : NULL, can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data)) { struct device_search_s *search; int per_device_timeout = DEFAULT_QUERY_TIMEOUT; int devices_needing_async_query = 0; char *key = NULL; const char *check_type = NULL; GHashTableIter gIter; stonith_device_t *device = NULL; if (!g_hash_table_size(device_list)) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { callback(NULL, user_data); return; } g_hash_table_iter_init(&gIter, device_list); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&device)) { check_type = target_list_type(device); if (safe_str_eq(check_type, "status") || safe_str_eq(check_type, "dynamic-list")) { devices_needing_async_query++; } } /* If we have devices that require an async event in order to know what * nodes they can fence, we have to give the events a timeout. The total * query timeout is divided among those events. */ if (devices_needing_async_query) { per_device_timeout = timeout / devices_needing_async_query; if (!per_device_timeout) { crm_err("STONITH timeout %ds is too low; using %ds, but consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); per_device_timeout = DEFAULT_QUERY_TIMEOUT; } else if (per_device_timeout < DEFAULT_QUERY_TIMEOUT) { crm_notice("STONITH timeout %ds is low for the current configuration;" " consider raising to at least %ds", timeout, DEFAULT_QUERY_TIMEOUT * devices_needing_async_query); } } search->host = host ? strdup(host) : NULL; search->action = action ? strdup(action) : NULL; search->per_device_timeout = per_device_timeout; /* We are guaranteed this many replies. Even if a device gets * unregistered some how during the async search, we will get * the correct number of replies. */ search->replies_needed = g_hash_table_size(device_list); search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; /* kick off the search */ crm_debug("Searching through %d devices to see what is capable of action (%s) for target %s", search->replies_needed, search->action ? search->action : "", search->host ? search->host : ""); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device */ static void add_action_specific_attributes(xmlNode *xml, const char *action, stonith_device_t *device) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action %s is required on %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action %s has timeout %dms on %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action %s has maximum random delay %dms on %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max / 1000); } delay_base = get_action_delay_base(device, action); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base / 1000); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action %s has maximum random delay %dms on %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action %s has a static delay of %dms on %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action %s has a minimum delay of %dms and a randomly chosen " "maximum delay of %dms on %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action %s on %s is disallowed for local host", action, device->id); crm_xml_add(xml, F_STONITH_ACTION_DISALLOWED, XML_BOOLEAN_TRUE); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device); add_disallowed(child, action, device, target, allow_suicide); } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GListPtr lpc = NULL; /* Pack the results into XML */ list = create_xml_node(NULL, __FUNCTION__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (is_not_set(device->flags, st_device_supports_reboot) && safe_str_eq(query->action, "reboot")) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = "off"; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device); if (safe_str_eq(query->action, "reboot")) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "off", device, query->target, is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, "on", device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching devices for '%s'", available_devices, query->target); } else { crm_debug("%d devices installed", available_devices); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, query->client_id); free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } static void stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int call_options) { struct st_query_data *query = NULL; const char *action = NULL; const char *target = NULL; int timeout = 0; xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_TRACE); crm_element_value_int(msg, F_STONITH_TIMEOUT, &timeout); if (dev) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); if (device && safe_str_eq(device, "manual_ack")) { /* No query or reply necessary */ return; } } crm_log_xml_debug(msg, "Query"); query = calloc(1, sizeof(struct st_query_data)); query->reply = stonith_construct_reply(msg, NULL, NULL, pcmk_ok); query->remote_peer = remote_peer ? strdup(remote_peer) : NULL; query->client_id = client_id ? strdup(client_id) : NULL; query->target = target ? strdup(target) : NULL; query->action = action ? strdup(action) : NULL; query->call_options = call_options; get_capable_devices(target, action, timeout, is_set(call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb); } #define ST_LOG_OUTPUT_MAX 512 static void log_operation(async_command_t * cmd, int rc, int pid, const char *next, const char *output) { if (rc == 0) { next = NULL; } if (cmd->victim != NULL) { do_crm_log(rc == 0 ? LOG_NOTICE : LOG_ERR, "Operation '%s' [%d] (call %d from %s) for host '%s' with device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->id, cmd->client_name, cmd->victim, cmd->device, rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } else { do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE, "Operation '%s' [%d] for device '%s' returned: %d (%s)%s%s", cmd->action, pid, cmd->device, rc, pcmk_strerror(rc), (next? ", retrying with " : ""), (next ? next : "")); } if (output) { /* Logging the whole string confuses syslog when the string is xml */ char *prefix = crm_strdup_printf("%s:%d", cmd->device, pid); crm_log_output(rc == 0 ? LOG_DEBUG : LOG_WARNING, prefix, output); free(prefix); } } static void stonith_send_async_reply(async_command_t * cmd, const char *output, int rc, GPid pid) { xmlNode *reply = NULL; gboolean bcast = FALSE; reply = stonith_construct_async_reply(cmd, output, NULL, rc); if (safe_str_eq(cmd->action, "metadata")) { /* Too verbose to log */ crm_trace("Metadata query for %s", cmd->device); output = NULL; } else if (crm_str_eq(cmd->action, "monitor", TRUE) || crm_str_eq(cmd->action, "list", TRUE) || crm_str_eq(cmd->action, "status", TRUE)) { crm_trace("Never broadcast %s replies", cmd->action); } else if (!stand_alone && safe_str_eq(cmd->origin, cmd->victim) && safe_str_neq(cmd->action, "on")) { crm_trace("Broadcast %s reply for %s", cmd->action, cmd->victim); crm_xml_add(reply, F_SUBTYPE, "broadcast"); bcast = TRUE; } log_operation(cmd, rc, pid, NULL, output); crm_log_xml_trace(reply, "Reply"); if (bcast) { crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else if (cmd->origin) { crm_trace("Directed reply to %s", cmd->origin); send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); } else { crm_trace("Directed local %ssync reply to %s", (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); } if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, F_STONITH_RC, rc); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); } free_xml(reply); } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device; CRM_CHECK(cmd != NULL, return); if (!cmd->device) { return; } device = g_hash_table_lookup(device_list, cmd->device); if (device) { crm_trace("Cancel scheduled %s on %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } static void st_child_done(GPid pid, int rc, const char *output, gpointer user_data) { stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; async_command_t *cmd = user_data; GListPtr gIter = NULL; GListPtr gIterNext = NULL; CRM_CHECK(cmd != NULL, return); cmd->active_on = NULL; /* The device is ready to do something else now */ device = g_hash_table_lookup(device_list, cmd->device); if (device) { if (rc == pcmk_ok && (safe_str_eq(cmd->action, "list") || safe_str_eq(cmd->action, "monitor") || safe_str_eq(cmd->action, "status"))) { device->verified = TRUE; } mainloop_set_trigger(device->work); } crm_debug("Operation '%s' on '%s' completed with rc=%d (%d remaining)", cmd->action, cmd->device, rc, g_list_length(cmd->device_next)); if (rc == 0) { GListPtr iter; /* see if there are any required devices left to execute for this op */ for (iter = cmd->device_next; iter != NULL; iter = iter->next) { next_device = g_hash_table_lookup(device_list, iter->data); if (next_device != NULL && is_action_required(cmd->action, next_device)) { cmd->device_next = iter->next; break; } next_device = NULL; } } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->device_next->data); cmd->device_next = cmd->device_next->next; } /* this operation requires more fencing, hooray! */ if (next_device) { log_operation(cmd, rc, pid, next_device->id, output); schedule_stonith_command(cmd, next_device); /* Prevent cmd from being freed */ cmd = NULL; goto done; } stonith_send_async_reply(cmd, output, rc, pid); if (rc != 0) { goto done; } /* Check to see if any operations are scheduled to do the exact * same thing that just completed. If so, rather than * performing the same fencing operation twice, return the result * of this operation for all pending commands it matches. */ for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd_other = gIter->data; gIterNext = gIter->next; if (cmd == cmd_other) { continue; } /* A pending scheduled command matches the command that just finished if. * 1. The client connections are different. * 2. The node victim is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (safe_str_eq(cmd->client, cmd_other->client) || safe_str_neq(cmd->victim, cmd_other->victim) || safe_str_neq(cmd->action, cmd_other->action) || safe_str_neq(cmd->device, cmd_other->device)) { continue; } /* Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to * off, then cmd->action will be reboot and will be merged with any * other reboot requests. If the originating fencer remapped a * topology reboot to off then on, we will get here once with * cmd->action "off" and once with "on", and they will be merged * separately with similar requests. */ crm_notice ("Merging stonith action %s for node %s originating from client %s with identical stonith request from client %s", cmd_other->action, cmd_other->victim, cmd_other->client_name, cmd->client_name); cmd_list = g_list_remove_link(cmd_list, gIter); stonith_send_async_reply(cmd_other, output, rc, pid); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(gIter); } done: free_async_command(cmd); } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; crm_info("Found %d matching devices for '%s'", g_list_length(devices), cmd->victim); if (g_list_length(devices) > 0) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); if (device) { cmd->device_list = devices; cmd->device_next = devices->next; devices = NULL; /* list owned by cmd now */ } } /* we have a device, schedule it for fencing. */ if (device) { schedule_stonith_command(cmd, device); /* in progress */ return; } /* no device found! */ stonith_send_async_reply(cmd, NULL, -ENODEV, 0); free_async_command(cmd); g_list_free_full(devices, free); } static int stonith_fence(xmlNode * msg) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = create_async_command(msg); xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); if (cmd == NULL) { return -EPROTO; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); return -ENODEV; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (cmd->options & st_opt_cs_nodeid) { int nodeid = crm_atoi(host, NULL); crm_node_t *node = crm_find_known_peer_full(nodeid, NULL, CRM_GET_PEER_ANY); if (node) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb); } return -EINPROGRESS; } xmlNode * stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, int rc) { int lpc = 0; xmlNode *reply = NULL; const char *name = NULL; const char *value = NULL; const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, "st_output", output); crm_xml_add_int(reply, F_STONITH_RC, rc); CRM_CHECK(request != NULL, crm_warn("Can't create a sane reply"); return reply); for (lpc = 0; lpc < DIMOF(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { crm_trace("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } static xmlNode * stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc) { xmlNode *reply = NULL; crm_trace("Creating a basic reply"); reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __FUNCTION__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->victim); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); crm_xml_add_int(reply, F_STONITH_RC, rc); crm_xml_add(reply, "st_output", output); if (data != NULL) { crm_info("Attaching reply output"); add_message_xml(reply, F_STONITH_CALLDATA, data); } return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } /*! * \internal * \brief Determine if we need to use an alternate node to * fence the target. If so return that node's uname * * \retval NULL, no alternate host * \retval uname, uname of alternate host to use */ static const char * check_alternate_host(const char *target) { const char *alternate_host = NULL; crm_trace("Checking if we (%s) can fence %s", stonith_our_uname, target); if (find_topology_for_host(target) && safe_str_eq(target, stonith_our_uname)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_trace("Checking for %s.%d != %s", entry->uname, entry->id, target); if (fencing_peer_active(entry) && safe_str_neq(entry->uname, target)) { alternate_host = entry->uname; break; } } if (alternate_host == NULL) { crm_err("No alternate host available to handle complex self fencing request"); g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { crm_notice("Peer[%d] %s", entry->id, entry->uname); } } } return alternate_host; } static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, const char *client_id) { if (remote_peer) { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } else { do_local_reply(reply, client_id, is_set(call_options, st_opt_sync_call), remote_peer != NULL); } } static int handle_request(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request, const char *remote_peer) { int call_options = 0; int rc = -EOPNOTSUPP; xmlNode *data = NULL; xmlNode *reply = NULL; char *output = NULL; const char *op = crm_element_value(request, F_STONITH_OPERATION); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (crm_str_eq(op, CRM_OP_REGISTER, TRUE)) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(client); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, client->id); crm_ipcs_send(client, id, reply, flags); client->request_id = 0; free_xml(reply); return 0; } else if (crm_str_eq(op, STONITH_OP_EXEC, TRUE)) { rc = stonith_device_action(request, &output); } else if (crm_str_eq(op, STONITH_OP_TIMEOUT_UPDATE, TRUE)) { const char *call_id = crm_element_value(request, F_STONITH_CALLID); const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); return 0; } else if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { if (remote_peer) { create_remote_stonith_op(client_id, request, TRUE); /* Record it for the future notification */ } stonith_query(request, remote_peer, client_id, call_options); return 0; } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { const char *flag_name = NULL; CRM_ASSERT(client); flag_name = crm_element_value(request, F_STONITH_NOTIFY_ACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): ON", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } flag_name = crm_element_value(request, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name) { crm_debug("Setting %s callbacks for %s (%s): off", flag_name, client->name, client->id); client->options |= get_stonith_flag(flag_name); } if (flags & crm_ipc_client_response) { crm_ipcs_send_ack(client, id, flags, "ack", __FUNCTION__, __LINE__); } return 0; } else if (crm_str_eq(op, STONITH_OP_RELAY, TRUE)) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); crm_notice("Peer %s has received a forwarded fencing request from %s to fence (%s) peer %s", stonith_our_uname, client ? client->name : remote_peer, crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { rc = -EINPROGRESS; } } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { if (remote_peer || stand_alone) { rc = stonith_fence(request); } else if (call_options & st_opt_manual_ack) { remote_fencing_op_t *rop = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); crm_notice("Received manual confirmation that %s is fenced", target); rop = initiate_remote_stonith_op(client, request, TRUE); rc = stonith_manual_ack(request, rop); } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (client) { int tolerance = 0; crm_notice("Client %s.%.8s wants to fence (%s) '%s' with device '%s'", client->name, client->id, action, target, device ? device : "(any)"); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { rc = 0; goto done; } } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", remote_peer, action, target, device ? device : "(any)"); } alternate_host = check_alternate_host(target); if (alternate_host && client) { const char *client_id = NULL; crm_notice("Forwarding complex self fencing request to peer %s", alternate_host); if (client->id) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } /* Create a record of it, otherwise call_id will be 0 if we need to notify of failures */ create_remote_stonith_op(client_id, request, FALSE); crm_xml_add(request, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request, F_STONITH_CLIENTID, client->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request, FALSE); rc = -EINPROGRESS; } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { rc = -EINPROGRESS; } } } else if (crm_str_eq(op, STONITH_OP_FENCE_HISTORY, TRUE)) { rc = stonith_fence_history(request, &data, remote_peer, call_options); if (call_options & st_opt_discard_reply) { /* we don't expect answers to the broadcast * we might have sent out */ free_xml(data); return pcmk_ok; } } else if (crm_str_eq(op, STONITH_OP_DEVICE_ADD, TRUE)) { const char *device_id = NULL; rc = stonith_device_register(request, &device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (crm_str_eq(op, STONITH_OP_DEVICE_DEL, TRUE)) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); rc = stonith_device_remove(device_id, FALSE); do_stonith_notify_device(call_options, op, rc, device_id); } else if (crm_str_eq(op, STONITH_OP_LEVEL_ADD, TRUE)) { char *device_id = NULL; rc = stonith_level_register(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); free(device_id); } else if (crm_str_eq(op, STONITH_OP_LEVEL_DEL, TRUE)) { char *device_id = NULL; rc = stonith_level_remove(request, &device_id); do_stonith_notify_level(call_options, op, rc, device_id); } else if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { int node_id = 0; const char *name = NULL; crm_element_value_int(request, XML_ATTR_ID, &node_id); name = crm_element_value(request, XML_ATTR_UNAME); reap_crm_member(node_id, name); return pcmk_ok; } else { crm_err("Unknown %s from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } done: /* Always reply unless the request is in process still. * If in progress, a reply will happen async after the request * processing is finished */ if (rc != -EINPROGRESS) { crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, id, is_set(call_options, st_opt_sync_call), call_options, crm_element_value(request, F_STONITH_CALLOPTS)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } reply = stonith_construct_reply(request, output, data, rc); stonith_send_reply(reply, call_options, remote_peer, client_id); } free(output); free_xml(data); free_xml(reply); return rc; } static void handle_reply(crm_client_t * client, xmlNode * request, const char *remote_peer) { const char *op = crm_element_value(request, F_STONITH_OPERATION); if (crm_str_eq(op, STONITH_OP_QUERY, TRUE)) { process_remote_stonith_query(request); } else if (crm_str_eq(op, T_STONITH_NOTIFY, TRUE)) { process_remote_stonith_exec(request); } else if (crm_str_eq(op, STONITH_OP_FENCE, TRUE)) { /* Reply to a complex fencing op */ process_remote_stonith_exec(request); } else { crm_err("Unknown %s reply from %s", op, client ? client->name : remote_peer); crm_log_xml_warn(request, "UnknownOp"); } } void stonith_command(crm_client_t * client, uint32_t id, uint32_t flags, xmlNode * request, const char *remote_peer) { int call_options = 0; int rc = 0; gboolean is_reply = FALSE; /* Copy op for reporting. The original might get freed by handle_reply() * before we use it in crm_debug(): * handle_reply() * |- process_remote_stonith_exec() * |-- remote_op_done() * |--- handle_local_reply_and_notify() * |---- crm_xml_add(...F_STONITH_OPERATION...) * |--- free_xml(op->request) */ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_TRACE)) { is_reply = TRUE; } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %s%s %u from %s (%16x)", op, is_reply ? " reply" : "", id, client ? client->name : remote_peer, call_options); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, request, remote_peer); } else { rc = handle_request(client, id, flags, request, remote_peer); } crm_debug("Processed %s%s from %s: %s (%d)", op, is_reply ? " reply" : "", client ? client->name : remote_peer, rc > 0 ? "" : pcmk_strerror(rc), rc); free(op); } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index da0dce981e..7e9bb07666 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,1516 +1,1518 @@ /* * Copyright 2009-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include /* U32T ~ PRIu32, X32T ~ PRIx32 */ #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; char *stonith_our_uuid = NULL; long stonith_watchdog_timeout_ms = 0; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; static gboolean no_cib_connect = FALSE; static gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static xmlNode *local_cib = NULL; static pe_working_set_t *fenced_data_set = NULL; static cib_t *cib_api = NULL; static void *cib_library = NULL; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", crm_ipcs_client_pid(c)); return -EPERM; } if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void st_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection created for %p", c); } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; crm_client_t *c = crm_client_get(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = crm_ipcs_recv(c, data, size, &id, &flags); if (request == NULL) { crm_ipcs_send_ack(c, id, flags, "nack", __FUNCTION__, __LINE__); return 0; } op = crm_element_value(request, F_CRM_TASK); if(safe_str_eq(op, CRM_OP_RM_NODE_CACHE)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %" X32T "/%u for command %" U32T " from %s", flags, call_options, id, crm_client_name(c)); if (is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, crm_client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "Client[inbound]"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); crm_client_destroy(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (crm_str_eq(op, "poke", TRUE)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ crm_client_t *client_obj = NULL; int local_rc = pcmk_ok; crm_trace("Sending response"); client_obj = crm_client_get_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = -1; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to %s %s", rid, client_obj->name, from_peer ? "(originator of delegated request)" : ""); } else { crm_trace("Sending an event to %s %s", client_obj->name, from_peer ? "(originator of delegated request)" : ""); } local_rc = crm_ipcs_send(client_obj, rid, notify_src, sync_reply?crm_ipc_flags_none:crm_ipc_server_event); } if (local_rc < pcmk_ok && client_obj != NULL) { crm_warn("%sSync reply to %s failed: %s", sync_reply ? "" : "A-", client_obj ? client_obj->name : "", pcmk_strerror(local_rc)); } } long long get_stonith_flag(const char *name) { if (safe_str_eq(name, T_STONITH_NOTIFY_FENCE)) { return st_callback_notify_fence; } else if (safe_str_eq(name, STONITH_OP_DEVICE_ADD)) { return st_callback_device_add; } else if (safe_str_eq(name, STONITH_OP_DEVICE_DEL)) { return st_callback_device_del; } else if (safe_str_eq(name, T_STONITH_NOTIFY_HISTORY)) { return st_callback_notify_history; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; crm_client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (client->options & get_stonith_flag(type)) { int rc = crm_ipcs_send(client, 0, update_msg, crm_ipc_server_event | crm_ipc_server_error); if (rc <= 0) { crm_warn("%s notification of client %s.%.6s failed: %s (%d)", type, crm_client_name(client), client->id, pcmk_strerror(rc), rc); } else { crm_trace("Sent %s notification to client %s.%.6s", type, crm_client_name(client), client->id); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { crm_client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = crm_client_get_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { crm_ipcs_send(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); g_hash_table_foreach(client_connections, stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static void do_stonith_notify_config(int options, const char *op, int rc, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); do_stonith_notify(options, op, rc, notify_data); free_xml(notify_data); } void do_stonith_notify_device(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); } void do_stonith_notify_level(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __FUNCTION__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { int rc; char *desc = NULL; CRM_CHECK(match != NULL, return); crm_trace("Updating %s", ID(match)); if(remove) { int index = 0; char *key = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } rc = stonith_level_register(match, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void fencing_topology_init() { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static node_t * our_node_allowed_for(resource_t *rsc) { GHashTableIter iter; node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in] rsc Resource to check * \param[in] data_set Cluster working set with device information */ static void cib_device_update(resource_t *rsc, pe_working_set_t *data_set) { node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; node_t *parent = NULL; gboolean remove = TRUE; /* If this is a complex resource, check children rather than this resource itself. * TODO: Mark each installed device and remove if untouched when this process finishes. */ if(rsc->children) { GListPtr gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(pe_rsc_is_clone(rsc)) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)) { return; } /* If this STONITH resource is disabled, just remove it. */ value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE); if (safe_str_eq(value, RSC_STOPPED)) { crm_info("Device %s has been disabled", rsc->id); goto update_done; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } goto update_done; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); goto update_done; } else { /* Our node is allowed, so update the device information */ int rc; xmlNode *data; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); get_rsc_attributes(rsc->parameters, rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_PROVIDES); g_hash_table_iter_init(&gIter, rsc->parameters); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } remove = FALSE; data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, agent, params, rsc_provides); stonith_key_value_freeall(params, 1, 1); rc = stonith_device_register(data, NULL, TRUE); CRM_ASSERT(rc == pcmk_ok); free_xml(data); } update_done: if(remove && g_hash_table_lookup(device_list, rsc_name(rsc))) { stonith_device_remove(rsc_name(rsc), TRUE); } } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GListPtr gIter = NULL; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); CRM_ASSERT(fenced_data_set != NULL); fenced_data_set->input = local_cib; fenced_data_set->now = crm_time_new(NULL); fenced_data_set->flags |= pe_flag_quick_location; fenced_data_set->localhost = stonith_our_uname; cluster_status(fenced_data_set); pcmk__schedule_actions(fenced_data_set, NULL, NULL); for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, fenced_data_set); } fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it pe_reset_working_set(fenced_data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if ((op == NULL) || (strcmp(op, "move") == 0) || strstr(xpath, "/"XML_CIB_TAG_STATUS)) { continue; } else if (safe_str_eq(op, "delete") && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS) || strstr(xpath, XML_TAG_META_SETS)) { needs_update = TRUE; reason = strdup("(meta) attribute deleted from resource"); break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, TRUE); } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } else { crm_trace("No updates for device list found in cib"); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (safe_str_neq(standard, PCMK_RESOURCE_CLASS_STONITH)) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from the cib: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_TRACE); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = __xml_first_child(patchset); change != NULL; change = __xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; xmlNode *stonith_watchdog_xml = NULL; const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting the full cib"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } crm_peer_caches_refresh(local_cib); stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_TRACE); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) { long timeout_ms = 0; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if(value) { timeout_ms = crm_get_msec(value); } if (timeout_ms < 0) { timeout_ms = crm_auto_watchdog_timeout(); } if(timeout_ms != stonith_watchdog_timeout_ms) { crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); stonith_watchdog_timeout_ms = timeout_ms; } } else { stonith_watchdog_timeout_ms = 0; } if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { crm_trace("Ignoring cib updates while stonith is disabled"); stonith_enabled_saved = FALSE; return; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating stonith device and topology lists now that stonith is enabled"); stonith_enabled_saved = TRUE; fencing_topology_init(); cib_devices_update(); } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from the cib: init"); have_cib_devices = TRUE; local_cib = copy_xml(output); crm_peer_caches_refresh(local_cib); fencing_topology_init(); cib_devices_update(); } static void stonith_shutdown(int nsig) { stonith_shutdown_flag = TRUE; crm_info("Terminating with %d clients", crm_hash_table_size(client_connections)); if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } else { stonith_cleanup(); crm_exit(CRM_EX_OK); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); crm_client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); free(stonith_our_uname); stonith_our_uname = NULL; free_xml(local_cib); local_cib = NULL; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { {"stand-alone", 0, 0, 's'}, {"stand-alone-w-cpg", 0, 0, 'c'}, {"logfile", 1, 0, 'l'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, '$'}, {"help", 0, 0, '?'}, {0, 0, 0, 0} }; /* *INDENT-ON* */ static void setup_cib(void) { int rc, retries = 0; static cib_t *(*cib_new_fn) (void) = NULL; if (cib_new_fn == NULL) { cib_new_fn = find_library_function(&cib_library, CIB_LIBRARY, "cib_new", TRUE); } if (cib_new_fn != NULL) { cib_api = (*cib_new_fn) (); } if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for stonith topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = st_ipc_created, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } int main(int argc, char **argv) { int flag; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t cluster; const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" }; crm_ipc_t *old_instance = NULL; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Provides a summary of cluster's current state." "\n\nOutputs varying levels of detail in a number of different formats.\n"); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': crm_add_logfile(optarg); break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': crm_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" Instance attributes available for all \"stonith\"-class resources" " and used by Pacemaker's fence daemon, formerly known as stonithd\n"); printf(" Instance attributes available for all \"stonith\"-class resources\n"); printf(" \n"); #if 0 // priority is not implemented yet printf(" \n"); printf(" Devices that are not in a topology " "are tried in order of highest to lowest integer priority\n"); printf(" \n"); printf(" \n"); #endif printf(" \n", STONITH_ATTR_HOSTARG); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTMAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTLIST); printf (" A list of machines controlled by this device (Optional unless %s=static-list).\n", STONITH_ATTR_HOSTCHECK); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_HOSTCHECK); printf (" How to determine which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list " "(query the device via the 'list' command), static-list " "(check the " STONITH_ATTR_HOSTLIST " attribute), status " "(query the device via the 'status' command), none (assume " "every device can fence every machine)\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_DELAY_MAX); printf (" Enable a random delay for stonith actions and specify the maximum of random delay.\n"); printf (" This prevents double fencing when using slow devices such as sbd.\n" "Use this to enable a random delay for stonith actions.\n" "The overall delay is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_DELAY_BASE); printf (" Enable a base delay for stonith actions and specify base delay value.\n"); printf (" This prevents double fencing when different delays are configured on the nodes.\n" "Use this to enable a static delay for stonith actions.\n" "The overall delay is derived from a random delay value adding this static delay so that the sum is kept below the maximum delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", STONITH_ATTR_ACTION_LIMIT); printf (" The maximum number of actions can be performed in parallel on this device\n"); printf (" Cluster property concurrent-fencing=true needs to be configured first.\n" "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < DIMOF(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return CRM_EX_OK; } if (optind != argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker fencer"); + old_instance = crm_ipc_new("stonith-ng", 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); fenced_data_set = pe_new_working_set(); CRM_ASSERT(fenced_data_set != NULL); if (stand_alone == FALSE) { if (is_corosync_cluster()) { #if SUPPORT_COROSYNC cluster.destroy = stonith_peer_cs_destroy; cluster.cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster.cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(&cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } stonith_our_uname = cluster.uname; stonith_our_uuid = cluster.uuid; if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } init_device_list(); init_topology_list(); if(stonith_watchdog_timeout_ms > 0) { int rc; xmlNode *xml; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, STONITH_ATTR_HOSTLIST, stonith_our_uname); xml = create_device_registration_xml("watchdog", st_namespace_internal, STONITH_WATCHDOG_AGENT, params, NULL); stonith_key_value_freeall(params, 1, 1); rc = stonith_device_register(xml, NULL, FALSE); free_xml(xml); if (rc != pcmk_ok) { crm_crit("Cannot register watchdog pseudo fence agent"); crm_exit(CRM_EX_FATAL); } } stonith_ipc_server_init(&ipcs, &ipc_callbacks); /* Create the mainloop and run it... */ mainloop = g_main_loop_new(NULL, FALSE); - crm_info("Starting %s mainloop", crm_system_name); + crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); stonith_cleanup(); pe_free_working_set(fenced_data_set); crm_exit(CRM_EX_OK); } diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index bebf938804..18830d3464 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -1,1422 +1,1421 @@ /* * Copyright 2010-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include "pacemakerd.h" #include #include #include #include #include #include #include #include #include /* indirectly: CRM_EX_* */ #include /* cib_channel_ro */ #include #include #include #include #include #include /* PCMK__SPECIAL_PID*, ... */ #ifdef SUPPORT_COROSYNC #include #endif #include #include static gboolean pcmk_quorate = FALSE; static gboolean fatal_error = FALSE; static GMainLoop *mainloop = NULL; static bool global_keep_tracking = false; #define PCMK_PROCESS_CHECK_INTERVAL 5 static const char *local_name = NULL; static uint32_t local_nodeid = 0; static crm_trigger_t *shutdown_trigger = NULL; static const char *pid_file = "/var/run/pacemaker.pid"; typedef struct pcmk_child_s { int pid; long flag; int start_seq; int respawn_count; gboolean respawn; const char *name; const char *uid; const char *command; const char *endpoint; /* IPC server name */ gboolean active_before_startup; } pcmk_child_t; /* Index into the array below */ #define PCMK_CHILD_CONTROLD 3 static pcmk_child_t pcmk_children[] = { { 0, crm_proc_none, 0, 0, FALSE, "none", NULL, NULL }, { 0, crm_proc_execd, 3, 0, TRUE, "pacemaker-execd", NULL, CRM_DAEMON_DIR "/pacemaker-execd", CRM_SYSTEM_LRMD }, { 0, crm_proc_based, 1, 0, TRUE, "pacemaker-based", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-based", CIB_CHANNEL_RO }, { 0, crm_proc_controld, 6, 0, TRUE, "pacemaker-controld", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-controld", CRM_SYSTEM_CRMD }, { 0, crm_proc_attrd, 4, 0, TRUE, "pacemaker-attrd", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-attrd", T_ATTRD }, { 0, crm_proc_schedulerd, 5, 0, TRUE, "pacemaker-schedulerd", CRM_DAEMON_USER, CRM_DAEMON_DIR "/pacemaker-schedulerd", CRM_SYSTEM_PENGINE }, { 0, crm_proc_fenced, 2, 0, TRUE, "pacemaker-fenced", NULL, CRM_DAEMON_DIR "/pacemaker-fenced", "stonith-ng" }, }; static gboolean check_active_before_startup_processes(gpointer user_data); static int pcmk_child_active(pcmk_child_t *child); static gboolean start_child(pcmk_child_t * child); static gboolean update_node_processes(uint32_t id, const char *uname, uint32_t procs); void update_process_clients(crm_client_t *client); static uint32_t get_process_list(void) { int lpc = 0; uint32_t procs = crm_get_cluster_proc(); for (lpc = 0; lpc < SIZEOF(pcmk_children); lpc++) { if (pcmk_children[lpc].pid != 0) { procs |= pcmk_children[lpc].flag; } } return procs; } static void pcmk_process_exit(pcmk_child_t * child) { child->pid = 0; child->active_before_startup = FALSE; /* Broadcast the fact that one of our processes died ASAP * * Try to get some logging of the cause out first though * because we're probably about to get fenced * * Potentially do this only if respawn_count > N * to allow for local recovery */ update_node_processes(local_nodeid, NULL, get_process_list()); child->respawn_count += 1; if (child->respawn_count > MAX_RESPAWN) { crm_err("Child respawn count exceeded by %s", child->name); child->respawn = FALSE; } if (shutdown_trigger) { /* resume step-wise shutdown (returned TRUE yields no parallelizing) */ mainloop_set_trigger(shutdown_trigger); /* intended to speed up propagating expected lay-off of the daemons? */ update_node_processes(local_nodeid, NULL, get_process_list()); } else if (!child->respawn) { /* nothing to do */ } else if (crm_is_true(getenv("PCMK_fail_fast"))) { crm_err("Rebooting system because of %s", child->name); pcmk_panic(__FUNCTION__); } else if (pcmk_child_active(child) == 1) { crm_warn("One-off suppressing strict respawning of a child process %s," " appears alright per %s IPC end-point", child->name, child->endpoint); /* need to monitor how it evolves, and start new process if badly */ child->active_before_startup = TRUE; if (!global_keep_tracking) { global_keep_tracking = true; g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, NULL); } } else { crm_notice("Respawning failed child process: %s", child->name); start_child(child); } } static void pcmk_exit_with_cluster(int exitcode) { #ifdef SUPPORT_COROSYNC corosync_cfg_handle_t cfg_handle; cs_error_t err; if (exitcode == CRM_EX_FATAL) { crm_info("Asking Corosync to shut down"); err = corosync_cfg_initialize(&cfg_handle, NULL); if (err != CS_OK) { crm_warn("Unable to open handle to corosync to close it down. err=%d", err); } err = corosync_cfg_try_shutdown(cfg_handle, COROSYNC_CFG_SHUTDOWN_FLAG_IMMEDIATE); if (err != CS_OK) { crm_warn("Corosync shutdown failed. err=%d", err); } corosync_cfg_finalize(cfg_handle); } #endif crm_exit(exitcode); } static void pcmk_child_exit(mainloop_child_t * p, pid_t pid, int core, int signo, int exitcode) { pcmk_child_t *child = mainloop_child_userdata(p); const char *name = mainloop_child_name(p); if (signo) { do_crm_log(((signo == SIGKILL)? LOG_WARNING : LOG_ERR), "%s[%d] terminated with signal %d (core=%d)", name, pid, signo, core); } else { switch(exitcode) { case CRM_EX_OK: crm_info("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; case CRM_EX_FATAL: crm_warn("Shutting cluster down because %s[%d] had fatal failure", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_shutdown(SIGTERM); break; case CRM_EX_PANIC: do_crm_log_always(LOG_EMERG, "%s[%d] instructed the machine to reset", name, pid); child->respawn = FALSE; fatal_error = TRUE; pcmk_panic(__FUNCTION__); pcmk_shutdown(SIGTERM); break; default: crm_err("%s[%d] exited with status %d (%s)", name, pid, exitcode, crm_exit_str(exitcode)); break; } } pcmk_process_exit(child); } static gboolean stop_child(pcmk_child_t * child, int signal) { if (signal == 0) { signal = SIGTERM; } /* why to skip PID of 1? - FreeBSD ~ how untrackable process behind IPC is masqueraded as - elsewhere: how "init" task is designated; in particular, in systemd arrangement of socket-based activation, this is pretty real */ if (child->command == NULL || child->pid == PCMK__SPECIAL_PID) { crm_debug("Nothing to do for child \"%s\" (process %lld)", child->name, (long long) PCMK__SPECIAL_PID_AS_0(child->pid)); return TRUE; } if (child->pid <= 0) { crm_trace("Client %s not running", child->name); return TRUE; } errno = 0; if (kill(child->pid, signal) == 0) { crm_notice("Stopping %s "CRM_XS" sent signal %d to process %d", child->name, signal, child->pid); } else { crm_perror(LOG_ERR, "Could not stop %s (process %d) with signal %d", child->name, child->pid, signal); } return TRUE; } static char *opts_default[] = { NULL, NULL }; static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL }; /* TODO once libqb is taught to juggle with IPC end-points carried over as bare file descriptor (https://github.com/ClusterLabs/libqb/issues/325) it shall hand over these descriptors here if/once they are successfully pre-opened in (presumably) pcmk_child_active, to avoid any remaining room for races */ static gboolean start_child(pcmk_child_t * child) { int lpc = 0; uid_t uid = 0; gid_t gid = 0; struct rlimit oflimits; gboolean use_valgrind = FALSE; gboolean use_callgrind = FALSE; const char *devnull = "/dev/null"; const char *env_valgrind = getenv("PCMK_valgrind_enabled"); const char *env_callgrind = getenv("PCMK_callgrind_enabled"); child->active_before_startup = FALSE; if (child->command == NULL) { crm_info("Nothing to do for child \"%s\"", child->name); return TRUE; } if (env_callgrind != NULL && crm_is_true(env_callgrind)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_callgrind != NULL && strstr(env_callgrind, child->name)) { use_callgrind = TRUE; use_valgrind = TRUE; } else if (env_valgrind != NULL && crm_is_true(env_valgrind)) { use_valgrind = TRUE; } else if (env_valgrind != NULL && strstr(env_valgrind, child->name)) { use_valgrind = TRUE; } if (use_valgrind && strlen(VALGRIND_BIN) == 0) { crm_warn("Cannot enable valgrind for %s:" " The location of the valgrind binary is unknown", child->name); use_valgrind = FALSE; } if (child->uid) { if (crm_user_lookup(child->uid, &uid, &gid) < 0) { crm_err("Invalid user (%s) for %s: not found", child->uid, child->name); return FALSE; } crm_info("Using uid=%u and group=%u for process %s", uid, gid, child->name); } child->pid = fork(); CRM_ASSERT(child->pid != -1); if (child->pid > 0) { /* parent */ mainloop_child_add(child->pid, 0, child->name, child, pcmk_child_exit); crm_info("Forked child %d for process %s%s", child->pid, child->name, use_valgrind ? " (valgrind enabled: " VALGRIND_BIN ")" : ""); update_node_processes(local_nodeid, NULL, get_process_list()); return TRUE; } else { /* Start a new session */ (void)setsid(); /* Setup the two alternate arg arrays */ opts_vgrind[0] = strdup(VALGRIND_BIN); if (use_callgrind) { opts_vgrind[1] = strdup("--tool=callgrind"); opts_vgrind[2] = strdup("--callgrind-out-file=" CRM_STATE_DIR "/callgrind.out.%p"); opts_vgrind[3] = strdup(child->command); opts_vgrind[4] = NULL; } else { opts_vgrind[1] = strdup(child->command); opts_vgrind[2] = NULL; opts_vgrind[3] = NULL; opts_vgrind[4] = NULL; } opts_default[0] = strdup(child->command); if(gid) { // Whether we need root group access to talk to cluster layer bool need_root_group = TRUE; if (is_corosync_cluster()) { /* Corosync clusters can drop root group access, because we set * uidgid.gid.${gid}=1 via CMAP, which allows these processes to * connect to corosync. */ need_root_group = FALSE; } // Drop root group access if not needed if (!need_root_group && (setgid(gid) < 0)) { crm_perror(LOG_ERR, "Could not set group to %d", gid); } /* Initialize supplementary groups to only those always granted to * the user, plus haclient (so we can access IPC). */ if (initgroups(child->uid, gid) < 0) { crm_err("Cannot initialize groups for %s: %s (%d)", child->uid, pcmk_strerror(errno), errno); } } if (uid && setuid(uid) < 0) { crm_perror(LOG_ERR, "Could not set user to %d (%s)", uid, child->uid); } /* Close all open file descriptors */ getrlimit(RLIMIT_NOFILE, &oflimits); for (lpc = 0; lpc < oflimits.rlim_cur; lpc++) { close(lpc); } (void)open(devnull, O_RDONLY); /* Stdin: fd 0 */ (void)open(devnull, O_WRONLY); /* Stdout: fd 1 */ (void)open(devnull, O_WRONLY); /* Stderr: fd 2 */ if (use_valgrind) { (void)execvp(VALGRIND_BIN, opts_vgrind); } else { (void)execvp(child->command, opts_default); } crm_perror(LOG_ERR, "FATAL: Cannot exec %s", child->command); crm_exit(CRM_EX_FATAL); } return TRUE; /* never reached */ } static gboolean escalate_shutdown(gpointer data) { pcmk_child_t *child = data; if (child->pid == PCMK__SPECIAL_PID) { pcmk_process_exit(child); } else if (child->pid) { /* Use SIGSEGV instead of SIGKILL to create a core so we can see what it was up to */ crm_err("Child %s not terminating in a timely manner, forcing", child->name); stop_child(child, SIGSEGV); } return FALSE; } #define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */ static gboolean pcmk_shutdown_worker(gpointer user_data) { static int phase = 0; static time_t next_log = 0; static int max = SIZEOF(pcmk_children); int lpc = 0; if (phase == 0) { crm_notice("Shutting down Pacemaker"); phase = max; } for (; phase > 0; phase--) { /* Don't stop anything with start_seq < 1 */ for (lpc = max - 1; lpc >= 0; lpc--) { pcmk_child_t *child = &(pcmk_children[lpc]); if (phase != child->start_seq) { continue; } if (child->pid) { time_t now = time(NULL); if (child->respawn) { if (child->pid == PCMK__SPECIAL_PID) { crm_warn("The process behind %s IPC cannot be" " terminated, so either wait the graceful" " period of %ld s for its native termination" " if it vitally depends on some other daemons" " going down in a controlled way already," " or locate and kill the correct %s process" " on your own; set PCMK_fail_fast=1 to avoid" " this altogether next time around", child->name, (long) SHUTDOWN_ESCALATION_PERIOD, child->command); } next_log = now + 30; child->respawn = FALSE; stop_child(child, SIGTERM); if (phase < pcmk_children[PCMK_CHILD_CONTROLD].start_seq) { g_timeout_add(SHUTDOWN_ESCALATION_PERIOD, escalate_shutdown, child); } } else if (now >= next_log) { next_log = now + 30; crm_notice("Still waiting for %s to terminate " CRM_XS " pid=%d seq=%d", child->name, child->pid, child->start_seq); } return TRUE; } /* cleanup */ crm_debug("%s confirmed stopped", child->name); child->pid = 0; } } /* send_cluster_id(); */ crm_notice("Shutdown complete"); { const char *delay = daemon_option("shutdown_delay"); if(delay) { sync(); sleep(crm_get_msec(delay) / 1000); } } g_main_loop_quit(mainloop); if (fatal_error) { crm_notice("Shutting down and staying down after fatal error"); pcmk_exit_with_cluster(CRM_EX_FATAL); } return TRUE; } static void pcmk_ignore(int nsig) { crm_info("Ignoring signal %s (%d)", strsignal(nsig), nsig); } static void pcmk_sigquit(int nsig) { pcmk_panic(__FUNCTION__); } void pcmk_shutdown(int nsig) { if (shutdown_trigger == NULL) { shutdown_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, pcmk_shutdown_worker, NULL); } mainloop_set_trigger(shutdown_trigger); } static int32_t pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void pcmk_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; const char *task = NULL; crm_client_t *c = crm_client_get(qbc); xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags); crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__); if (msg == NULL) { return 0; } task = crm_element_value(msg, F_CRM_TASK); if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) { /* Time to quit */ crm_notice("Shutting down in response to ticket %s (%s)", crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN)); pcmk_shutdown(15); } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { /* Send to everyone */ struct iovec *iov; int id = 0; const char *name = NULL; crm_element_value_int(msg, XML_ATTR_ID, &id); name = crm_element_value(msg, XML_ATTR_UNAME); crm_notice("Instructing peers to remove references to node %s/%u", name, id); iov = calloc(1, sizeof(struct iovec)); iov->iov_base = dump_xml_unformatted(msg); iov->iov_len = 1 + strlen(iov->iov_base); send_cpg_iov(iov); } else { update_process_clients(c); } free_xml(msg); return 0; } /* Error code means? */ static int32_t pcmk_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void pcmk_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pcmk_ipc_closed(c); } struct qb_ipcs_service_handlers mcp_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, .connection_created = pcmk_ipc_created, .msg_process = pcmk_ipc_dispatch, .connection_closed = pcmk_ipc_closed, .connection_destroyed = pcmk_ipc_destroy }; /*! * \internal * \brief Send an XML message with process list of all known peers to client(s) * * \param[in] client Send message to this client, or all clients if NULL */ void update_process_clients(crm_client_t *client) { GHashTableIter iter; crm_node_t *node = NULL; xmlNode *update = create_xml_node(NULL, "nodes"); if (is_corosync_cluster()) { crm_xml_add_int(update, "quorate", pcmk_quorate); } g_hash_table_iter_init(&iter, crm_peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) { xmlNode *xml = create_xml_node(update, "node"); crm_xml_add_int(xml, "id", node->id); crm_xml_add(xml, "uname", node->uname); crm_xml_add(xml, "state", node->state); crm_xml_add_int(xml, "processes", node->processes); } if(client) { crm_trace("Sending process list to client %s", client->id); crm_ipcs_send(client, 0, update, crm_ipc_server_event); } else { crm_trace("Sending process list to %d clients", crm_hash_table_size(client_connections)); g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & client)) { crm_ipcs_send(client, 0, update, crm_ipc_server_event); } } free_xml(update); } /*! * \internal * \brief Send a CPG message with local node's process list to all peers */ static void update_process_peers(void) { /* Do nothing for corosync-2 based clusters */ struct iovec *iov = calloc(1, sizeof(struct iovec)); CRM_ASSERT(iov); if (local_name) { iov->iov_base = crm_strdup_printf("", local_name, get_process_list()); } else { iov->iov_base = crm_strdup_printf("", get_process_list()); } iov->iov_len = strlen(iov->iov_base) + 1; crm_trace("Sending %s", (char*) iov->iov_base); send_cpg_iov(iov); } /*! * \internal * \brief Update a node's process list, notifying clients and peers if needed * * \param[in] id Node ID of affected node * \param[in] uname Uname of affected node * \param[in] procs Affected node's process list mask * * \return TRUE if the process list changed, FALSE otherwise */ static gboolean update_node_processes(uint32_t id, const char *uname, uint32_t procs) { gboolean changed = FALSE; crm_node_t *node = crm_get_peer(id, uname); if (procs != 0) { if (procs != node->processes) { crm_debug("Node %s now has process list: %.32x (was %.32x)", node->uname, procs, node->processes); node->processes = procs; changed = TRUE; /* If local node's processes have changed, notify clients/peers */ if (id == local_nodeid) { update_process_clients(NULL); update_process_peers(); } } else { crm_trace("Node %s still has process list: %.32x", node->uname, procs); } } return changed; } /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"version", 0, 0, '$', "\tVersion information" }, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {"shutdown", 0, 0, 'S', "\tInstruct Pacemaker to shutdown on this machine"}, {"features", 0, 0, 'F', "\tDisplay the full version and list of features Pacemaker was built with"}, {"-spacer-", 1, 0, '-', "\nAdditional Options:"}, {"foreground", 0, 0, 'f', "\t(Ignored) Pacemaker always runs in the foreground"}, {"pid-file", 1, 0, 'p', "\t(Ignored) Daemon pid file location"}, {"standby", 0, 0, 's', "\tStart node in standby state"}, {NULL, 0, 0, 0} }; /* *INDENT-ON* */ static void mcp_chown(const char *path, uid_t uid, gid_t gid) { int rc = chown(path, uid, gid); if (rc < 0) { crm_warn("Cannot change the ownership of %s to user %s and gid %d: %s", path, CRM_DAEMON_USER, gid, pcmk_strerror(errno)); } } /*! * \internal * \brief Check the liveness of the child based on IPC name and PID if tracked * * \param[inout] child Child tracked data * * \return 0 if no trace of child's liveness detected (while it is detectable * to begin with, at least according to one of the two properties), * 1 if everything is fine, 2 if it's up per PID, but not per IPC * end-point (still starting?), -1 on error, and -2 when the child * (its IPC) blocked with an unauthorized process (log message * emitted in both latter cases) * * \note This function doesn't modify any of \p child members but \c pid, * and is not actively toying with processes as such but invoking * \c stop_child in one particular case (there's for some reason * a different authentic holder of the IPC end-point). */ static int pcmk_child_active(pcmk_child_t *child) { static uid_t cl_uid = 0; static gid_t cl_gid = 0; const uid_t root_uid = 0; const gid_t root_gid = 0; const uid_t *ref_uid; const gid_t *ref_gid; int ret = 0; pid_t ipc_pid = 0; if (child->endpoint == NULL && (child->pid <= 0 || child->pid == PCMK__SPECIAL_PID)) { crm_err("Cannot track child %s for missing both API end-point and PID", child->name); ret = -1; /* misuse of the function when child is not trackable */ } else if (child->endpoint != NULL) { ref_uid = (child->uid != NULL) ? &cl_uid : &root_uid; ref_gid = (child->uid != NULL) ? &cl_gid : &root_gid; if (child->uid != NULL && !cl_uid && !cl_gid && crm_user_lookup(CRM_DAEMON_USER, &cl_uid, &cl_gid) < 0) { crm_err("Could not find user and group IDs for user %s", CRM_DAEMON_USER); ret = -1; } else if ((ret = pcmk__ipc_is_authentic_process_active(child->endpoint, *ref_uid, *ref_gid, &ipc_pid)) < 0) { /* game over */ } else if (child->pid <= 0) { /* hit new child to be initialized, or reset to zero and investigate further for ret == 0 */ child->pid = ipc_pid; } else if (ipc_pid && child->pid != ipc_pid) { /* ultimately strange for ret == 1; either way, investigate */ ret = 0; } } if (!ret) { /* when no IPC based liveness detected (incl. if ever a child without IPC is tracked), or detected for a different _authentic_ process; safe on FreeBSD since the only change possible from a proper child's PID into "special" PID of 1 behind more loosely related process */ ret = crm_pid_active(child->pid, child->name); if (ipc_pid && (ret != 1 || ipc_pid == PCMK__SPECIAL_PID || crm_pid_active(ipc_pid, child->name) == 1)) { if (ret == 1) { /* assume there's no forking-while-retaining-IPC-socket involved in the "children's" lifecycle, hence that the tracking got out of sync purely because of some external (esotheric?) forces (user initiated process "refresh" by force? or intentionally racing on start-up, even?), and that switching over to this other detected, authentic instance with an IPC already in possession is a better trade-off than "neutralizing" it first so as to give either the original or possibly a new to-be-spawned daemon process a leeway for operation, which would otherwise have to be carried out */ /* not possessing IPC, afterall (what about corosync CPG?) */ stop_child(child, SIGKILL); } else { ret = 1; } child->pid = ipc_pid; } else if (ret == 1) { ret = 2; /* up per PID, but not per IPC (still starting?) */ } else if (!child->pid && ret == -1) { ret = 0; /* correct -1 on FreeBSD from above back to 0 */ } } return ret; } static gboolean check_active_before_startup_processes(gpointer user_data) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); gboolean keep_tracking = FALSE; for (start_seq = 1; start_seq < max; start_seq++) { for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].active_before_startup == FALSE) { /* we are already tracking it as a child process. */ continue; } else if (start_seq != pcmk_children[lpc].start_seq) { continue; } else { const char *name = pcmk_children[lpc].name; int ret; switch ((ret = pcmk_child_active(&pcmk_children[lpc]))) { case 1: break; case 0: case 2: /* this very case: it was OK once already */ if (pcmk_children[lpc].respawn == TRUE) { /* presumably after crash, hence critical */ crm_crit("Process %s terminated (pid=%lld)%s", \ name, (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid), ret ? ", at least per IPC end-point that went AWOL" : ""); } else { /* orderly shutdown */ crm_notice("Process %s terminated (pid=%lld)%s", \ name, (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[lpc].pid), ret ? ", at least per IPC end-point that went AWOL" : ""); } pcmk_process_exit(&(pcmk_children[lpc])); continue; default: crm_crit("Unexpected value from pcmk_child_active:" " %d (pid=%lld)", ret, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[lpc].pid)); /* fall through */ case -1: case -2: /* message(s) already emitted */ crm_exit(CRM_EX_FATAL); break; /* static analysis/noreturn */ } } /* at least one of the processes found at startup * is still going, so keep this recurring timer around */ keep_tracking = TRUE; } } global_keep_tracking = keep_tracking; return keep_tracking; } /*! * \internal * \brief Initial one-off check of the pre-existing "child" processes * * With "child" process, we mean the subdaemon that defines an API end-point * (all of them do as of the comment) -- the possible complement is skipped * as it is deemed it has no such shared resources to cause conflicts about, * hence it can presumably be started anew without hesitation. * If that won't hold true in the future, the concept of a shared resource * will have to be generalized beyond the API end-point. * * For boundary cases that the "child" is still starting (IPC end-point is yet * to be witnessed), or more rarely (practically FreeBSD only), when there's * a pre-existing "untrackable" authentic process, we give the situation some * time to possibly unfold in the right direction, meaning that said socket * will appear or the unattainable process will disappear per the observable * IPC, respectively. * * \return 0 if no such "child" process found, positive number X when X * "children" detected, -1 on an internal error, -2 when any * would-be-used IPC is blocked with an unauthorized process * * \note Since this gets run at the very start, \c respawn_count fields * for particular children get temporarily overloaded with "rounds * of waiting" tracking, restored once we are about to finish with * success (i.e. returning value >=0) and will remain unrestored * otherwise. One way to suppress liveness detection logic for * particular child is to set the said value to a negative number. */ #define WAIT_TRIES 4 /* together with interleaved sleeps, worst case ~ 1s */ static int find_and_track_existing_processes(void) { unsigned tracking = 0U; bool wait_in_progress; int cur; size_t i, rounds; for (rounds = 1; rounds <= WAIT_TRIES; rounds++) { wait_in_progress = false; for (i = 0; i < SIZEOF(pcmk_children); i++) { if (!pcmk_children[i].endpoint || pcmk_children[i].respawn_count < 0 || !(cur = pcmk_child_active(&pcmk_children[i]))) { /* as a speculation, don't give up in the context of pcmk_child_active check if there are more rounds to come for other reasons, but don't artificially wait just because of this, since we would preferably start ASAP */ continue; } pcmk_children[i].respawn_count = rounds; switch (cur) { case 1: if (pcmk_children[i].pid == PCMK__SPECIAL_PID) { if (crm_is_true(getenv("PCMK_fail_fast"))) { crm_crit("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform and PCMK_fail_fast requested", pcmk_children[i].endpoint); return -1; } else if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_notice("Assuming pre-existing authentic, though" " on this platform untrackable, process" " behind %s IPC is stable (was in %d" " previous samples) so rather than" " bailing out (PCMK_fail_fast not" " requested), we just switch to a less" " optimal IPC liveness monitoring" " (not very suitable for heavy load)", pcmk_children[i].name, WAIT_TRIES - 1); crm_warn("The process behind %s IPC cannot be" " terminated, so the overall shutdown" " will get delayed implicitly (%ld s)," " which serves as a graceful period for" " its native termination if it vitally" " depends on some other daemons going" " down in a controlled way already", pcmk_children[i].name, (long) SHUTDOWN_ESCALATION_PERIOD); } else { wait_in_progress = true; crm_warn("Cannot reliably track pre-existing" " authentic process behind %s IPC on this" " platform, can still disappear in %d" " attempt(s)", pcmk_children[i].endpoint, WAIT_TRIES - pcmk_children[i].respawn_count); continue; } } crm_notice("Tracking existing %s process (pid=%lld)", pcmk_children[i].name, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); pcmk_children[i].respawn_count = -1; /* 0~keep watching */ pcmk_children[i].active_before_startup = TRUE; tracking++; break; case 2: if (pcmk_children[i].respawn_count == WAIT_TRIES) { crm_crit("%s IPC end-point for existing authentic" " process %lld did not (re)appear", pcmk_children[i].endpoint, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid)); return -1; } wait_in_progress = true; crm_warn("Cannot find %s IPC end-point for existing" " authentic process %lld, can still (re)appear" " in %d attempts (?)", pcmk_children[i].endpoint, (long long) PCMK__SPECIAL_PID_AS_0( pcmk_children[i].pid), WAIT_TRIES - pcmk_children[i].respawn_count); continue; case -1: case -2: return cur; /* messages already emitted */ default: crm_crit("Unexpected condition"CRM_XS"cur=%d", cur); return -1; /* unexpected condition */ } } if (!wait_in_progress) { break; } (void) poll(NULL, 0, 250); /* a bit for changes to possibly happen */ } for (i = 0; i < SIZEOF(pcmk_children); i++) { pcmk_children[i].respawn_count = 0; /* restore pristine state */ } if (tracking) { g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL, check_active_before_startup_processes, NULL); } return (tracking > INT_MAX) ? INT_MAX : tracking; } static void init_children_processes(void) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); /* start any children that have not been detected */ for (start_seq = 1; start_seq < max; start_seq++) { /* don't start anything with start_seq < 1 */ for (lpc = 0; lpc < max; lpc++) { if (pcmk_children[lpc].pid) { /* we are already tracking it */ continue; } if (start_seq == pcmk_children[lpc].start_seq) { start_child(&(pcmk_children[lpc])); } } } /* From this point on, any daemons being started will be due to * respawning rather than node start. * * This may be useful for the daemons to know */ setenv("PCMK_respawned", "true", 1); } static void mcp_cpg_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); crm_exit(CRM_EX_DISCONNECT); } /*! * \internal * \brief Process a CPG message (process list or manual peer cache removal) * * \param[in] handle CPG connection (ignored) * \param[in] groupName CPG group name (ignored) * \param[in] nodeid ID of affected node * \param[in] pid Process ID (ignored) * \param[in] msg CPG XML message * \param[in] msg_len Length of msg in bytes (ignored) */ static void mcp_cpg_deliver(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = string2xml(msg); const char *task = crm_element_value(xml, F_CRM_TASK); crm_trace("Received CPG message (%s): %.200s", (task? task : "process list"), (char*)msg); if (task == NULL) { if (nodeid == local_nodeid) { crm_debug("Ignoring message with local node's process list"); } else { uint32_t procs = 0; const char *uname = crm_element_value(xml, "uname"); crm_element_value_int(xml, "proclist", (int *)&procs); if (update_node_processes(nodeid, uname, procs)) { update_process_clients(NULL); } } } else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) { int id = 0; const char *name = NULL; crm_element_value_int(xml, XML_ATTR_ID, &id); name = crm_element_value(xml, XML_ATTR_UNAME); reap_crm_member(id, name); } if (xml != NULL) { free_xml(xml); } } static void mcp_cpg_membership(cpg_handle_t handle, const struct cpg_name *groupName, const struct cpg_address *member_list, size_t member_list_entries, const struct cpg_address *left_list, size_t left_list_entries, const struct cpg_address *joined_list, size_t joined_list_entries) { /* Update peer cache if needed */ pcmk_cpg_membership(handle, groupName, member_list, member_list_entries, left_list, left_list_entries, joined_list, joined_list_entries); /* Always broadcast our own presence after any membership change */ update_process_peers(); } static gboolean mcp_quorum_callback(unsigned long long seq, gboolean quorate) { pcmk_quorate = quorate; return TRUE; } static void mcp_quorum_destroy(gpointer user_data) { crm_info("connection lost"); } int main(int argc, char **argv) { int rc; int flag; int argerr = 0; int option_index = 0; gboolean shutdown = FALSE; uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; struct rlimit cores; crm_ipc_t *old_instance = NULL; qb_ipcs_service_t *ipcs = NULL; static crm_cluster_t cluster; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "mode [options]", long_options, "Start/Stop Pacemaker\n"); mainloop_add_signal(SIGHUP, pcmk_ignore); mainloop_add_signal(SIGQUIT, pcmk_sigquit); while (1) { flag = crm_get_option(argc, argv, &option_index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'f': /* Legacy */ break; case 'p': pid_file = optarg; break; case 's': set_daemon_option("node_start_state", "standby"); break; case '$': case '?': crm_help(flag, CRM_EX_OK); break; case 'S': shutdown = TRUE; break; case 'F': printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); crm_exit(CRM_EX_OK); default: printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); ++argerr; break; } } if (optind < argc) { printf("non-option ARGV-elements: "); while (optind < argc) printf("%s ", argv[optind++]); printf("\n"); } if (argerr) { crm_help('?', CRM_EX_USAGE); } setenv("LC_ALL", "C", 1); set_daemon_option("mcp", "true"); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_debug("Checking for old instances of %s", CRM_SYSTEM_MCP); old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0); crm_ipc_connect(old_instance); if (shutdown) { crm_debug("Terminating previous instance"); while (crm_ipc_connected(old_instance)) { xmlNode *cmd = create_request(CRM_OP_QUIT, NULL, NULL, CRM_SYSTEM_MCP, CRM_SYSTEM_MCP, NULL); crm_debug("."); crm_ipc_send(old_instance, cmd, 0, 0, NULL); free_xml(cmd); sleep(2); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_exit(CRM_EX_OK); } else if (crm_ipc_connected(old_instance)) { crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("Pacemaker is already active, aborting startup"); crm_exit(CRM_EX_FATAL); } crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); if (mcp_read_config() == FALSE) { crm_notice("Could not obtain corosync config data, exiting"); crm_exit(CRM_EX_UNAVAILABLE); } // OCF shell functions and cluster-glue need facility under different name { const char *facility = daemon_option("logfacility"); if (facility && safe_str_neq(facility, "none")) { setenv("HA_LOGFACILITY", facility, 1); } } crm_notice("Starting Pacemaker %s "CRM_XS" build=%s features:%s", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); mainloop = g_main_loop_new(NULL, FALSE); rc = getrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Cannot determine current maximum core size."); } else { if (cores.rlim_max == 0 && geteuid() == 0) { cores.rlim_max = RLIM_INFINITY; } else { crm_info("Maximum core file size is: %lu", (unsigned long)cores.rlim_max); } cores.rlim_cur = cores.rlim_max; rc = setrlimit(RLIMIT_CORE, &cores); if (rc < 0) { crm_perror(LOG_ERR, "Core file generation will remain disabled." " Core files are an important diagnostic tool, so" " please consider enabling them by default."); } } if (crm_user_lookup(CRM_DAEMON_USER, &pcmk_uid, &pcmk_gid) < 0) { crm_err("Cluster user %s does not exist, aborting Pacemaker startup", CRM_DAEMON_USER); crm_exit(CRM_EX_NOUSER); } mkdir(CRM_STATE_DIR, 0750); mcp_chown(CRM_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store core/blackbox/scheduler/cib files in */ crm_build_path(CRM_PACEMAKER_DIR, 0750); mcp_chown(CRM_PACEMAKER_DIR, pcmk_uid, pcmk_gid); /* Used to store core files in */ crm_build_path(CRM_CORE_DIR, 0750); mcp_chown(CRM_CORE_DIR, pcmk_uid, pcmk_gid); /* Used to store blackbox dumps in */ crm_build_path(CRM_BLACKBOX_DIR, 0750); mcp_chown(CRM_BLACKBOX_DIR, pcmk_uid, pcmk_gid); // Used to store scheduler inputs in crm_build_path(PE_STATE_DIR, 0750); mcp_chown(PE_STATE_DIR, pcmk_uid, pcmk_gid); /* Used to store the cluster configuration */ crm_build_path(CRM_CONFIG_DIR, 0750); mcp_chown(CRM_CONFIG_DIR, pcmk_uid, pcmk_gid); // Don't build CRM_RSCTMP_DIR, pacemaker-execd will do it ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, &mcp_ipc_callbacks); if (ipcs == NULL) { crm_err("Couldn't start IPC server"); crm_exit(CRM_EX_OSERR); } /* Allows us to block shutdown */ if (cluster_connect_cfg(&local_nodeid) == FALSE) { crm_err("Couldn't connect to Corosync's CFG service"); crm_exit(CRM_EX_PROTOCOL); } if(pcmk_locate_sbd() > 0) { setenv("PCMK_watchdog", "true", 1); } else { setenv("PCMK_watchdog", "false", 1); } switch (find_and_track_existing_processes()) { case -1: crm_crit("Internal fatality, see the log"); crm_exit(CRM_EX_FATAL); case -2: crm_crit("Blocked by foreign process, kill the offender"); crm_exit(CRM_EX_CANTCREAT); default: break; }; cluster.destroy = mcp_cpg_destroy; cluster.cpg.cpg_deliver_fn = mcp_cpg_deliver; cluster.cpg.cpg_confchg_fn = mcp_cpg_membership; crm_set_autoreap(FALSE); rc = pcmk_ok; if (cluster_connect_cpg(&cluster) == FALSE) { crm_err("Couldn't connect to Corosync's CPG service"); rc = -ENOPROTOOPT; } else if (cluster_connect_quorum(mcp_quorum_callback, mcp_quorum_destroy) == FALSE) { rc = -ENOTCONN; } else { local_name = get_local_node_name(); update_node_processes(local_nodeid, local_name, get_process_list()); mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); init_children_processes(); - crm_info("Starting mainloop"); - + crm_notice("Pacemaker daemon successfully started and accepting connections"); g_main_loop_run(mainloop); } if (ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } g_main_loop_unref(mainloop); cluster_disconnect_cpg(&cluster); cluster_disconnect_cfg(); crm_exit(crm_errno2exit(rc)); } diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c index e349bdc099..0eca09fd7b 100644 --- a/daemons/schedulerd/pacemaker-schedulerd.c +++ b/daemons/schedulerd/pacemaker-schedulerd.c @@ -1,350 +1,350 @@ /* * Copyright 2004-2019 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OPTARGS "hVc" static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static pe_working_set_t *sched_data_set = NULL; #define get_series() was_processing_error?1:was_processing_warning?2:3 typedef struct series_s { const char *name; const char *param; int wrap; } series_t; series_t series[] = { {"pe-unknown", "_dont_match_anything_", -1}, {"pe-error", "pe-error-series-max", -1}, {"pe-warn", "pe-warn-series-max", 200}, {"pe-input", "pe-input-series-max", 400}, }; void pengine_shutdown(int nsig); static gboolean process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender) { static char *last_digest = NULL; static char *filename = NULL; time_t execution_date = time(NULL); const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *op = crm_element_value(msg, F_CRM_TASK); const char *ref = crm_element_value(msg, F_CRM_REFERENCE); crm_trace("Processing %s op (ref=%s)...", op, ref); if (op == NULL) { /* error */ } else if (strcasecmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if (safe_str_eq(crm_element_value(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE)) { /* ignore */ } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) { crm_trace("Bad sys-to %s", crm_str(sys_to)); return FALSE; } else if (strcasecmp(op, CRM_OP_PECALC) == 0) { int seq = -1; int series_id = 0; int series_wrap = 0; char *digest = NULL; const char *value = NULL; xmlNode *converted = NULL; xmlNode *reply = NULL; gboolean is_repoke = FALSE; gboolean process = TRUE; crm_config_error = FALSE; crm_config_warning = FALSE; was_processing_error = FALSE; was_processing_warning = FALSE; if (sched_data_set == NULL) { sched_data_set = pe_new_working_set(); CRM_ASSERT(sched_data_set != NULL); } digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE, CRM_FEATURE_SET); converted = copy_xml(xml_data); if (cli_config_update(&converted, NULL, TRUE) == FALSE) { sched_data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); crm_xml_add_int(sched_data_set->graph, "transition_id", 0); crm_xml_add_int(sched_data_set->graph, "cluster-delay", 0); process = FALSE; free(digest); } else if (safe_str_eq(digest, last_digest)) { crm_info("Input has not changed since last time, not saving to disk"); is_repoke = TRUE; free(digest); } else { free(last_digest); last_digest = digest; } if (process) { pcmk__schedule_actions(sched_data_set, converted, NULL); } series_id = get_series(); series_wrap = series[series_id].wrap; value = pe_pref(sched_data_set->config_hash, series[series_id].param); if (value != NULL) { series_wrap = crm_int_helper(value, NULL); if (errno != 0) { series_wrap = series[series_id].wrap; } } else { crm_config_warn("No value specified for cluster" " preference: %s", series[series_id].param); } seq = get_last_sequence(PE_STATE_DIR, series[series_id].name); crm_trace("Series %s: wrap=%d, seq=%d, pref=%s", series[series_id].name, series_wrap, seq, value); sched_data_set->input = NULL; reply = create_reply(msg, sched_data_set->graph); CRM_ASSERT(reply != NULL); if (is_repoke == FALSE) { free(filename); filename = generate_series_filename(PE_STATE_DIR, series[series_id].name, seq, HAVE_BZLIB_H); } crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename); crm_xml_add_int(reply, "graph-errors", was_processing_error); crm_xml_add_int(reply, "graph-warnings", was_processing_warning); crm_xml_add_int(reply, "config-errors", crm_config_error); crm_xml_add_int(reply, "config-warnings", crm_config_warning); if (crm_ipcs_send(sender, 0, reply, crm_ipc_server_event) == FALSE) { int graph_file_fd = 0; char *graph_file = NULL; umask(S_IWGRP | S_IWOTH | S_IROTH); graph_file = crm_strdup_printf("%s/pengine.graph.XXXXXX", PE_STATE_DIR); graph_file_fd = mkstemp(graph_file); crm_err("Couldn't send transition graph to peer, writing to %s instead", graph_file); crm_xml_add(reply, F_CRM_TGRAPH, graph_file); write_xml_fd(sched_data_set->graph, graph_file, graph_file_fd, FALSE); free(graph_file); free_xml(first_named_child(reply, F_CRM_DATA)); CRM_ASSERT(crm_ipcs_send(sender, 0, reply, crm_ipc_server_event)); } free_xml(reply); pe_reset_working_set(sched_data_set); pcmk__log_transition_summary(filename); if (is_repoke == FALSE && series_wrap != 0) { unlink(filename); crm_xml_add_int(xml_data, "execution-date", execution_date); write_xml_file(xml_data, filename, HAVE_BZLIB_H); write_last_sequence(PE_STATE_DIR, series[series_id].name, seq + 1, series_wrap); } else { crm_trace("Not writing out %s: %d & %d", filename, is_repoke, series_wrap); } free_xml(converted); } return TRUE; } static int32_t pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (crm_client_new(c, uid, gid) == NULL) { return -EIO; } return 0; } static void pe_ipc_created(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); } gboolean process_pe_message(xmlNode * msg, xmlNode * xml_data, crm_client_t * sender); static int32_t pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; crm_client_t *c = crm_client_get(qbc); xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags); crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__); if (msg != NULL) { xmlNode *data_xml = get_message_xml(msg, F_CRM_DATA); process_pe_message(msg, data_xml, c); free_xml(msg); } return 0; } /* Error code means? */ static int32_t pe_ipc_closed(qb_ipcs_connection_t * c) { crm_client_t *client = crm_client_get(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); crm_client_destroy(client); return 0; } static void pe_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pe_ipc_closed(c); } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = pe_ipc_accept, .connection_created = pe_ipc_created, .msg_process = pe_ipc_dispatch, .connection_closed = pe_ipc_closed, .connection_destroyed = pe_ipc_destroy }; /* *INDENT-OFF* */ static struct crm_option long_options[] = { /* Top-level Options */ {"help", 0, 0, '?', "\tThis text"}, {"verbose", 0, 0, 'V', "\tIncrease debug output"}, {0, 0, 0, 0} }; /* *INDENT-ON* */ int main(int argc, char **argv) { int flag; int index = 0; int argerr = 0; crm_log_preinit(NULL, argc, argv); crm_set_options(NULL, "[options]", long_options, "Daemon for calculating the cluster's response to events"); mainloop_add_signal(SIGTERM, pengine_shutdown); while (1) { flag = crm_get_option(argc, argv, &index); if (flag == -1) break; switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'h': /* Help message */ crm_help('?', CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && safe_str_eq("metadata", argv[optind])) { pe_metadata(); return CRM_EX_OK; } if (optind > argc) { ++argerr; } if (argerr) { crm_help('?', CRM_EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); + crm_notice("Starting Pacemaker scheduler"); + if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { crm_err("Terminating due to bad permissions on " PE_STATE_DIR); fprintf(stderr, "ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n"); fflush(stderr); return CRM_EX_FATAL; } - crm_debug("Init server comms"); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_SHM, &ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); crm_exit(CRM_EX_FATAL); } /* Create the mainloop and run it... */ - crm_info("Starting %s", crm_system_name); - mainloop = g_main_loop_new(NULL, FALSE); + crm_notice("Pacemaker scheduler successfully started and accepting connections"); g_main_loop_run(mainloop); pe_free_working_set(sched_data_set); crm_info("Exiting %s", crm_system_name); crm_exit(CRM_EX_OK); } void pengine_shutdown(int nsig) { mainloop_del_ipc_server(ipcs); pe_free_working_set(sched_data_set); crm_exit(CRM_EX_OK); } diff --git a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt index e7ba332fd0..9c3a05838e 100644 --- a/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt +++ b/doc/Pacemaker_Explained/en-US/Ch-Stonith.txt @@ -1,942 +1,946 @@ :compat-mode: legacy = STONITH = //// We prefer [[ch-stonith]], but older versions of asciidoc don't deal well with that construct for chapter headings //// anchor:ch-stonith[Chapter 13, STONITH] indexterm:[STONITH, Configuration] == What Is STONITH? == STONITH (an acronym for "Shoot The Other Node In The Head"), also called 'fencing', protects your data from being corrupted by rogue nodes or concurrent access. Just because a node is unresponsive, this doesn't mean it isn't accessing your data. The only way to be 100% sure that your data is safe, is to use STONITH so we can be certain that the node is truly offline, before allowing the data to be accessed from another node. STONITH also has a role to play in the event that a clustered service cannot be stopped. In this case, the cluster uses STONITH to force the whole node offline, thereby making it safe to start the service elsewhere. == What STONITH Device Should You Use? == It is crucial that the STONITH device can allow the cluster to differentiate between a node failure and a network one. The biggest mistake people make in choosing a STONITH device is to use a remote power switch (such as many on-board IPMI controllers) that shares power with the node it controls. In such cases, the cluster cannot be sure if the node is really offline, or active and suffering from a network fault. Likewise, any device that relies on the machine being active (such as SSH-based "devices" used during testing) are inappropriate. == Special Treatment of STONITH Resources == STONITH resources are somewhat special in Pacemaker. STONITH may be initiated by pacemaker or by other parts of the cluster (such as resources like DRBD or DLM). To accommodate this, pacemaker does not require the STONITH resource to be in the 'started' state in order to be used, thus allowing reliable use of STONITH devices in such a case. All nodes have access to STONITH devices' definitions and instantiate them on-the-fly when needed, but preference is given to 'verified' instances, which are the ones that are 'started' according to the cluster's knowledge. In the case of a cluster split, the partition with a verified instance will have a slight advantage, because the STONITH daemon in the other partition will have to hear from all its current peers before choosing a node to perform the fencing. Fencing resources do work the same as regular resources in some respects: * +target-role+ can be used to enable or disable the resource * Location constraints can be used to prevent a specific node from using the resource [IMPORTANT] =========== Currently there is a limitation that fencing resources may only have one set of meta-attributes and one set of instance attributes. This can be revisited if it becomes a significant limitation for people. =========== See the table below or run `man pacemaker-fenced` to see special instance attributes that may be set for any fencing resource, regardless of fence agent. .Additional Properties of Fencing Resources -[width="95%",cols="5m,2,3,<10",options="header",align="center"] +[width="95%",cols="8m,3,6,<12",options="header",align="center"] |========================================================= |Field |Type |Default |Description |stonith-timeout |NA |NA a|Older versions used this to override the default period to wait for a STONITH (reboot, on, off) action to complete for this device. It has been replaced by the +pcmk_reboot_timeout+ and +pcmk_off_timeout+ properties. indexterm:[stonith-timeout,Fencing] indexterm:[Fencing,Property,stonith-timeout] //// (not yet implemented) priority integer 0 The priority of the STONITH resource. Devices are tried in order of highest priority to lowest. indexterm priority,Fencing indexterm Fencing,Property,priority //// |provides |string | |Any special capability provided by the fence device. Currently, only one such capability is meaningful: +unfencing+ (see <>). indexterm:[provides,Fencing] indexterm:[Fencing,Property,provides] |pcmk_host_map |string | |A mapping of host names to ports numbers for devices that do not support host names. Example: +node1:1;node2:2,3+ tells the cluster to use port 1 for - *node1* and ports 2 and 3 for *node2*. + *node1* and ports 2 and 3 for *node2*. If +pcmk_host_check+ is explicitly set + to +static-list+, either this or +pcmk_host_list+ must be set. indexterm:[pcmk_host_map,Fencing] indexterm:[Fencing,Property,pcmk_host_map] |pcmk_host_list |string | -|A list of machines controlled by this device (optional unless -+pcmk_host_check+ is +static-list+). +|A list of machines controlled by this device. If +pcmk_host_check+ is + explicitly set to +static-list+, either this or +pcmk_host_map+ must be set. indexterm:[pcmk_host_list,Fencing] indexterm:[Fencing,Property,pcmk_host_list] |pcmk_host_check |string -|dynamic-list +|static-list if either +pcmk_host_list+ or +pcmk_host_map+ is set, + otherwise dynamic-list if the fence device supports the list action, + otherwise status if the fence device supports the status action, + otherwise none a|How to determine which machines are controlled by the device. Allowed values: * +dynamic-list:+ query the device via the "list" command -* +static-list:+ check the +pcmk_host_list+ attribute +* +static-list:+ check the +pcmk_host_list+ or +pcmk_host_map+ attribute * +status:+ query the device via the "status" command * +none:+ assume every device can fence every machine indexterm:[pcmk_host_check,Fencing] indexterm:[Fencing,Property,pcmk_host_check] |pcmk_delay_max |time |0s |Enable a random delay of up to the time specified before executing stonith actions. This is sometimes used in two-node clusters to ensure that the nodes don't fence each other at the same time. The overall delay introduced by pacemaker is derived from this random delay value adding a static delay so that the sum is kept below the maximum delay. indexterm:[pcmk_delay_max,Fencing] indexterm:[Fencing,Property,pcmk_delay_max] |pcmk_delay_base |time |0s |Enable a static delay before executing stonith actions. This can be used e.g. in two-node clusters to ensure that the nodes don't fence each other, by having separate fencing resources with different values. The node that is fenced with the shorter delay will lose a fencing race. The overall delay introduced by pacemaker is derived from this value plus a random delay such that the sum is kept below the maximum delay. indexterm:[pcmk_delay_base,Fencing] indexterm:[Fencing,Property,pcmk_delay_base] |pcmk_action_limit |integer |1 |The maximum number of actions that can be performed in parallel on this device, if the cluster option +concurrent-fencing+ is +true+. -1 is unlimited. indexterm:[pcmk_action_limit,Fencing] indexterm:[Fencing,Property,pcmk_action_limit] |pcmk_host_argument |string |port |'Advanced use only.' Which parameter should be supplied to the resource agent to identify the node to be fenced. Some devices do not support the standard +port+ parameter or may provide additional ones. Use this to specify an alternate, device-specific parameter. A value of +none+ tells the cluster not to supply any additional parameters. indexterm:[pcmk_host_argument,Fencing] indexterm:[Fencing,Property,pcmk_host_argument] |pcmk_reboot_action |string |reboot |'Advanced use only.' The command to send to the resource agent in order to reboot a node. Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific command. indexterm:[pcmk_reboot_action,Fencing] indexterm:[Fencing,Property,pcmk_reboot_action] |pcmk_reboot_timeout |time |60s |'Advanced use only.' Specify an alternate timeout to use for `reboot` actions instead of the value of +stonith-timeout+. Some devices need much more or less time to complete than normal. Use this to specify an alternate, device-specific timeout. indexterm:[pcmk_reboot_timeout,Fencing] indexterm:[Fencing,Property,pcmk_reboot_timeout] indexterm:[stonith-timeout,Fencing] indexterm:[Fencing,Property,stonith-timeout] |pcmk_reboot_retries |integer |2 |'Advanced use only.' The maximum number of times to retry the `reboot` command within the timeout period. Some devices do not support multiple connections, and operations may fail if the device is busy with another task, so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries before giving up. indexterm:[pcmk_reboot_retries,Fencing] indexterm:[Fencing,Property,pcmk_reboot_retries] |pcmk_off_action |string |off |'Advanced use only.' The command to send to the resource agent in order to shut down a node. Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific command. indexterm:[pcmk_off_action,Fencing] indexterm:[Fencing,Property,pcmk_off_action] |pcmk_off_timeout |time |60s |'Advanced use only.' Specify an alternate timeout to use for `off` actions instead of the value of +stonith-timeout+. Some devices need much more or less time to complete than normal. Use this to specify an alternate, device-specific timeout. indexterm:[pcmk_off_timeout,Fencing] indexterm:[Fencing,Property,pcmk_off_timeout] indexterm:[stonith-timeout,Fencing] indexterm:[Fencing,Property,stonith-timeout] |pcmk_off_retries |integer |2 |'Advanced use only.' The maximum number of times to retry the `off` command within the timeout period. Some devices do not support multiple connections, and operations may fail if the device is busy with another task, so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries before giving up. indexterm:[pcmk_off_retries,Fencing] indexterm:[Fencing,Property,pcmk_off_retries] |pcmk_list_action |string |list |'Advanced use only.' The command to send to the resource agent in order to list nodes. Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific command. indexterm:[pcmk_list_action,Fencing] indexterm:[Fencing,Property,pcmk_list_action] |pcmk_list_timeout |time |60s |'Advanced use only.' Specify an alternate timeout to use for `list` actions instead of the value of +stonith-timeout+. Some devices need much more or less time to complete than normal. Use this to specify an alternate, device-specific timeout. indexterm:[pcmk_list_timeout,Fencing] indexterm:[Fencing,Property,pcmk_list_timeout] |pcmk_list_retries |integer |2 |'Advanced use only.' The maximum number of times to retry the `list` command within the timeout period. Some devices do not support multiple connections, and operations may fail if the device is busy with another task, so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries before giving up. indexterm:[pcmk_list_retries,Fencing] indexterm:[Fencing,Property,pcmk_list_retries] |pcmk_monitor_action |string |monitor |'Advanced use only.' The command to send to the resource agent in order to report extended status. Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific command. indexterm:[pcmk_monitor_action,Fencing] indexterm:[Fencing,Property,pcmk_monitor_action] |pcmk_monitor_timeout |time |60s |'Advanced use only.' Specify an alternate timeout to use for `monitor` actions instead of the value of +stonith-timeout+. Some devices need much more or less time to complete than normal. Use this to specify an alternate, device-specific timeout. indexterm:[pcmk_monitor_timeout,Fencing] indexterm:[Fencing,Property,pcmk_monitor_timeout] |pcmk_monitor_retries |integer |2 |'Advanced use only.' The maximum number of times to retry the `monitor` command within the timeout period. Some devices do not support multiple connections, and operations may fail if the device is busy with another task, so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries before giving up. indexterm:[pcmk_monitor_retries,Fencing] indexterm:[Fencing,Property,pcmk_monitor_retries] |pcmk_status_action |string |status |'Advanced use only.' The command to send to the resource agent in order to report status. Some devices do not support the standard commands or may provide additional ones. Use this to specify an alternate, device-specific command. indexterm:[pcmk_status_action,Fencing] indexterm:[Fencing,Property,pcmk_status_action] |pcmk_status_timeout |time |60s |'Advanced use only.' Specify an alternate timeout to use for `status` actions instead of the value of +stonith-timeout+. Some devices need much more or less time to complete than normal. Use this to specify an alternate, device-specific timeout. indexterm:[pcmk_status_timeout,Fencing] indexterm:[Fencing,Property,pcmk_status_timeout] |pcmk_status_retries |integer |2 |'Advanced use only.' The maximum number of times to retry the `status` command within the timeout period. Some devices do not support multiple connections, and operations may fail if the device is busy with another task, so Pacemaker will automatically retry the operation, if there is time remaining. Use this option to alter the number of times Pacemaker retries before giving up. indexterm:[pcmk_status_retries,Fencing] indexterm:[Fencing,Property,pcmk_status_retries] |========================================================= [[s-unfencing]] == Unfencing == Most fence devices cut the power to the target. By contrast, fence devices that perform 'fabric fencing' cut off a node's access to some critical resource, such as a shared disk or a network switch. With fabric fencing, it is expected that the cluster will fence the node, and then a system administrator must manually investigate what went wrong, correct any issues found, then reboot (or restart the cluster services on) the node. Once the node reboots and rejoins the cluster, some fabric fencing devices require that an explicit command to restore the node's access to the critical resource. This capability is called 'unfencing' and is typically implemented as the fence agent's +on+ command. If any cluster resource has +requires+ set to +unfencing+, then that resource will not be probed or started on a node until that node has been unfenced. == Configuring STONITH == [NOTE] =========== Higher-level configuration shells include functionality to simplify the process below, particularly the step for deciding which parameters are required. However since this document deals only with core components, you should refer to the STONITH chapter of the http://www.clusterlabs.org/doc/[Clusters from Scratch] guide for those details. =========== . Find the correct driver: + ---- # stonith_admin --list-installed ---- . Find the required parameters associated with the device (replacing $AGENT_NAME with the name obtained from the previous step): + ---- # stonith_admin --metadata --agent $AGENT_NAME ---- . Create a file called +stonith.xml+ containing a primitive resource with a class of +stonith+, a type equal to the agent name obtained earlier, and a parameter for each of the values returned in the previous step. . If the device does not know how to fence nodes based on their uname, you may also need to set the special +pcmk_host_map+ parameter. See `man pacemaker-fenced` for details. . If the device does not support the `list` command, you may also need to set the special +pcmk_host_list+ and/or +pcmk_host_check+ parameters. See `man pacemaker-fenced` for details. . If the device does not expect the victim to be specified with the `port` parameter, you may also need to set the special +pcmk_host_argument+ parameter. See `man pacemaker-fenced` for details. . Upload it into the CIB using cibadmin: + ---- # cibadmin -C -o resources --xml-file stonith.xml ---- . Set +stonith-enabled+ to true: + ---- # crm_attribute -t crm_config -n stonith-enabled -v true ---- . Once the stonith resource is running, you can test it by executing the following (although you might want to stop the cluster on that machine first): + ---- # stonith_admin --reboot nodename ---- === Example STONITH Configuration === Assume we have an chassis containing four nodes and an IPMI device active on 192.0.2.1. We would choose the `fence_ipmilan` driver, and obtain the following list of parameters: .Obtaining a list of STONITH Parameters ==== ---- # stonith_admin --metadata -a fence_ipmilan ---- [source,XML] ---- ---- ==== Based on that, we would create a STONITH resource fragment that might look like this: .An IPMI-based STONITH Resource ==== [source,XML] ---- ---- ==== Finally, we need to enable STONITH: ---- # crm_attribute -t crm_config -n stonith-enabled -v true ---- == Advanced STONITH Configurations == Some people consider that having one fencing device is a single point of failure footnote:[Not true, since a node or resource must fail before fencing even has a chance to]; others prefer removing the node from the storage and network instead of turning it off. Whatever the reason, Pacemaker supports fencing nodes with multiple devices through a feature called 'fencing topologies'. Simply create the individual devices as you normally would, then define one or more +fencing-level+ entries in the +fencing-topology+ section of the configuration. * Each fencing level is attempted in order of ascending +index+. Allowed values are 1 through 9. * If a device fails, processing terminates for the current level. No further devices in that level are exercised, and the next level is attempted instead. * If the operation succeeds for all the listed devices in a level, the level is deemed to have passed. * The operation is finished when a level has passed (success), or all levels have been attempted (failed). * If the operation failed, the next step is determined by the scheduler and/or the controller. Some possible uses of topologies include: * Try poison-pill and fail back to power * Try disk and network, and fall back to power if either fails * Initiate a kdump and then poweroff the node .Properties of Fencing Levels [width="95%",cols="1m,<3",options="header",align="center"] |========================================================= |Field |Description |id |A unique name for the level indexterm:[id,fencing-level] indexterm:[Fencing,fencing-level,id] |target |The name of a single node to which this level applies indexterm:[target,fencing-level] indexterm:[Fencing,fencing-level,target] |target-pattern |An extended regular expression (as defined in http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04[POSIX]) matching the names of nodes to which this level applies indexterm:[target-pattern,fencing-level] indexterm:[Fencing,fencing-level,target-pattern] |target-attribute |The name of a node attribute that is set (to +target-value+) for nodes to which this level applies indexterm:[target-attribute,fencing-level] indexterm:[Fencing,fencing-level,target-attribute] |target-value |The node attribute value (of +target-attribute+) that is set for nodes to which this level applies indexterm:[target-attribute,fencing-level] indexterm:[Fencing,fencing-level,target-attribute] |index |The order in which to attempt the levels. Levels are attempted in ascending order 'until one succeeds'. Valid values are 1 through 9. indexterm:[index,fencing-level] indexterm:[Fencing,fencing-level,index] |devices |A comma-separated list of devices that must all be tried for this level indexterm:[devices,fencing-level] indexterm:[Fencing,fencing-level,devices] |========================================================= .Fencing topology with different devices for different nodes ==== [source,XML] ---- ... ... ---- ==== === Example Dual-Layer, Dual-Device Fencing Topologies === The following example illustrates an advanced use of +fencing-topology+ in a cluster with the following properties: * 3 nodes (2 active prod-mysql nodes, 1 prod_mysql-rep in standby for quorum purposes) * the active nodes have an IPMI-controlled power board reached at 192.0.2.1 and 192.0.2.2 * the active nodes also have two independent PSUs (Power Supply Units) connected to two independent PDUs (Power Distribution Units) reached at 198.51.100.1 (port 10 and port 11) and 203.0.113.1 (port 10 and port 11) * the first fencing method uses the `fence_ipmi` agent * the second fencing method uses the `fence_apc_snmp` agent targetting 2 fencing devices (one per PSU, either port 10 or 11) * fencing is only implemented for the active nodes and has location constraints * fencing topology is set to try IPMI fencing first then default to a "sure-kill" dual PDU fencing In a normal failure scenario, STONITH will first select +fence_ipmi+ to try to kill the faulty node. Using a fencing topology, if that first method fails, STONITH will then move on to selecting +fence_apc_snmp+ twice: * once for the first PDU * again for the second PDU The fence action is considered successful only if both PDUs report the required status. If any of them fails, STONITH loops back to the first fencing method, +fence_ipmi+, and so on until the node is fenced or fencing action is cancelled. .First fencing method: single IPMI device Each cluster node has it own dedicated IPMI channel that can be called for fencing using the following primitives: [source,XML] ---- ---- .Second fencing method: dual PDU devices Each cluster node also has two distinct power channels controlled by two distinct PDUs. That means a total of 4 fencing devices configured as follows: - Node 1, PDU 1, PSU 1 @ port 10 - Node 1, PDU 2, PSU 2 @ port 10 - Node 2, PDU 1, PSU 1 @ port 11 - Node 2, PDU 2, PSU 2 @ port 11 The matching fencing agents are configured as follows: [source,XML] ---- ---- .Location Constraints To prevent STONITH from trying to run a fencing agent on the same node it is supposed to fence, constraints are placed on all the fencing primitives: [source,XML] ---- ---- .Fencing topology Now that all the fencing resources are defined, it's time to create the right topology. We want to first fence using IPMI and if that does not work, fence both PDUs to effectively and surely kill the node. [source,XML] ---- ---- Please note, in +fencing-topology+, the lowest +index+ value determines the priority of the first fencing method. .Final configuration Put together, the configuration looks like this: [source,XML] ---- ... ... ---- == Remapping Reboots == When the cluster needs to reboot a node, whether because +stonith-action+ is +reboot+ or because a reboot was manually requested (such as by `stonith_admin --reboot`), it will remap that to other commands in two cases: . If the chosen fencing device does not support the +reboot+ command, the cluster will ask it to perform +off+ instead. . If a fencing topology level with multiple devices must be executed, the cluster will ask all the devices to perform +off+, then ask the devices to perform +on+. To understand the second case, consider the example of a node with redundant power supplies connected to intelligent power switches. Rebooting one switch and then the other would have no effect on the node. Turning both switches off, and then on, actually reboots the node. In such a case, the fencing operation will be treated as successful as long as the +off+ commands succeed, because then it is safe for the cluster to recover any resources that were on the node. Timeouts and errors in the +on+ phase will be logged but ignored. When a reboot operation is remapped, any action-specific timeout for the remapped action will be used (for example, +pcmk_off_timeout+ will be used when executing the +off+ command, not +pcmk_reboot_timeout+).