diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am index 938e33ed65..3c855b0845 100644 --- a/daemons/fenced/Makefile.am +++ b/daemons/fenced/Makefile.am @@ -1,57 +1,58 @@ # # Original Author: Sun Jiang Dong # Copyright 2004 International Business Machines # # with later changes copyright 2004-2023 the Pacemaker project contributors. # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk include $(top_srcdir)/mk/man.mk halibdir = $(CRM_DAEMON_DIR) halib_PROGRAMS = pacemaker-fenced \ cts-fence-helper noinst_HEADERS = pacemaker-fenced.h if BUILD_XML_HELP man7_MANS = pacemaker-fenced.7 endif cts_fence_helper_SOURCES = cts-fence-helper.c cts_fence_helper_LDADD = $(top_builddir)/lib/fencing/libstonithd.la cts_fence_helper_LDADD += $(top_builddir)/lib/common/libcrmcommon.la pacemaker_fenced_YFLAGS = -d pacemaker_fenced_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_fenced_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_fenced_LDADD = $(top_builddir)/lib/pacemaker/libpacemaker.la pacemaker_fenced_LDADD += $(top_builddir)/lib/pengine/libpe_status.la pacemaker_fenced_LDADD += $(top_builddir)/lib/cib/libcib.la pacemaker_fenced_LDADD += $(top_builddir)/lib/cluster/libcrmcluster.la pacemaker_fenced_LDADD += $(top_builddir)/lib/fencing/libstonithd.la pacemaker_fenced_LDADD += $(top_builddir)/lib/common/libcrmcommon.la pacemaker_fenced_LDADD += $(CLUSTERLIBS) -pacemaker_fenced_SOURCES = pacemaker-fenced.c \ - fenced_commands.c \ - fenced_remote.c \ +pacemaker_fenced_SOURCES = pacemaker-fenced.c \ + fenced_cib.c \ + fenced_commands.c \ + fenced_remote.c \ fenced_history.c CLEANFILES = $(man7_MANS) $(man8_MANS) if BUILD_LEGACY_LINKS .PHONY: install-exec-hook install-exec-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd && $(LN_S) pacemaker-fenced stonithd .PHONY: uninstall-hook uninstall-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f stonithd endif diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c new file mode 100644 index 0000000000..e3a64ec2cb --- /dev/null +++ b/daemons/fenced/fenced_cib.c @@ -0,0 +1,893 @@ +/* + * Copyright 2009-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. +*/ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include + +#include + +#define rsc_name(x) x->clone_name?x->clone_name:x->id + +pcmk_scheduler_t *fenced_data_set = NULL; + +static xmlNode *local_cib = NULL; +static cib_t *cib_api = NULL; +static bool have_cib_devices = FALSE; +static const unsigned long long data_set_flags = pcmk_sched_location_only + |pcmk_sched_no_compat + |pcmk_sched_no_counts; + +/*! + * \internal + * \brief Check whether a node has a specific attribute name/value + * + * \param[in] node Name of node to check + * \param[in] name Name of an attribute to look for + * \param[in] value The value the named attribute needs to be set to in order to be considered a match + * + * \return TRUE if the locally cached CIB has the specified node attribute + */ +gboolean +node_has_attr(const char *node, const char *name, const char *value) +{ + GString *xpath = NULL; + xmlNode *match; + + CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) + && (value != NULL), return FALSE); + + /* Search for the node's attributes in the CIB. While the schema allows + * multiple sets of instance attributes, and allows instance attributes to + * use id-ref to reference values elsewhere, that is intended for resources, + * so we ignore that here. + */ + xpath = g_string_sized_new(256); + pcmk__g_strcat(xpath, + "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE + "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS + "/" XML_CIB_TAG_NVPAIR + "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " + "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); + + match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); + + g_string_free(xpath, TRUE); + return (match != NULL); +} + +static void +add_topology_level(xmlNode *match) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + CRM_CHECK(match != NULL, return); + + fenced_register_level(match, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); + pcmk__reset_result(&result); + free(desc); +} + +static void +topology_remove_helper(const char *node, int level) +{ + char *desc = NULL; + pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); + + crm_xml_add(data, F_STONITH_ORIGIN, __func__); + crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); + crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); + + fenced_unregister_level(data, &desc, &result); + fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); + pcmk__reset_result(&result); + free_xml(data); + free(desc); +} + +static void +remove_topology_level(xmlNode *match) +{ + int index = 0; + char *key = NULL; + + CRM_CHECK(match != NULL, return); + + key = stonith_level_key(match, fenced_target_by_unknown); + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + topology_remove_helper(key, index); + free(key); +} + +static void +register_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + remove_topology_level(match); + add_topology_level(match); + } +} + +/* Fencing + + + + + + + + + + + + + + + + +*/ + +void +fencing_topology_init(void) +{ + xmlXPathObjectPtr xpathObj = NULL; + const char *xpath = "//" XML_TAG_FENCING_LEVEL; + + crm_trace("Full topology refresh"); + free_topology_list(); + init_topology_list(); + + /* Grab everything */ + xpathObj = xpath_search(local_cib, xpath); + register_fencing_topology(xpathObj); + + freeXpathObject(xpathObj); +} + +static void +remove_cib_device(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if(match != NULL) { + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + } + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + rsc_id = crm_element_value(match, XML_ATTR_ID); + + stonith_device_remove(rsc_id, true); + } +} + +static void +update_stonith_watchdog_timeout_ms(xmlNode *cib) +{ + long timeout_ms = 0; + xmlNode *stonith_watchdog_xml = NULL; + const char *value = NULL; + + stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", + cib, LOG_NEVER); + if (stonith_watchdog_xml) { + value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); + } + if (value) { + timeout_ms = crm_get_msec(value); + } + + if (timeout_ms < 0) { + timeout_ms = pcmk__auto_watchdog_timeout(); + } + + stonith_watchdog_timeout_ms = timeout_ms; +} + +/*! + * \internal + * \brief Check whether our uname is in a resource's allowed node list + * + * \param[in] rsc Resource to check + * + * \return Pointer to node object if found, NULL otherwise + */ +static pcmk_node_t * +our_node_allowed_for(const pcmk_resource_t *rsc) +{ + GHashTableIter iter; + pcmk_node_t *node = NULL; + + if (rsc && stonith_our_uname) { + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { + break; + } + node = NULL; + } + } + return node; +} + +/*! + * \internal + * \brief If a resource or any of its children are STONITH devices, update their + * definitions given a cluster working set. + * + * \param[in,out] rsc Resource to check + * \param[in,out] data_set Cluster working set with device information + */ +static void +cib_device_update(pcmk_resource_t *rsc, pcmk_scheduler_t *data_set) +{ + pcmk_node_t *node = NULL; + const char *value = NULL; + const char *rclass = NULL; + pcmk_node_t *parent = NULL; + + /* If this is a complex resource, check children rather than this resource itself. */ + if(rsc->children) { + GList *gIter = NULL; + for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { + cib_device_update(gIter->data, data_set); + if(pe_rsc_is_clone(rsc)) { + crm_trace("Only processing one copy of the clone %s", rsc->id); + break; + } + } + return; + } + + /* We only care about STONITH resources. */ + rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + return; + } + + /* If this STONITH resource is disabled, remove it. */ + if (pe__resource_is_disabled(rsc)) { + crm_info("Device %s has been disabled", rsc->id); + return; + } + + /* if watchdog-fencing is disabled handle any watchdog-fence + resource as if it was disabled + */ + if ((stonith_watchdog_timeout_ms <= 0) && + pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { + crm_info("Watchdog-fencing disabled thus handling " + "device %s as disabled", rsc->id); + return; + } + + /* Check whether our node is allowed for this resource (and its parent if in a group) */ + node = our_node_allowed_for(rsc); + if (rsc->parent && (rsc->parent->variant == pcmk_rsc_variant_group)) { + parent = our_node_allowed_for(rsc->parent); + } + + if(node == NULL) { + /* Our node is disallowed, so remove the device */ + GHashTableIter iter; + + crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); + g_hash_table_iter_init(&iter, rsc->allowed_nodes); + while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { + crm_trace("Available: %s = %d", pe__node_name(node), node->weight); + } + + return; + + } else if(node->weight < 0 || (parent && parent->weight < 0)) { + /* Our node (or its group) is disallowed by score, so remove the device */ + int score = (node->weight < 0)? node->weight : parent->weight; + + crm_info("Device %s has been disabled on %s: score=%s", + rsc->id, stonith_our_uname, pcmk_readable_score(score)); + return; + + } else { + /* Our node is allowed, so update the device information */ + int rc; + xmlNode *data; + GHashTable *rsc_params = NULL; + GHashTableIter gIter; + stonith_key_value_t *params = NULL; + + const char *name = NULL; + const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); + const char *rsc_provides = NULL; + + crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); + rsc_params = pe_rsc_params(rsc, node, data_set); + get_meta_attributes(rsc->meta, rsc, node, data_set); + + rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); + + g_hash_table_iter_init(&gIter, rsc_params); + while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { + if (!name || !value) { + continue; + } + params = stonith_key_value_add(params, name, value); + crm_trace(" %s=%s", name, value); + } + + data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, + agent, params, rsc_provides); + stonith_key_value_freeall(params, 1, 1); + rc = stonith_device_register(data, TRUE); + CRM_ASSERT(rc == pcmk_ok); + free_xml(data); + } +} + +/*! + * \internal + * \brief Update all STONITH device definitions based on current CIB + */ +static void +cib_devices_update(void) +{ + GHashTableIter iter; + stonith_device_t *device = NULL; + + crm_info("Updating devices to version %s.%s.%s", + crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), + crm_element_value(local_cib, XML_ATTR_GENERATION), + crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); + + if (fenced_data_set->now != NULL) { + crm_time_free(fenced_data_set->now); + fenced_data_set->now = NULL; + } + fenced_data_set->localhost = stonith_our_uname; + pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->cib_registered) { + device->dirty = TRUE; + } + } + + /* have list repopulated if cib has a watchdog-fencing-resource + TODO: keep a cached list for queries happening while we are refreshing + */ + g_list_free_full(stonith_watchdog_targets, free); + stonith_watchdog_targets = NULL; + g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); + + g_hash_table_iter_init(&iter, device_list); + while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { + if (device->dirty) { + g_hash_table_iter_remove(&iter); + } + } + + fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it + pe_reset_working_set(fenced_data_set); +} + +static void +update_cib_stonith_devices_v1(const char *event, xmlNode * msg) +{ + const char *reason = "none"; + gboolean needs_update = FALSE; + xmlXPathObjectPtr xpath_obj = NULL; + + /* process new constraints */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + /* Safest and simplest to always recompute */ + needs_update = TRUE; + reason = "new location constraint"; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpath_obj, lpc); + + crm_log_xml_trace(match, "new constraint"); + } + } + freeXpathObject(xpath_obj); + + /* process deletions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + remove_cib_device(xpath_obj); + } + freeXpathObject(xpath_obj); + + /* process additions */ + xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); + if (numXpathResults(xpath_obj) > 0) { + int max = numXpathResults(xpath_obj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + const char *rsc_id = NULL; + const char *standard = NULL; + xmlNode *match = getXpathResult(xpath_obj, lpc); + + rsc_id = crm_element_value(match, XML_ATTR_ID); + standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); + + if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { + continue; + } + + crm_trace("Fencing resource %s was added or modified", rsc_id); + reason = "new resource"; + needs_update = TRUE; + } + } + freeXpathObject(xpath_obj); + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } +} + +static void +update_cib_stonith_devices_v2(const char *event, xmlNode * msg) +{ + xmlNode *change = NULL; + char *reason = NULL; + bool needs_update = FALSE; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + const char *shortpath = NULL; + + if ((op == NULL) || + (strcmp(op, "move") == 0) || + strstr(xpath, "/"XML_CIB_TAG_STATUS)) { + continue; + } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { + const char *rsc_id = NULL; + char *search = NULL; + char *mutable = NULL; + + if (strstr(xpath, XML_TAG_ATTR_SETS) || + strstr(xpath, XML_TAG_META_SETS)) { + needs_update = TRUE; + pcmk__str_update(&reason, + "(meta) attribute deleted from resource"); + break; + } + pcmk__str_update(&mutable, xpath); + rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); + if (rsc_id != NULL) { + rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); + search = strchr(rsc_id, '\''); + } + if (search != NULL) { + *search = 0; + stonith_device_remove(rsc_id, true); + /* watchdog_device_update called afterwards + to fall back to implicit definition if needed */ + } else { + crm_warn("Ignoring malformed CIB update (resource deletion)"); + } + free(mutable); + + } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || + strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || + strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { + shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); + reason = crm_strdup_printf("%s %s", op, shortpath+1); + needs_update = TRUE; + break; + } + } + + if(needs_update) { + crm_info("Updating device list from CIB: %s", reason); + cib_devices_update(); + } else { + crm_trace("No updates for device list found in CIB"); + } + free(reason); +} + +static void +update_cib_stonith_devices(const char *event, xmlNode * msg) +{ + int format = 1; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); + switch(format) { + case 1: + update_cib_stonith_devices_v1(event, msg); + break; + case 2: + update_cib_stonith_devices_v2(event, msg); + break; + default: + crm_warn("Unknown patch format: %d", format); + } +} + +static void +watchdog_device_update(void) +{ + if (stonith_watchdog_timeout_ms > 0) { + if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && + !stonith_watchdog_targets) { + /* getting here watchdog-fencing enabled, no device there yet + and reason isn't stonith_watchdog_targets preventing that + */ + int rc; + xmlNode *xml; + + xml = create_device_registration_xml( + STONITH_WATCHDOG_ID, + st_namespace_internal, + STONITH_WATCHDOG_AGENT, + NULL, /* stonith_device_register will add our + own name as PCMK_STONITH_HOST_LIST param + so we can skip that here + */ + NULL); + rc = stonith_device_register(xml, TRUE); + free_xml(xml); + if (rc != pcmk_ok) { + rc = pcmk_legacy2rc(rc); + exit_code = CRM_EX_FATAL; + crm_crit("Cannot register watchdog pseudo fence agent: %s", + pcmk_rc_str(rc)); + stonith_shutdown(0); + } + } + + } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { + /* be silent if no device - todo parameter to stonith_device_remove */ + stonith_device_remove(STONITH_WATCHDOG_ID, true); + } +} + +/*! + * \internal + * \brief Query the full CIB + * + * \return Standard Pacemaker return code + */ +static int +fenced_query_cib(void) +{ + int rc = pcmk_ok; + + crm_trace("Re-requesting full CIB"); + rc = cib_api->cmds->query(cib_api, NULL, &local_cib, + cib_scope_local|cib_sync_call); + rc = pcmk_legacy2rc(rc); + if (rc == pcmk_rc_ok) { + CRM_ASSERT(local_cib != NULL); + } else { + crm_err("Couldn't retrieve the CIB: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); + } + return rc; +} + +static void +remove_fencing_topology(xmlXPathObjectPtr xpathObj) +{ + int max = numXpathResults(xpathObj), lpc = 0; + + for (lpc = 0; lpc < max; lpc++) { + xmlNode *match = getXpathResult(xpathObj, lpc); + + CRM_LOG_ASSERT(match != NULL); + if (match && crm_element_value(match, XML_DIFF_MARKER)) { + /* Deletion */ + int index = 0; + char *target = stonith_level_key(match, fenced_target_by_unknown); + + crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); + if (target == NULL) { + crm_err("Invalid fencing target in element %s", ID(match)); + + } else if (index <= 0) { + crm_err("Invalid level for %s in element %s", target, ID(match)); + + } else { + topology_remove_helper(target, index); + } + /* } else { Deal with modifications during the 'addition' stage */ + } + } +} + +static void +update_fencing_topology(const char *event, xmlNode * msg) +{ + int format = 1; + const char *xpath; + xmlXPathObjectPtr xpathObj = NULL; + xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + + CRM_ASSERT(patchset); + crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); + + if(format == 1) { + /* Process deletions (only) */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + remove_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + /* Process additions and changes */ + xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; + xpathObj = xpath_search(msg, xpath); + + register_fencing_topology(xpathObj); + freeXpathObject(xpathObj); + + } else if(format == 2) { + xmlNode *change = NULL; + int add[] = { 0, 0, 0 }; + int del[] = { 0, 0, 0 }; + + xml_patch_versions(patchset, add, del); + + for (change = pcmk__xml_first_child(patchset); change != NULL; + change = pcmk__xml_next(change)) { + const char *op = crm_element_value(change, XML_DIFF_OP); + const char *xpath = crm_element_value(change, XML_DIFF_PATH); + + if(op == NULL) { + continue; + + } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { + /* Change to a specific entry */ + + crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); + if(strcmp(op, "move") == 0) { + continue; + + } else if(strcmp(op, "create") == 0) { + add_topology_level(change->children); + + } else if(strcmp(op, "modify") == 0) { + xmlNode *match = first_named_child(change, XML_DIFF_RESULT); + + if(match) { + remove_topology_level(match->children); + add_topology_level(match->children); + } + + } else if(strcmp(op, "delete") == 0) { + /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ + crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { + /* Change to the topology in general */ + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + + } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { + /* Changes to the whole config section, possibly including the topology as a whild */ + if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + + } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { + crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", + op, add[0], add[1], add[2], xpath); + fencing_topology_init(); + return; + } + + } else { + crm_trace("Nothing for us in %s operation %d.%d.%d for %s", + op, add[0], add[1], add[2], xpath); + } + } + + } else { + crm_warn("Unknown patch format: %d", format); + } +} + +static void +update_cib_cache_cb(const char *event, xmlNode * msg) +{ + long timeout_ms_saved = stonith_watchdog_timeout_ms; + bool need_full_refresh = false; + + if(!have_cib_devices) { + crm_trace("Skipping updates until we get a full dump"); + return; + + } else if(msg == NULL) { + crm_trace("Missing %s update", event); + return; + } + + /* Maintain a local copy of the CIB so that we have full access + * to device definitions, location constraints, and node attributes + */ + if (local_cib != NULL) { + int rc = pcmk_ok; + xmlNode *patchset = NULL; + + crm_element_value_int(msg, F_CIB_RC, &rc); + if (rc != pcmk_ok) { + return; + } + + patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); + rc = xml_apply_patchset(local_cib, patchset, TRUE); + switch (rc) { + case pcmk_ok: + case -pcmk_err_old_data: + break; + case -pcmk_err_diff_resync: + case -pcmk_err_diff_failed: + crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + break; + default: + crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); + free_xml(local_cib); + local_cib = NULL; + } + } + + if (local_cib == NULL) { + if (fenced_query_cib() != pcmk_rc_ok) { + return; + } + need_full_refresh = true; + } + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + if (timeout_ms_saved != stonith_watchdog_timeout_ms) { + need_full_refresh = true; + } + + if (need_full_refresh) { + fencing_topology_init(); + cib_devices_update(); + } else { + // Partial refresh + update_fencing_topology(event, msg); + update_cib_stonith_devices(event, msg); + } + + watchdog_device_update(); +} + +static void +init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) +{ + crm_info("Updating device list from CIB"); + have_cib_devices = TRUE; + local_cib = copy_xml(output); + + pcmk__refresh_node_caches_from_cib(local_cib); + update_stonith_watchdog_timeout_ms(local_cib); + + fencing_topology_init(); + cib_devices_update(); + watchdog_device_update(); +} + +static void +cib_connection_destroy(gpointer user_data) +{ + if (stonith_shutdown_flag) { + crm_info("Connection to the CIB manager closed"); + return; + } else { + crm_crit("Lost connection to the CIB manager, shutting down"); + } + if (cib_api) { + cib_api->cmds->signoff(cib_api); + } + stonith_shutdown(0); +} + +/*! + * \internal + * \brief Disconnect from CIB manager + */ +void +fenced_cib_cleanup(void) +{ + if (cib_api != NULL) { + cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, + update_cib_cache_cb); + cib__clean_up_connection(&cib_api); + } + free_xml(local_cib); + local_cib = NULL; +} + +void +setup_cib(void) +{ + int rc, retries = 0; + + cib_api = cib_new(); + if (cib_api == NULL) { + crm_err("No connection to the CIB manager"); + return; + } + + do { + sleep(retries); + rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); + } while (rc == -ENOTCONN && ++retries < 5); + + if (rc != pcmk_ok) { + crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); + + } else if (pcmk_ok != + cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { + crm_err("Could not set CIB notification callback"); + + } else { + rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); + cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", + init_cib_cache_cb); + cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); + crm_info("Watching for fencing topology changes"); + } +} diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 6f5c3d2ede..d352555162 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,1782 +1,915 @@ /* * Copyright 2009-2023 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include -#include #include #include #include #include #include #include -#include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" +extern pcmk_scheduler_t *fenced_data_set; + char *stonith_our_uname = NULL; long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; -static gboolean stonith_shutdown_flag = FALSE; +gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; -static xmlNode *local_cib = NULL; -static pcmk_scheduler_t *fenced_data_set = NULL; -static const unsigned long long data_set_flags = pcmk_sched_location_only - |pcmk_sched_no_compat - |pcmk_sched_no_counts; - -static cib_t *cib_api = NULL; - static pcmk__output_t *logger_out = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { bool no_cib_connect; gchar **log_files; } options; -static crm_exit_t exit_code = CRM_EX_OK; +crm_exit_t exit_code = CRM_EX_OK; -static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, "nack", NULL, CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, F_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (pcmk__str_eq(op, "poke", pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (STONITH_OP_DEVICE_ADD, * STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, or * STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed * \param[in] active Current number of devices or topologies in use */ static void send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); fenced_send_notification(op, result, notify_data); free_xml(notify_data); } /*! * \internal * \brief Send notifications for a device change to subscribed clients * * \param[in] op Notification type (STONITH_OP_DEVICE_ADD or * STONITH_OP_DEVICE_DEL) * \param[in] result Operation result * \param[in] desc ID of device that changed */ void fenced_send_device_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { send_config_notification(op, result, desc, g_hash_table_size(device_list)); } /*! * \internal * \brief Send notifications for a topology level change to subscribed clients * * \param[in] op Notification type (STONITH_OP_LEVEL_ADD or * STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc String representation of level ([]) */ void fenced_send_level_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { send_config_notification(op, result, desc, g_hash_table_size(topology)); } -static void -topology_remove_helper(const char *node, int level) -{ - char *desc = NULL; - pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; - xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); - - crm_xml_add(data, F_STONITH_ORIGIN, __func__); - crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); - crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); - - fenced_unregister_level(data, &desc, &result); - fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); - pcmk__reset_result(&result); - free_xml(data); - free(desc); -} - -static void -remove_cib_device(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; - const char *standard = NULL; - xmlNode *match = getXpathResult(xpathObj, lpc); - - CRM_LOG_ASSERT(match != NULL); - if(match != NULL) { - standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); - } - - if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - continue; - } - - rsc_id = crm_element_value(match, XML_ATTR_ID); - - stonith_device_remove(rsc_id, true); - } -} - -static void -remove_topology_level(xmlNode *match) -{ - int index = 0; - char *key = NULL; - - CRM_CHECK(match != NULL, return); - - key = stonith_level_key(match, fenced_target_by_unknown); - crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); - topology_remove_helper(key, index); - free(key); -} - -static void -add_topology_level(xmlNode *match) -{ - char *desc = NULL; - pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; - - CRM_CHECK(match != NULL, return); - - fenced_register_level(match, &desc, &result); - fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); - pcmk__reset_result(&result); - free(desc); -} - -static void -remove_fencing_topology(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); - - CRM_LOG_ASSERT(match != NULL); - if (match && crm_element_value(match, XML_DIFF_MARKER)) { - /* Deletion */ - int index = 0; - char *target = stonith_level_key(match, fenced_target_by_unknown); - - crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); - if (target == NULL) { - crm_err("Invalid fencing target in element %s", ID(match)); - - } else if (index <= 0) { - crm_err("Invalid level for %s in element %s", target, ID(match)); - - } else { - topology_remove_helper(target, index); - } - /* } else { Deal with modifications during the 'addition' stage */ - } - } -} - -static void -register_fencing_topology(xmlXPathObjectPtr xpathObj) -{ - int max = numXpathResults(xpathObj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpathObj, lpc); - - remove_topology_level(match); - add_topology_level(match); - } -} - -/* Fencing - - - - - - - - - - - - - - - - -*/ - -static void -fencing_topology_init(void) -{ - xmlXPathObjectPtr xpathObj = NULL; - const char *xpath = "//" XML_TAG_FENCING_LEVEL; - - crm_trace("Full topology refresh"); - free_topology_list(); - init_topology_list(); - - /* Grab everything */ - xpathObj = xpath_search(local_cib, xpath); - register_fencing_topology(xpathObj); - - freeXpathObject(xpathObj); -} - -#define rsc_name(x) x->clone_name?x->clone_name:x->id - -/*! - * \internal - * \brief Check whether our uname is in a resource's allowed node list - * - * \param[in] rsc Resource to check - * - * \return Pointer to node object if found, NULL otherwise - */ -static pcmk_node_t * -our_node_allowed_for(const pcmk_resource_t *rsc) -{ - GHashTableIter iter; - pcmk_node_t *node = NULL; - - if (rsc && stonith_our_uname) { - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { - if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { - break; - } - node = NULL; - } - } - return node; -} - -static void -watchdog_device_update(void) -{ - if (stonith_watchdog_timeout_ms > 0) { - if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && - !stonith_watchdog_targets) { - /* getting here watchdog-fencing enabled, no device there yet - and reason isn't stonith_watchdog_targets preventing that - */ - int rc; - xmlNode *xml; - - xml = create_device_registration_xml( - STONITH_WATCHDOG_ID, - st_namespace_internal, - STONITH_WATCHDOG_AGENT, - NULL, /* stonith_device_register will add our - own name as PCMK_STONITH_HOST_LIST param - so we can skip that here - */ - NULL); - rc = stonith_device_register(xml, TRUE); - free_xml(xml); - if (rc != pcmk_ok) { - rc = pcmk_legacy2rc(rc); - exit_code = CRM_EX_FATAL; - crm_crit("Cannot register watchdog pseudo fence agent: %s", - pcmk_rc_str(rc)); - stonith_shutdown(0); - } - } - - } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { - /* be silent if no device - todo parameter to stonith_device_remove */ - stonith_device_remove(STONITH_WATCHDOG_ID, true); - } -} - -static void -update_stonith_watchdog_timeout_ms(xmlNode *cib) -{ - long timeout_ms = 0; - xmlNode *stonith_watchdog_xml = NULL; - const char *value = NULL; - - stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", - cib, LOG_NEVER); - if (stonith_watchdog_xml) { - value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); - } - if (value) { - timeout_ms = crm_get_msec(value); - } - - if (timeout_ms < 0) { - timeout_ms = pcmk__auto_watchdog_timeout(); - } - - stonith_watchdog_timeout_ms = timeout_ms; -} - -/*! - * \internal - * \brief If a resource or any of its children are STONITH devices, update their - * definitions given a cluster working set. - * - * \param[in,out] rsc Resource to check - * \param[in,out] data_set Cluster working set with device information - */ -static void -cib_device_update(pcmk_resource_t *rsc, pcmk_scheduler_t *data_set) -{ - pcmk_node_t *node = NULL; - const char *value = NULL; - const char *rclass = NULL; - pcmk_node_t *parent = NULL; - - /* If this is a complex resource, check children rather than this resource itself. */ - if(rsc->children) { - GList *gIter = NULL; - for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { - cib_device_update(gIter->data, data_set); - if(pe_rsc_is_clone(rsc)) { - crm_trace("Only processing one copy of the clone %s", rsc->id); - break; - } - } - return; - } - - /* We only care about STONITH resources. */ - rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); - if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - return; - } - - /* If this STONITH resource is disabled, remove it. */ - if (pe__resource_is_disabled(rsc)) { - crm_info("Device %s has been disabled", rsc->id); - return; - } - - /* if watchdog-fencing is disabled handle any watchdog-fence - resource as if it was disabled - */ - if ((stonith_watchdog_timeout_ms <= 0) && - pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { - crm_info("Watchdog-fencing disabled thus handling " - "device %s as disabled", rsc->id); - return; - } - - /* Check whether our node is allowed for this resource (and its parent if in a group) */ - node = our_node_allowed_for(rsc); - if (rsc->parent && (rsc->parent->variant == pcmk_rsc_variant_group)) { - parent = our_node_allowed_for(rsc->parent); - } - - if(node == NULL) { - /* Our node is disallowed, so remove the device */ - GHashTableIter iter; - - crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); - g_hash_table_iter_init(&iter, rsc->allowed_nodes); - while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { - crm_trace("Available: %s = %d", pe__node_name(node), node->weight); - } - - return; - - } else if(node->weight < 0 || (parent && parent->weight < 0)) { - /* Our node (or its group) is disallowed by score, so remove the device */ - int score = (node->weight < 0)? node->weight : parent->weight; - - crm_info("Device %s has been disabled on %s: score=%s", - rsc->id, stonith_our_uname, pcmk_readable_score(score)); - return; - - } else { - /* Our node is allowed, so update the device information */ - int rc; - xmlNode *data; - GHashTable *rsc_params = NULL; - GHashTableIter gIter; - stonith_key_value_t *params = NULL; - - const char *name = NULL; - const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); - const char *rsc_provides = NULL; - - crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); - rsc_params = pe_rsc_params(rsc, node, data_set); - get_meta_attributes(rsc->meta, rsc, node, data_set); - - rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); - - g_hash_table_iter_init(&gIter, rsc_params); - while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { - if (!name || !value) { - continue; - } - params = stonith_key_value_add(params, name, value); - crm_trace(" %s=%s", name, value); - } - - data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, - agent, params, rsc_provides); - stonith_key_value_freeall(params, 1, 1); - rc = stonith_device_register(data, TRUE); - CRM_ASSERT(rc == pcmk_ok); - free_xml(data); - } -} - -/*! - * \internal - * \brief Update all STONITH device definitions based on current CIB - */ -static void -cib_devices_update(void) -{ - GHashTableIter iter; - stonith_device_t *device = NULL; - - crm_info("Updating devices to version %s.%s.%s", - crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), - crm_element_value(local_cib, XML_ATTR_GENERATION), - crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); - - if (fenced_data_set->now != NULL) { - crm_time_free(fenced_data_set->now); - fenced_data_set->now = NULL; - } - fenced_data_set->localhost = stonith_our_uname; - pcmk__schedule_actions(local_cib, data_set_flags, fenced_data_set); - - g_hash_table_iter_init(&iter, device_list); - while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { - if (device->cib_registered) { - device->dirty = TRUE; - } - } - - /* have list repopulated if cib has a watchdog-fencing-resource - TODO: keep a cached list for queries happening while we are refreshing - */ - g_list_free_full(stonith_watchdog_targets, free); - stonith_watchdog_targets = NULL; - g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); - - g_hash_table_iter_init(&iter, device_list); - while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { - if (device->dirty) { - g_hash_table_iter_remove(&iter); - } - } - - fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it - pe_reset_working_set(fenced_data_set); -} - -static void -update_cib_stonith_devices_v2(const char *event, xmlNode * msg) -{ - xmlNode *change = NULL; - char *reason = NULL; - bool needs_update = FALSE; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - for (change = pcmk__xml_first_child(patchset); change != NULL; - change = pcmk__xml_next(change)) { - const char *op = crm_element_value(change, XML_DIFF_OP); - const char *xpath = crm_element_value(change, XML_DIFF_PATH); - const char *shortpath = NULL; - - if ((op == NULL) || - (strcmp(op, "move") == 0) || - strstr(xpath, "/"XML_CIB_TAG_STATUS)) { - continue; - } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { - const char *rsc_id = NULL; - char *search = NULL; - char *mutable = NULL; - - if (strstr(xpath, XML_TAG_ATTR_SETS) || - strstr(xpath, XML_TAG_META_SETS)) { - needs_update = TRUE; - pcmk__str_update(&reason, - "(meta) attribute deleted from resource"); - break; - } - pcmk__str_update(&mutable, xpath); - rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); - if (rsc_id != NULL) { - rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); - search = strchr(rsc_id, '\''); - } - if (search != NULL) { - *search = 0; - stonith_device_remove(rsc_id, true); - /* watchdog_device_update called afterwards - to fall back to implicit definition if needed */ - } else { - crm_warn("Ignoring malformed CIB update (resource deletion)"); - } - free(mutable); - - } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || - strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || - strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { - shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); - reason = crm_strdup_printf("%s %s", op, shortpath+1); - needs_update = TRUE; - break; - } - } - - if(needs_update) { - crm_info("Updating device list from CIB: %s", reason); - cib_devices_update(); - } else { - crm_trace("No updates for device list found in CIB"); - } - free(reason); -} - - -static void -update_cib_stonith_devices_v1(const char *event, xmlNode * msg) -{ - const char *reason = "none"; - gboolean needs_update = FALSE; - xmlXPathObjectPtr xpath_obj = NULL; - - /* process new constraints */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); - if (numXpathResults(xpath_obj) > 0) { - int max = numXpathResults(xpath_obj), lpc = 0; - - /* Safest and simplest to always recompute */ - needs_update = TRUE; - reason = "new location constraint"; - - for (lpc = 0; lpc < max; lpc++) { - xmlNode *match = getXpathResult(xpath_obj, lpc); - - crm_log_xml_trace(match, "new constraint"); - } - } - freeXpathObject(xpath_obj); - - /* process deletions */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); - if (numXpathResults(xpath_obj) > 0) { - remove_cib_device(xpath_obj); - } - freeXpathObject(xpath_obj); - - /* process additions */ - xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); - if (numXpathResults(xpath_obj) > 0) { - int max = numXpathResults(xpath_obj), lpc = 0; - - for (lpc = 0; lpc < max; lpc++) { - const char *rsc_id = NULL; - const char *standard = NULL; - xmlNode *match = getXpathResult(xpath_obj, lpc); - - rsc_id = crm_element_value(match, XML_ATTR_ID); - standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); - - if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - continue; - } - - crm_trace("Fencing resource %s was added or modified", rsc_id); - reason = "new resource"; - needs_update = TRUE; - } - } - freeXpathObject(xpath_obj); - - if(needs_update) { - crm_info("Updating device list from CIB: %s", reason); - cib_devices_update(); - } -} - -static void -update_cib_stonith_devices(const char *event, xmlNode * msg) -{ - int format = 1; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - CRM_ASSERT(patchset); - crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); - switch(format) { - case 1: - update_cib_stonith_devices_v1(event, msg); - break; - case 2: - update_cib_stonith_devices_v2(event, msg); - break; - default: - crm_warn("Unknown patch format: %d", format); - } -} - -/*! - * \internal - * \brief Check whether a node has a specific attribute name/value - * - * \param[in] node Name of node to check - * \param[in] name Name of an attribute to look for - * \param[in] value The value the named attribute needs to be set to in order to be considered a match - * - * \return TRUE if the locally cached CIB has the specified node attribute - */ -gboolean -node_has_attr(const char *node, const char *name, const char *value) -{ - GString *xpath = NULL; - xmlNode *match; - - CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) - && (value != NULL), return FALSE); - - /* Search for the node's attributes in the CIB. While the schema allows - * multiple sets of instance attributes, and allows instance attributes to - * use id-ref to reference values elsewhere, that is intended for resources, - * so we ignore that here. - */ - xpath = g_string_sized_new(256); - pcmk__g_strcat(xpath, - "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE - "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS - "/" XML_CIB_TAG_NVPAIR - "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " - "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); - - match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); - - g_string_free(xpath, TRUE); - return (match != NULL); -} - /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } - -static void -update_fencing_topology(const char *event, xmlNode * msg) -{ - int format = 1; - const char *xpath; - xmlXPathObjectPtr xpathObj = NULL; - xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - - CRM_ASSERT(patchset); - crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); - - if(format == 1) { - /* Process deletions (only) */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - remove_fencing_topology(xpathObj); - freeXpathObject(xpathObj); - - /* Process additions and changes */ - xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; - xpathObj = xpath_search(msg, xpath); - - register_fencing_topology(xpathObj); - freeXpathObject(xpathObj); - - } else if(format == 2) { - xmlNode *change = NULL; - int add[] = { 0, 0, 0 }; - int del[] = { 0, 0, 0 }; - - xml_patch_versions(patchset, add, del); - - for (change = pcmk__xml_first_child(patchset); change != NULL; - change = pcmk__xml_next(change)) { - const char *op = crm_element_value(change, XML_DIFF_OP); - const char *xpath = crm_element_value(change, XML_DIFF_PATH); - - if(op == NULL) { - continue; - - } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { - /* Change to a specific entry */ - - crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); - if(strcmp(op, "move") == 0) { - continue; - - } else if(strcmp(op, "create") == 0) { - add_topology_level(change->children); - - } else if(strcmp(op, "modify") == 0) { - xmlNode *match = first_named_child(change, XML_DIFF_RESULT); - - if(match) { - remove_topology_level(match->children); - add_topology_level(match->children); - } - - } else if(strcmp(op, "delete") == 0) { - /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ - crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - } - - } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { - /* Change to the topology in general */ - crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - - } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { - /* Changes to the whole config section, possibly including the topology as a whild */ - if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { - crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", - op, add[0], add[1], add[2], xpath); - - } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { - crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", - op, add[0], add[1], add[2], xpath); - fencing_topology_init(); - return; - } - - } else { - crm_trace("Nothing for us in %s operation %d.%d.%d for %s", - op, add[0], add[1], add[2], xpath); - } - } - - } else { - crm_warn("Unknown patch format: %d", format); - } -} -static bool have_cib_devices = FALSE; - -/*! - * \internal - * \brief Query the full CIB - * - * \return Standard Pacemaker return code - */ -static int -fenced_query_cib(void) -{ - int rc = pcmk_ok; - - crm_trace("Re-requesting full CIB"); - rc = cib_api->cmds->query(cib_api, NULL, &local_cib, - cib_scope_local|cib_sync_call); - rc = pcmk_legacy2rc(rc); - if (rc == pcmk_rc_ok) { - CRM_ASSERT(local_cib != NULL); - } else { - crm_err("Couldn't retrieve the CIB: %s " CRM_XS " rc=%d", - pcmk_rc_str(rc), rc); - } - return rc; -} - -static void -update_cib_cache_cb(const char *event, xmlNode * msg) -{ - int rc = pcmk_ok; - long timeout_ms_saved = stonith_watchdog_timeout_ms; - bool need_full_refresh = false; - - if(!have_cib_devices) { - crm_trace("Skipping updates until we get a full dump"); - return; - - } else if(msg == NULL) { - crm_trace("Missing %s update", event); - return; - } - - /* Maintain a local copy of the CIB so that we have full access - * to device definitions, location constraints, and node attributes - */ - if (local_cib != NULL) { - int rc = pcmk_ok; - xmlNode *patchset = NULL; - - crm_element_value_int(msg, F_CIB_RC, &rc); - if (rc != pcmk_ok) { - return; - } - - patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); - rc = xml_apply_patchset(local_cib, patchset, TRUE); - switch (rc) { - case pcmk_ok: - case -pcmk_err_old_data: - break; - case -pcmk_err_diff_resync: - case -pcmk_err_diff_failed: - crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); - free_xml(local_cib); - local_cib = NULL; - break; - default: - crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); - free_xml(local_cib); - local_cib = NULL; - } - } - - if (local_cib == NULL) { - if (fenced_query_cib() != pcmk_rc_ok) { - return; - } - need_full_refresh = true; - } - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - - if (timeout_ms_saved != stonith_watchdog_timeout_ms) { - need_full_refresh = true; - } - - if (need_full_refresh) { - fencing_topology_init(); - cib_devices_update(); - } else { - // Partial refresh - update_fencing_topology(event, msg); - update_cib_stonith_devices(event, msg); - } - - watchdog_device_update(); -} - -static void -init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) -{ - crm_info("Updating device list from CIB"); - have_cib_devices = TRUE; - local_cib = copy_xml(output); - - pcmk__refresh_node_caches_from_cib(local_cib); - update_stonith_watchdog_timeout_ms(local_cib); - - fencing_topology_init(); - cib_devices_update(); - watchdog_device_update(); -} - -static void +void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } -static void -cib_connection_destroy(gpointer user_data) -{ - if (stonith_shutdown_flag) { - crm_info("Connection to the CIB manager closed"); - return; - } else { - crm_crit("Lost connection to the CIB manager, shutting down"); - } - if (cib_api) { - cib_api->cmds->signoff(cib_api); - } - stonith_shutdown(0); -} - -/*! - * \internal - * \brief Disconnect from CIB manager - */ -static void -fenced_cib_cleanup(void) -{ - if (cib_api != NULL) { - cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, - update_cib_cache_cb); - cib__clean_up_connection(&cib_api); - } - free_xml(local_cib); - local_cib = NULL; -} - static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); free(stonith_our_uname); stonith_our_uname = NULL; } static gboolean stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { stand_alone = FALSE; options.no_cib_connect = true; return TRUE; } -static void -setup_cib(void) -{ - int rc, retries = 0; - - cib_api = cib_new(); - if (cib_api == NULL) { - crm_err("No connection to the CIB manager"); - return; - } - - do { - sleep(retries); - rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); - } while (rc == -ENOTCONN && ++retries < 5); - - if (rc != pcmk_ok) { - crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); - - } else if (pcmk_ok != - cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { - crm_err("Could not set CIB notification callback"); - - } else { - rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); - cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", - init_cib_cache_cb); - cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); - crm_info("Watching for fencing topology changes"); - } -} - struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !pcmk_is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } static pcmk__cluster_option_t fencer_options[] = { /* name, old name, type, allowed values, * default value, validator, * short description, * long description */ { PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL, N_("Advanced use only: An alternate parameter to supply instead of 'port'"), N_("some devices do not support the " "standard 'port' parameter or may provide additional ones. Use " "this to specify an alternate, device-specific, parameter " "that should indicate the machine to be fenced. A value of " "none can be used to tell the cluster not to supply any " "additional parameters.") }, { PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL, N_("A mapping of host names to ports numbers for devices that do not support host names."), N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2") }, { PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL, N_("Eg. node1,node2,node3"), N_("A list of machines controlled by " "this device (Optional unless pcmk_host_list=static-list)") }, { PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL, N_("How to determine which machines are controlled by the device."), N_("Allowed values: dynamic-list " "(query the device via the 'list' command), static-list " "(check the pcmk_host_list attribute), status " "(query the device via the 'status' command), " "none (assume every device can fence every " "machine)") }, { PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL, N_("Enable a base delay for fencing actions and specify base delay value."), N_("Enable a delay of no more than the " "time specified before executing fencing actions. Pacemaker " "derives the overall delay by taking the value of " "pcmk_delay_base and adding a random delay value such " "that the sum is kept below this maximum.") }, { PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL, N_("Enable a base delay for " "fencing actions and specify base delay value."), N_("This enables a static delay for " "fencing actions, which can help avoid \"death matches\" where " "two nodes try to fence each other at the same time. If " "pcmk_delay_max is also used, a random delay will be " "added such that the total delay is kept below that value." "This can be set to a single time value to apply to any node " "targeted by this device (useful if a separate device is " "configured for each target), or to a node map (for example, " "\"node1:1s;node2:5\") to set a different value per target.") }, { PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL, N_("The maximum number of actions can be performed in parallel on this device"), N_("Cluster property concurrent-fencing=true needs to be configured first." "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.") }, { "pcmk_reboot_action", NULL, "string", NULL, PCMK_ACTION_REBOOT, NULL, N_("Advanced use only: An alternate command to run instead of 'reboot'"), N_("Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") }, { "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL, N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"), N_("Some devices need much more/less time to complete than normal." "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.") }, { "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL, N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"), N_("Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries \'reboot\' actions before giving up.") }, { "pcmk_off_action", NULL, "string", NULL, PCMK_ACTION_OFF, NULL, N_("Advanced use only: An alternate command to run instead of \'off\'"), N_("Some devices do not support the standard commands or may provide additional ones." "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") }, { "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL, N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"), N_("Some devices need much more/less time to complete than normal." "Use this to specify an alternate, device-specific, timeout for \'off\' actions.") }, { "pcmk_off_retries",NULL, "integer", NULL, "2", NULL, N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"), N_("Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries \'off\' actions before giving up.") }, { "pcmk_on_action", NULL, "string", NULL, PCMK_ACTION_ON, NULL, N_("Advanced use only: An alternate command to run instead of 'on'"), N_("Some devices do not support the standard commands or may provide additional ones." "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") }, { "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL, N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"), N_("Some devices need much more/less time to complete than normal." "Use this to specify an alternate, device-specific, timeout for \'on\' actions.") }, { "pcmk_on_retries",NULL, "integer", NULL, "2", NULL, N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"), N_("Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries \'on\' actions before giving up.") }, { "pcmk_list_action",NULL, "string", NULL, PCMK_ACTION_LIST, NULL, N_("Advanced use only: An alternate command to run instead of \'list\'"), N_("Some devices do not support the standard commands or may provide additional ones." "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") }, { "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL, N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"), N_("Some devices need much more/less time to complete than normal." "Use this to specify an alternate, device-specific, timeout for \'list\' actions.") }, { "pcmk_list_retries",NULL, "integer", NULL, "2", NULL, N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"), N_("Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries \'list\' actions before giving up.") }, { "pcmk_monitor_action", NULL, "string", NULL, PCMK_ACTION_MONITOR, NULL, N_("Advanced use only: An alternate command to run instead of \'monitor\'"), N_("Some devices do not support the standard commands or may provide additional ones." "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.") }, { "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL, N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"), N_("Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.") }, { "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL, N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"), N_("Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries \'monitor\' actions before giving up.") }, { "pcmk_status_action", NULL, "string", NULL, PCMK_ACTION_STATUS, NULL, N_("Advanced use only: An alternate command to run instead of \'status\'"), N_("Some devices do not support the standard commands or may provide additional ones." "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") }, { "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL, N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"), N_("Some devices need much more/less time to complete than normal." "Use this to specify an alternate, device-specific, timeout for \'status\' actions.") }, { "pcmk_status_retries",NULL, "integer", NULL, "2", NULL, N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"), N_("Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries \'status\' actions before giving up.") }, }; void fencer_metadata(void) { const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all \"stonith\"-" "class resources and used by Pacemaker's fence " "daemon, formerly known as stonithd"); gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short, desc_long, fencer_options, PCMK__NELEM(fencer_options)); printf("%s", s); g_free(s); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, N_("Deprecated (will be removed in a future release)"), NULL }, { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, "[metadata]"); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { fencer_metadata(); goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); fenced_data_set = pe_new_working_set(); CRM_ASSERT(fenced_data_set != NULL); cluster = pcmk_cluster_new(); if (!stand_alone) { #if SUPPORT_COROSYNC if (is_corosync_cluster()) { cluster->destroy = stonith_peer_cs_destroy; cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; } #endif // SUPPORT_COROSYNC crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(cluster) == FALSE) { exit_code = CRM_EX_FATAL; crm_crit("Cannot sign in to the cluster... terminating"); goto done; } pcmk__str_update(&stonith_our_uname, cluster->uname); if (!options.no_cib_connect) { setup_cib(); } } else { pcmk__str_update(&stonith_our_uname, "localhost"); crm_warn("Stand-alone mode is deprecated and will be removed " "in a future release"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); rc = pcmk__log_output_new(&logger_out); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format log: %s", pcmk_rc_str(rc)); goto done; } pe__register_messages(logger_out); pcmk__register_lib_messages(logger_out); pcmk__output_set_log_level(logger_out, LOG_TRACE); fenced_data_set->priv = logger_out; // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); pe_free_working_set(fenced_data_set); pcmk__output_and_clear_error(&error, out); if (logger_out != NULL) { logger_out->finish(logger_out, exit_code, true, NULL); pcmk__output_free(logger_out); } if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index 8fa2c00bda..bf0d5d37dc 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -1,318 +1,328 @@ /* * Copyright 2009-2023 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include // uint32_t, uint64_t #include +#include +#include +#include /*! * \internal * \brief Check whether target has already been fenced recently * * \param[in] tolerance Number of seconds to look back in time * \param[in] target Name of node to search for * \param[in] action Action we want to match * * \return TRUE if an equivalent fencing operation took place in the last * \p tolerance seconds, FALSE otherwise */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; /*! list of actions that must execute on the target node. Used for unfencing */ GString *on_target_actions; GList *targets; time_t targets_age; gboolean has_attr_map; // Whether target's nodeid should be passed as a parameter to the agent gboolean include_nodeid; /* whether the cluster should automatically unfence nodes with the device */ gboolean automatic_unfencing; guint priority; uint32_t flags; // Group of enum st_device_flags GHashTable *params; GHashTable *aliases; GList *pending_ops; mainloop_timer_t *timer; crm_trigger_t *work; xmlNode *agent_metadata; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; gboolean cib_registered; gboolean api_registered; gboolean dirty; } stonith_device_t; /* These values are used to index certain arrays by "phase". Usually an * operation has only one "phase", so phase is always zero. However, some * reboots are remapped to "off" then "on", in which case "reboot" will be * phase 0, "off" will be phase 1 and "on" will be phase 2. */ enum st_remap_phase { st_phase_requested = 0, st_phase_off = 1, st_phase_on = 2, st_phase_max = 3 }; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! When was the fencing action recorded (seconds since epoch) */ time_t created; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! The number of query replies expected */ guint replies_expected; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; /*! * Fencing delay (in seconds) requested by API client (used by controller to * implement priority-fencing-delay). A value of -1 means disable all * configured delays. */ int client_delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; //! Group of enum stonith_call_options associated with this operation uint32_t call_options; /*! The current state of the remote operation. This indicates * what stage the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The client's call_id that initiated the fencing request */ int client_callid; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GList *query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The current operation phase being executed */ enum st_remap_phase phase; /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ GList *automatic_list; /*! List of all devices at the currently executing topology level */ GList *devices_list; /*! Current entry in the topology device list */ GList *devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GList *duplicates; /*! The point at which the remote operation completed(nsec) */ long long completed_nsec; /*! The (potentially intermediate) result of the operation */ pcmk__action_result_t result; } remote_fencing_op_t; void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged); // Fencer-specific client flags enum st_client_flags { st_callback_unknown = UINT64_C(0), st_callback_notify_fence = (UINT64_C(1) << 0), st_callback_device_add = (UINT64_C(1) << 2), st_callback_device_del = (UINT64_C(1) << 4), st_callback_notify_history = (UINT64_C(1) << 5), st_callback_notify_history_synced = (UINT64_C(1) << 6) }; // How the user specified the target of a topology level enum fenced_target_by { fenced_target_by_unknown = -1, // Invalid or not yet parsed fenced_target_by_name, // By target name fenced_target_by_pattern, // By a pattern matching target names fenced_target_by_attribute, // By a node attribute/value on target }; /* * Complex fencing requirements are specified via fencing topologies. * A topology consists of levels; each level is a list of fencing devices. * Topologies are stored in a hash table by node name. When a node needs to be * fenced, if it has an entry in the topology table, the levels are tried * sequentially, and the devices in each level are tried sequentially. * Fencing is considered successful as soon as any level succeeds; * a level is considered successful if all its devices succeed. * Essentially, all devices at a given level are "and-ed" and the * levels are "or-ed". * * This structure is used for the topology table entries. * Topology levels start from 1, so levels[0] is unused and always NULL. */ typedef struct stonith_topology_s { enum fenced_target_by kind; // How target was specified /*! Node name regex or attribute name=value for which topology applies */ char *target; char *target_value; char *target_pattern; char *target_attribute; /*! Names of fencing devices at each topology level */ GList *levels[ST_LEVEL_MAX]; } stonith_topology_t; +void stonith_shutdown(int nsig); + void init_device_list(void); void free_device_list(void); void init_topology_list(void); void free_topology_list(void); void free_stonith_remote_op_list(void); void init_stonith_remote_op_hash_table(GHashTable **table); void free_metadata_cache(void); void fenced_unregister_handlers(void); uint64_t get_stonith_flag(const char *name); void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *op_request, const char *remote_peer); int stonith_device_register(xmlNode *msg, gboolean from_cib); void stonith_device_remove(const char *id, bool from_cib); char *stonith_level_key(const xmlNode *msg, enum fenced_target_by); void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); stonith_topology_t *find_topology_for_host(const char *host); void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data); void fenced_send_device_notification(const char *op, const pcmk__action_result_t *result, const char *desc); void fenced_send_level_notification(const char *op, const pcmk__action_result_t *result, const char *desc); remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack); void fenced_process_fencing_reply(xmlNode *msg); int process_remote_stonith_query(xmlNode * msg); void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options); void stonith_fence_history_trim(void); bool fencing_peer_active(crm_node_t *peer); void set_fencing_completed(remote_fencing_op_t * op); int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg); void fencer_metadata(void); const char *fenced_device_reboot_action(const char *device_id); bool fenced_device_supports_on(const char *device_id); gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); +void fencing_topology_init(void); +void setup_cib(void); +void fenced_cib_cleanup(void); + static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } /*! * \internal * \brief Get the device flag to use with a given action when searching devices * * \param[in] action Action to check * * \return st_device_supports_on if \p action is "on", otherwise * st_device_supports_none */ static inline uint32_t fenced_support_flag(const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { return st_device_supports_on; } return st_device_supports_none; } extern char *stonith_our_uname; extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; - extern GHashTable *stonith_remote_op_list; +extern crm_exit_t exit_code; +extern gboolean stonith_shutdown_flag;