diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c index eb2ec29608..247afdf7d9 100644 --- a/daemons/fenced/fenced_cib.c +++ b/daemons/fenced/fenced_cib.c @@ -1,638 +1,638 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include static xmlNode *local_cib = NULL; static cib_t *cib_api = NULL; static bool have_cib_devices = FALSE; /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { GString *xpath = NULL; xmlNode *match; CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) && (value != NULL), return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, "//" PCMK_XE_NODES "/" PCMK_XE_NODE "[@" PCMK_XA_UNAME "='", node, "']" "/" PCMK_XE_INSTANCE_ATTRIBUTES "/" PCMK_XE_NVPAIR "[@" PCMK_XA_NAME "='", name, "' " "and @" PCMK_XA_VALUE "='", value, "']", NULL); match = pcmk__xpath_find_one(local_cib->doc, xpath->str, LOG_NEVER); g_string_free(xpath, TRUE); return (match != NULL); } static void remove_topology_level(xmlNode *match) { int index = 0; char *key = NULL; xmlNode *data = NULL; CRM_CHECK(match != NULL, return); key = stonith_level_key(match, fenced_target_by_unknown); pcmk__xe_get_int(match, PCMK_XA_INDEX, &index); data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); pcmk__xe_set(data, PCMK__XA_ST_ORIGIN, __func__); pcmk__xe_set(data, PCMK_XA_TARGET, key); pcmk__xe_set_int(data, PCMK_XA_INDEX, index); fenced_unregister_level(data, NULL); free(key); pcmk__xml_free(data); } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = pcmk__xpath_num_results(xpathObj); for (int lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); if (match == NULL) { continue; } remove_topology_level(match); fenced_register_level(match, NULL); } } /* Fencing */ void fencing_topology_init(void) { xmlXPathObject *xpathObj = NULL; const char *xpath = "//" PCMK_XE_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = pcmk__xpath_search(local_cib->doc, xpath); register_fencing_topology(xpathObj); xmlXPathFreeObject(xpathObj); } #define XPATH_WATCHDOG_TIMEOUT "//" PCMK_XE_NVPAIR \ "[@" PCMK_XA_NAME "='" \ PCMK_OPT_FENCING_WATCHDOG_TIMEOUT "']" static void update_stonith_watchdog_timeout_ms(xmlNode *cib) { xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; int rc = pcmk_rc_ok; // @TODO An XPath search can't handle multiple instances or rules stonith_watchdog_xml = pcmk__xpath_find_one(cib->doc, XPATH_WATCHDOG_TIMEOUT, LOG_NEVER); if (stonith_watchdog_xml == NULL) { return; } value = pcmk__xe_get(stonith_watchdog_xml, PCMK_XA_VALUE); if (value == NULL) { return; } rc = pcmk__parse_ms(value, &stonith_watchdog_timeout_ms); if ((rc != pcmk_rc_ok) || (stonith_watchdog_timeout_ms < 0)) { - stonith_watchdog_timeout_ms = pcmk__auto_stonith_watchdog_timeout(); + stonith_watchdog_timeout_ms = pcmk__auto_fencing_watchdog_timeout(); } } /*! * \internal * \brief Mark a fence device dirty if its \c fenced_df_cib_registered flag is * set * * \param[in] key Ignored * \param[in,out] value Fence device (fenced_device_t *) * \param[in] user_data Ignored * * \note This function is suitable for use with \c g_hash_table_foreach(). */ static void mark_dirty_if_cib_registered(gpointer key, gpointer value, gpointer user_data) { fenced_device_t *device = value; if (pcmk_is_set(device->flags, fenced_df_cib_registered)) { fenced_device_set_flags(device, fenced_df_dirty); } } /*! * \internal * \brief Return the value of a fence device's \c dirty flag * * \param[in] key Ignored * \param[in] value Fence device (fenced_device_t *) * \param[in] user_data Ignored * * \return \c dirty flag of \p value * * \note This function is suitable for use with * \c g_hash_table_foreach_remove(). */ static gboolean device_is_dirty(gpointer key, gpointer value, gpointer user_data) { fenced_device_t *device = value; return pcmk_is_set(device->flags, fenced_df_dirty); } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { crm_info("Updating devices to version %s.%s.%s", pcmk__xe_get(local_cib, PCMK_XA_ADMIN_EPOCH), pcmk__xe_get(local_cib, PCMK_XA_EPOCH), pcmk__xe_get(local_cib, PCMK_XA_NUM_UPDATES)); fenced_foreach_device(mark_dirty_if_cib_registered, NULL); /* have list repopulated if cib has a watchdog-fencing-resource TODO: keep a cached list for queries happening while we are refreshing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = NULL; fenced_scheduler_run(local_cib); fenced_foreach_device_remove(device_is_dirty); } #define PRIMITIVE_ID_XP_FRAGMENT "/" PCMK_XE_PRIMITIVE "[@" PCMK_XA_ID "='" static void update_cib_stonith_devices(const xmlNode *patchset) { char *reason = NULL; for (const xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL); change != NULL; change = pcmk__xe_next(change, NULL)) { const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *xpath = pcmk__xe_get(change, PCMK_XA_PATH); const char *primitive_xpath = NULL; if (pcmk__str_eq(op, PCMK_VALUE_MOVE, pcmk__str_null_matches) || (strstr(xpath, "/" PCMK_XE_STATUS) != NULL)) { continue; } primitive_xpath = strstr(xpath, PRIMITIVE_ID_XP_FRAGMENT); if ((primitive_xpath != NULL) && pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none)) { const char *rsc_id = NULL; const char *end_quote = NULL; if ((strstr(primitive_xpath, PCMK_XE_INSTANCE_ATTRIBUTES) != NULL) || (strstr(primitive_xpath, PCMK_XE_META_ATTRIBUTES) != NULL)) { reason = pcmk__str_copy("(meta) attribute deleted from " "resource"); break; } rsc_id = primitive_xpath + sizeof(PRIMITIVE_ID_XP_FRAGMENT) - 1; end_quote = strchr(rsc_id, '\''); CRM_LOG_ASSERT(end_quote != NULL); if (end_quote == NULL) { crm_err("Bug: Malformed item in Pacemaker-generated patchset"); continue; } if (strchr(end_quote, '/') == NULL) { /* The primitive element itself was deleted. If this was a * fencing resource, it's faster to remove it directly than to * run the scheduler and update all device registrations. */ char *copy = strndup(rsc_id, end_quote - rsc_id); pcmk__assert(copy != NULL); stonith_device_remove(copy, true); /* watchdog_device_update called afterwards to fall back to implicit definition if needed */ free(copy); continue; } } if (strstr(xpath, "/" PCMK_XE_RESOURCES) || strstr(xpath, "/" PCMK_XE_CONSTRAINTS) || strstr(xpath, "/" PCMK_XE_RSC_DEFAULTS)) { const char *shortpath = strrchr(xpath, '/'); reason = crm_strdup_printf("%s %s", op, shortpath + 1); break; } } if (reason != NULL) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); free(reason); } else { crm_trace("No updates for device list found in CIB"); } } static void watchdog_device_update(void) { if (stonith_watchdog_timeout_ms > 0) { if (!fenced_has_watchdog_device() && (stonith_watchdog_targets == NULL)) { /* getting here watchdog-fencing enabled, no device there yet and reason isn't stonith_watchdog_targets preventing that */ int rc; xmlNode *xml; xml = create_device_registration_xml( STONITH_WATCHDOG_ID, st_namespace_internal, STONITH_WATCHDOG_AGENT, NULL, /* fenced_device_register() will add our own name as PCMK_STONITH_HOST_LIST param so we can skip that here */ NULL); rc = fenced_device_register(xml, true); pcmk__xml_free(xml); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; crm_crit("Cannot register watchdog pseudo fence agent: %s", pcmk_rc_str(rc)); stonith_shutdown(0); } } } else if (fenced_has_watchdog_device()) { /* be silent if no device - todo parameter to stonith_device_remove */ stonith_device_remove(STONITH_WATCHDOG_ID, true); } } /*! * \internal * \brief Query the full CIB * * \return Standard Pacemaker return code */ static int fenced_query_cib(void) { int rc = pcmk_ok; crm_trace("Re-requesting full CIB"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { pcmk__assert(local_cib != NULL); } else { crm_err("Couldn't retrieve the CIB: %s " QB_XS " rc=%d", pcmk_rc_str(rc), rc); } return rc; } static void update_fencing_topology(const char *event, xmlNode *msg) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); int format = 1; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; CRM_CHECK(patchset != NULL, return); pcmk__xe_get_int(patchset, PCMK_XA_FORMAT, &format); if (format != 2) { crm_warn("Unknown patch format: %d", format); return; } pcmk__xml_patchset_versions(patchset, del, add); for (xmlNode *change = pcmk__xe_first_child(patchset, NULL, NULL, NULL); change != NULL; change = pcmk__xe_next(change, NULL)) { const char *op = pcmk__xe_get(change, PCMK_XA_OPERATION); const char *xpath = pcmk__xe_get(change, PCMK_XA_PATH); if (op == NULL) { continue; } if (strstr(xpath, "/" PCMK_XE_FENCING_LEVEL) != NULL) { // Change to a specific entry crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if (strcmp(op, PCMK_VALUE_DELETE) == 0) { /* We have only path and ID, which is not enough info to remove * a specific entry. Re-initialize the whole topology. */ crm_info("Re-initializing fencing topology after %s operation " "%d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } if (strcmp(op, PCMK_VALUE_CREATE) == 0) { fenced_register_level(change->children, NULL); } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) { xmlNode *match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL); if (match != NULL) { remove_topology_level(match->children); fenced_register_level(match->children, NULL); } } continue; } if (strstr(xpath, "/" PCMK_XE_FENCING_TOPOLOGY) != NULL) { // Change to the topology in general crm_info("Re-initializing fencing topology after top-level " "%s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } if ((strstr(xpath, "/" PCMK_XE_CONFIGURATION) != NULL) && (pcmk__xe_first_child(change, PCMK_XE_FENCING_TOPOLOGY, NULL, NULL) != NULL) && pcmk__str_any_of(op, PCMK_VALUE_CREATE, PCMK_VALUE_DELETE, NULL)) { // Topology was created or entire configuration section was deleted crm_info("Re-initializing fencing topology after top-level " "%s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } static void update_cib_cache_cb(const char *event, xmlNode * msg) { xmlNode *patchset = NULL; long long timeout_ms_saved = stonith_watchdog_timeout_ms; bool need_full_refresh = false; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *wrapper = NULL; pcmk__xe_get_int(msg, PCMK__XA_CIB_RC, &rc); if (rc != pcmk_ok) { return; } wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); patchset = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: /* @TODO Full refresh (with or without query) in case of * -pcmk_err_old_data? It seems wrong to call * stonith_device_remove() based on primitive deletion in an * old diff. */ break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(local_cib); local_cib = NULL; } } if (local_cib == NULL) { if (fenced_query_cib() != pcmk_rc_ok) { return; } need_full_refresh = true; } pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); if (timeout_ms_saved != stonith_watchdog_timeout_ms) { need_full_refresh = true; } if (need_full_refresh) { fencing_topology_init(); cib_devices_update(); } else { // Partial refresh update_fencing_topology(event, msg); update_cib_stonith_devices(patchset); } watchdog_device_update(); } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from CIB"); have_cib_devices = TRUE; local_cib = pcmk__xml_copy(NULL, output); pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } /*! * \internal * \brief Disconnect from CIB manager */ void fenced_cib_cleanup(void) { if (cib_api != NULL) { cib_api->cmds->del_notify_callback(cib_api, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cib_cache_cb); cib__clean_up_connection(&cib_api); } pcmk__xml_free(local_cib); local_cib = NULL; } void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, crm_system_name, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); return; } rc = cib_api->cmds->add_notify_callback(cib_api, PCMK__VALUE_CIB_DIFF_NOTIFY, update_cib_cache_cb); if (rc != pcmk_ok) { crm_err("Could not set CIB notification callback"); return; } rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_none); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for fencing topology changes"); } diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h index d76b0666fb..dbedc1defd 100644 --- a/include/crm/common/options_internal.h +++ b/include/crm/common/options_internal.h @@ -1,247 +1,247 @@ /* * Copyright 2006-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_OPTIONS_INTERNAL__H #define PCMK__CRM_COMMON_OPTIONS_INTERNAL__H #ifndef PCMK__CONFIG_H #define PCMK__CONFIG_H #include // _Noreturn #endif #include // GHashTable #include // bool #include // pcmk_parse_interval_spec() #include // pcmk__output_t #ifdef __cplusplus extern "C" { #endif /* * Environment variable option handling */ const char *pcmk__env_option(const char *option); void pcmk__set_env_option(const char *option, const char *value, bool compat); bool pcmk__env_option_enabled(const char *daemon, const char *option); /* * Cluster option handling */ /*! * \internal * \brief Option flags */ enum pcmk__opt_flags { pcmk__opt_none = 0U, //!< No additional information /*! * \brief In CIB manager metadata * * \deprecated This flag will be removed with CIB manager metadata */ pcmk__opt_based = (1U << 0), /*! * \brief In controller metadata * * \deprecated This flag will be removed with controller metadata */ pcmk__opt_controld = (1U << 1), /*! * \brief In scheduler metadata * * \deprecated This flag will be removed with scheduler metadata */ pcmk__opt_schedulerd = (1U << 2), pcmk__opt_advanced = (1U << 3), //!< Advanced use only pcmk__opt_generated = (1U << 4), //!< Generated by Pacemaker pcmk__opt_deprecated = (1U << 5), //!< Option is deprecated pcmk__opt_fencing = (1U << 6), //!< Common fencing resource parameter pcmk__opt_primitive = (1U << 7), //!< Primitive resource meta-attribute }; typedef struct pcmk__cluster_option_s { const char *name; const char *alt_name; const char *type; const char *values; const char *default_value; bool (*is_valid)(const char *); uint32_t flags; //!< Group of enum pcmk__opt_flags const char *description_short; const char *description_long; } pcmk__cluster_option_t; const char *pcmk__cluster_option(GHashTable *options, const char *name); int pcmk__output_cluster_options(pcmk__output_t *out, const char *name, const char *desc_short, const char *desc_long, uint32_t filter, bool all); int pcmk__output_fencing_params(pcmk__output_t *out, const char *name, const char *desc_short, const char *desc_long, bool all); int pcmk__output_primitive_meta(pcmk__output_t *out, const char *name, const char *desc_short, const char *desc_long, bool all); int pcmk__daemon_metadata(pcmk__output_t *out, const char *name, const char *short_desc, const char *long_desc, enum pcmk__opt_flags filter); void pcmk__validate_cluster_options(GHashTable *options); bool pcmk__valid_interval_spec(const char *value); bool pcmk__valid_boolean(const char *value); bool pcmk__valid_int(const char *value); bool pcmk__valid_positive_int(const char *value); bool pcmk__valid_no_quorum_policy(const char *value); bool pcmk__valid_percentage(const char *value); bool pcmk__valid_placement_strategy(const char *value); // from watchdog.c long pcmk__get_sbd_watchdog_timeout(void); bool pcmk__get_sbd_sync_resource_startup(void); -long pcmk__auto_stonith_watchdog_timeout(void); +long pcmk__auto_fencing_watchdog_timeout(void); bool pcmk__valid_stonith_watchdog_timeout(const char *value); // Constants for environment variable names #define PCMK__ENV_AUTHKEY_LOCATION "authkey_location" #define PCMK__ENV_BLACKBOX "blackbox" #define PCMK__ENV_CA_FILE "ca_file" #define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled" #define PCMK__ENV_CERT_FILE "cert_file" #define PCMK__ENV_CLUSTER_TYPE "cluster_type" #define PCMK__ENV_CRL_FILE "crl_file" #define PCMK__ENV_DEBUG "debug" #define PCMK__ENV_DH_MAX_BITS "dh_max_bits" #define PCMK__ENV_FAIL_FAST "fail_fast" #define PCMK__ENV_IPC_TYPE "ipc_type" #define PCMK__ENV_KEY_FILE "key_file" #define PCMK__ENV_LOGFACILITY "logfacility" #define PCMK__ENV_LOGFILE "logfile" #define PCMK__ENV_LOGFILE_MODE "logfile_mode" #define PCMK__ENV_LOGPRIORITY "logpriority" #define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit" #define PCMK__ENV_NODE_START_STATE "node_start_state" #define PCMK__ENV_PANIC_ACTION "panic_action" #define PCMK__ENV_REMOTE_ADDRESS "remote_address" #define PCMK__ENV_REMOTE_SCHEMA_DIRECTORY "remote_schema_directory" #define PCMK__ENV_REMOTE_PID1 "remote_pid1" #define PCMK__ENV_REMOTE_PORT "remote_port" #define PCMK__ENV_RESPAWNED "respawned" #define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory" #define PCMK__ENV_SERVICE "service" #define PCMK__ENV_STDERR "stderr" #define PCMK__ENV_TLS_PRIORITIES "tls_priorities" #define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox" #define PCMK__ENV_TRACE_FILES "trace_files" #define PCMK__ENV_TRACE_FORMATS "trace_formats" #define PCMK__ENV_TRACE_FUNCTIONS "trace_functions" #define PCMK__ENV_TRACE_TAGS "trace_tags" #define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled" // Constants for meta-attribute names #define PCMK__META_CLONE "clone" #define PCMK__META_CONTAINER "container" #define PCMK__META_DIGESTS_ALL "digests-all" #define PCMK__META_DIGESTS_SECURE "digests-secure" #define PCMK__META_INTERNAL_RSC "internal_rsc" #define PCMK__META_MIGRATE_SOURCE "migrate_source" #define PCMK__META_MIGRATE_TARGET "migrate_target" #define PCMK__META_ON_NODE "on_node" #define PCMK__META_ON_NODE_UUID "on_node_uuid" #define PCMK__META_OP_NO_WAIT "op_no_wait" #define PCMK__META_OP_TARGET_RC "op_target_rc" #define PCMK__META_PHYSICAL_HOST "physical-host" #define PCMK__META_STONITH_ACTION "stonith_action" /* @TODO Plug these in. Currently, they're never set. These are op attrs for use * with https://projects.clusterlabs.org/T382. */ #define PCMK__META_CLEAR_FAILURE_OP "clear_failure_op" #define PCMK__META_CLEAR_FAILURE_INTERVAL "clear_failure_interval" // @COMPAT Deprecated alias for PCMK__META_PROMOTED_MAX since 2.0.0 #define PCMK__META_PROMOTED_MAX_LEGACY "master-max" // @COMPAT Deprecated alias for PCMK__META_PROMOTED_NODE_MAX since 2.0.0 #define PCMK__META_PROMOTED_NODE_MAX_LEGACY "master-node-max" // Constants for enumerated values #define PCMK__VALUE_ATTRD "attrd" #define PCMK__VALUE_BOLD "bold" #define PCMK__VALUE_BROADCAST "broadcast" #define PCMK__VALUE_CIB "cib" #define PCMK__VALUE_CIB_DIFF_NOTIFY "cib_diff_notify" #define PCMK__VALUE_CIB_NOTIFY "cib_notify" #define PCMK__VALUE_CIB_POST_NOTIFY "cib_post_notify" #define PCMK__VALUE_CIB_PRE_NOTIFY "cib_pre_notify" #define PCMK__VALUE_CIB_UPDATE_CONFIRMATION "cib_update_confirmation" #define PCMK__VALUE_CLUSTER "cluster" #define PCMK__VALUE_CRMD "crmd" #define PCMK__VALUE_EN "en" #define PCMK__VALUE_EPOCH "epoch" #define PCMK__VALUE_HEALTH_RED "health_red" #define PCMK__VALUE_HEALTH_YELLOW "health_yellow" #define PCMK__VALUE_INIT "init" #define PCMK__VALUE_LOCAL "local" #define PCMK__VALUE_LOST "lost" #define PCMK__VALUE_LRMD "lrmd" #define PCMK__VALUE_MAINT "maint" #define PCMK__VALUE_OUTPUT "output" #define PCMK__VALUE_PASSWORD "password" #define PCMK__VALUE_PRIMITIVE "primitive" #define PCMK__VALUE_REFRESH "refresh" #define PCMK__VALUE_REQUEST "request" #define PCMK__VALUE_RESPONSE "response" #define PCMK__VALUE_RSC_FAILED "rsc-failed" #define PCMK__VALUE_RSC_FAILURE_IGNORED "rsc-failure-ignored" #define PCMK__VALUE_RSC_MANAGED "rsc-managed" #define PCMK__VALUE_RSC_MULTIPLE "rsc-multiple" #define PCMK__VALUE_RSC_OK "rsc-ok" #define PCMK__VALUE_RUNNING "running" #define PCMK__VALUE_SCHEDULER "scheduler" #define PCMK__VALUE_SHUTDOWN_COMPLETE "shutdown_complete" #define PCMK__VALUE_SHUTTING_DOWN "shutting_down" #define PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE "st-async-timeout-value" #define PCMK__VALUE_ST_NOTIFY "st_notify" #define PCMK__VALUE_ST_NOTIFY_DISCONNECT "st_notify_disconnect" #define PCMK__VALUE_ST_NOTIFY_FENCE "st_notify_fence" #define PCMK__VALUE_ST_NOTIFY_HISTORY "st_notify_history" #define PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED "st_notify_history_synced" #define PCMK__VALUE_STARTING_DAEMONS "starting_daemons" #define PCMK__VALUE_STONITH_NG "stonith-ng" #define PCMK__VALUE_WAIT_FOR_PING "wait_for_ping" #define PCMK__VALUE_WARNING "warning" /* @COMPAT Deprecated since 2.1.7 (used with PCMK__XA_ORDERING attribute of * resource sets) */ #define PCMK__VALUE_GROUP "group" // @COMPAT Drop when daemon metadata commands are dropped #define PCMK__VALUE_TIME "time" #ifdef __cplusplus } #endif #endif // PCMK__OPTIONS_INTERNAL__H diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index a49fe6993d..862a77c776 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,323 +1,323 @@ /* * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // g_str_has_prefix() #include // QB_MIN(), QB_MAX() static pid_t sbd_pid = 0; /*! * \internal * \brief Tell pacemakerd to panic the local host * * \param[in] ppid Process ID of parent process */ static void panic_local_nonroot(pid_t ppid) { if (ppid > 1) { // pacemakerd is still our parent crm_emerg("Escalating panic to " PCMK__SERVER_PACEMAKERD "[%lld]", (long long) ppid); } else { // Signal (non-parent) pacemakerd if possible ppid = pcmk__procfs_pid_of(PCMK__SERVER_PACEMAKERD); if (ppid > 0) { union sigval signal_value; crm_emerg("Signaling " PCMK__SERVER_PACEMAKERD "[%lld] to panic", (long long) ppid); memset(&signal_value, 0, sizeof(signal_value)); if (sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_emerg("Exiting after signal failure: %s", strerror(errno)); } } else { crm_emerg("Exiting with no known " PCMK__SERVER_PACEMAKERD "process"); } } crm_exit(CRM_EX_PANIC); } /*! * \internal * \brief Panic the local host (if root) or tell pacemakerd to do so */ static void panic_local(void) { const char *full_panic_action = pcmk__env_option(PCMK__ENV_PANIC_ACTION); const char *panic_action = full_panic_action; int reboot_cmd = RB_AUTOBOOT; // Default panic action is reboot if (geteuid() != 0) { // Non-root caller such as the controller panic_local_nonroot(getppid()); return; } if ((full_panic_action != NULL) && g_str_has_prefix(full_panic_action, "sync-")) { panic_action += sizeof("sync-") - 1; sync(); } if (pcmk__str_empty(full_panic_action) || pcmk__str_eq(panic_action, PCMK_VALUE_REBOOT, pcmk__str_none)) { pcmk__sysrq_trigger('b'); } else if (pcmk__str_eq(panic_action, PCMK_VALUE_CRASH, pcmk__str_none)) { pcmk__sysrq_trigger('c'); } else if (pcmk__str_eq(panic_action, PCMK_VALUE_OFF, pcmk__str_none)) { pcmk__sysrq_trigger('o'); #ifdef RB_POWER_OFF reboot_cmd = RB_POWER_OFF; #elif defined(RB_POWEROFF) reboot_cmd = RB_POWEROFF; #endif } else { crm_warn("Using default '" PCMK_VALUE_REBOOT "' for local option PCMK_" PCMK__ENV_PANIC_ACTION " because '%s' is not a valid value", full_panic_action); pcmk__sysrq_trigger('b'); } // sysrq failed or is not supported on this platform, so fall back to reboot reboot(reboot_cmd); // Even reboot failed, nothing left to do but exit crm_emerg("Exiting after reboot failed: %s", strerror(errno)); if (getppid() > 1) { // pacemakerd is parent process crm_exit(CRM_EX_PANIC); } else { // This is pacemakerd, or an orphaned subdaemon crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Tell sbd to kill the local host, then exit */ static void panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_emerg("Panicking directly because couldn't signal sbd"); panic_local(); } if(ppid > 1) { /* child daemon */ crm_exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Panic the local host * * Panic the local host either by sbd (if running), directly, or by asking * pacemakerd. If trace logging this function, exit instead. * * \param[in] reason Why panic is needed (for logging only) */ void pcmk__panic(const char *reason) { if (pcmk__locate_sbd() > 1) { crm_emerg("Signaling sbd[%lld] to panic the system: %s", (long long) sbd_pid, reason); panic_sbd(); } else { crm_emerg("Panicking the system directly: %s", reason); panic_local(); } } #define PIDFILE PCMK__RUN_DIR "/sbd.pid" /*! * \internal * \brief Return the process ID of sbd (or 0 if it is not running) */ pid_t pcmk__locate_sbd(void) { gchar *contents = NULL; long long pid_read = 0; if (sbd_pid > 1) { return sbd_pid; } if (g_file_get_contents(PIDFILE, &contents, NULL, NULL) && (pcmk__scan_ll(contents, &pid_read, 0) == pcmk_rc_ok) && (pcmk__pid_active((pid_t) pid_read, SBIN_DIR "/sbd") != ESRCH)) { /* If the pcmk__pid_active() return code is neither pcmk__rc_ok nor * ESRCH, then we couldn't determine whether the PID belongs to * SBIN_DIR "/sbd". In that case, we assume that it does. * * @TODO Make sure that's what we want to do. */ crm_trace("SBD detected at pid %lld (via PID file " PIDFILE ")", pid_read); sbd_pid = (pid_t) pid_read; } else { unlink(PIDFILE); // Fall back to /proc for systems that support it sbd_pid = pcmk__procfs_pid_of("sbd"); if (sbd_pid != 0) { crm_trace("SBD detected at pid %lld (via procfs)", (long long) sbd_pid); } } if (sbd_pid <= 0) { sbd_pid = 0; crm_trace("SBD not detected"); } g_free(contents); return sbd_pid; } // 0 <= return value <= LONG_MAX long pcmk__get_sbd_watchdog_timeout(void) { static long sbd_timeout = -1; if (sbd_timeout == -1) { const char *timeout = getenv("SBD_WATCHDOG_TIMEOUT"); long long timeout_ms = 0; if ((timeout != NULL) && (pcmk__parse_ms(timeout, &timeout_ms) == pcmk_rc_ok) && (timeout_ms >= 0)) { sbd_timeout = (long) QB_MIN(timeout_ms, LONG_MAX); } else { sbd_timeout = 0; } } return sbd_timeout; } bool pcmk__get_sbd_sync_resource_startup(void) { static bool sync_resource_startup = PCMK__SBD_SYNC_DEFAULT; static bool checked_sync_resource_startup = false; if (!checked_sync_resource_startup) { const char *sync_env = getenv("SBD_SYNC_RESOURCE_STARTUP"); if (sync_env == NULL) { crm_trace("Defaulting to %sstart-up synchronization with sbd", (PCMK__SBD_SYNC_DEFAULT? "" : "no ")); } else if (pcmk__parse_bool(sync_env, &sync_resource_startup) != pcmk_rc_ok) { crm_warn("Defaulting to %sstart-up synchronization with sbd " "because environment value '%s' is invalid", (PCMK__SBD_SYNC_DEFAULT? "" : "no "), sync_env); } checked_sync_resource_startup = true; } return sync_resource_startup; } // 0 <= return value <= min(LONG_MAX, (2 * SBD timeout)) long -pcmk__auto_stonith_watchdog_timeout(void) +pcmk__auto_fencing_watchdog_timeout(void) { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); long long st_timeout = 2 * (long long) sbd_timeout; return (long) QB_MIN(st_timeout, LONG_MAX); } bool pcmk__valid_stonith_watchdog_timeout(const char *value) { /* @COMPAT At a compatibility break, accept either negative values or a * specific string like "auto" (but not both) to mean "auto-calculate the * timeout." Reject other values that aren't parsable as timeouts. */ long long st_timeout = 0; if ((value != NULL) && (pcmk__parse_ms(value, &st_timeout) == pcmk_rc_ok)) { st_timeout = QB_MIN(st_timeout, LONG_MAX); } if (st_timeout < 0) { - st_timeout = pcmk__auto_stonith_watchdog_timeout(); + st_timeout = pcmk__auto_fencing_watchdog_timeout(); // At this point, 0 <= sbd_timeout <= st_timeout crm_debug("Using calculated value %lld for " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT " (%s)", st_timeout, value); } if (st_timeout == 0) { crm_debug("Watchdog may be enabled but " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT " is disabled (%s)", value? value : "default"); } else if (pcmk__locate_sbd() == 0) { crm_emerg("Shutting down: " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT " configured (%s) but SBD not active", pcmk__s(value, "auto")); crm_exit(CRM_EX_FATAL); return false; } else { long sbd_timeout = pcmk__get_sbd_watchdog_timeout(); if (st_timeout < sbd_timeout) { /* Passed-in value for PCMK_OPT_FENCING_WATCHDOG_TIMEOUT was * parsable, positive, and less than the SBD_WATCHDOG_TIMEOUT */ crm_emerg("Shutting down: " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT " (%s) too short (must be >%ldms)", value, sbd_timeout); crm_exit(CRM_EX_FATAL); return false; } crm_info("Watchdog configured with " PCMK_OPT_FENCING_WATCHDOG_TIMEOUT " %s and SBD timeout %ldms", value, sbd_timeout); } return true; }