diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 5f71285dff..cf2ff22cf8 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,981 +1,676 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" char *stonith_our_uname = NULL; long long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { bool no_cib_connect; gchar **log_files; } options; crm_exit_t exit_code = CRM_EX_OK; static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, PCMK__XA_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(request, PCMK__XA_ST_OP, op); crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid); } crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options); crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC); const char *op = crm_element_value(msg, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, pcmk__str_none)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, PCMK__XA_SUBT); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id); crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, type); crm_xml_add(update_msg, PCMK__XA_ST_OP, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { add_message_xml(update_msg, PCMK__XA_ST_CALLDATA, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD, * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or * \c STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed (either device ID or string * representation of level * ([])) */ void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { xmlNode *notify_data = pcmk__xe_create(NULL, op); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc); fenced_send_notification(op, result, notify_data); free_xml(notify_data); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); free(stonith_our_uname); stonith_our_uname = NULL; } static gboolean stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { stand_alone = FALSE; options.no_cib_connect = true; return TRUE; } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !pcmk_is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } -static pcmk__cluster_option_t fencer_options[] = { - /* name, old name, type, allowed values, - * default value, validator, - * flags, - * short description, - * long description - */ - { - PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, - "port", NULL, - pcmk__opt_advanced, - N_("An alternate parameter to supply instead of 'port'"), - N_("Some devices do not support the standard 'port' parameter or may " - "provide additional ones. Use this to specify an alternate, device-" - "specific, parameter that should indicate the machine to be " - "fenced. A value of \"none\" can be used to tell the cluster not " - "to supply any additional parameters."), - }, - { - PCMK_STONITH_HOST_MAP, NULL, "string", NULL, - NULL, NULL, - pcmk__opt_none, - N_("A mapping of node names to port numbers for devices that do not " - "support node names."), - N_("For example, \"node1:1;node2:2,3\" would tell the cluster to use " - "port 1 for node1 and ports 2 and 3 for node2."), - }, - { - PCMK_STONITH_HOST_LIST, NULL, "string", NULL, - NULL, NULL, - pcmk__opt_none, - N_("Nodes targeted by this device"), - N_("Comma-separated list of nodes that can be targeted by this device " - "(for example, \"node1,node2,node3\"). If pcmk_host_check is " - "\"static-list\", either this or pcmk_host_map must be set."), - }, - { - PCMK_STONITH_HOST_CHECK, NULL, "select", - "dynamic-list, static-list, status, none", - NULL, NULL, - pcmk__opt_none, - N_("How to determine which nodes can be targeted by the device"), - N_("Use \"dynamic-list\" to query the device via the 'list' command; " - "\"static-list\" to check the pcmk_host_list attribute; " - "\"status\" to query the device via the 'status' command; or " - "\"none\" to assume every device can fence every node. " - "The default value is \"static-list\" if pcmk_host_map or " - "pcmk_host_list is set; otherwise \"dynamic-list\" if the device " - "supports the list operation; otherwise \"status\" if the device " - "supports the status operation; otherwise \"none\""), - }, - { - PCMK_STONITH_DELAY_MAX, NULL, "time", NULL, - "0s", NULL, - pcmk__opt_none, - N_("Enable a delay of no more than the time specified before executing " - "fencing actions."), - N_("Enable a delay of no more than the time specified before executing " - "fencing actions. Pacemaker derives the overall delay by taking " - "the value of pcmk_delay_base and adding a random delay value such " - "that the sum is kept below this maximum."), - }, - { - PCMK_STONITH_DELAY_BASE, NULL, "string", NULL, - "0s", NULL, - pcmk__opt_none, - N_("Enable a base delay for fencing actions and specify base delay " - "value."), - N_("This enables a static delay for fencing actions, which can help " - "avoid \"death matches\" where two nodes try to fence each other " - "at the same time. If pcmk_delay_max is also used, a random delay " - "will be added such that the total delay is kept below that value. " - "This can be set to a single time value to apply to any node " - "targeted by this device (useful if a separate device is " - "configured for each target), or to a node map (for example, " - "\"node1:1s;node2:5\") to set a different value for each target."), - }, - { - PCMK_STONITH_ACTION_LIMIT, NULL, "integer", NULL, - "1", NULL, - pcmk__opt_none, - N_("The maximum number of actions can be performed in parallel on this " - "device"), - N_("Cluster property concurrent-fencing=\"true\" needs to be " - "configured first. Then use this to specify the maximum number of " - "actions can be performed in parallel on this device. A value of " - "-1 means an unlimited number of actions can be performed in " - "parallel."), - }, - { - "pcmk_reboot_action", NULL, "string", NULL, - PCMK_ACTION_REBOOT, NULL, - pcmk__opt_advanced, - N_("An alternate command to run instead of 'reboot'"), - N_("Some devices do not support the standard commands or may provide " - "additional ones. Use this to specify an alternate, device-" - "specific, command that implements the 'reboot' action."), - }, - { - "pcmk_reboot_timeout", NULL, "time", NULL, - "60s", NULL, - pcmk__opt_advanced, - N_("Specify an alternate timeout to use for 'reboot' actions instead " - "of stonith-timeout"), - N_("Some devices need much more/less time to complete than normal. " - "Use this to specify an alternate, device-specific, timeout for " - "'reboot' actions."), - }, - { - "pcmk_reboot_retries", NULL, "integer", NULL, - "2", NULL, - pcmk__opt_advanced, - N_("The maximum number of times to try the 'reboot' command within the " - "timeout period"), - N_("Some devices do not support multiple connections. Operations may " - "\"fail\" if the device is busy with another task. In that case, " - "Pacemaker will automatically retry the operation if there is time " - "remaining. Use this option to alter the number of times Pacemaker " - "tries a 'reboot' action before giving up."), - }, - { - "pcmk_off_action", NULL, "string", NULL, - PCMK_ACTION_OFF, NULL, - pcmk__opt_advanced, - N_("An alternate command to run instead of 'off'"), - N_("Some devices do not support the standard commands or may provide " - "additional ones. Use this to specify an alternate, device-" - "specific, command that implements the 'off' action."), - }, - { - "pcmk_off_timeout", NULL, "time", NULL, - "60s", NULL, - pcmk__opt_advanced, - N_("Specify an alternate timeout to use for 'off' actions instead of " - "stonith-timeout"), - N_("Some devices need much more/less time to complete than normal. " - "Use this to specify an alternate, device-specific, timeout for " - "'off' actions."), - }, - { - "pcmk_off_retries", NULL, "integer", NULL, - "2", NULL, - pcmk__opt_advanced, - N_("The maximum number of times to try the 'off' command within the " - "timeout period"), - N_("Some devices do not support multiple connections. Operations may " - "\"fail\" if the device is busy with another task. In that case, " - "Pacemaker will automatically retry the operation if there is time " - "remaining. Use this option to alter the number of times Pacemaker " - "tries a 'off' action before giving up."), - }, - { - "pcmk_on_action", NULL, "string", NULL, - PCMK_ACTION_ON, NULL, - pcmk__opt_advanced, - N_("An alternate command to run instead of 'on'"), - N_("Some devices do not support the standard commands or may provide " - "additional ones. Use this to specify an alternate, device-" - "specific, command that implements the 'on' action."), - }, - { - "pcmk_on_timeout", NULL, "time", NULL, - "60s", NULL, - pcmk__opt_advanced, - N_("Specify an alternate timeout to use for 'on' actions instead of " - "stonith-timeout"), - N_("Some devices need much more/less time to complete than normal. " - "Use this to specify an alternate, device-specific, timeout for " - "'on' actions."), - }, - { - "pcmk_on_retries", NULL, "integer", NULL, - "2", NULL, - pcmk__opt_advanced, - N_("The maximum number of times to try the 'on' command within the " - "timeout period"), - N_("Some devices do not support multiple connections. Operations may " - "\"fail\" if the device is busy with another task. In that case, " - "Pacemaker will automatically retry the operation if there is time " - "remaining. Use this option to alter the number of times Pacemaker " - "tries a 'on' action before giving up."), - }, - { - "pcmk_list_action", NULL, "string", NULL, - PCMK_ACTION_LIST, NULL, - pcmk__opt_advanced, - N_("An alternate command to run instead of 'list'"), - N_("Some devices do not support the standard commands or may provide " - "additional ones. Use this to specify an alternate, device-" - "specific, command that implements the 'list' action."), - }, - { - "pcmk_list_timeout", NULL, "time", NULL, - "60s", NULL, - pcmk__opt_advanced, - N_("Specify an alternate timeout to use for 'list' actions instead of " - "stonith-timeout"), - N_("Some devices need much more/less time to complete than normal. " - "Use this to specify an alternate, device-specific, timeout for " - "'list' actions."), - }, - { - "pcmk_list_retries", NULL, "integer", NULL, - "2", NULL, - pcmk__opt_advanced, - N_("The maximum number of times to try the 'list' command within the " - "timeout period"), - N_("Some devices do not support multiple connections. Operations may " - "\"fail\" if the device is busy with another task. In that case, " - "Pacemaker will automatically retry the operation if there is time " - "remaining. Use this option to alter the number of times Pacemaker " - "tries a 'list' action before giving up."), - }, - { - "pcmk_monitor_action", NULL, "string", NULL, - PCMK_ACTION_MONITOR, NULL, - pcmk__opt_advanced, - N_("An alternate command to run instead of 'monitor'"), - N_("Some devices do not support the standard commands or may provide " - "additional ones. Use this to specify an alternate, device-" - "specific, command that implements the 'monitor' action."), - }, - { - "pcmk_monitor_timeout", NULL, "time", NULL, - "60s", NULL, - pcmk__opt_advanced, - N_("Specify an alternate timeout to use for 'monitor' actions instead " - "of stonith-timeout"), - N_("Some devices need much more/less time to complete than normal. " - "Use this to specify an alternate, device-specific, timeout for " - "'monitor' actions."), - }, - { - "pcmk_monitor_retries", NULL, "integer", NULL, - "2", NULL, - pcmk__opt_advanced, - N_("The maximum number of times to try the 'monitor' command within " - "the timeout period"), - N_("Some devices do not support multiple connections. Operations may " - "\"fail\" if the device is busy with another task. In that case, " - "Pacemaker will automatically retry the operation if there is time " - "remaining. Use this option to alter the number of times Pacemaker " - "tries a 'monitor' action before giving up."), - }, - { - "pcmk_status_action", NULL, "string", NULL, - PCMK_ACTION_STATUS, NULL, - pcmk__opt_advanced, - N_("An alternate command to run instead of 'status'"), - N_("Some devices do not support the standard commands or may provide " - "additional ones. Use this to specify an alternate, device-" - "specific, command that implements the 'status' action."), - }, - { - "pcmk_status_timeout", NULL, "time", NULL, - "60s", NULL, - pcmk__opt_advanced, - N_("Specify an alternate timeout to use for 'status' actions instead " - "of stonith-timeout"), - N_("Some devices need much more/less time to complete than normal. " - "Use this to specify an alternate, device-specific, timeout for " - "'status' actions."), - }, - { - "pcmk_status_retries", NULL, "integer", NULL, - "2", NULL, - pcmk__opt_advanced, - N_("The maximum number of times to try the 'status' command within " - "the timeout period"), - N_("Some devices do not support multiple connections. Operations may " - "\"fail\" if the device is busy with another task. In that case, " - "Pacemaker will automatically retry the operation if there is time " - "remaining. Use this option to alter the number of times Pacemaker " - "tries a 'status' action before giving up."), - }, - - { NULL, }, -}; - static int fencer_metadata(void) { - // @TODO Use pcmk__daemon_metadata when fencer_options moves to options.c const char *name = "pacemaker-fenced"; const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all " "\"stonith\"-class resources and used by " "Pacemaker's fence daemon, formerly known as " "stonithd"); - pcmk__output_t *tmp_out = NULL; - xmlNode *top = NULL; - const xmlNode *metadata = NULL; - GString *metadata_s = NULL; - - int rc = pcmk__output_new(&tmp_out, "xml", "/dev/null", NULL); - if (rc != pcmk_rc_ok) { - return rc; - } - - pcmk__output_set_legacy_xml(tmp_out); - out->message(tmp_out, "option-list", name, desc_short, desc_long, - pcmk__opt_none, fencer_options, true); - - tmp_out->finish(tmp_out, CRM_EX_OK, false, (void **) &top); - metadata = pcmk__xe_first_child(top, PCMK_XE_RESOURCE_AGENT, NULL, NULL); - - metadata_s = g_string_sized_new(16384); - pcmk__xml_string(metadata, pcmk__xml_fmt_pretty|pcmk__xml_fmt_text, - metadata_s, 0); - - out->output_xml(out, PCMK_XE_METADATA, metadata_s->str); - - pcmk__output_free(tmp_out); - free_xml(top); - g_string_free(metadata_s, TRUE); - return pcmk_rc_ok; + return pcmk__daemon_metadata(out, name, desc_short, desc_long, + pcmk__opt_fencing); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, N_("Deprecated (will be removed in a future release)"), NULL }, { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, "[metadata]"); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; crm_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = fencer_metadata(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } cluster = pcmk_cluster_new(); if (!stand_alone) { #if SUPPORT_COROSYNC if (is_corosync_cluster()) { cluster->destroy = stonith_peer_cs_destroy; cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; } #endif // SUPPORT_COROSYNC crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(cluster) == FALSE) { exit_code = CRM_EX_FATAL; crm_crit("Cannot sign in to the cluster... terminating"); goto done; } pcmk__str_update(&stonith_our_uname, cluster->uname); if (!options.no_cib_connect) { setup_cib(); } } else { pcmk__str_update(&stonith_our_uname, "localhost"); crm_warn("Stand-alone mode is deprecated and will be removed " "in a future release"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/lib/common/options.c b/lib/common/options.c index 3dcb83f45c..1ddcf0249f 100644 --- a/lib/common/options.c +++ b/lib/common/options.c @@ -1,1320 +1,1326 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef _GNU_SOURCE # define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include void pcmk__cli_help(char cmd) { if (cmd == 'v' || cmd == '$') { printf("Pacemaker %s\n", PACEMAKER_VERSION); printf("Written by Andrew Beekhof and " "the Pacemaker project contributors\n"); } else if (cmd == '!') { printf("Pacemaker %s (Build: %s): %s\n", PACEMAKER_VERSION, BUILD_VERSION, CRM_FEATURES); } crm_exit(CRM_EX_OK); while(1); // above does not return } /* * Option metadata */ static pcmk__cluster_option_t cluster_options[] = { /* name, old name, type, allowed values, * default value, validator, * flags, * short description, * long description */ { PCMK_OPT_DC_VERSION, NULL, "string", NULL, NULL, NULL, pcmk__opt_controld|pcmk__opt_generated, N_("Pacemaker version on cluster node elected Designated Controller " "(DC)"), N_("Includes a hash which identifies the exact revision the code was " "built from. Used for diagnostic purposes."), }, { PCMK_OPT_CLUSTER_INFRASTRUCTURE, NULL, "string", NULL, NULL, NULL, pcmk__opt_controld|pcmk__opt_generated, N_("The messaging layer on which Pacemaker is currently running"), N_("Used for informational and diagnostic purposes."), }, { PCMK_OPT_CLUSTER_NAME, NULL, "string", NULL, NULL, NULL, pcmk__opt_controld, N_("An arbitrary name for the cluster"), N_("This optional value is mostly for users' convenience as desired " "in administration, but may also be used in Pacemaker " "configuration rules via the #cluster-name node attribute, and " "by higher-level tools and resource agents."), }, { PCMK_OPT_DC_DEADTIME, NULL, "time", NULL, "20s", pcmk__valid_interval_spec, pcmk__opt_controld, N_("How long to wait for a response from other nodes during start-up"), N_("The optimal value will depend on the speed and load of your " "network and the type of switches used."), }, { PCMK_OPT_CLUSTER_RECHECK_INTERVAL, NULL, "time", NULL, "15min", pcmk__valid_interval_spec, pcmk__opt_controld, N_("Polling interval to recheck cluster state and evaluate rules " "with date specifications"), N_("Pacemaker is primarily event-driven, and looks ahead to know when " "to recheck cluster state for failure-timeout settings and most " "time-based rules. However, it will also recheck the cluster after " "this amount of inactivity, to evaluate rules with date " "specifications and serve as a fail-safe for certain types of " "scheduler bugs. A value of 0 disables polling. A positive value " "sets an interval in seconds, unless other units are specified " "(for example, \"5min\")."), }, { PCMK_OPT_FENCE_REACTION, NULL, "select", PCMK_VALUE_STOP ", " PCMK_VALUE_PANIC, PCMK_VALUE_STOP, NULL, pcmk__opt_controld, N_("How a cluster node should react if notified of its own fencing"), N_("A cluster node may receive notification of a \"succeeded\" " "fencing that targeted it if fencing is misconfigured, or if " "fabric fencing is in use that doesn't cut cluster communication. " "Use \"stop\" to attempt to immediately stop Pacemaker and stay " "stopped, or \"panic\" to attempt to immediately reboot the local " "node, falling back to stop on failure."), }, { PCMK_OPT_ELECTION_TIMEOUT, NULL, "time", NULL, "2min", pcmk__valid_interval_spec, pcmk__opt_controld|pcmk__opt_advanced, N_("Declare an election failed if it is not decided within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug."), NULL, }, { PCMK_OPT_SHUTDOWN_ESCALATION, NULL, "time", NULL, "20min", pcmk__valid_interval_spec, pcmk__opt_controld|pcmk__opt_advanced, N_("Exit immediately if shutdown does not complete within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug."), NULL, }, { PCMK_OPT_JOIN_INTEGRATION_TIMEOUT, "crmd-integration-timeout", "time", NULL, "3min", pcmk__valid_interval_spec, pcmk__opt_controld|pcmk__opt_advanced, N_("If you need to adjust this value, it probably indicates " "the presence of a bug."), NULL, }, { PCMK_OPT_JOIN_FINALIZATION_TIMEOUT, "crmd-finalization-timeout", "time", NULL, "30min", pcmk__valid_interval_spec, pcmk__opt_controld|pcmk__opt_advanced, N_("If you need to adjust this value, it probably indicates " "the presence of a bug."), NULL, }, { PCMK_OPT_TRANSITION_DELAY, "crmd-transition-delay", "time", NULL, "0s", pcmk__valid_interval_spec, pcmk__opt_controld|pcmk__opt_advanced, N_("Enabling this option will slow down cluster recovery under all " "conditions"), N_("Delay cluster recovery for this much time to allow for additional " "events to occur. Useful if your configuration is sensitive to " "the order in which ping updates arrive."), }, { PCMK_OPT_NO_QUORUM_POLICY, NULL, "select", PCMK_VALUE_STOP ", " PCMK_VALUE_FREEZE ", " PCMK_VALUE_IGNORE ", " PCMK_VALUE_DEMOTE ", " PCMK_VALUE_FENCE_LEGACY, PCMK_VALUE_STOP, pcmk__valid_no_quorum_policy, pcmk__opt_schedulerd, N_("What to do when the cluster does not have quorum"), NULL, }, { PCMK_OPT_SHUTDOWN_LOCK, NULL, "boolean", NULL, PCMK_VALUE_FALSE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether to lock resources to a cleanly shut down node"), N_("When true, resources active on a node when it is cleanly shut down " "are kept \"locked\" to that node (not allowed to run elsewhere) " "until they start again on that node after it rejoins (or for at " "most shutdown-lock-limit, if set). Stonith resources and " "Pacemaker Remote connections are never locked. Clone and bundle " "instances and the promoted role of promotable clones are " "currently never locked, though support could be added in a future " "release."), }, { PCMK_OPT_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, pcmk__opt_schedulerd, N_("Do not lock resources to a cleanly shut down node longer than " "this"), N_("If shutdown-lock is true and this is set to a nonzero time " "duration, shutdown locks will expire after this much time has " "passed since the shutdown was initiated, even if the node has not " "rejoined."), }, { PCMK_OPT_ENABLE_ACL, NULL, "boolean", NULL, PCMK_VALUE_FALSE, pcmk__valid_boolean, pcmk__opt_based, N_("Enable Access Control Lists (ACLs) for the CIB"), NULL, }, { PCMK_OPT_SYMMETRIC_CLUSTER, NULL, "boolean", NULL, PCMK_VALUE_TRUE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether resources can run on any node by default"), NULL, }, { PCMK_OPT_MAINTENANCE_MODE, NULL, "boolean", NULL, PCMK_VALUE_FALSE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether the cluster should refrain from monitoring, starting, and " "stopping resources"), NULL, }, { PCMK_OPT_START_FAILURE_IS_FATAL, NULL, "boolean", NULL, PCMK_VALUE_TRUE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether a start failure should prevent a resource from being " "recovered on the same node"), N_("When true, the cluster will immediately ban a resource from a node " "if it fails to start there. When false, the cluster will instead " "check the resource's fail count against its migration-threshold.") }, { PCMK_OPT_ENABLE_STARTUP_PROBES, NULL, "boolean", NULL, PCMK_VALUE_TRUE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether the cluster should check for active resources during " "start-up"), NULL, }, // Fencing-related options { PCMK_OPT_STONITH_ENABLED, NULL, "boolean", NULL, PCMK_VALUE_TRUE, pcmk__valid_boolean, pcmk__opt_schedulerd|pcmk__opt_advanced, N_("Whether nodes may be fenced as part of recovery"), N_("If false, unresponsive nodes are immediately assumed to be " "harmless, and resources that were active on them may be recovered " "elsewhere. This can result in a \"split-brain\" situation, " "potentially leading to data loss and/or service unavailability."), }, { PCMK_OPT_STONITH_ACTION, NULL, "select", "reboot, off, poweroff", PCMK_ACTION_REBOOT, pcmk__is_fencing_action, pcmk__opt_schedulerd, N_("Action to send to fence device when a node needs to be fenced " "(\"poweroff\" is a deprecated alias for \"off\")"), NULL, }, { PCMK_OPT_STONITH_TIMEOUT, NULL, "time", NULL, "60s", pcmk__valid_interval_spec, pcmk__opt_schedulerd, N_("How long to wait for on, off, and reboot fence actions to complete " "by default"), NULL, }, { PCMK_OPT_HAVE_WATCHDOG, NULL, "boolean", NULL, PCMK_VALUE_FALSE, pcmk__valid_boolean, pcmk__opt_schedulerd|pcmk__opt_generated, N_("Whether watchdog integration is enabled"), N_("This is set automatically by the cluster according to whether SBD " "is detected to be in use. User-configured values are ignored. " "The value `true` is meaningful if diskless SBD is used and " "`stonith-watchdog-timeout` is nonzero. In that case, if fencing " "is required, watchdog-based self-fencing will be performed via " "SBD without requiring a fencing resource explicitly configured."), }, { /* @COMPAT Currently, unparsable values default to -1 (auto-calculate), * while missing values default to 0 (disable). All values are accepted * (unless the controller finds that the value conflicts with the * SBD_WATCHDOG_TIMEOUT). * * At a compatibility break: properly validate as a timeout, let * either negative values or a particular string like "auto" mean auto- * calculate, and use 0 as the single default for when the option either * is unset or fails to validate. */ PCMK_OPT_STONITH_WATCHDOG_TIMEOUT, NULL, "time", NULL, "0", NULL, pcmk__opt_controld, N_("How long before nodes can be assumed to be safely down when " "watchdog-based self-fencing via SBD is in use"), N_("If this is set to a positive value, lost nodes are assumed to " "achieve self-fencing using watchdog-based SBD within this much " "time. This does not require a fencing resource to be explicitly " "configured, though a fence_watchdog resource can be configured, to " "limit use to specific nodes. If this is set to 0 (the default), " "the cluster will never assume watchdog-based self-fencing. If this " "is set to a negative value, the cluster will use twice the local " "value of the `SBD_WATCHDOG_TIMEOUT` environment variable if that " "is positive, or otherwise treat this as 0. WARNING: When used, " "this timeout must be larger than `SBD_WATCHDOG_TIMEOUT` on all " "nodes that use watchdog-based SBD, and Pacemaker will refuse to " "start on any of those nodes where this is not true for the local " "value or SBD is not active. When this is set to a negative value, " "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " "that use SBD, otherwise data corruption or loss could occur."), }, { PCMK_OPT_STONITH_MAX_ATTEMPTS, NULL, "integer", NULL, "10", pcmk__valid_positive_int, pcmk__opt_controld, N_("How many times fencing can fail before it will no longer be " "immediately re-attempted on a target"), NULL, }, { PCMK_OPT_CONCURRENT_FENCING, NULL, "boolean", NULL, PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Allow performing fencing operations in parallel"), NULL, }, { PCMK_OPT_STARTUP_FENCING, NULL, "boolean", NULL, PCMK_VALUE_TRUE, pcmk__valid_boolean, pcmk__opt_schedulerd|pcmk__opt_advanced, N_("Whether to fence unseen nodes at start-up"), N_("Setting this to false may lead to a \"split-brain\" situation, " "potentially leading to data loss and/or service unavailability."), }, { PCMK_OPT_PRIORITY_FENCING_DELAY, NULL, "time", NULL, "0", pcmk__valid_interval_spec, pcmk__opt_schedulerd, N_("Apply fencing delay targeting the lost nodes with the highest " "total resource priority"), N_("Apply specified delay for the fencings that are targeting the lost " "nodes with the highest total resource priority in case we don't " "have the majority of the nodes in our cluster partition, so that " "the more significant nodes potentially win any fencing match, " "which is especially meaningful under split-brain of 2-node " "cluster. A promoted resource instance takes the base priority + 1 " "on calculation if the base priority is not 0. Any static/random " "delays that are introduced by `pcmk_delay_base/max` configured " "for the corresponding fencing resources will be added to this " "delay. This delay should be significantly greater than, safely " "twice, the maximum `pcmk_delay_base/max`. By default, priority " "fencing delay is disabled."), }, { PCMK_OPT_NODE_PENDING_TIMEOUT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, pcmk__opt_schedulerd, N_("How long to wait for a node that has joined the cluster to join " "the controller process group"), N_("Fence nodes that do not join the controller process group within " "this much time after joining the cluster, to allow the cluster " "to continue managing resources. A value of 0 means never fence " "pending nodes. Setting the value to 2h means fence nodes after " "2 hours."), }, { PCMK_OPT_CLUSTER_DELAY, NULL, "time", NULL, "60s", pcmk__valid_interval_spec, pcmk__opt_schedulerd, N_("Maximum time for node-to-node communication"), N_("The node elected Designated Controller (DC) will consider an action " "failed if it does not get a response from the node executing the " "action within this time (after considering the action's own " "timeout). The \"correct\" value will depend on the speed and " "load of your network and cluster nodes.") }, // Limits { PCMK_OPT_LOAD_THRESHOLD, NULL, "percentage", NULL, "80%", pcmk__valid_percentage, pcmk__opt_controld, N_("Maximum amount of system load that should be used by cluster " "nodes"), N_("The cluster will slow down its recovery process when the amount of " "system resources used (currently CPU) approaches this limit"), }, { PCMK_OPT_NODE_ACTION_LIMIT, NULL, "integer", NULL, "0", pcmk__valid_int, pcmk__opt_controld, N_("Maximum number of jobs that can be scheduled per node (defaults to " "2x cores)"), NULL, }, { PCMK_OPT_BATCH_LIMIT, NULL, "integer", NULL, "0", pcmk__valid_int, pcmk__opt_schedulerd, N_("Maximum number of jobs that the cluster may execute in parallel " "across all nodes"), N_("The \"correct\" value will depend on the speed and load of your " "network and cluster nodes. If set to 0, the cluster will " "impose a dynamically calculated limit when any node has a " "high load."), }, { PCMK_OPT_MIGRATION_LIMIT, NULL, "integer", NULL, "-1", pcmk__valid_int, pcmk__opt_schedulerd, N_("The number of live migration actions that the cluster is allowed " "to execute in parallel on a node (-1 means no limit)"), NULL, }, { PCMK_OPT_CLUSTER_IPC_LIMIT, NULL, "integer", NULL, "500", pcmk__valid_positive_int, pcmk__opt_based, N_("Maximum IPC message backlog before disconnecting a cluster daemon"), N_("Raise this if log has \"Evicting client\" messages for cluster " "daemon PIDs (a good value is the number of resources in the " "cluster multiplied by the number of nodes)."), }, // Orphans and stopping { PCMK_OPT_STOP_ALL_RESOURCES, NULL, "boolean", NULL, PCMK_VALUE_FALSE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether the cluster should stop all active resources"), NULL, }, { PCMK_OPT_STOP_ORPHAN_RESOURCES, NULL, "boolean", NULL, PCMK_VALUE_TRUE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether to stop resources that were removed from the " "configuration"), NULL, }, { PCMK_OPT_STOP_ORPHAN_ACTIONS, NULL, "boolean", NULL, PCMK_VALUE_TRUE, pcmk__valid_boolean, pcmk__opt_schedulerd, N_("Whether to cancel recurring actions removed from the " "configuration"), NULL, }, { PCMK__OPT_REMOVE_AFTER_STOP, NULL, "boolean", NULL, PCMK_VALUE_FALSE, pcmk__valid_boolean, pcmk__opt_schedulerd|pcmk__opt_deprecated, N_("Whether to remove stopped resources from the executor"), N_("Values other than default are poorly tested and potentially " "dangerous."), }, // Storing inputs { PCMK_OPT_PE_ERROR_SERIES_MAX, NULL, "integer", NULL, "-1", pcmk__valid_int, pcmk__opt_schedulerd, N_("The number of scheduler inputs resulting in errors to save"), N_("Zero to disable, -1 to store unlimited."), }, { PCMK_OPT_PE_WARN_SERIES_MAX, NULL, "integer", NULL, "5000", pcmk__valid_int, pcmk__opt_schedulerd, N_("The number of scheduler inputs resulting in warnings to save"), N_("Zero to disable, -1 to store unlimited."), }, { PCMK_OPT_PE_INPUT_SERIES_MAX, NULL, "integer", NULL, "4000", pcmk__valid_int, pcmk__opt_schedulerd, N_("The number of scheduler inputs without errors or warnings to save"), N_("Zero to disable, -1 to store unlimited."), }, // Node health { PCMK_OPT_NODE_HEALTH_STRATEGY, NULL, "select", PCMK_VALUE_NONE ", " PCMK_VALUE_MIGRATE_ON_RED ", " PCMK_VALUE_ONLY_GREEN ", " PCMK_VALUE_PROGRESSIVE ", " PCMK_VALUE_CUSTOM, PCMK_VALUE_NONE, pcmk__validate_health_strategy, pcmk__opt_schedulerd, N_("How cluster should react to node health attributes"), N_("Requires external entities to create node attributes (named with " "the prefix \"#health\") with values \"red\", \"yellow\", or " "\"green\".") }, { PCMK_OPT_NODE_HEALTH_BASE, NULL, "integer", NULL, "0", pcmk__valid_int, pcmk__opt_schedulerd, N_("Base health score assigned to a node"), N_("Only used when \"node-health-strategy\" is set to " "\"progressive\"."), }, { PCMK_OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL, "0", pcmk__valid_int, pcmk__opt_schedulerd, N_("The score to use for a node health attribute whose value is " "\"green\""), N_("Only used when \"node-health-strategy\" is set to \"custom\" or " "\"progressive\"."), }, { PCMK_OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL, "0", pcmk__valid_int, pcmk__opt_schedulerd, N_("The score to use for a node health attribute whose value is " "\"yellow\""), N_("Only used when \"node-health-strategy\" is set to \"custom\" or " "\"progressive\"."), }, { PCMK_OPT_NODE_HEALTH_RED, NULL, "integer", NULL, "-INFINITY", pcmk__valid_int, pcmk__opt_schedulerd, N_("The score to use for a node health attribute whose value is " "\"red\""), N_("Only used when \"node-health-strategy\" is set to \"custom\" or " "\"progressive\".") }, // Placement strategy { PCMK_OPT_PLACEMENT_STRATEGY, NULL, "select", PCMK_VALUE_DEFAULT ", " PCMK_VALUE_UTILIZATION ", " PCMK_VALUE_MINIMAL ", " PCMK_VALUE_BALANCED, PCMK_VALUE_DEFAULT, pcmk__valid_placement_strategy, pcmk__opt_schedulerd, N_("How the cluster should allocate resources to nodes"), NULL, }, { NULL, }, }; static pcmk__cluster_option_t fencing_params[] = { /* name, old name, type, allowed values, * default value, validator, * flags, * short description, * long description */ { PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL, pcmk__opt_advanced, N_("An alternate parameter to supply instead of 'port'"), N_("Some devices do not support the standard 'port' parameter or may " "provide additional ones. Use this to specify an alternate, device-" "specific, parameter that should indicate the machine to be " "fenced. A value of \"none\" can be used to tell the cluster not " "to supply any additional parameters."), }, { PCMK_STONITH_HOST_MAP, NULL, "string", NULL, NULL, NULL, pcmk__opt_none, N_("A mapping of node names to port numbers for devices that do not " "support node names."), N_("For example, \"node1:1;node2:2,3\" would tell the cluster to use " "port 1 for node1 and ports 2 and 3 for node2."), }, { PCMK_STONITH_HOST_LIST, NULL, "string", NULL, NULL, NULL, pcmk__opt_none, N_("Nodes targeted by this device"), N_("Comma-separated list of nodes that can be targeted by this device " "(for example, \"node1,node2,node3\"). If pcmk_host_check is " "\"static-list\", either this or pcmk_host_map must be set."), }, { PCMK_STONITH_HOST_CHECK, NULL, "select", "dynamic-list, static-list, status, none", NULL, NULL, pcmk__opt_none, N_("How to determine which nodes can be targeted by the device"), N_("Use \"dynamic-list\" to query the device via the 'list' command; " "\"static-list\" to check the pcmk_host_list attribute; " "\"status\" to query the device via the 'status' command; or " "\"none\" to assume every device can fence every node. " "The default value is \"static-list\" if pcmk_host_map or " "pcmk_host_list is set; otherwise \"dynamic-list\" if the device " "supports the list operation; otherwise \"status\" if the device " "supports the status operation; otherwise \"none\""), }, { PCMK_STONITH_DELAY_MAX, NULL, "time", NULL, "0s", NULL, pcmk__opt_none, N_("Enable a delay of no more than the time specified before executing " "fencing actions."), N_("Enable a delay of no more than the time specified before executing " "fencing actions. Pacemaker derives the overall delay by taking " "the value of pcmk_delay_base and adding a random delay value such " "that the sum is kept below this maximum."), }, { PCMK_STONITH_DELAY_BASE, NULL, "string", NULL, "0s", NULL, pcmk__opt_none, N_("Enable a base delay for fencing actions and specify base delay " "value."), N_("This enables a static delay for fencing actions, which can help " "avoid \"death matches\" where two nodes try to fence each other " "at the same time. If pcmk_delay_max is also used, a random delay " "will be added such that the total delay is kept below that value. " "This can be set to a single time value to apply to any node " "targeted by this device (useful if a separate device is " "configured for each target), or to a node map (for example, " "\"node1:1s;node2:5\") to set a different value for each target."), }, { PCMK_STONITH_ACTION_LIMIT, NULL, "integer", NULL, "1", NULL, pcmk__opt_none, N_("The maximum number of actions can be performed in parallel on this " "device"), N_("Cluster property concurrent-fencing=\"true\" needs to be " "configured first. Then use this to specify the maximum number of " "actions can be performed in parallel on this device. A value of " "-1 means an unlimited number of actions can be performed in " "parallel."), }, { "pcmk_reboot_action", NULL, "string", NULL, PCMK_ACTION_REBOOT, NULL, pcmk__opt_advanced, N_("An alternate command to run instead of 'reboot'"), N_("Some devices do not support the standard commands or may provide " "additional ones. Use this to specify an alternate, device-" "specific, command that implements the 'reboot' action."), }, { "pcmk_reboot_timeout", NULL, "time", NULL, "60s", NULL, pcmk__opt_advanced, N_("Specify an alternate timeout to use for 'reboot' actions instead " "of stonith-timeout"), N_("Some devices need much more/less time to complete than normal. " "Use this to specify an alternate, device-specific, timeout for " "'reboot' actions."), }, { "pcmk_reboot_retries", NULL, "integer", NULL, "2", NULL, pcmk__opt_advanced, N_("The maximum number of times to try the 'reboot' command within the " "timeout period"), N_("Some devices do not support multiple connections. Operations may " "\"fail\" if the device is busy with another task. In that case, " "Pacemaker will automatically retry the operation if there is time " "remaining. Use this option to alter the number of times Pacemaker " "tries a 'reboot' action before giving up."), }, { "pcmk_off_action", NULL, "string", NULL, PCMK_ACTION_OFF, NULL, pcmk__opt_advanced, N_("An alternate command to run instead of 'off'"), N_("Some devices do not support the standard commands or may provide " "additional ones. Use this to specify an alternate, device-" "specific, command that implements the 'off' action."), }, { "pcmk_off_timeout", NULL, "time", NULL, "60s", NULL, pcmk__opt_advanced, N_("Specify an alternate timeout to use for 'off' actions instead of " "stonith-timeout"), N_("Some devices need much more/less time to complete than normal. " "Use this to specify an alternate, device-specific, timeout for " "'off' actions."), }, { "pcmk_off_retries", NULL, "integer", NULL, "2", NULL, pcmk__opt_advanced, N_("The maximum number of times to try the 'off' command within the " "timeout period"), N_("Some devices do not support multiple connections. Operations may " "\"fail\" if the device is busy with another task. In that case, " "Pacemaker will automatically retry the operation if there is time " "remaining. Use this option to alter the number of times Pacemaker " "tries a 'off' action before giving up."), }, { "pcmk_on_action", NULL, "string", NULL, PCMK_ACTION_ON, NULL, pcmk__opt_advanced, N_("An alternate command to run instead of 'on'"), N_("Some devices do not support the standard commands or may provide " "additional ones. Use this to specify an alternate, device-" "specific, command that implements the 'on' action."), }, { "pcmk_on_timeout", NULL, "time", NULL, "60s", NULL, pcmk__opt_advanced, N_("Specify an alternate timeout to use for 'on' actions instead of " "stonith-timeout"), N_("Some devices need much more/less time to complete than normal. " "Use this to specify an alternate, device-specific, timeout for " "'on' actions."), }, { "pcmk_on_retries", NULL, "integer", NULL, "2", NULL, pcmk__opt_advanced, N_("The maximum number of times to try the 'on' command within the " "timeout period"), N_("Some devices do not support multiple connections. Operations may " "\"fail\" if the device is busy with another task. In that case, " "Pacemaker will automatically retry the operation if there is time " "remaining. Use this option to alter the number of times Pacemaker " "tries a 'on' action before giving up."), }, { "pcmk_list_action", NULL, "string", NULL, PCMK_ACTION_LIST, NULL, pcmk__opt_advanced, N_("An alternate command to run instead of 'list'"), N_("Some devices do not support the standard commands or may provide " "additional ones. Use this to specify an alternate, device-" "specific, command that implements the 'list' action."), }, { "pcmk_list_timeout", NULL, "time", NULL, "60s", NULL, pcmk__opt_advanced, N_("Specify an alternate timeout to use for 'list' actions instead of " "stonith-timeout"), N_("Some devices need much more/less time to complete than normal. " "Use this to specify an alternate, device-specific, timeout for " "'list' actions."), }, { "pcmk_list_retries", NULL, "integer", NULL, "2", NULL, pcmk__opt_advanced, N_("The maximum number of times to try the 'list' command within the " "timeout period"), N_("Some devices do not support multiple connections. Operations may " "\"fail\" if the device is busy with another task. In that case, " "Pacemaker will automatically retry the operation if there is time " "remaining. Use this option to alter the number of times Pacemaker " "tries a 'list' action before giving up."), }, { "pcmk_monitor_action", NULL, "string", NULL, PCMK_ACTION_MONITOR, NULL, pcmk__opt_advanced, N_("An alternate command to run instead of 'monitor'"), N_("Some devices do not support the standard commands or may provide " "additional ones. Use this to specify an alternate, device-" "specific, command that implements the 'monitor' action."), }, { "pcmk_monitor_timeout", NULL, "time", NULL, "60s", NULL, pcmk__opt_advanced, N_("Specify an alternate timeout to use for 'monitor' actions instead " "of stonith-timeout"), N_("Some devices need much more/less time to complete than normal. " "Use this to specify an alternate, device-specific, timeout for " "'monitor' actions."), }, { "pcmk_monitor_retries", NULL, "integer", NULL, "2", NULL, pcmk__opt_advanced, N_("The maximum number of times to try the 'monitor' command within " "the timeout period"), N_("Some devices do not support multiple connections. Operations may " "\"fail\" if the device is busy with another task. In that case, " "Pacemaker will automatically retry the operation if there is time " "remaining. Use this option to alter the number of times Pacemaker " "tries a 'monitor' action before giving up."), }, { "pcmk_status_action", NULL, "string", NULL, PCMK_ACTION_STATUS, NULL, pcmk__opt_advanced, N_("An alternate command to run instead of 'status'"), N_("Some devices do not support the standard commands or may provide " "additional ones. Use this to specify an alternate, device-" "specific, command that implements the 'status' action."), }, { "pcmk_status_timeout", NULL, "time", NULL, "60s", NULL, pcmk__opt_advanced, N_("Specify an alternate timeout to use for 'status' actions instead " "of stonith-timeout"), N_("Some devices need much more/less time to complete than normal. " "Use this to specify an alternate, device-specific, timeout for " "'status' actions."), }, { "pcmk_status_retries", NULL, "integer", NULL, "2", NULL, pcmk__opt_advanced, N_("The maximum number of times to try the 'status' command within " "the timeout period"), N_("Some devices do not support multiple connections. Operations may " "\"fail\" if the device is busy with another task. In that case, " "Pacemaker will automatically retry the operation if there is time " "remaining. Use this option to alter the number of times Pacemaker " "tries a 'status' action before giving up."), }, { NULL, }, }; /* * Environment variable option handling */ /*! * \internal * \brief Get the value of a Pacemaker environment variable option * * If an environment variable option is set, with either a PCMK_ or (for * backward compatibility) HA_ prefix, log and return the value. * * \param[in] option Environment variable name (without prefix) * * \return Value of environment variable option, or NULL in case of * option name too long or value not found */ const char * pcmk__env_option(const char *option) { const char *const prefixes[] = {"PCMK_", "HA_"}; char env_name[NAME_MAX]; const char *value = NULL; CRM_CHECK(!pcmk__str_empty(option), return NULL); for (int i = 0; i < PCMK__NELEM(prefixes); i++) { int rv = snprintf(env_name, NAME_MAX, "%s%s", prefixes[i], option); if (rv < 0) { crm_err("Failed to write %s%s to buffer: %s", prefixes[i], option, strerror(errno)); return NULL; } if (rv >= sizeof(env_name)) { crm_trace("\"%s%s\" is too long", prefixes[i], option); continue; } value = getenv(env_name); if (value != NULL) { crm_trace("Found %s = %s", env_name, value); return value; } } crm_trace("Nothing found for %s", option); return NULL; } /*! * \brief Set or unset a Pacemaker environment variable option * * Set an environment variable option with a \c "PCMK_" prefix and optionally * an \c "HA_" prefix for backward compatibility. * * \param[in] option Environment variable name (without prefix) * \param[in] value New value (or NULL to unset) * \param[in] compat If false and \p value is not \c NULL, set only * \c "PCMK_