diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c index bd92795cf0..2e0a67dba2 100644 --- a/daemons/controld/controld_alerts.c +++ b/daemons/controld/controld_alerts.c @@ -1,92 +1,88 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include +#include #include #include #include #include #include #include static GList *crmd_alert_list = NULL; void crmd_unpack_alerts(xmlNode *alerts) { pe_free_alert_list(crmd_alert_list); crmd_alert_list = pe_unpack_alerts(alerts); } void crmd_alert_node_event(crm_node_t *node) { lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node->uname, node->id, node->state); } void crmd_alert_fencing_op(stonith_event_t * e) { char *desc; lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } - desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)", - e->action, e->target, - (e->executioner? e->executioner : ""), - e->client_origin, e->origin, - pcmk_strerror(e->result), e->id); - + desc = stonith__event_description(e); lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, e->target, e->operation, desc, e->result); free(desc); } void crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) { lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node, op); } diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index acc16d05e9..d2b49f831a 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -1,216 +1,217 @@ /* * Copyright 2011-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef STONITH_NG_INTERNAL__H # define STONITH_NG_INTERNAL__H # include # include # include # include # include enum st_device_flags { st_device_supports_list = 0x0001, st_device_supports_status = 0x0002, st_device_supports_reboot = 0x0004, st_device_supports_parameter_plug = 0x0008, st_device_supports_parameter_port = 0x0010, }; #define stonith__set_device_flags(device_flags, device_id, flags_to_set) do { \ device_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fence device", device_id, \ (device_flags), (flags_to_set), \ #flags_to_set); \ } while (0) #define stonith__set_call_options(st_call_opts, call_for, flags_to_set) do { \ st_call_opts = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fencer call", (call_for), \ (st_call_opts), (flags_to_set), \ #flags_to_set); \ } while (0) #define stonith__clear_call_options(st_call_opts, call_for, flags_to_clear) do { \ st_call_opts = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fencer call", (call_for), \ (st_call_opts), (flags_to_clear), \ #flags_to_clear); \ } while (0) struct stonith_action_s; typedef struct stonith_action_s stonith_action_t; stonith_action_t *stonith_action_create(const char *agent, const char *_action, const char *victim, uint32_t victim_nodeid, int timeout, GHashTable * device_args, GHashTable * port_map, const char * host_arg); void stonith__destroy_action(stonith_action_t *action); pcmk__action_result_t *stonith__action_result(stonith_action_t *action); int stonith__result2rc(const pcmk__action_result_t *result); void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result); void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result); xmlNode *stonith__find_xe_with_result(xmlNode *xml); int stonith_action_execute_async(stonith_action_t * action, void *userdata, void (*done) (int pid, const pcmk__action_result_t *result, void *user_data), void (*fork_cb) (int pid, void *user_data)); xmlNode *create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list); xmlNode *create_device_registration_xml(const char *id, enum stonith_namespace namespace, const char *agent, stonith_key_value_t *params, const char *rsc_provides); void stonith__register_messages(pcmk__output_t *out); GList *stonith__parse_targets(const char *hosts); gboolean stonith__later_succeeded(stonith_history_t *event, stonith_history_t *top_history); stonith_history_t *stonith__sort_history(stonith_history_t *history); void stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, xmlNode *metadata); # define ST_LEVEL_MAX 10 # define F_STONITH_CLIENTID "st_clientid" # define F_STONITH_CALLOPTS "st_callopt" # define F_STONITH_CALLID "st_callid" # define F_STONITH_CALLDATA "st_calldata" # define F_STONITH_OPERATION "st_op" # define F_STONITH_TARGET "st_target" # define F_STONITH_REMOTE_OP_ID "st_remote_op" # define F_STONITH_REMOTE_OP_ID_RELAY "st_remote_op_relay" # define F_STONITH_RC "st_rc" # define F_STONITH_OUTPUT "st_output" /*! Timeout period per a device execution */ # define F_STONITH_TIMEOUT "st_timeout" # define F_STONITH_TOLERANCE "st_tolerance" # define F_STONITH_DELAY "st_delay" /*! Action specific timeout period returned in query of fencing devices. */ # define F_STONITH_ACTION_TIMEOUT "st_action_timeout" /*! Host in query result is not allowed to run this action */ # define F_STONITH_ACTION_DISALLOWED "st_action_disallowed" /*! Maximum of random fencing delay for a device */ # define F_STONITH_DELAY_MAX "st_delay_max" /*! Base delay used for a fencing delay */ # define F_STONITH_DELAY_BASE "st_delay_base" /*! Has this device been verified using a monitor type * operation (monitor, list, status) */ # define F_STONITH_DEVICE_VERIFIED "st_monitor_verified" /*! device is required for this action */ # define F_STONITH_DEVICE_REQUIRED "st_required" /*! number of available devices in query result */ # define F_STONITH_AVAILABLE_DEVICES "st-available-devices" # define F_STONITH_CALLBACK_TOKEN "st_async_id" # define F_STONITH_CLIENTNAME "st_clientname" # define F_STONITH_CLIENTNODE "st_clientnode" # define F_STONITH_NOTIFY_ACTIVATE "st_notify_activate" # define F_STONITH_NOTIFY_DEACTIVATE "st_notify_deactivate" # define F_STONITH_DELEGATE "st_delegate" /*! The node initiating the stonith operation. If an operation * is relayed, this is the last node the operation lands on. When * in standalone mode, origin is the client's id that originated the * operation. */ # define F_STONITH_ORIGIN "st_origin" # define F_STONITH_HISTORY_LIST "st_history" # define F_STONITH_DATE "st_date" # define F_STONITH_DATE_NSEC "st_date_nsec" # define F_STONITH_STATE "st_state" # define F_STONITH_ACTIVE "st_active" # define F_STONITH_DIFFERENTIAL "st_differential" # define F_STONITH_DEVICE "st_device_id" # define F_STONITH_ACTION "st_device_action" # define F_STONITH_MERGED "st_op_merged" # define T_STONITH_NG "stonith-ng" # define T_STONITH_REPLY "st-reply" /*! For async operations, an event from the server containing * the total amount of time the server is allowing for the operation * to take place is returned to the client. */ # define T_STONITH_TIMEOUT_VALUE "st-async-timeout-value" # define T_STONITH_NOTIFY "st_notify" # define STONITH_ATTR_ACTION_OP "action" # define STONITH_OP_EXEC "st_execute" # define STONITH_OP_TIMEOUT_UPDATE "st_timeout_update" # define STONITH_OP_QUERY "st_query" # define STONITH_OP_FENCE "st_fence" # define STONITH_OP_RELAY "st_relay" # define STONITH_OP_DEVICE_ADD "st_device_register" # define STONITH_OP_DEVICE_DEL "st_device_remove" # define STONITH_OP_FENCE_HISTORY "st_fence_history" # define STONITH_OP_LEVEL_ADD "st_level_add" # define STONITH_OP_LEVEL_DEL "st_level_remove" # define STONITH_WATCHDOG_AGENT "fence_watchdog" /* Don't change 2 below as it would break rolling upgrade */ # define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog" # define STONITH_WATCHDOG_ID "watchdog" /* Exported for crm_mon to reference */ int stonith__failed_history(pcmk__output_t *out, va_list args); int stonith__history(pcmk__output_t *out, va_list args); int stonith__full_history(pcmk__output_t *out, va_list args); int stonith__pending_actions(pcmk__output_t *out, va_list args); stonith_history_t *stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data); bool stonith__event_state_pending(stonith_history_t *history, void *user_data); bool stonith__event_state_eq(stonith_history_t *history, void *user_data); bool stonith__event_state_neq(stonith_history_t *history, void *user_data); int stonith__legacy2status(int rc); int stonith__exit_status(stonith_callback_data_t *data); int stonith__execution_status(stonith_callback_data_t *data); const char *stonith__exit_reason(stonith_callback_data_t *data); int stonith__event_exit_status(stonith_event_t *event); int stonith__event_execution_status(stonith_event_t *event); const char *stonith__event_exit_reason(stonith_event_t *event); +char *stonith__event_description(stonith_event_t *event); /*! * \internal * \brief Is a fencing operation in pending state? * * \param[in] state State as enum op_state value * * \return A boolean */ static inline bool stonith__op_state_pending(enum op_state state) { return state != st_failed && state != st_done; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node); gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node); #endif diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index 5fec7529e3..b1de912b2a 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2445 +1,2483 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fencing_private.h" CRM_TRACE_INIT_DATA(stonith); typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; int notify_refcnt; bool notify_deletes; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); bool delete; } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); bool stonith_dispatch(stonith_t * st); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static int stonith_send_command(stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); /*! * \brief Get agent namespace by name * * \param[in] namespace_s Name of namespace as string * * \return Namespace as enum value */ enum stonith_namespace stonith_text2namespace(const char *namespace_s) { if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) { return st_namespace_any; } else if (!strcmp(namespace_s, "redhat") || !strcmp(namespace_s, "stonith-ng")) { return st_namespace_rhcs; } else if (!strcmp(namespace_s, "internal")) { return st_namespace_internal; } else if (!strcmp(namespace_s, "heartbeat")) { return st_namespace_lha; } return st_namespace_invalid; } /*! * \brief Get agent namespace name * * \param[in] namespace Namespace as enum value * * \return Namespace name as string */ const char * stonith_namespace2text(enum stonith_namespace st_namespace) { switch (st_namespace) { case st_namespace_any: return "any"; case st_namespace_rhcs: return "stonith-ng"; case st_namespace_internal: return "internal"; case st_namespace_lha: return "heartbeat"; default: break; } return "unsupported"; } /*! * \brief Determine namespace of a fence agent * * \param[in] agent Fence agent type * \param[in] namespace_s Name of agent namespace as string, if known * * \return Namespace of specified agent, as enum value */ enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s) { if (pcmk__str_eq(namespace_s, "internal", pcmk__str_casei)) { return st_namespace_internal; } if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } #if HAVE_STONITH_STONITH_H if (stonith__agent_is_lha(agent)) { return st_namespace_lha; } #endif crm_err("Unknown fence agent: %s", agent); return st_namespace_invalid; } gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) { gboolean rv = FALSE; stonith_t *stonith_api = st?st:stonith_api_new(); char *list = NULL; if(stonith_api) { if (stonith_api->state == stonith_disconnected) { int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); if (rc != pcmk_ok) { crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); } } if (stonith_api->state != stonith_disconnected) { /* caveat!!! * this might fail when when stonithd is just updating the device-list * probably something we should fix as well for other api-calls */ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); if ((rc != pcmk_ok) || (list == NULL)) { /* due to the race described above it can happen that * we drop in here - so as not to make remote nodes * panic on that answer */ crm_warn("watchdog-fencing-query failed"); } else if (list[0] == '\0') { rv = TRUE; } else { GList *targets = stonith__parse_targets(list); rv = pcmk__str_in_list(node, targets, pcmk__str_casei); g_list_free_full(targets, free); } free(list); if (!st) { /* if we're provided the api we still might have done the * connection - but let's assume the caller won't bother */ stonith_api->cmds->disconnect(stonith_api); } } if (!st) { stonith_api_delete(stonith_api); } } else { crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); } crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", node, rv?"":"not "); return rv; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node) { return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); } /* when cycling through the list we don't want to delete items so just mark them and when we know nobody is using the list loop over it to remove the marked items */ static void foreach_notify_entry (stonith_private_t *private, GFunc func, gpointer user_data) { private->notify_refcnt++; g_list_foreach(private->notify_list, func, user_data); private->notify_refcnt--; if ((private->notify_refcnt == 0) && private->notify_deletes) { GList *list_item = private->notify_list; private->notify_deletes = FALSE; while (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; GList *next = g_list_next(list_item); if (list_client->delete) { free(list_client); private->notify_list = g_list_delete_link(private->notify_list, list_item); } list_item = next; } } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = create_xml_node(NULL, "notify"); native = stonith->st_private; native->ipc = NULL; native->source = NULL; free(native->token); native->token = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(blob.xml, F_SUBTYPE, T_STONITH_NOTIFY_DISCONNECT); foreach_notify_entry(native, stonith_send_notification, &blob); free_xml(blob.xml); } xmlNode * create_device_registration_xml(const char *id, enum stonith_namespace namespace, const char *agent, stonith_key_value_t *params, const char *rsc_provides) { xmlNode *data = create_xml_node(NULL, F_STONITH_DEVICE); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); #if HAVE_STONITH_STONITH_H if (namespace == st_namespace_any) { namespace = stonith_get_namespace(agent, NULL); } if (namespace == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, XML_ATTR_ID, id); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, "agent", agent); if ((namespace != st_namespace_any) && (namespace != st_namespace_invalid)) { crm_xml_add(data, "namespace", stonith_namespace2text(namespace)); } if (rsc_provides) { crm_xml_add(data, "rsc_provides", rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t * st, int call_options, const char *id, const char *namespace, const char *agent, stonith_key_value_t * params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, stonith_text2namespace(namespace), agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, XML_ATTR_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); free_xml(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for fence topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { size_t len = 0; char *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); CRM_CHECK(data, return NULL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_ID, level); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); if (node) { crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); } else if (pattern) { crm_xml_add(data, XML_ATTR_STONITH_TARGET_PATTERN, pattern); } else { crm_xml_add(data, XML_ATTR_STONITH_TARGET_ATTRIBUTE, attr); crm_xml_add(data, XML_ATTR_STONITH_TARGET_VALUE, value); } // cppcheck seems not to understand the abort logic behind pcmk__realloc // cppcheck-suppress memleak for (; device_list; device_list = device_list->next) { pcmk__add_separated_word(&list, &len, device_list->value, ","); } crm_xml_add(data, XML_ATTR_STONITH_DEVICES, list); free(list); return data; } static int stonith_api_register_level_full(stonith_t * st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); free_xml(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static int stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, stonith_key_value_t ** devices, int timeout) { int count = 0; enum stonith_namespace ns = stonith_text2namespace(namespace); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } #if HAVE_STONITH_STONITH_H // Include Linux-HA agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { count += stonith__list_lha_agents(devices); } #endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { count += stonith__list_rhcs_agents(devices); } return count; } static int stonith_api_device_metadata(stonith_t * stonith, int call_options, const char *agent, const char *namespace, char **output, int timeout) { /* By executing meta-data directly, we can get it from stonith_admin when * the cluster is not running, which is important for higher-level tools. */ enum stonith_namespace ns = stonith_get_namespace(agent, namespace); crm_trace("Looking up metadata for %s agent %s", stonith_namespace2text(ns), agent); switch (ns) { case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout, output); #if HAVE_STONITH_STONITH_H case st_namespace_lha: return stonith__lha_metadata(agent, timeout, output); #endif default: crm_err("Can't get fence agent '%s' meta-data: No such agent", agent); break; } return -ENODEV; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObjectPtr xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_TARGET, target); crm_xml_add(data, F_STONITH_ACTION, "off"); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = xpath_search(output, "//@agent"); if (xpathObj) { max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("%s[%d] = %s", "//@agent", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, XML_ATTR_ID)); } } freeXpathObject(xpathObj); } free_xml(output); free_xml(data); return max; } static int stonith_api_call(stonith_t * stonith, int call_options, const char *id, const char *action, const char *victim, int timeout, xmlNode ** output) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, F_STONITH_DEVICE); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add(data, F_STONITH_DEVICE, id); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add(data, F_STONITH_TARGET, victim); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout); free_xml(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, "list", NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, F_STONITH_OUTPUT); if (list_str) { *list_info = strdup(list_str); } } if (output) { free_xml(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, "monitor", NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, "status", port, timeout, NULL); } static int stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); crm_xml_add(data, F_STONITH_ACTION, action); crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance); crm_xml_add_int(data, F_STONITH_DELAY, delay); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); free_xml(data); return rc; } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { return stonith_api_fence_with_delay(stonith, call_options, node, action, timeout, tolerance, 0); } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { stonith__set_call_options(call_options, target, st_opt_manual_ack); return stonith_api_fence(stonith, call_options, target, "off", 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = create_xml_node(NULL, __func__); crm_xml_add(data, F_STONITH_TARGET, node); } stonith__set_call_options(call_options, node, st_opt_sync_call); rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options, timeout); free_xml(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = get_xpath_object("//" F_STONITH_HISTORY_LIST, output, LOG_NEVER); for (op = pcmk__xml_first_child(reply); op != NULL; op = pcmk__xml_next(op)) { stonith_history_t *kvp; long long completed; long long completed_nsec = 0L; kvp = calloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, F_STONITH_TARGET); kvp->action = crm_element_value_copy(op, F_STONITH_ACTION); kvp->origin = crm_element_value_copy(op, F_STONITH_ORIGIN); kvp->delegate = crm_element_value_copy(op, F_STONITH_DELEGATE); kvp->client = crm_element_value_copy(op, F_STONITH_CLIENTNAME); crm_element_value_ll(op, F_STONITH_DATE, &completed); kvp->completed = (time_t) completed; crm_element_value_ll(op, F_STONITH_DATE_NSEC, &completed_nsec); kvp->completed_nsec = completed_nsec; crm_element_value_int(op, F_STONITH_STATE, &kvp->state); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } free_xml(output); return rc; } void stonith_history_free(stonith_history_t *history) { stonith_history_t *hp, *hp_old; for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) { free(hp->target); free(hp->action); free(hp->origin); free(hp->delegate); free(hp->client); } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; if (a_client->delete || b_client->delete) { /* make entries marked for deletion not findable */ return -1; } CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = create_xml_node(NULL, "stonith_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "stonith_command"); crm_xml_add(op_msg, F_TYPE, T_STONITH_NG); crm_xml_add(op_msg, F_STONITH_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_STONITH_OPERATION, op); crm_xml_add_int(op_msg, F_STONITH_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, F_STONITH_CALLOPTS, call_options); if (data != NULL) { add_message_xml(op_msg, F_STONITH_CALLDATA, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->st_private; crm_debug("Disconnecting from the fencer"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->st_private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id); } return pcmk_ok; } /*! * \internal * \brief Invoke a (single) specified fence action callback * * \param[in] st Fencer API connection * \param[in] call_id If positive, call ID of completed fence action, otherwise * legacy return code for early action failure * \param[in] result Full result for action * \param[in] userdata User data to pass to callback * \param[in] callback Fence action callback to invoke */ static void invoke_fence_action_callback(stonith_t *st, int call_id, pcmk__action_result_t *result, void *userdata, void (*callback) (stonith_t *st, stonith_callback_data_t *data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = pcmk_rc2legacy(stonith__result2rc(result)); data.userdata = userdata; data.opaque = (void *) result; callback(st, &data); } /*! * \internal * \brief Invoke any callbacks registered for a specified fence action result * * Given a fence action result from the fencer, invoke any callback registered * for that action, as well as any global callback registered. * * \param[in] st Fencer API connection * \param[in] msg If non-NULL, fencer reply * \param[in] call_id If \p msg is NULL, call ID of action that timed out */ static void invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id) { stonith_private_t *private = NULL; stonith_callback_client_t *cb_info = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->st_private != NULL, return); private = stonith->st_private; if (msg == NULL) { // Fencer didn't reply in time pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Timeout waiting for reply from fencer"); CRM_LOG_ASSERT(call_id > 0); } else { // We have the fencer reply if ((crm_element_value_int(msg, F_STONITH_CALLID, &call_id) != 0) || (call_id <= 0)) { crm_log_xml_warn(msg, "Bad fencer reply"); } stonith__xe_get_result(msg, &result); } if (call_id > 0) { cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); } if ((cb_info != NULL) && (cb_info->callback != NULL) && (pcmk__result_ok(&result) || !(cb_info->only_success))) { crm_trace("Invoking callback %s for call %d", crm_str(cb_info->id), call_id); invoke_fence_action_callback(stonith, call_id, &result, cb_info->user_data, cb_info->callback); } else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) { crm_warn("Fencing action without registered callback failed: %d (%s%s%s)", result.exit_status, pcmk_exec_status_str(result.execution_status), ((result.exit_reason == NULL)? "" : ": "), ((result.exit_reason == NULL)? "" : result.exit_reason)); crm_log_xml_debug(msg, "Failed fence update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_fence_action_callback(stonith, call_id, &result, NULL, private->op_callback); } if (cb_info != NULL) { stonith_api_del_callback(stonith, call_id, FALSE); } pcmk__reset_result(&result); } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); invoke_registered_callbacks(timer->stonith, NULL, timer->call_id); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = calloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = g_timeout_add(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->st_private; callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; blob.stonith = st; blob.xml = string2xml(buffer); if (blob.xml == NULL) { crm_warn("Received malformed message from fencer: %s", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, F_TYPE); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) { invoke_registered_callbacks(st, blob.xml, 0); } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) { foreach_notify_entry(private, stonith_send_notification, &blob); } else if (pcmk__str_eq(type, T_STONITH_TIMEOUT_VALUE, pcmk__str_casei)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, F_STONITH_TIMEOUT, &timeout); crm_element_value_int(blob.xml, F_STONITH_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } free_xml(blob.xml); return 1; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = NULL; const char *display_name = name? name : "client"; struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; CRM_CHECK(stonith != NULL, return -EINVAL); native = stonith->st_private; CRM_ASSERT(native != NULL); crm_debug("Attempting fencer connection by %s with%s mainloop", display_name, (stonith_fd? "out" : "")); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc && crm_ipc_connect(native->ipc)) { *stonith_fd = crm_ipc_get_fd(native->ipc); } else if (native->ipc) { crm_ipc_close(native->ipc); crm_ipc_destroy(native->ipc); native->ipc = NULL; } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { rc = -ENOTCONN; } else { xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "stonith_command"); crm_xml_add(hello, F_TYPE, T_STONITH_NG); crm_xml_add(hello, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_STONITH_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_debug("Couldn't register with the fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); rc = -ECOMM; } else if (reply == NULL) { crm_debug("Couldn't register with the fencer: no reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, F_STONITH_OPERATION); native->token = crm_element_value_copy(reply, F_STONITH_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_debug("Couldn't register with the fencer: invalid reply type '%s'", (msg_type? msg_type : "(missing)")); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else if (native->token == NULL) { crm_debug("Couldn't register with the fencer: no token in reply"); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else { #if HAVE_MSGFROMIPC_TIMEOUT stonith->call_timeout = PCMK__IPC_TIMEOUT; #endif crm_debug("Connection to fencer by %s succeeded (registration token: %s)", display_name, native->token); rc = pcmk_ok; } } free_xml(reply); free_xml(hello); } if (rc != pcmk_ok) { crm_debug("Connection attempt to fencer by %s failed: %s " CRM_XS " rc=%d", display_name, pcmk_strerror(rc), rc); stonith->cmds->disconnect(stonith); } return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = create_xml_node(NULL, __func__); stonith_private_t *native = stonith->st_private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, F_STONITH_OPERATION, T_STONITH_NOTIFY); if (enabled) { crm_xml_add(notify_msg, F_STONITH_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, F_STONITH_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } free_xml(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->st_private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; crm_debug("Removing callback for %s events", event); private = stonith->st_private; new_client = calloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; if (private->notify_refcnt) { list_client->delete = TRUE; private->notify_deletes = TRUE; } else { private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); } crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->st_private != NULL, return -EINVAL); private = stonith->st_private; if (call_id == 0) { // Add global callback private->op_callback = callback; } else if (call_id < 0) { // Call failed immediately, so call callback now if (!(options & st_opt_report_only_success)) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); pcmk__set_result(&result, CRM_EX_ERROR, stonith__legacy2status(call_id), NULL); invoke_fence_action_callback(stonith, call_id, &result, user_data, callback); } else { crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = calloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id, blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, crm_str(blob->id)); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->st_private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } /*! * \internal * \brief Get the data section of a fencer notification * * \param[in] msg Notification XML * \param[in] ntype Notification type */ static xmlNode * get_event_data_xml(xmlNode *msg, const char *ntype) { char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG); free(data_addr); return data; } /* */ static stonith_event_t * xml_to_event(xmlNode *msg, pcmk__action_result_t *result) { stonith_event_t *event = calloc(1, sizeof(stonith_event_t)); const char *ntype = crm_element_value(msg, F_SUBTYPE); CRM_ASSERT((event != NULL) && (result != NULL)); crm_log_xml_trace(msg, "stonith_notify"); // All notification types have the operation result event->opaque = result; stonith__xe_get_result(msg, result); // @COMPAT The API originally provided the result as a legacy return code event->result = pcmk_rc2legacy(stonith__result2rc(result)); // Fence notifications have additional information if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { xmlNode *data = get_event_data_xml(msg, ntype); if (data == NULL) { crm_err("No data for %s event", ntype); crm_log_xml_notice(msg, "BadEvent"); } else { event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN); event->action = crm_element_value_copy(data, F_STONITH_ACTION); event->target = crm_element_value_copy(data, F_STONITH_TARGET); event->executioner = crm_element_value_copy(data, F_STONITH_DELEGATE); event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID); event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME); event->device = crm_element_value_copy(data, F_STONITH_DEVICE); } event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION); } return event; } static void event_free(stonith_event_t * event) { free(event->id); free(event->type); free(event->message); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); pcmk__reset_result((pcmk__action_result_t *) (event->opaque)); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, F_SUBTYPE); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->delete) { crm_trace("Skipping callback - marked for deletion"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_casei)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml, &result); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } /*! * \internal * \brief Create and send an API request * * \param[in] stonith Stonith connection * \param[in] op API operation to request * \param[in] data Data to attach to request * \param[out] output_data If not NULL, will be set to reply if synchronous * \param[in] call_options Bitmask of stonith_call_options to use * \param[in] timeout Error if not completed within this many seconds * * \return pcmk_ok (for synchronous requests) or positive call ID * (for asynchronous requests) on success, -errno otherwise */ static int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = NULL; CRM_ASSERT(stonith && stonith->st_private && op); native = stonith->st_private; if (output_data != NULL) { *output_data = NULL; } if ((stonith->state == stonith_disconnected) || (native->token == NULL)) { return -ENOTCONN; } /* Increment the call ID, which must be positive to avoid conflicting with * error codes. This shouldn't be a problem unless the client mucked with * it or the counter wrapped around. */ stonith->call_id++; if (stonith->call_id < 1) { stonith->call_id = 1; } op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, F_STONITH_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); if (data) { const char *delay_s = crm_element_value(data, F_STONITH_DELAY); if (delay_s) { crm_xml_add(op_msg, F_STONITH_DELAY, delay_s); } } { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; if (call_options & st_opt_sync_call) { pcmk__set_ipc_flags(ipc_flags, "stonith command", crm_ipc_client_response); } rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); } free_xml(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); free_xml(op_reply); return stonith->call_id; } rc = pcmk_ok; crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id); if (reply_id == stonith->call_id) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Synchronous reply %d received", reply_id); stonith__xe_get_result(op_reply, &result); rc = pcmk_rc2legacy(stonith__result2rc(&result)); pcmk__reset_result(&result); if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); free_xml(op_reply); rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); free_xml(op_reply); rc = -ENOMSG; } done: if (crm_ipc_connected(native->ipc) == FALSE) { crm_err("Fencer disconnected"); free(native->token); native->token = NULL; stonith->state = stonith_disconnected; } free_xml(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; CRM_ASSERT(st != NULL); private = st->st_private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (crm_ipc_connected(private->ipc) == FALSE) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } static int stonith_api_free(stonith_t * stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Disconnecting %p first", stonith); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->st_private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->st_private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { crm_trace("Destroying %p", stonith); if(stonith) { stonith->cmds->free(stonith); } } static int stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, stonith_key_value_t *params, int timeout, char **output, char **error_output) { /* Validation should be done directly via the agent, so we can get it from * stonith_admin when the cluster is not running, which is important for * higher-level tools. */ int rc = pcmk_ok; /* Use a dummy node name in case the agent requires a target. We assume the * actual target doesn't matter for validation purposes (if in practice, * that is incorrect, we will need to allow the caller to pass the target). */ const char *target = "node1"; const char *host_arg = NULL; GHashTable *params_table = pcmk__strkey_table(free, free); // Convert parameter list to a hash table for (; params; params = params->next) { if (pcmk__str_eq(params->key, PCMK_STONITH_HOST_ARGUMENT, pcmk__str_casei)) { host_arg = params->value; } if (!pcmk_stonith_param(params->key)) { g_hash_table_insert(params_table, strdup(params->key), strdup(params->value)); } } #if SUPPORT_CIBSECRETS rc = pcmk__substitute_secrets(rsc_id, params_table); if (rc != pcmk_rc_ok) { crm_warn("Could not replace secret parameters for validation of %s: %s", agent, pcmk_rc_str(rc)); // rc is standard return value, don't return it in this function } #endif if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } switch (stonith_get_namespace(agent, namespace_s)) { case st_namespace_rhcs: rc = stonith__rhcs_validate(st, call_options, target, agent, params_table, host_arg, timeout, output, error_output); break; #if HAVE_STONITH_STONITH_H case st_namespace_lha: rc = stonith__lha_validate(st, call_options, target, agent, params_table, timeout, output, error_output); break; #endif default: rc = -EINVAL; errno = EINVAL; crm_perror(LOG_ERR, "Agent %s not found or does not support validation", agent); break; } g_hash_table_destroy(params_table); return rc; } stonith_t * stonith_api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); if (new_stonith == NULL) { return NULL; } private = calloc(1, sizeof(stonith_private_t)); if (private == NULL) { free(new_stonith); return NULL; } new_stonith->st_private = private; private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); private->notify_list = NULL; private->notify_refcnt = 0; private->notify_deletes = FALSE; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); if (new_stonith->cmds == NULL) { free(new_stonith->st_private); free(new_stonith); return NULL; } /* *INDENT-OFF* */ new_stonith->cmds->free = stonith_api_free; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; new_stonith->cmds->validate = stonith_api_validate; /* *INDENT-ON* */ return new_stonith; } /*! * \brief Make a blocking connection attempt to the fencer * * \param[in,out] st Fencer API object * \param[in] name Client name to use with fencer * \param[in] max_attempts Return error if this many attempts fail * * \return pcmk_ok on success, result of last attempt otherwise */ int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts) { int rc = -EINVAL; // if max_attempts is not positive for (int attempt = 1; attempt <= max_attempts; attempt++) { rc = st->cmds->connect(st, name, NULL); if (rc == pcmk_ok) { return pcmk_ok; } else if (attempt < max_attempts) { crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s " CRM_XS " rc=%d", attempt, max_attempts, pcmk_strerror(rc), rc); sleep(2); } } crm_notice("Could not connect to fencer: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); return rc; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = calloc(1, sizeof(stonith_key_value_t)); if (key) { p->key = strdup(key); } if (value) { p->value = strdup(value); } end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { int rc = pcmk_ok; stonith_t *st = stonith_api_new(); const char *action = off? "off" : "reboot"; api_log_open(); if (st == NULL) { api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s", action, nodeid, uname); return -EPROTO; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); int opts = 0; stonith__set_call_options(opts, name, st_opt_sync_call|st_opt_allow_suicide); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->fence(st, opts, name, action, timeout, 0); free(name); if (rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action); } } stonith_api_delete(st); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = pcmk_ok; time_t when = 0; stonith_t *st = stonith_api_new(); stonith_history_t *history = NULL, *hp = NULL; if (st == NULL) { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: " "API initialization failed", nodeid, uname); return when; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } else { int entries = 0; int progress = 0; int completed = 0; int opts = 0; char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); stonith__set_call_options(opts, name, st_opt_sync_call); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->history(st, opts, name, &history, 120); free(name); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } stonith_history_free(history); if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } stonith_api_delete(st); if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } return when; } bool stonith_agent_exists(const char *agent, int timeout) { stonith_t *st = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; bool rc = FALSE; if (agent == NULL) { return rc; } st = stonith_api_new(); if (st == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return FALSE; } st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout); for (dIter = devices; dIter != NULL; dIter = dIter->next) { if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) { rc = TRUE; break; } } stonith_key_value_freeall(devices, 1, 1); stonith_api_delete(st); return rc; } const char * stonith_action_str(const char *action) { if (action == NULL) { return "fencing"; } else if (!strcmp(action, "on")) { return "unfencing"; } else if (!strcmp(action, "off")) { return "turning off"; } else { return action; } } /*! * \internal * \brief Parse a target name from one line of a target list string * * \param[in] line One line of a target list string * \param[in] len String length of line * \param[in,out] output List to add newly allocated target name to */ static void parse_list_line(const char *line, int len, GList **output) { size_t i = 0; size_t entry_start = 0; /* Skip complaints about additional parameters device doesn't understand * * @TODO Document or eliminate the implied restriction of target names */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping list output line: %s", line); return; } // Process line content, character by character for (i = 0; i <= len; i++) { if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';') || (line[i] == '\0')) { // We've found a separator (i.e. the end of an entry) int rc = 0; char *entry = NULL; if (i == entry_start) { // Skip leading and sequential separators entry_start = i + 1; continue; } entry = calloc(i - entry_start + 1, sizeof(char)); CRM_ASSERT(entry != NULL); /* Read entry, stopping at first separator * * @TODO Document or eliminate these character restrictions */ rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry); if (rc != 1) { crm_warn("Could not parse list output entry: %s " CRM_XS " entry_start=%d position=%d", line + entry_start, entry_start, i); free(entry); } else if (pcmk__strcase_any_of(entry, "on", "off", NULL)) { /* Some agents print the target status in the list output, * though none are known now (the separate list-status command * is used for this, but it can also print "UNKNOWN"). To handle * this possibility, skip such entries. * * @TODO Document or eliminate the implied restriction of target * names. */ free(entry); } else { // We have a valid entry *output = g_list_append(*output, entry); } entry_start = i + 1; } } } /*! * \internal * \brief Parse a list of targets from a string * * \param[in] list_output Target list as a string * * \return List of target names * \note The target list string format is flexible, to allow for user-specified * lists such pcmk_host_list and the output of an agent's list action * (whether direct or via the API, which escapes newlines). There may be * multiple lines, separated by either a newline or an escaped newline * (backslash n). Each line may have one or more target names, separated * by any combination of whitespace, commas, and semi-colons. Lines * containing "invalid" or "variable" will be ignored entirely. Target * names "on" or "off" (case-insensitive) will be ignored. Target names * may contain only alphanumeric characters, underbars (_), dashes (-), * and dots (.) (if any other character occurs in the name, it and all * subsequent characters in the name will be ignored). * \note The caller is responsible for freeing the result with * g_list_free_full(result, free). */ GList * stonith__parse_targets(const char *target_spec) { GList *targets = NULL; if (target_spec != NULL) { size_t out_len = strlen(target_spec); size_t line_start = 0; // Starting index of line being processed for (size_t i = 0; i <= out_len; ++i) { if ((target_spec[i] == '\n') || (target_spec[i] == '\0') || ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) { // We've reached the end of one line of output int len = i - line_start; if (len > 0) { char *line = strndup(target_spec + line_start, len); line[len] = '\0'; // Because it might be a newline parse_list_line(line, len, &targets); free(line); } if (target_spec[i] == '\\') { ++i; // backslash-n takes up two positions } line_start = i + 1; } } } return targets; } /*! * \internal * \brief Determine if a later stonith event succeeded. * * \note Before calling this function, use stonith__sort_history() to sort the * top_history argument. */ gboolean stonith__later_succeeded(stonith_history_t *event, stonith_history_t *top_history) { gboolean ret = FALSE; for (stonith_history_t *prev_hp = top_history; prev_hp; prev_hp = prev_hp->next) { if (prev_hp == event) { break; } if ((prev_hp->state == st_done) && pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) && pcmk__str_eq(event->action, prev_hp->action, pcmk__str_casei) && pcmk__str_eq(event->delegate, prev_hp->delegate, pcmk__str_casei) && ((event->completed < prev_hp->completed) || ((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) { ret = TRUE; break; } } return ret; } /*! * \internal * \brief Sort the stonith-history * sort by competed most current on the top * pending actions lacking a completed-stamp are gathered at the top * * \param[in] history List of stonith actions * */ stonith_history_t * stonith__sort_history(stonith_history_t *history) { stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp; for (hp = history; hp; ) { tmp = hp->next; if ((hp->state == st_done) || (hp->state == st_failed)) { /* sort into new */ if ((!new) || (hp->completed > new->completed) || ((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) { hp->next = new; new = hp; } else { np = new; do { if ((!np->next) || (hp->completed > np->next->completed) || ((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) { hp->next = np->next; np->next = hp; break; } np = np->next; } while (1); } } else { /* put into pending */ hp->next = pending; pending = hp; } hp = tmp; } /* pending actions don't have a completed-stamp so make them go front */ if (pending) { stonith_history_t *last_pending = pending; while (last_pending->next) { last_pending = last_pending->next; } last_pending->next = new; new = pending; } return new; } /*! * \brief Return string equivalent of an operation state value * * \param[in] state Fencing operation state value * * \return Human-friendly string equivalent of state */ const char * stonith_op_state_str(enum op_state state) { switch (state) { case st_query: return "querying"; case st_exec: return "executing"; case st_done: return "completed"; case st_duplicate: return "duplicate"; case st_failed: return "failed"; } return "unknown"; } stonith_history_t * stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data) { for (stonith_history_t *hp = history; hp; hp = hp->next) { if (matching_fn(hp, user_data)) { return hp; } } return NULL; } bool stonith__event_state_pending(stonith_history_t *history, void *user_data) { return history->state != st_failed && history->state != st_done; } bool stonith__event_state_eq(stonith_history_t *history, void *user_data) { return history->state == GPOINTER_TO_INT(user_data); } bool stonith__event_state_neq(stonith_history_t *history, void *user_data) { return history->state != GPOINTER_TO_INT(user_data); } void stonith__device_parameter_flags(uint32_t *device_flags, const char *device_name, xmlNode *metadata) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; CRM_CHECK((device_flags != NULL) && (metadata != NULL), return); xpath = xpath_search(metadata, "//parameter"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *parameter = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if (match == NULL) { continue; } parameter = crm_element_value(match, "name"); if (pcmk__str_eq(parameter, "plug", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_plug); } else if (pcmk__str_eq(parameter, "port", pcmk__str_casei)) { stonith__set_device_flags(*device_flags, device_name, st_device_supports_parameter_port); } } freeXpathObject(xpath); } /*! * \internal * \brief Return the exit status from an async action callback * * \param[in] data Callback data * * \return Exit status from callback data */ int stonith__exit_status(stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return CRM_EX_ERROR; } return ((pcmk__action_result_t *) data->opaque)->exit_status; } /*! * \internal * \brief Return the execution status from an async action callback * * \param[in] data Callback data * * \return Execution status from callback data */ int stonith__execution_status(stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } return ((pcmk__action_result_t *) data->opaque)->execution_status; } /*! * \internal * \brief Return the exit reason from an async action callback * * \param[in] data Callback data * * \return Exit reason from callback data */ const char * stonith__exit_reason(stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return NULL; } return ((pcmk__action_result_t *) data->opaque)->exit_reason; } /*! * \internal * \brief Return the exit status from an event notification * * \param[in] event Event * * \return Exit status from event */ int stonith__event_exit_status(stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return CRM_EX_ERROR; } return ((pcmk__action_result_t *) event->opaque)->exit_status; } /*! * \internal * \brief Return the execution status from an event notification * * \param[in] event Event * * \return Execution status from event */ int stonith__event_execution_status(stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } return ((pcmk__action_result_t *) event->opaque)->execution_status; } /*! * \internal * \brief Return the exit reason from an event notification * * \param[in] event Event * * \return Exit reason from event */ const char * stonith__event_exit_reason(stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return NULL; } return ((pcmk__action_result_t *) event->opaque)->exit_reason; } +/*! + * \internal + * \brief Return a human-friendly description of a fencing event + * + * \param[in] event Event to describe + * + * \return Newly allocated string with description of \p event + * \note The caller is responsible for freeing the return value. + * This function asserts on memory errors and never returns NULL. + * \note This currently is useful only for events of type + * T_STONITH_NOTIFY_FENCE. + */ +char * +stonith__event_description(stonith_event_t *event) +{ + const char *reason; + const char *status; + + if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) { + status = pcmk_exec_status_str(stonith__event_execution_status(event)); + } else if (stonith__event_exit_status(event) != CRM_EX_OK) { + status = pcmk_exec_status_str(PCMK_EXEC_ERROR); + } else { + status = crm_exit_str(CRM_EX_OK); + } + reason = stonith__event_exit_reason(event); + + return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)", + event->action, event->target, + (event->executioner? event->executioner : "the cluster"), + (event->client_origin? event->client_origin : "a client"), + event->origin, status, + ((reason == NULL)? "" : " ("), + ((reason == NULL)? "" : reason), + ((reason == NULL)? "" : ")"), + event->id); +} + // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START const char *get_stonith_provider(const char *agent, const char *provider); const char * get_stonith_provider(const char *agent, const char *provider) { return stonith_namespace2text(stonith_get_namespace(agent, provider)); } // LCOV_EXCL_STOP // End deprecated API diff --git a/tools/crm_mon.c b/tools/crm_mon.c index a6c459aaf7..e7b4fe2847 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2409 +1,2408 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static unsigned int show; static unsigned int show_opts = pcmk_show_pending; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; static guint reconnect_timer = 0; static mainloop_timer_t *refresh_timer = NULL; static pe_working_set_t *mon_data_set = NULL; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; volatile crm_trigger_t *refresh_trigger = NULL; static gboolean fence_history = FALSE; static gboolean has_warnings = FALSE; static gboolean on_remote_node = FALSE; static gboolean use_cib_native = FALSE; int interactive_fence_level = 0; static pcmk__supported_format_t formats[] = { #if CURSES_ENABLED CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; /* Define exit codes for monitoring-compatible output * For nagios plugins, the possibilities are * OK=0, WARN=1, CRIT=2, and UNKNOWN=3 */ #define MON_STATUS_WARN CRM_EX_ERROR #define MON_STATUS_CRIT CRM_EX_INVALID_PARAM #define MON_STATUS_UNKNOWN CRM_EX_UNIMPLEMENT_FEATURE #define RECONNECT_MSECS 5000 struct { guint reconnect_ms; gboolean daemonize; gboolean fence_connect; gboolean one_shot; gboolean print_pending; gboolean show_bans; gboolean watch_fencing; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; char *only_rsc; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .fence_connect = TRUE, .reconnect_ms = RECONNECT_MSECS }; static void clean_up_fencing_connection(void); static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); static void handle_connection_failures(int rc); static int mon_refresh_display(gpointer user_data); static int cib_connect(gboolean full); static int fencing_connect(void); static int pacemakerd_status(void); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); static void refresh_after_event(gboolean data_updated, gboolean enforce); static unsigned int all_includes(mon_output_format_t fmt) { if (fmt == mon_output_monitor || fmt == mon_output_plain || fmt == mon_output_console) { return ~pcmk_section_options; } else { return pcmk_section_all; } } static unsigned int default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_monitor: case mon_output_plain: case mon_output_console: return pcmk_section_summary | pcmk_section_nodes | pcmk_section_resources | pcmk_section_failures; case mon_output_xml: case mon_output_legacy_xml: return all_includes(fmt); case mon_output_html: case mon_output_cgi: return pcmk_section_summary | pcmk_section_nodes | pcmk_section_resources | pcmk_section_failures; default: return 0; } } struct { const char *name; unsigned int bit; } sections[] = { { "attributes", pcmk_section_attributes }, { "bans", pcmk_section_bans }, { "counts", pcmk_section_counts }, { "dc", pcmk_section_dc }, { "failcounts", pcmk_section_failcounts }, { "failures", pcmk_section_failures }, { "fencing", pcmk_section_fencing_all }, { "fencing-failed", pcmk_section_fence_failed }, { "fencing-pending", pcmk_section_fence_pending }, { "fencing-succeeded", pcmk_section_fence_worked }, { "maint-mode", pcmk_section_maint_mode }, { "nodes", pcmk_section_nodes }, { "operations", pcmk_section_operations }, { "options", pcmk_section_options }, { "resources", pcmk_section_resources }, { "stack", pcmk_section_stack }, { "summary", pcmk_section_summary }, { "tickets", pcmk_section_tickets }, { "times", pcmk_section_times }, { NULL } }; static unsigned int find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = 0; } else if (pcmk__str_eq(*s, "none", pcmk__str_none)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, " "none, operations, options, resources, stack, summary, " "tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { unsigned int bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= pcmk_section_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } } else if (pcmk__str_any_of(*s, "default", "defaults", NULL)) { show |= default_includes(output_format); } else if (pcmk__str_eq(*s, "none", pcmk__str_none)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " "default, failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, none, " "operations, options, resources, stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include_exclude(GSList *lst, GError **error) { gboolean rc = TRUE; GSList *node = lst; while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_cgi_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("html"); output_format = mon_output_cgi; options.one_shot = TRUE; return TRUE; } static gboolean as_html_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } if (args->output_dest != NULL) { free(args->output_dest); args->output_dest = NULL; } if (optarg != NULL) { args->output_dest = strdup(optarg); } args->output_ty = strdup("html"); output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); return TRUE; } static gboolean as_simple_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_monitor; options.one_shot = TRUE; return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_legacy_xml; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg == NULL) { interactive_fence_level = 2; } else { pcmk__scan_min_int(optarg, &interactive_fence_level, 0); } switch (interactive_fence_level) { case 3: options.fence_connect = TRUE; fence_history = TRUE; return include_exclude_cb("--include", "fencing", data, err); case 2: options.fence_connect = TRUE; fence_history = TRUE; return include_exclude_cb("--include", "fencing", data, err); case 1: options.fence_connect = TRUE; fence_history = TRUE; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.fence_connect = FALSE; fence_history = FALSE; return include_exclude_cb("--exclude", "fencing", data, err); default: g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_rscs_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_inactive_rscs; return TRUE; } static gboolean no_curses_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { output_format = mon_output_plain; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_brief; return TRUE; } static gboolean print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_details; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_timing; return user_include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { options.reconnect_ms = crm_parse_interval_spec(optarg); } return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = user_include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return user_include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.one_shot = TRUE; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds)", "TIMESPEC" }, { "one-shot", '1', 0, G_OPTION_ARG_NONE, &options.one_shot, "Display the cluster status once on the console and exit", NULL }, { "daemonize", 'd', 0, G_OPTION_ARG_NONE, &options.daemonize, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc, "When displaying information about resources, show only what's related to the given\n" INDENT "resource, or to all resources tagged with the given tag", "RSC" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending, "Display pending state if 'record-pending' is enabled", NULL }, { "simple-status", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_simple_cb, "Display the cluster status once as a simple one line output (suitable for nagios)", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "as-html", 'h', G_OPTION_FLAG_FILENAME, G_OPTION_ARG_CALLBACK, as_html_cb, "Write cluster status to the named HTML file.\n" INDENT "Use --output-as=html --output-to=FILE instead.", "FILE" }, { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { "disable-ncurses", 'N', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, no_curses_cb, "Disable the use of ncurses.\n" INDENT "Use --output-as=text instead.", NULL }, { "web-cgi", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_cgi_cb, "Web mode with output suitable for CGI (preselected when run as *.cgi).\n" INDENT "Use --output-as=html --html-cgi instead.", NULL }, { NULL } }; /* *INDENT-ON* */ /* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds * like it would be more broadly useful, but only ever happens after a disconnect via * mon_cib_connection_destroy. */ static gboolean reconnect_after_timeout(gpointer data) { #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif out->info(out, "Reconnecting..."); if (pacemakerd_status() == pcmk_rc_ok) { fencing_connect(); if (cib_connect(TRUE) == pcmk_rc_ok) { /* trigger redrawing the screen (needs reconnect_timer == 0) */ reconnect_timer = 0; refresh_after_event(FALSE, TRUE); return G_SOURCE_REMOVE; } } reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); return G_SOURCE_REMOVE; } /* Called from various places when we are disconnected from the CIB or from the * fencing agent. If the CIB connection is still valid, this function will also * attempt to sign off and reconnect. */ static void mon_cib_connection_destroy(gpointer user_data) { out->info(out, "Connection to the cluster-daemons terminated"); if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } if (reconnect_timer) { /* we'll trigger a new reconnect-timeout at the end */ g_source_remove(reconnect_timer); reconnect_timer = 0; } if (st) { /* the client API won't properly reconnect notifications * if they are still in the table - so remove them */ clean_up_fencing_connection(); } if (cib) { cib->cmds->signoff(cib); reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); } return; } /* Signal handler installed into the mainloop for normal program shutdown */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if CURSES_ENABLED static volatile sighandler_t ncurses_winch_handler; /* Signal handler installed the regular way (not into the main loop) for when * the screen is resized. Commonly, this happens when running in an xterm and * the user changes its size. */ static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); /* Alert the mainloop code we'd like the refresh_trigger to run next * time the mainloop gets around to checking. */ mainloop_set_trigger((crm_trigger_t *) refresh_trigger); } not_done--; } #endif static int fencing_connect(void) { int rc = pcmk_ok; if (options.fence_connect && st == NULL) { st = stonith_api_new(); } if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) { return rc; } rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); if (options.watch_fencing) { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_event); st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, mon_st_callback_event); } else { st->cmds->register_notification(st, T_STONITH_NOTIFY_DISCONNECT, mon_st_callback_display); st->cmds->register_notification(st, T_STONITH_NOTIFY_HISTORY, mon_st_callback_display); } } else { clean_up_fencing_connection(); } return rc; } static int cib_connect(gboolean full) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state == cib_connected_query || cib->state == cib_connected_command) { return rc; } crm_trace("Connecting to the CIB"); rc = pcmk_legacy2rc(cib->cmds->signon(cib, crm_system_name, cib_query)); if (rc != pcmk_rc_ok) { out->err(out, "Could not connect to the CIB: %s", pcmk_rc_str(rc)); return rc; } #if CURSES_ENABLED /* just show this if refresh is gonna remove all traces */ if (output_format == mon_output_console) { out->info(out,"Waiting for CIB ..."); } #endif rc = pcmk_legacy2rc(cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call)); if (rc == pcmk_rc_ok && full) { rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy)); if (rc == EPROTONOSUPPORT) { out->err(out, "Notification setup not supported, won't be " "able to reconnect after failure"); if (output_format == mon_output_console) { sleep(2); } rc = pcmk_rc_ok; } if (rc == pcmk_rc_ok) { cib->cmds->del_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update); rc = pcmk_legacy2rc(cib->cmds->add_notify_callback(cib, T_CIB_DIFF_NOTIFY, crm_diff_update)); } if (rc != pcmk_rc_ok) { out->err(out, "Notification setup failed, could not monitor CIB actions"); cib__clean_up_connection(&cib); clean_up_fencing_connection(); } } return rc; } /* This is used to set up the fencing options after the interactive UI has been stared. * fence_history_cb can't be used because it builds up a list of includes/excludes that * then have to be processed with apply_include_exclude and that could affect other * things. */ static void set_fencing_options(int level) { switch (level) { case 3: options.fence_connect = TRUE; fence_history = TRUE; show |= pcmk_section_fencing_all; break; case 2: options.fence_connect = TRUE; fence_history = TRUE; show |= pcmk_section_fencing_all; break; case 1: options.fence_connect = TRUE; fence_history = TRUE; show |= pcmk_section_fence_failed | pcmk_section_fence_pending; break; default: interactive_fence_level = 0; options.fence_connect = FALSE; fence_history = FALSE; show &= ~pcmk_section_fencing_all; break; } } /* Before trying to connect to fencer or cib check for state of pacemakerd - just no sense in trying till pacemakerd has taken care of starting all the sub-processes Only noteworthy thing to show here is when pacemakerd is waiting for startup-trigger from SBD. */ static void pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { pcmk_pacemakerd_api_reply_t *reply = event_data; enum pcmk_pacemakerd_state *state = (enum pcmk_pacemakerd_state *) user_data; /* we are just interested in the latest reply */ *state = pcmk_pacemakerd_state_invalid; switch (event_type) { case pcmk_ipc_event_reply: break; default: return; } if (status != CRM_EX_OK) { out->err(out, "Bad reply from pacemakerd: %s", crm_exit_str(status)); return; } if (reply->reply_type != pcmk_pacemakerd_reply_ping) { out->err(out, "Unknown reply type %d from pacemakerd", reply->reply_type); } else { if ((reply->data.ping.last_good != (time_t) 0) && (reply->data.ping.status == pcmk_rc_ok)) { *state = reply->data.ping.state; } } } static int pacemakerd_status(void) { int rc = pcmk_rc_ok; pcmk_ipc_api_t *pacemakerd_api = NULL; enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; if (!use_cib_native) { /* we don't need fully functional pacemakerd otherwise */ return rc; } if (cib != NULL && (cib->state == cib_connected_query || cib->state == cib_connected_command)) { /* As long as we have a cib-connection let's go with * that to fetch further cluster-status and avoid * unnecessary pings to pacemakerd. * If cluster is going down and fencer is down already * this will lead to a silently failing fencer reconnect. * On cluster startup we shouldn't see this situation * as first we do is wait for pacemakerd to report all * daemons running. */ return rc; } rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); if (pacemakerd_api == NULL) { out->err(out, "Could not connect to pacemakerd: %s", pcmk_rc_str(rc)); /* this is unrecoverable so return with rc we have */ return rc; } pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, (void *) &state); rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_poll); switch (rc) { case pcmk_rc_ok: rc = pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); if (rc == pcmk_rc_ok) { rc = pcmk_poll_ipc(pacemakerd_api, options.reconnect_ms/2); if (rc == pcmk_rc_ok) { pcmk_dispatch_ipc(pacemakerd_api); rc = ENOTCONN; if ((output_format == mon_output_console) || (output_format == mon_output_plain)) { switch (state) { case pcmk_pacemakerd_state_running: rc = pcmk_rc_ok; break; case pcmk_pacemakerd_state_starting_daemons: out->info(out,"Pacemaker daemons starting ..."); break; case pcmk_pacemakerd_state_wait_for_ping: out->info(out,"Waiting for startup-trigger from SBD ..."); break; case pcmk_pacemakerd_state_shutting_down: out->info(out,"Pacemaker daemons shutting down ..."); /* try our luck maybe CIB is still accessible */ rc = pcmk_rc_ok; break; case pcmk_pacemakerd_state_shutdown_complete: /* assuming pacemakerd doesn't dispatch any pings after entering * that state unless it is waiting for SBD */ out->info(out,"Pacemaker daemons shut down - reporting to SBD ..."); break; default: break; } } else { switch (state) { case pcmk_pacemakerd_state_running: rc = pcmk_rc_ok; break; case pcmk_pacemakerd_state_shutting_down: /* try our luck maybe CIB is still accessible */ rc = pcmk_rc_ok; break; default: break; } } } } break; case EREMOTEIO: rc = pcmk_rc_ok; on_remote_node = TRUE; #if CURSES_ENABLED /* just show this if refresh is gonna remove all traces */ if (output_format == mon_output_console) { out->info(out, "Running on remote-node waiting to be connected by cluster ..."); } #endif break; default: break; } pcmk_free_ipc_api(pacemakerd_api); /* returning with ENOTCONN triggers a retry */ return (rc == pcmk_rc_ok)?rc:ENOTCONN; } #if CURSES_ENABLED static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(out, option, condition) \ curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option)); /* This function is called from the main loop when there is something to be read * on stdin, like an interactive user's keystroke. All it does is read the keystroke, * set flags (or show the page showing which keystrokes are valid), and redraw the * screen. It does not do anything with connections to the CIB or fencing agent * agent what would happen in mon_refresh_display. */ static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; gboolean config_mode = FALSE; while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': interactive_fence_level++; if (interactive_fence_level > 3) { interactive_fence_level = 0; } set_fencing_options(interactive_fence_level); break; case 'c': show ^= pcmk_section_tickets; break; case 'f': show ^= pcmk_section_failcounts; break; case 'n': show_opts ^= pcmk_show_rscs_by_node; break; case 'o': show ^= pcmk_section_operations; if (!pcmk_is_set(show, pcmk_section_operations)) { show_opts &= ~pcmk_show_timing; } break; case 'r': show_opts ^= pcmk_show_inactive_rscs; break; case 'R': show_opts ^= pcmk_show_details; #ifdef PCMK__COMPAT_2_0 // Keep failed action output the same as 2.0.x show_opts |= pcmk_show_failed_detail; #endif break; case 't': show_opts ^= pcmk_show_timing; if (pcmk_is_set(show_opts, pcmk_show_timing)) { show |= pcmk_section_operations; } break; case 'A': show ^= pcmk_section_attributes; break; case 'L': show ^= pcmk_section_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (pcmk_any_flags_set(show, pcmk_section_summary)) { show &= ~pcmk_section_summary; } else { show |= pcmk_section_summary; } /* Regardless, we don't show options in console mode. */ show &= ~pcmk_section_options; break; case 'b': show_opts ^= pcmk_show_brief; break; case 'j': show_opts ^= pcmk_show_pending; break; case '?': config_mode = TRUE; break; default: /* All other keys just redraw the screen. */ goto refresh; } if (!config_mode) goto refresh; blank_screen(); curses_formatted_printf(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets)); print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts)); print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node)); print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations)); print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs)); print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing)); print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes)); print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans)); print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary)); print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details)); print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief)); print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending)); curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m')); curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n"); } refresh: refresh_after_event(FALSE, TRUE); return TRUE; } #endif // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_strerror(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; const char *description = "Notes:\n\n" "If this program is called as crm_mon.cgi, --output-as=html --html-cgi will\n" "automatically be added to the command line arguments.\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many different\n" "formats. It can be just an integer number of seconds, a number plus units\n" "(ms/msec/us/usec/s/sec/m/min/h/hr), or an ISO 8601 period specification.\n\n" "Output Control:\n\n" "By default, a certain list of sections are written to the output destination.\n" "The default varies based on the output format - XML includes everything, while\n" "other output formats will display less. This list can be modified with the\n" "--include and --exclude command line options. Each option may be given multiple\n" "times on the command line, and each can give a comma-separated list of sections.\n" "The options are applied to the default set, from left to right as seen on the\n" "command line. For a list of valid sections, pass --include=list or --exclude=list.\n\n" "Interactive Use:\n\n" "When run interactively, crm_mon can be told to hide and display various sections\n" "of output. To see a help screen explaining the options, hit '?'. Any key stroke\n" "aside from those listed will cause the screen to refresh.\n\n" "Examples:\n\n" "Display the cluster status on the console with updates as they occur:\n\n" "\tcrm_mon\n\n" "Display the cluster status on the console just once then exit:\n\n" "\tcrm_mon -1\n\n" "Display your cluster status, group resources by node, and include inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html --output-to /path/to/docroot/filename.html\n\n" "Start crm_mon and export the current cluster status as XML to stdout, then exit:\n\n" "\tcrm_mon --output-as xml\n\n"; #if CURSES_ENABLED context = pcmk__build_arg_context(args, "console (default), html, text, xml", group, NULL); #else context = pcmk__build_arg_context(args, "text (default), html, xml", group, NULL); #endif pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /* If certain format options were specified, we want to set some extra * options. We can just process these like they were given on the * command line. */ static void add_output_args(void) { GError *err = NULL; if (output_format == mon_output_plain) { if (!pcmk__force_args(context, &err, "%s --text-fancy", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_cgi) { if (!pcmk__force_args(context, &err, "%s --html-cgi", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_xml) { if (!pcmk__force_args(context, &err, "%s --xml-simple-list --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } else if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; if (!pcmk__force_args(context, &err, "%s --xml-legacy --xml-substitute", g_get_prgname())) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } } /* Which output format to use could come from two places: The --as-xml * style arguments we gave in deprecated_entries above, or the formatted output * arguments added by pcmk__register_formats. If the latter were used, * output_format will be mon_output_unset. * * Call the callbacks as if those older style arguments were provided so * the various things they do get done. */ static void reconcile_output_format(pcmk__common_args_t *args) { gboolean retval = TRUE; GError *err = NULL; if (output_format != mon_output_unset) { return; } if (pcmk__str_eq(args->output_ty, "html", pcmk__str_casei)) { char *dest = NULL; if (args->output_dest != NULL) { dest = strdup(args->output_dest); } retval = as_html_cb("h", dest, NULL, &err); free(dest); } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_casei)) { retval = no_curses_cb("N", NULL, NULL, &err); } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_casei)) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("xml"); output_format = mon_output_xml; } else if (options.one_shot) { if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("text"); output_format = mon_output_plain; } else { /* Neither old nor new arguments were given, so set the default. */ if (args->output_ty != NULL) { free(args->output_ty); } args->output_ty = strdup("console"); output_format = mon_output_console; } if (!retval) { g_propagate_error(&error, err); clean_up(CRM_EX_USAGE); } } static void handle_connection_failures(int rc) { if (rc == pcmk_rc_ok) { return; } if (output_format == mon_output_monitor) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "CLUSTER CRIT: Connection to cluster failed: %s", pcmk_rc_str(rc)); rc = MON_STATUS_CRIT; } else if (rc == ENOTCONN) { if (on_remote_node) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster"); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); } rc = pcmk_rc2exitc(rc); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc)); rc = pcmk_rc2exitc(rc); } clean_up(rc); } static void one_shot(void) { int rc; rc = pacemakerd_status(); if (rc == pcmk_rc_ok) { fencing_connect(); rc = cib_connect(FALSE); } if (rc == pcmk_rc_ok) { mon_refresh_display(NULL); } else { handle_connection_failures(rc); } clean_up(CRM_EX_OK); } int main(int argc, char **argv) { int rc = pcmk_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); pcmk__cli_init_logging("crm_mon", 0); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); if (pcmk__ends_with_ext(argv[0], ".cgi")) { output_format = mon_output_cgi; options.one_shot = TRUE; } processed_args = pcmk__cmdline_preproc(argv, "ehimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. This can't * be done in add_output_args. That function is called after command line * arguments are processed in the next block, which means it'll override whatever * title the user provides. Doing this here means the user can give their own * title on the command line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (options.watch_fencing) { fence_history_cb("--fence-history", "0", NULL, NULL); options.fence_connect = TRUE; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } else { switch (cib->variant) { case cib_native: /* cib & fencing - everything available */ use_cib_native = TRUE; break; case cib_file: /* Don't try to connect to fencing as we * either don't have a running cluster or * the fencing-information would possibly * not match the cib data from a file. * As we don't expect cib-updates coming * in enforce one-shot. */ fence_history_cb("--fence-history", "0", NULL, NULL); options.one_shot = TRUE; break; case cib_remote: /* updates coming in but no fencing */ fence_history_cb("--fence-history", "0", NULL, NULL); break; case cib_undefined: case cib_database: default: /* something is odd */ rc = -EINVAL; break; } } if (options.one_shot) { if (output_format == mon_output_console) { output_format = mon_output_plain; } } else if (options.daemonize) { if ((output_format == mon_output_console) || (args->output_dest == NULL)) { output_format = mon_output_none; } crm_enable_stderr(FALSE); if (pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches | pcmk__str_casei) && !options.external_agent) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to and --external-agent"); return clean_up(CRM_EX_USAGE); } if (cib) { /* to be on the safe side don't have cib-object around * when we are forking */ cib_delete(cib); cib = NULL; pcmk__daemonize(crm_system_name, options.pid_file); cib = cib_new(); if (cib == NULL) { rc = -EINVAL; } /* otherwise assume we've got the same cib-object we've just destroyed * in our parent */ } } else if (output_format == mon_output_console) { #if CURSES_ENABLED crm_enable_stderr(FALSE); #else options.one_shot = TRUE; output_format = mon_output_plain; printf("Defaulting to one-shot mode\n"); printf("You need to have curses available at compile time to enable console mode\n"); #endif } } if (rc != pcmk_ok) { // Shouldn't really be possible g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source"); return clean_up(CRM_EX_ERROR); } reconcile_output_format(args); add_output_args(); if (args->version && output_format == mon_output_console) { /* Use the text output format here if we are in curses mode but were given * --version. Displaying version information uses printf, and then we * immediately exit. We don't want to initialize curses for that. */ rc = pcmk__output_new(&out, "text", args->output_dest, argv); } else { rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); } if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ show = default_includes(output_format); /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, &error)) { return clean_up(CRM_EX_USAGE); } /* Sync up the initial value of interactive_fence_level with whatever was set with * --include/--exclude= options. */ if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) { interactive_fence_level = 3; } else if (pcmk_is_set(show, pcmk_section_fence_worked)) { interactive_fence_level = 2; } else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) { interactive_fence_level = 1; } else { interactive_fence_level = 0; } pcmk__register_lib_messages(out); crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } /* Extra sanity checks when in CGI mode */ if (output_format == mon_output_cgi) { if (cib->variant == cib_file) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode used with CIB file"); return clean_up(CRM_EX_USAGE); } else if (options.external_agent != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with --external-agent"); return clean_up(CRM_EX_USAGE); } else if (options.daemonize == TRUE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "CGI mode cannot be used with -d"); return clean_up(CRM_EX_USAGE); } } if (output_format == mon_output_xml || output_format == mon_output_legacy_xml) { show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing; if (!options.daemonize) { options.one_shot = TRUE; } } if ((output_format == mon_output_html || output_format == mon_output_cgi) && out->dest != stdout) { pcmk__html_add_header("meta", "http-equiv", "refresh", "content", pcmk__itoa(options.reconnect_ms / 1000), NULL); } #ifdef PCMK__COMPAT_2_0 // Keep failed action output the same as 2.0.x show_opts |= pcmk_show_failed_detail; #endif crm_info("Starting %s", crm_system_name); cib__set_output(cib, out); if (options.one_shot) { one_shot(); } do { out->info(out,"Waiting until cluster is available on this node ..."); rc = pacemakerd_status(); if (rc == pcmk_rc_ok) { fencing_connect(); rc = cib_connect(TRUE); } if (rc != pcmk_rc_ok) { pcmk__sleep_ms(options.reconnect_ms); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } else if (output_format == mon_output_html && out->dest != stdout) { printf("Writing html to %s ...\n", args->output_dest); } } while (rc == ENOTCONN); handle_connection_failures(rc); set_fencing_options(interactive_fence_level); mon_refresh_display(NULL); mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, G_IO_IN, detect_user_input, NULL); } #endif /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In * this file, that is anywhere mainloop_set_trigger is called. */ refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } /*! * \internal * \brief Print one-line status suitable for use with monitoring software * * \param[in] data_set Working set of CIB state * * \note This function's output (and the return code when the program exits) * should conform to https://www.monitoring-plugins.org/doc/guidelines.html */ static void print_simple_status(pcmk__output_t *out, pe_working_set_t * data_set) { GList *gIter = NULL; int nodes_online = 0; int nodes_standby = 0; int nodes_maintenance = 0; char *offline_nodes = NULL; size_t offline_nodes_len = 0; gboolean no_dc = FALSE; gboolean offline = FALSE; if (data_set->dc_node == NULL) { has_warnings = TRUE; no_dc = TRUE; } for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) { pe_node_t *node = (pe_node_t *) gIter->data; if (node->details->standby && node->details->online) { nodes_standby++; } else if (node->details->maintenance && node->details->online) { nodes_maintenance++; } else if (node->details->online) { nodes_online++; } else { char *s = crm_strdup_printf("offline node: %s", node->details->uname); /* coverity[leaked_storage] False positive */ pcmk__add_word(&offline_nodes, &offline_nodes_len, s); free(s); has_warnings = TRUE; offline = TRUE; } } if (has_warnings) { out->info(out, "CLUSTER WARN: %s%s%s", no_dc ? "No DC" : "", no_dc && offline ? ", " : "", (offline? offline_nodes : "")); free(offline_nodes); } else { char *nodes_standby_s = NULL; char *nodes_maint_s = NULL; if (nodes_standby > 0) { nodes_standby_s = crm_strdup_printf(", %d standby node%s", nodes_standby, pcmk__plural_s(nodes_standby)); } if (nodes_maintenance > 0) { nodes_maint_s = crm_strdup_printf(", %d maintenance node%s", nodes_maintenance, pcmk__plural_s(nodes_maintenance)); } out->info(out, "CLUSTER OK: %d node%s online%s%s, " "%d resource instance%s configured", nodes_online, pcmk__plural_s(nodes_online), nodes_standby_s != NULL ? nodes_standby_s : "", nodes_maint_s != NULL ? nodes_maint_s : "", data_set->ninstances, pcmk__plural_s(data_set->ninstances)); free(nodes_standby_s); free(nodes_maint_s); } /* coverity[leaked_storage] False positive */ } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = pcmk__itoa(rc); char *status_s = pcmk__itoa(status); char *target_rc_s = pcmk__itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { crm_perror(LOG_ERR, "notification fork() failed."); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static void handle_rsc_op(xmlNode * xml, const char *node_id) { int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, XML_LRM_TAG_RSC_OP) != 0) { xmlNode *cIter; for(cIter = xml->children; cIter; cIter = cIter->next) { handle_rsc_op(cIter, node_id); } return; } id = crm_element_value(rsc_op, XML_LRM_ATTR_TASK_KEY); if (id == NULL) { /* Compatibility with <= 1.1.5 */ id = ID(rsc_op); } magic = crm_element_value(rsc_op, XML_ATTR_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, XML_LRM_ATTR_TARGET); while (n != NULL && !pcmk__str_eq(XML_CIB_TAG_STATE, TYPE(n), pcmk__str_casei)) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, XML_ATTR_UNAME); } if (node == NULL && n) { node = ID(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_strerror(pcmk_ok); if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_EXEC_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = pcmk_exec_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); } /* This function is just a wrapper around mainloop_set_trigger so that it can be * called from a mainloop directly. It's simply another way of ensuring the screen * gets redrawn. */ static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); return FALSE; } static void crm_diff_update_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(diff); change != NULL; change = pcmk__xml_next(change)) { const char *name = NULL; const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); xmlNode *match = NULL; const char *node = NULL; if(op == NULL) { continue; } else if(strcmp(op, "create") == 0) { match = change->children; } else if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "delete") == 0) { continue; } else if(strcmp(op, "modify") == 0) { match = first_named_child(change, XML_DIFF_RESULT); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0); } else if(strcmp(name, XML_TAG_CIB) == 0) { xmlNode *state = NULL; xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS); for (state = pcmk__xe_first_child(status); state != NULL; state = pcmk__xe_next(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) { xmlNode *state = NULL; for (state = pcmk__xe_first_child(match); state != NULL; state = pcmk__xe_next(state)) { node = crm_element_value(state, XML_ATTR_UNAME); if (node == NULL) { node = ID(state); } handle_rsc_op(state, node); } } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) { node = crm_element_value(match, XML_ATTR_UNAME); if (node == NULL) { node = ID(match); } handle_rsc_op(match, node); } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) { node = ID(match); handle_rsc_op(match, node); } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, "lrm"); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } } } static void crm_diff_update_v1(const char *event, xmlNode * msg) { /* Process operation updates */ xmlXPathObject *xpathObj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP); int lpc = 0, max = numXpathResults(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *rsc_op = getXpathResult(xpathObj, lpc); handle_rsc_op(rsc_op, NULL); } freeXpathObject(xpathObj); } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *diff = get_message_xml(msg, F_CIB_UPDATE_RESULT); out->progress(out, false); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_scope_local | cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, "format", &format); switch(format) { case 1: crm_diff_update_v1(event, msg); break; case 2: crm_diff_update_v2(event, msg); break; default: crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { out->info(out, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; refresh_after_event(cib_updated, FALSE); } static int get_fencing_history(stonith_history_t **stonith_history) { int rc = 0; while (fence_history) { if (st != NULL) { rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history, 120); if (rc == 0) { *stonith_history = stonith__sort_history(*stonith_history); if (!pcmk_all_flags_set(show, pcmk_section_fencing_all) && (output_format != mon_output_xml)) { *stonith_history = pcmk__reduce_fence_history(*stonith_history); } break; /* all other cases are errors */ } } else { rc = ENOTCONN; break; } } return rc; } static int mon_refresh_display(gpointer user_data) { xmlNode *cib_copy = copy_xml(current_cib); stonith_history_t *stonith_history = NULL; int history_rc = 0; GList *unames = NULL; GList *resources = NULL; last_refresh = time(NULL); if (cli_config_update(&cib_copy, NULL, FALSE) == FALSE) { cib__clean_up_connection(&cib); out->err(out, "Upgrade failed: %s", pcmk_strerror(-pcmk_err_schema_validation)); clean_up(CRM_EX_CONFIG); return 0; } /* get the stonith-history if there is evidence we need it */ history_rc = get_fencing_history(&stonith_history); if (mon_data_set == NULL) { mon_data_set = pe_new_working_set(); CRM_ASSERT(mon_data_set != NULL); } pe__set_working_set_flags(mon_data_set, pe_flag_no_compat); mon_data_set->input = cib_copy; mon_data_set->priv = out; cluster_status(mon_data_set); /* Unpack constraints if any section will need them * (tickets may be referenced in constraints but not granted yet, * and bans need negative location constraints) */ if (pcmk_is_set(show, pcmk_section_bans) || pcmk_is_set(show, pcmk_section_tickets)) { pcmk__unpack_constraints(mon_data_set); } if (options.daemonize) { out->reset(out); } unames = pe__build_node_name_list(mon_data_set, options.only_node); resources = pe__build_rsc_list(mon_data_set, options.only_rsc); /* Always print DC if NULL. */ if (mon_data_set->dc_node == NULL) { show |= pcmk_section_dc; } switch (output_format) { case mon_output_html: case mon_output_cgi: if (out->message(out, "cluster-status", mon_data_set, crm_errno2exit(history_rc), stonith_history, fence_history, show, show_opts, options.neg_location_prefix, unames, resources) != 0) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Critical: Unable to output html file"); clean_up(CRM_EX_CANTCREAT); return 0; } break; case mon_output_monitor: print_simple_status(out, mon_data_set); if (has_warnings) { clean_up(MON_STATUS_WARN); return FALSE; } break; case mon_output_unset: case mon_output_none: break; default: out->message(out, "cluster-status", mon_data_set, crm_errno2exit(history_rc), stonith_history, fence_history, show, show_opts, options.neg_location_prefix, unames, resources); break; } if (options.daemonize) { out->finish(out, CRM_EX_OK, true, NULL); } g_list_free_full(unames, free); g_list_free_full(resources, free); stonith_history_free(stonith_history); stonith_history = NULL; pe_reset_working_set(mon_data_set); return 1; } /* This function is called for fencing events (see fencing_connect for which ones) when * --watch-fencing is used on the command line. */ static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else if (options.external_agent) { - char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)", - e->operation, e->origin, e->target, pcmk_strerror(e->result), - e->id); + char *desc = stonith__event_description(e); + send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } /* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met: * * - If the last update occurred more than reconnect_ms ago (defaults to 5s, but * can be changed via the -i command line option), or * - After every 10 CIB updates, or * - If it's been 2s since the last update * * This function sounds like it would be more broadly useful, but it is only called when a * fencing event is received or a CIB diff occurrs. */ static void refresh_after_event(gboolean data_updated, gboolean enforce) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } if (reconnect_timer > 0) { /* we will receive a refresh request after successful reconnect */ mainloop_timer_stop(refresh_timer); return; } /* as we're not handling initial failure of fencer-connection as * fatal give it a retry here * not getting here if cib-reconnection is already on the way */ fencing_connect(); if (enforce || ((now - last_refresh) > (options.reconnect_ms / 1000)) || updates >= 10) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } /* This function is called for fencing events (see fencing_connect for which ones) when * --watch-fencing is NOT used on the command line. */ static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else { out->progress(out, false); refresh_after_event(TRUE, FALSE); } } static void clean_up_fencing_connection(void) { if (st == NULL) { return; } if (st->state != stonith_disconnected) { st->cmds->remove_notification(st, T_STONITH_NOTIFY_DISCONNECT); st->cmds->remove_notification(st, T_STONITH_NOTIFY_FENCE); st->cmds->remove_notification(st, T_STONITH_NOTIFY_HISTORY); st->cmds->disconnect(st); } stonith_api_delete(st); st = NULL; } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ cib__clean_up_connection(&cib); clean_up_fencing_connection(); free(options.neg_location_prefix); free(options.only_node); free(options.only_rsc); free(options.pid_file); g_slist_free_full(options.includes_excludes, free); pe_free_working_set(mon_data_set); mon_data_set = NULL; g_strfreev(processed_args); /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if ((error != NULL || exit_code == CRM_EX_USAGE) && output_format == mon_output_console) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { if (!options.daemonize) { out->finish(out, exit_code, true, NULL); } pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); }