diff --git a/daemons/execd/cts-exec-helper.c b/daemons/execd/cts-exec-helper.c index eff47f983e..8c47e6ed7d 100644 --- a/daemons/execd/cts-exec-helper.c +++ b/daemons/execd/cts-exec-helper.c @@ -1,627 +1,626 @@ /* * Copyright 2012-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "cts-exec-helper - inject commands into the Pacemaker executor and watch for events" static int exec_call_id = 0; static gboolean start_test(gpointer user_data); static void try_connect(void); static char *key = NULL; static char *val = NULL; static struct { int verbose; int quiet; guint interval_ms; int timeout; int start_delay; int cancel_call_id; gboolean no_wait; gboolean is_running; gboolean no_connect; int exec_call_opts; const char *api_call; const char *rsc_id; const char *provider; const char *class; const char *type; const char *action; const char *listen; gboolean use_tls; lrmd_key_value_t *params; } options; static gboolean interval_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { return pcmk_parse_interval_spec(optarg, &options.interval_ms) == pcmk_rc_ok; } static gboolean notify_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--notify-orig", "-n", NULL)) { options.exec_call_opts = lrmd_opt_notify_orig_only; } else if (pcmk__str_any_of(option_name, "--notify-changes", "-o", NULL)) { options.exec_call_opts = lrmd_opt_notify_changes_only; } return TRUE; } static gboolean param_key_val_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--param-key", "-k", NULL)) { pcmk__str_update(&key, optarg); } else if (pcmk__str_any_of(option_name, "--param-val", "-v", NULL)) { pcmk__str_update(&val, optarg); } if (key != NULL && val != NULL) { options.params = lrmd_key_value_add(options.params, key, val); pcmk__str_update(&key, NULL); pcmk__str_update(&val, NULL); } return TRUE; } static GOptionEntry basic_entries[] = { { "api-call", 'c', 0, G_OPTION_ARG_STRING, &options.api_call, "Directly relates to executor API functions", NULL }, { "is-running", 'R', 0, G_OPTION_ARG_NONE, &options.is_running, "Determine if a resource is registered and running", NULL }, { "listen", 'l', 0, G_OPTION_ARG_STRING, &options.listen, "Listen for a specific event string", NULL }, { "no-wait", 'w', 0, G_OPTION_ARG_NONE, &options.no_wait, "Make api call and do not wait for result", NULL }, { "notify-changes", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, "Only notify client changes to recurring operations", NULL }, { "notify-orig", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, notify_cb, "Only notify this client of the results of an API action", NULL }, { "tls", 'S', 0, G_OPTION_ARG_NONE, &options.use_tls, "Use TLS backend for local connection", NULL }, { NULL } }; static GOptionEntry api_call_entries[] = { { "action", 'a', 0, G_OPTION_ARG_STRING, &options.action, NULL, NULL }, { "cancel-call-id", 'x', 0, G_OPTION_ARG_INT, &options.cancel_call_id, NULL, NULL }, { "class", 'C', 0, G_OPTION_ARG_STRING, &options.class, NULL, NULL }, { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, interval_cb, NULL, NULL }, { "param-key", 'k', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, NULL, NULL }, { "param-val", 'v', 0, G_OPTION_ARG_CALLBACK, param_key_val_cb, NULL, NULL }, { "provider", 'P', 0, G_OPTION_ARG_STRING, &options.provider, NULL, NULL }, { "rsc-id", 'r', 0, G_OPTION_ARG_STRING, &options.rsc_id, NULL, NULL }, { "start-delay", 's', 0, G_OPTION_ARG_INT, &options.start_delay, NULL, NULL }, { "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout, NULL, NULL }, { "type", 'T', 0, G_OPTION_ARG_STRING, &options.type, NULL, NULL }, { NULL } }; static GMainLoop *mainloop = NULL; static lrmd_t *lrmd_conn = NULL; static char event_buf_v0[1024]; static crm_exit_t test_exit(crm_exit_t exit_code) { lrmd_api_delete(lrmd_conn); return crm_exit(exit_code); } #define print_result(fmt, args...) \ if (!options.quiet) { \ printf(fmt "\n" , ##args); \ } #define report_event(event) \ snprintf(event_buf_v0, sizeof(event_buf_v0), "NEW_EVENT event_type:%s rsc_id:%s action:%s rc:%s op_status:%s", \ lrmd_event_type2str(event->type), \ event->rsc_id, \ event->op_type ? event->op_type : "none", \ services_ocf_exitcode_str(event->rc), \ pcmk_exec_status_str(event->op_status)); \ crm_info("%s", event_buf_v0); static void test_shutdown(int nsig) { lrmd_api_delete(lrmd_conn); lrmd_conn = NULL; } static void read_events(lrmd_event_data_t * event) { report_event(event); if (options.listen) { if (pcmk__str_eq(options.listen, event_buf_v0, pcmk__str_casei)) { print_result("LISTEN EVENT SUCCESSFUL"); test_exit(CRM_EX_OK); } } if (exec_call_id && (event->call_id == exec_call_id)) { if (event->op_status == 0 && event->rc == 0) { print_result("API-CALL SUCCESSFUL for 'exec'"); } else { print_result("API-CALL FAILURE for 'exec', rc:%d lrmd_op_status:%s", event->rc, pcmk_exec_status_str(event->op_status)); test_exit(CRM_EX_ERROR); } if (!options.listen) { test_exit(CRM_EX_OK); } } } static gboolean timeout_err(gpointer data) { print_result("LISTEN EVENT FAILURE - timeout occurred, never found"); test_exit(CRM_EX_TIMEOUT); return FALSE; } static void connection_events(lrmd_event_data_t * event) { int rc = event->connection_rc; if (event->type != lrmd_event_connect) { /* ignore */ return; } if (!rc) { crm_info("Executor client connection established"); start_test(NULL); return; } else { sleep(1); try_connect(); crm_notice("Executor client connection failed"); } } static void try_connect(void) { int tries = 10; static int num_tries = 0; int rc = 0; lrmd_conn->cmds->set_callback(lrmd_conn, connection_events); for (; num_tries < tries; num_tries++) { rc = lrmd_conn->cmds->connect_async(lrmd_conn, crm_system_name, 3000); if (!rc) { return; /* we'll hear back in async callback */ } sleep(1); } print_result("API CONNECTION FAILURE"); test_exit(CRM_EX_ERROR); } static gboolean start_test(gpointer user_data) { int rc = 0; if (!options.no_connect) { if (!lrmd_conn->cmds->is_connected(lrmd_conn)) { try_connect(); /* async connect -- this function will get called back into */ return 0; } } lrmd_conn->cmds->set_callback(lrmd_conn, read_events); if (options.timeout) { g_timeout_add(options.timeout, timeout_err, NULL); } if (!options.api_call) { return 0; } if (pcmk__str_eq(options.api_call, "exec", pcmk__str_casei)) { rc = lrmd_conn->cmds->exec(lrmd_conn, options.rsc_id, options.action, NULL, options.interval_ms, options.timeout, options.start_delay, options.exec_call_opts, options.params); if (rc > 0) { exec_call_id = rc; print_result("API-CALL 'exec' action pending, waiting on response"); } } else if (pcmk__str_eq(options.api_call, "register_rsc", pcmk__str_casei)) { rc = lrmd_conn->cmds->register_rsc(lrmd_conn, options.rsc_id, options.class, options.provider, options.type, 0); } else if (pcmk__str_eq(options.api_call, "get_rsc_info", pcmk__str_casei)) { lrmd_rsc_info_t *rsc_info; rsc_info = lrmd_conn->cmds->get_rsc_info(lrmd_conn, options.rsc_id, 0); if (rsc_info) { print_result("RSC_INFO: id:%s class:%s provider:%s type:%s", rsc_info->id, rsc_info->standard, (rsc_info->provider? rsc_info->provider : ""), rsc_info->type); lrmd_free_rsc_info(rsc_info); rc = pcmk_ok; } else { rc = -1; } } else if (pcmk__str_eq(options.api_call, "unregister_rsc", pcmk__str_casei)) { rc = lrmd_conn->cmds->unregister_rsc(lrmd_conn, options.rsc_id, 0); } else if (pcmk__str_eq(options.api_call, "cancel", pcmk__str_casei)) { rc = lrmd_conn->cmds->cancel(lrmd_conn, options.rsc_id, options.action, options.interval_ms); } else if (pcmk__str_eq(options.api_call, "metadata", pcmk__str_casei)) { char *output = NULL; rc = lrmd_conn->cmds->get_metadata(lrmd_conn, options.class, options.provider, options.type, &output, 0); if (rc == pcmk_ok) { print_result("%s", output); free(output); } } else if (pcmk__str_eq(options.api_call, "list_agents", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_agents(lrmd_conn, &list, options.class, options.provider); if (rc > 0) { print_result("%d agents found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no agents found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "list_ocf_providers", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_ocf_providers(lrmd_conn, options.type, &list); if (rc > 0) { print_result("%d providers found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no providers found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "list_standards", pcmk__str_casei)) { lrmd_list_t *list = NULL; lrmd_list_t *iter = NULL; rc = lrmd_conn->cmds->list_standards(lrmd_conn, &list); if (rc > 0) { print_result("%d standards found", rc); for (iter = list; iter != NULL; iter = iter->next) { print_result("%s", iter->val); } lrmd_list_freeall(list); rc = 0; } else { print_result("API_CALL FAILURE - no providers found"); rc = -1; } } else if (pcmk__str_eq(options.api_call, "get_recurring_ops", pcmk__str_casei)) { GList *op_list = NULL; GList *op_item = NULL; rc = lrmd_conn->cmds->get_recurring_ops(lrmd_conn, options.rsc_id, 0, 0, &op_list); for (op_item = op_list; op_item != NULL; op_item = op_item->next) { lrmd_op_info_t *op_info = op_item->data; print_result("RECURRING_OP: %s_%s_%s timeout=%sms", op_info->rsc_id, op_info->action, op_info->interval_ms_s, op_info->timeout_ms_s); lrmd_free_op_info(op_info); } g_list_free(op_list); } else if (options.api_call) { print_result("API-CALL FAILURE unknown action '%s'", options.action); test_exit(CRM_EX_ERROR); } if (rc < 0) { print_result("API-CALL FAILURE for '%s' api_rc:%d", options.api_call, rc); test_exit(CRM_EX_ERROR); } if (options.api_call && rc == pcmk_ok) { print_result("API-CALL SUCCESSFUL for '%s'", options.api_call); if (!options.listen) { test_exit(CRM_EX_OK); } } if (options.no_wait) { /* just make the call and exit regardless of anything else. */ test_exit(CRM_EX_OK); } return 0; } /*! * \internal * \brief Generate resource parameters from CIB if none explicitly given * * \return Standard Pacemaker return code */ static int generate_params(void) { int rc = pcmk_rc_ok; pcmk_scheduler_t *scheduler = NULL; xmlNode *cib_xml_copy = NULL; pcmk_resource_t *rsc = NULL; GHashTable *params = NULL; GHashTable *meta = NULL; GHashTableIter iter; char *key = NULL; char *value = NULL; if (options.params != NULL) { return pcmk_rc_ok; // User specified parameters explicitly } // Retrieve and update CIB rc = cib__signon_query(NULL, NULL, &cib_xml_copy); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk_update_configured_schema(&cib_xml_copy, false); if (rc != pcmk_rc_ok) { return rc; } // Calculate cluster status scheduler = pe_new_working_set(); if (scheduler == NULL) { crm_crit("Could not allocate scheduler data"); return ENOMEM; } - pcmk__set_scheduler_flags(scheduler, - pcmk__sched_no_counts|pcmk__sched_no_compat); + pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts); scheduler->input = cib_xml_copy; scheduler->priv->now = crm_time_new(NULL); cluster_status(scheduler); // Find resource in CIB rsc = pe_find_resource_with_flags(scheduler->priv->resources, options.rsc_id, pcmk_rsc_match_history |pcmk_rsc_match_basename); if (rsc == NULL) { crm_err("Resource does not exist in config"); pe_free_working_set(scheduler); return EINVAL; } // Add resource instance parameters to options.params params = pe_rsc_params(rsc, NULL, scheduler); if (params != NULL) { g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { options.params = lrmd_key_value_add(options.params, key, value); } } // Add resource meta-attributes to options.params meta = pcmk__strkey_table(free, free); get_meta_attributes(meta, rsc, NULL, scheduler); g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { char *crm_name = crm_meta_name(key); options.params = lrmd_key_value_add(options.params, crm_name, value); free(crm_name); } g_hash_table_destroy(meta); pe_free_working_set(scheduler); return rc; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, NULL, group, NULL); pcmk__add_main_args(context, basic_entries); pcmk__add_arg_group(context, "api-call", "API Call Options:", "Parameters for api-call option", api_call_entries); return context; } int main(int argc, char **argv) { GError *error = NULL; crm_exit_t exit_code = CRM_EX_OK; crm_trigger_t *trig = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); /* Typically we'd pass all the single character options that take an argument * as the second parameter here (and there's a bunch of those in this tool). * However, we control how this program is called so we can just not call it * in a way where the preprocessing ever matters. */ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, NULL); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } /* We have to use crm_log_init here to set up the logging because there's * different handling for daemons vs. command line programs, and * pcmk__cli_init_logging is set up to only handle the latter. */ crm_log_init(NULL, LOG_INFO, TRUE, (args->verbosity? TRUE : FALSE), argc, argv, FALSE); for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!options.listen && pcmk__strcase_any_of(options.api_call, "metadata", "list_agents", "list_standards", "list_ocf_providers", NULL)) { options.no_connect = TRUE; } if (options.is_running) { int rc = pcmk_rc_ok; if (options.rsc_id == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "--is-running requires --rsc-id"); goto done; } options.interval_ms = 0; if (options.timeout == 0) { options.timeout = 30000; } rc = generate_params(); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Can not determine resource status: " "unable to get parameters from CIB"); goto done; } options.api_call = "exec"; options.action = PCMK_ACTION_MONITOR; options.exec_call_opts = lrmd_opt_notify_orig_only; } if (!options.api_call && !options.listen) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must specify at least one of --api-call, --listen, " "or --is-running"); goto done; } if (options.use_tls) { lrmd_conn = lrmd_remote_api_new(NULL, "localhost", 0); } else { lrmd_conn = lrmd_api_new(); } trig = mainloop_add_trigger(G_PRIORITY_HIGH, start_test, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); free(key); free(val); pcmk__output_and_clear_error(&error, NULL); return test_exit(exit_code); } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 8b0245c2e7..b7c0bb64fd 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3639 +1,3642 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(timeout_ms / 1000, INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; if (msg == NULL) { return NULL; } op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP); cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); crm_element_value_int(msg, PCMK__XA_ST_CALLOPT, &(cmd->options)); crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { - if (node_does_watchdog_fencing(stonith_our_uname)) { + if (node_does_watchdog_fencing(fenced_get_local_node())) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { pcmk__node_status_t *node = pcmk__get_node(0, cmd->target, NULL, pcmk__node_search_cluster_member); cmd->target_nodeid = node->cluster_layer_id; } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); pcmk__xml_free(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } freeXpathObject(xpath); } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(stonith_device_t)); device->id = crm_element_value_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2list(dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } // Use pcmk__digest_operation() here? if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { + const char *local_node_name = fenced_get_local_node(); + if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } - if (node_does_watchdog_fencing(stonith_our_uname)) { + if (node_does_watchdog_fencing(local_node_name)) { g_list_free_full(device->targets, free); - device->targets = stonith__parse_targets(stonith_our_uname); + device->targets = stonith__parse_targets(local_node_name); pcmk__insert_dup(device->params, - PCMK_STONITH_HOST_LIST, stonith_our_uname); + PCMK_STONITH_HOST_LIST, local_node_name); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE), crm_element_value(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if (!stand_alone /* if standalone, there's no attribute manager */ && (crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = get_xpath_object("//" PCMK_XE_FENCING_LEVEL, xml, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, PCMK_XA_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST__LEVEL_COUNT)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { - gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, + gboolean localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_suicide)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { const pcmk__node_status_t *node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member); pcmk__cluster_send_message(node, pcmk__cluster_msg_fenced, reply); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *wrapper = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } // Pack the results into XML wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); list = pcmk__xe_create(wrapper, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; xmlNode *dev = NULL; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, device->verified); crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } crm_log_xml_trace(list, "query-result"); stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: pcmk__xml_free(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " QB_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } if (!stand_alone && pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, reply); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); pcmk__xml_free(reply); if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_NOTIFY_FENCE); stonith__xe_set_result(notify_data, result); crm_xml_add(notify_data, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(notify_data, PCMK__XA_ST_OP, cmd->op); crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, "localhost"); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, cmd->client); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, result, notify_data); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { host = node->name; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(pcmk__node_status_t *peer) { return (peer != NULL) && (peer->name != NULL) && pcmk_is_set(peer->processes, crm_get_cluster_proc()); } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { - if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { + if (pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { GHashTableIter gIter; pcmk__node_status_t *entry = NULL; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->name, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->name); return entry->name; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); } relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ - if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { + if ((relay_op_id != NULL) && (target != NULL) + && pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); CRM_ASSERT(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID); const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = get_xpath_object("//@" PCMK__XE_ST_DEVICE_ACTION, request->xml, LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, PCMK__XA_ST_TARGET); action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; CRM_ASSERT(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION), crm_element_value(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if ((request->peer != NULL) || stand_alone) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request->xml, LOG_TRACE); const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET); const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster_member); if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); pcmk__cluster_send_message(node, pcmk__cluster_msg_fenced, request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } pcmk__xml_free(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//" PCMK__XE_ST_DEVICE_ID, request->xml, LOG_ERR); const char *device_id = crm_element_value(dev, PCMK_XA_ID); const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_config_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, PCMK_XA_ID, &node_id); name = crm_element_value(request->xml, PCMK_XA_UNAME); pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } pcmk__xml_free(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { int call_options = st_opt_none; bool is_reply = false; CRM_CHECK(message != NULL, return); if (get_xpath_object("//" PCMK__XE_ST_REPLY, message, LOG_NEVER) != NULL) { is_reply = true; } crm_element_value_int(message, PCMK__XA_ST_CALLOPT, &call_options); crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c index e0b5729707..28207eb7ba 100644 --- a/daemons/fenced/fenced_history.c +++ b/daemons/fenced/fenced_history.c @@ -1,572 +1,574 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_STONITH_HISTORY 500 /*! * \internal * \brief Send a broadcast to all nodes to trigger cleanup or * history synchronisation * * \param[in] history Optional history to be attached * \param[in] callopts We control cleanup via a flag in the callopts * \param[in] target Cleanup can be limited to certain fence-targets */ static void stonith_send_broadcast_history(xmlNode *history, int callopts, const char *target) { xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); xmlNode *wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA); xmlNode *call_data = pcmk__xe_create(wrapper, __func__); crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_FENCE_HISTORY); crm_xml_add_int(bcast, PCMK__XA_ST_CALLOPT, callopts); pcmk__xml_copy(call_data, history); if (target != NULL) { crm_xml_add(call_data, PCMK__XA_ST_TARGET, target); } pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, bcast); pcmk__xml_free(bcast); } static gboolean stonith_remove_history_entry (gpointer key, gpointer value, gpointer user_data) { remote_fencing_op_t *op = value; const char *target = (const char *) user_data; if ((op->state == st_failed) || (op->state == st_done)) { if ((target) && (strcmp(op->target, target) != 0)) { return FALSE; } return TRUE; } return FALSE; /* don't clean pending operations */ } /*! * \internal * \brief Send out a cleanup broadcast or do a local history-cleanup * * \param[in] target Cleanup can be limited to certain fence-targets * \param[in] broadcast Send out a cleanup broadcast */ static void stonith_fence_history_cleanup(const char *target, gboolean broadcast) { if (broadcast) { stonith_send_broadcast_history(NULL, st_opt_cleanup | st_opt_discard_reply, target); /* we'll do the local clean when we receive back our own broadcast */ } else if (stonith_remote_op_list) { g_hash_table_foreach_remove(stonith_remote_op_list, stonith_remove_history_entry, (gpointer) target); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } } /* keeping the length of fence-history within bounds * ================================================= * * If things are really running wild a lot of fencing-attempts * might fill up the hash-map, eventually using up a lot * of memory and creating huge history-sync messages. * Before the history being synced across nodes at least * the reboot of a cluster-node helped keeping the * history within bounds even though not in a reliable * manner. * * stonith_remote_op_list isn't sorted for time-stamps * thus it would be kind of expensive to delete e.g. * the oldest entry if it would grow past MAX_STONITH_HISTORY * entries. * It is more efficient to purge MAX_STONITH_HISTORY/2 * entries whenever the list grows beyond MAX_STONITH_HISTORY. * (sort for age + purge the MAX_STONITH_HISTORY/2 oldest) * That done on a per-node-base might raise the * probability of large syncs to occur. * Things like introducing a broadcast to purge * MAX_STONITH_HISTORY/2 entries or not sync above a certain * threshold coming to mind ... * Simplest thing though is to purge the full history * throughout the cluster once MAX_STONITH_HISTORY is reached. * On the other hand this leads to purging the history in * situations where it would be handy to have it probably. */ /*! * \internal * \brief Compare two remote fencing operations by status and completion time * * A pending operation is ordered before a completed operation. If both * operations have completed, then the more recently completed operation is * ordered first. Two pending operations are considered equal. * * \param[in] a First \c remote_fencing_op_t to compare * \param[in] b Second \c remote_fencing_op_t to compare * * \return Standard comparison result (a negative integer if \p a is lesser, * 0 if the values are equal, and a positive integer if \p a is greater) */ static gint cmp_op_by_completion(gconstpointer a, gconstpointer b) { const remote_fencing_op_t *op1 = a; const remote_fencing_op_t *op2 = b; bool op1_pending = stonith__op_state_pending(op1->state); bool op2_pending = stonith__op_state_pending(op2->state); if (op1_pending && op2_pending) { return 0; } if (op1_pending) { return -1; } if (op2_pending) { return 1; } if (op1->completed > op2->completed) { return -1; } if (op1->completed < op2->completed) { return 1; } if (op1->completed_nsec > op2->completed_nsec) { return -1; } if (op1->completed_nsec < op2->completed_nsec) { return 1; } return 0; } /*! * \internal * \brief Remove a completed operation from \c stonith_remote_op_list * * \param[in] data \c remote_fencing_op_t to remove * \param[in] user_data Ignored */ static void remove_completed_remote_op(gpointer data, gpointer user_data) { const remote_fencing_op_t *op = data; if (!stonith__op_state_pending(op->state)) { g_hash_table_remove(stonith_remote_op_list, op->id); } } /*! * \internal * \brief Do a local history-trim to MAX_STONITH_HISTORY / 2 entries * once over MAX_STONITH_HISTORY */ void stonith_fence_history_trim(void) { if (stonith_remote_op_list == NULL) { return; } if (g_hash_table_size(stonith_remote_op_list) > MAX_STONITH_HISTORY) { GList *ops = g_hash_table_get_values(stonith_remote_op_list); crm_trace("More than %d entries in fencing history, purging oldest " "completed operations", MAX_STONITH_HISTORY); ops = g_list_sort(ops, cmp_op_by_completion); // Always keep pending ops regardless of number of entries g_list_foreach(g_list_nth(ops, MAX_STONITH_HISTORY / 2), remove_completed_remote_op, NULL); // No need for a notification after purging old data g_list_free(ops); } } /*! * \internal * \brief Convert xml fence-history to a hash-table like stonith_remote_op_list * * \param[in] history Fence-history in xml * * \return Fence-history as hash-table */ static GHashTable * stonith_xml_history_to_list(const xmlNode *history) { xmlNode *xml_op = NULL; GHashTable *rv = NULL; init_stonith_remote_op_hash_table(&rv); CRM_LOG_ASSERT(rv != NULL); for (xml_op = pcmk__xe_first_child(history, NULL, NULL, NULL); xml_op != NULL; xml_op = pcmk__xe_next(xml_op)) { remote_fencing_op_t *op = NULL; char *id = crm_element_value_copy(xml_op, PCMK__XA_ST_REMOTE_OP); int state; int exit_status = CRM_EX_OK; int execution_status = PCMK_EXEC_DONE; long long completed; long long completed_nsec = 0L; if (!id) { crm_warn("Malformed fencing history received from peer"); continue; } crm_trace("Attaching op %s to hashtable", id); op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t)); op->id = id; op->target = crm_element_value_copy(xml_op, PCMK__XA_ST_TARGET); op->action = crm_element_value_copy(xml_op, PCMK__XA_ST_DEVICE_ACTION); op->originator = crm_element_value_copy(xml_op, PCMK__XA_ST_ORIGIN); op->delegate = crm_element_value_copy(xml_op, PCMK__XA_ST_DELEGATE); op->client_name = crm_element_value_copy(xml_op, PCMK__XA_ST_CLIENTNAME); crm_element_value_ll(xml_op, PCMK__XA_ST_DATE, &completed); op->completed = (time_t) completed; crm_element_value_ll(xml_op, PCMK__XA_ST_DATE_NSEC, &completed_nsec); op->completed_nsec = completed_nsec; crm_element_value_int(xml_op, PCMK__XA_ST_STATE, &state); op->state = (enum op_state) state; /* @COMPAT We can't use stonith__xe_get_result() here because * fencers <2.1.3 didn't include results, leading it to assume an error * status. Instead, set an unknown status in that case. */ if ((crm_element_value_int(xml_op, PCMK__XA_RC_CODE, &exit_status) < 0) || (crm_element_value_int(xml_op, PCMK__XA_OP_STATUS, &execution_status) < 0)) { exit_status = CRM_EX_INDETERMINATE; execution_status = PCMK_EXEC_UNKNOWN; } pcmk__set_result(&op->result, exit_status, execution_status, crm_element_value(xml_op, PCMK_XA_EXIT_REASON)); pcmk__set_result_output(&op->result, crm_element_value_copy(xml_op, PCMK__XA_ST_OUTPUT), NULL); g_hash_table_replace(rv, id, op); CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL); } return rv; } /*! * \internal * \brief Craft xml difference between local fence-history and a history * coming from remote, and merge the remote history into the local * * \param[in,out] remote_history Fence-history as hash-table (may be NULL) * \param[in] add_id If crafting the answer for an API * history-request there is no need for the id * \param[in] target Optionally limit to certain fence-target * * \return The fence-history as xml */ static xmlNode * stonith_local_history_diff_and_merge(GHashTable *remote_history, gboolean add_id, const char *target) { xmlNode *history = NULL; GHashTableIter iter; remote_fencing_op_t *op = NULL; gboolean updated = FALSE; int cnt = 0; if (stonith_remote_op_list) { char *id = NULL; history = pcmk__xe_create(NULL, PCMK__XE_ST_HISTORY); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, (void **)&id, (void **)&op)) { xmlNode *entry = NULL; if (remote_history) { remote_fencing_op_t *remote_op = g_hash_table_lookup(remote_history, op->id); if (remote_op) { if (stonith__op_state_pending(op->state) && !stonith__op_state_pending(remote_op->state)) { crm_debug("Updating outdated pending operation %.8s " "(state=%s) according to the one (state=%s) from " "remote peer history", op->id, stonith_op_state_str(op->state), stonith_op_state_str(remote_op->state)); g_hash_table_steal(remote_history, op->id); op->id = remote_op->id; remote_op->id = id; g_hash_table_iter_replace(&iter, remote_op); updated = TRUE; continue; /* skip outdated entries */ } else if (!stonith__op_state_pending(op->state) && stonith__op_state_pending(remote_op->state)) { crm_debug("Broadcasting operation %.8s (state=%s) to " "update the outdated pending one " "(state=%s) in remote peer history", op->id, stonith_op_state_str(op->state), stonith_op_state_str(remote_op->state)); g_hash_table_remove(remote_history, op->id); } else { g_hash_table_remove(remote_history, op->id); continue; /* skip entries broadcasted already */ } } } if (!pcmk__str_eq(target, op->target, pcmk__str_null_matches)) { continue; } cnt++; crm_trace("Attaching op %s", op->id); entry = pcmk__xe_create(history, STONITH_OP_EXEC); if (add_id) { crm_xml_add(entry, PCMK__XA_ST_REMOTE_OP, op->id); } crm_xml_add(entry, PCMK__XA_ST_TARGET, op->target); crm_xml_add(entry, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(entry, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(entry, PCMK__XA_ST_DELEGATE, op->delegate); crm_xml_add(entry, PCMK__XA_ST_CLIENTNAME, op->client_name); crm_xml_add_ll(entry, PCMK__XA_ST_DATE, op->completed); crm_xml_add_ll(entry, PCMK__XA_ST_DATE_NSEC, op->completed_nsec); crm_xml_add_int(entry, PCMK__XA_ST_STATE, op->state); stonith__xe_set_result(entry, &op->result); } } if (remote_history) { init_stonith_remote_op_hash_table(&stonith_remote_op_list); updated |= g_hash_table_size(remote_history); g_hash_table_iter_init(&iter, remote_history); while (g_hash_table_iter_next(&iter, NULL, (void **)&op)) { if (stonith__op_state_pending(op->state) && - pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + pcmk__str_eq(op->originator, fenced_get_local_node(), + pcmk__str_casei)) { crm_warn("Failing pending operation %.8s originated by us but " "known only from peer history", op->id); op->state = st_failed; set_fencing_completed(op); /* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op() * from setting a delegate */ pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID, "Initiated by earlier fencer " "process and presumed failed"); fenced_broadcast_op_result(op, false); } g_hash_table_iter_steal(&iter); g_hash_table_replace(stonith_remote_op_list, op->id, op); /* we could trim the history here but if we bail * out after trim we might miss more recent entries * of those that might still be in the list * if we don't bail out trimming once is more * efficient and memory overhead is minimal as * we are just moving pointers from one hash to * another */ } g_hash_table_destroy(remote_history); /* remove what is left */ } if (updated) { stonith_fence_history_trim(); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } if (cnt == 0) { pcmk__xml_free(history); return NULL; } else { return history; } } /*! * \internal * \brief Craft xml from the local fence-history * * \param[in] add_id If crafting the answer for an API * history-request there is no need for the id * \param[in] target Optionally limit to certain fence-target * * \return The fence-history as xml */ static xmlNode * stonith_local_history(gboolean add_id, const char *target) { return stonith_local_history_diff_and_merge(NULL, add_id, target); } /*! * \internal * \brief Handle fence-history messages (from API or coming in as broadcasts) * * \param[in,out] msg Request XML * \param[out] output Where to set local history, if requested * \param[in] remote_peer If broadcast, peer that sent it * \param[in] options Call options from the request */ void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options) { const char *target = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_NEVER); xmlNode *out_history = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); if (target && (options & st_opt_cs_nodeid)) { int nodeid; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(target, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { target = node->name; } } } if (options & st_opt_cleanup) { const char *call_id = crm_element_value(msg, PCMK__XA_ST_CALLID); crm_trace("Cleaning up operations on %s in %p", target, stonith_remote_op_list); stonith_fence_history_cleanup(target, (call_id != NULL)); } else if (options & st_opt_broadcast) { /* there is no clear sign atm for when a history sync is done so send a notification for anything that smells like history-sync */ fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, NULL, NULL); if (crm_element_value(msg, PCMK__XA_ST_CALLID) != NULL) { /* this is coming from the stonith-API * * craft a broadcast with node's history * so that every node can merge and broadcast * what it has on top */ out_history = stonith_local_history(TRUE, NULL); crm_trace("Broadcasting history to peers"); stonith_send_broadcast_history(out_history, st_opt_broadcast | st_opt_discard_reply, NULL); } else if (remote_peer && - !pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) { + !pcmk__str_eq(remote_peer, fenced_get_local_node(), + pcmk__str_casei)) { xmlNode *history = get_xpath_object("//" PCMK__XE_ST_HISTORY, msg, LOG_NEVER); /* either a broadcast created directly upon stonith-API request * or a diff as response to such a thing * * in both cases it may have a history or not * if we have differential data * merge in what we've received and stop * otherwise broadcast what we have on top * marking as differential and merge in afterwards */ if (!history || !pcmk__xe_attr_is_true(history, PCMK__XA_ST_DIFFERENTIAL)) { GHashTable *received_history = NULL; if (history != NULL) { received_history = stonith_xml_history_to_list(history); } out_history = stonith_local_history_diff_and_merge(received_history, TRUE, NULL); if (out_history) { crm_trace("Broadcasting history-diff to peers"); pcmk__xe_set_bool_attr(out_history, PCMK__XA_ST_DIFFERENTIAL, true); stonith_send_broadcast_history(out_history, st_opt_broadcast | st_opt_discard_reply, NULL); } else { crm_trace("History-diff is empty - skip broadcast"); } } } else { crm_trace("Skipping history-query-broadcast (%s%s)" " we sent ourselves", remote_peer?"remote-peer=":"local-ipc", remote_peer?remote_peer:""); } } else { /* plain history request */ crm_trace("Looking for operations on %s in %p", target, stonith_remote_op_list); *output = stonith_local_history(FALSE, target); } pcmk__xml_free(out_history); } diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index 896a779f5d..b1c0e6f2e4 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -1,2598 +1,2600 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 /* When one fencer queries its peers for devices able to handle a fencing * request, each peer will reply with a list of such devices available to it. * Each reply will be parsed into a peer_device_info_t, with each device's * information kept in a device_properties_t. */ typedef struct device_properties_s { /* Whether access to this device has been verified */ gboolean verified; /* The remaining members are indexed by the operation's "phase" */ /* Whether this device has been executed in each phase */ gboolean executed[st_phase_max]; /* Whether this device is disallowed from executing in each phase */ gboolean disallowed[st_phase_max]; /* Action-specific timeout for each phase */ int custom_action_timeout[st_phase_max]; /* Action-specific maximum random delay for each phase */ int delay_max[st_phase_max]; /* Action-specific base delay for each phase */ int delay_base[st_phase_max]; /* Group of enum st_device_flags */ uint32_t device_support_flags; } device_properties_t; typedef struct { /* Name of peer that sent this result */ char *host; /* Only try peers for non-topology based operations once */ gboolean tried; /* Number of entries in the devices table */ int ndevices; /* Devices available to this host that are capable of fencing the target */ GHashTable *devices; } peer_device_info_t; GHashTable *stonith_remote_op_list = NULL; extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer); static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data != NULL) { peer_device_info_t *peer = data; g_hash_table_destroy(peer->devices); free(peer->host); free(peer); } } void free_stonith_remote_op_list(void) { if (stonith_remote_op_list != NULL) { g_hash_table_destroy(stonith_remote_op_list); stonith_remote_op_list = NULL; } } struct peer_count_data { const remote_fencing_op_t *op; gboolean verified_only; uint32_t support_action_only; int count; }; /*! * \internal * \brief Increment a counter if a device has not been executed yet * * \param[in] key Device ID (ignored) * \param[in] value Device properties * \param[in,out] user_data Peer count data */ static void count_peer_device(gpointer key, gpointer value, gpointer user_data) { device_properties_t *props = (device_properties_t*)value; struct peer_count_data *data = user_data; if (!props->executed[data->op->phase] && (!data->verified_only || props->verified) && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) { ++(data->count); } } /*! * \internal * \brief Check the number of available devices in a peer's query results * * \param[in] op Operation that results are for * \param[in] peer Peer to count * \param[in] verified_only Whether to count only verified devices * \param[in] support_action_only Whether to count only devices that support action * * \return Number of devices available to peer that were not already executed */ static int count_peer_devices(const remote_fencing_op_t *op, const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only) { struct peer_count_data data; data.op = op; data.verified_only = verified_only; data.support_action_only = support_on_action_only; data.count = 0; if (peer) { g_hash_table_foreach(peer->devices, count_peer_device, &data); } return data.count; } /*! * \internal * \brief Search for a device in a query result * * \param[in] op Operation that result is for * \param[in] peer Query result for a peer * \param[in] device Device ID to search for * * \return Device properties if found, NULL otherwise */ static device_properties_t * find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, uint32_t support_action_only) { device_properties_t *props = g_hash_table_lookup(peer->devices, device); if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) { return NULL; } return (props && !props->executed[op->phase] && !props->disallowed[op->phase])? props : NULL; } /*! * \internal * \brief Find a device in a peer's device list and mark it as executed * * \param[in] op Operation that peer result is for * \param[in,out] peer Peer with results to search * \param[in] device ID of device to mark as done * \param[in] verified_devices_only Only consider verified devices * * \return TRUE if device was found and marked, FALSE otherwise */ static gboolean grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, const char *device, gboolean verified_devices_only) { device_properties_t *props = find_peer_device(op, peer, device, fenced_support_flag(op->action)); if ((props == NULL) || (verified_devices_only && !props->verified)) { return FALSE; } crm_trace("Removing %s from %s (%d remaining)", device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none)); props->executed[op->phase] = TRUE; return TRUE; } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->delegate); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { pcmk__xml_free(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); g_list_free(op->duplicates); pcmk__reset_result(&op->result); free(op); } void init_stonith_remote_op_hash_table(GHashTable **table) { if (*table == NULL) { *table = pcmk__strkey_table(NULL, free_remote_op); } } /*! * \internal * \brief Return an operation's originally requested action (before any remap) * * \param[in] op Operation to check * * \return Operation's original action */ static const char * op_requested_action(const remote_fencing_op_t *op) { return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action); } /*! * \internal * \brief Remap a "reboot" operation to the "off" phase * * \param[in,out] op Operation to remap */ static void op_phase_off(remote_fencing_op_t *op) { crm_info("Remapping multiple-device reboot targeting %s to 'off' " QB_XS " id=%.8s", op->target, op->id); op->phase = st_phase_off; /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ strcpy(op->action, PCMK_ACTION_OFF); } /*! * \internal * \brief Advance a remapped reboot operation to the "on" phase * * \param[in,out] op Operation to remap */ static void op_phase_on(remote_fencing_op_t *op) { GList *iter = NULL; crm_info("Remapped 'off' targeting %s complete, " "remapping to 'on' for %s " QB_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; strcpy(op->action, PCMK_ACTION_ON); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. */ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { GList *match = g_list_find_custom(op->devices_list, iter->data, sort_strings); if (match) { op->devices_list = g_list_remove(op->devices_list, match->data); } } g_list_free_full(op->automatic_list, free); op->automatic_list = NULL; /* Rewind device list pointer */ op->devices = op->devices_list; } /*! * \internal * \brief Reset a remapped reboot operation * * \param[in,out] op Operation to reset */ static void undo_op_remap(remote_fencing_op_t *op) { if (op->phase > 0) { crm_info("Undoing remap of reboot targeting %s for %s " QB_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; strcpy(op->action, PCMK_ACTION_REBOOT); } } /*! * \internal * \brief Create notification data XML for a fencing operation result * * \param[in,out] parent Parent XML element for newly created element * \param[in] op Fencer operation that completed * * \return Newly created XML to add as notification data * \note The caller is responsible for freeing the result. */ static xmlNode * fencing_result2xml(xmlNode *parent, const remote_fencing_op_t *op) { xmlNode *notify_data = pcmk__xe_create(parent, PCMK__XE_ST_NOTIFY_FENCE); crm_xml_add_int(notify_data, PCMK_XA_STATE, op->state); crm_xml_add(notify_data, PCMK__XA_ST_TARGET, op->target); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(notify_data, PCMK__XA_ST_DELEGATE, op->delegate); crm_xml_add(notify_data, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(notify_data, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(notify_data, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(notify_data, PCMK__XA_ST_CLIENTNAME, op->client_name); return notify_data; } /*! * \internal * \brief Broadcast a fence result notification to all CPG peers * * \param[in] op Fencer operation that completed * \param[in] op_merged Whether this operation is a duplicate of another */ void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged) { static int count = 0; xmlNode *bcast = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); xmlNode *wrapper = NULL; xmlNode *notify_data = NULL; count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(bcast, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(bcast, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); crm_xml_add_int(bcast, PCMK_XA_COUNT, count); if (op_merged) { pcmk__xe_set_bool_attr(bcast, PCMK__XA_ST_OP_MERGED, true); } wrapper = pcmk__xe_create(bcast, PCMK__XE_ST_CALLDATA); notify_data = fencing_result2xml(wrapper, op); stonith__xe_set_result(notify_data, &op->result); pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, bcast); pcmk__xml_free(bcast); return; } /*! * \internal * \brief Reply to a local request originator and notify all subscribed clients * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; pcmk__client_t *client = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ crm_xml_add_int(data, PCMK_XA_STATE, op->state); crm_xml_add(data, PCMK__XA_ST_TARGET, op->target); crm_xml_add(data, PCMK__XA_ST_OP, op->action); reply = fenced_construct_reply(op->request, data, &op->result); crm_xml_add(reply, PCMK__XA_ST_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ client = pcmk__find_client_by_id(op->client_id); if (client == NULL) { crm_trace("Skipping reply to %s: no longer a client", op->client_id); } else { do_local_reply(reply, client, op->call_options); } /* bcast to all local clients that the fencing operation happend */ notify_data = fencing_result2xml(NULL, op); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_FENCE, &op->result, notify_data); pcmk__xml_free(notify_data); fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; pcmk__xml_free(reply); } /*! * \internal * \brief Finalize all duplicates of a given fencer operation * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data) { for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { other->state = op->state; crm_debug("Performing duplicate notification for %s@%s: %s " QB_XS " id=%.8s", other->client_name, other->originator, pcmk_exec_status_str(op->result.execution_status), other->id); pcmk__copy_result(&op->result, &other->result); finalize_op(other, data, true); } else { // Possible if (for example) it timed out already crm_err("Skipping duplicate notification for %s@%s " QB_XS " state=%s id=%.8s", other->client_name, other->originator, stonith_op_state_str(other->state), other->id); } } } static char * delegate_from_xml(xmlNode *xml) { xmlNode *match = get_xpath_object("//@" PCMK__XA_ST_DELEGATE, xml, LOG_NEVER); if (match == NULL) { return crm_element_value_copy(xml, PCMK__XA_SRC); } else { return crm_element_value_copy(match, PCMK__XA_ST_DELEGATE); } } /*! * \internal * \brief Finalize a peer fencing operation * * Clean up after a fencing operation completes. This function has two code * paths: the executioner uses it to broadcast the result to CPG peers, and then * each peer (including the executioner) uses it to process that broadcast and * notify its IPC clients of the result. * * \param[in,out] op Fencer operation that completed * \param[in,out] data If not NULL, XML reply of last delegated operation * \param[in] dup Whether this operation is a duplicate of another * (in which case, do not broadcast the result) * * \note The operation result should be set before calling this function. */ static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; gboolean op_merged = FALSE; CRM_CHECK((op != NULL), return); // This is a no-op if timers have already been cleared clear_remote_op_timers(op); if (op->notify_sent) { // Most likely, this is a timed-out action that eventually completed crm_notice("Operation '%s'%s%s by %s for %s@%s%s: " "Result arrived too late " QB_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), op->id); return; } set_fencing_completed(op); undo_op_remap(op); if (data == NULL) { data = pcmk__xe_create(NULL, "remote-op"); local_data = data; } else if (op->delegate == NULL) { switch (op->result.execution_status) { case PCMK_EXEC_NO_FENCE_DEVICE: break; case PCMK_EXEC_INVALID: if (op->result.exit_status != CRM_EX_EXPIRED) { op->delegate = delegate_from_xml(data); } break; default: op->delegate = delegate_from_xml(data); break; } } if (dup || (crm_element_value(data, PCMK__XA_ST_OP_MERGED) != NULL)) { op_merged = true; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, PCMK__XA_SUBT); if (!dup && !pcmk__str_eq(subt, PCMK__VALUE_BROADCAST, pcmk__str_none)) { /* Defer notification until the bcast message arrives */ fenced_broadcast_op_result(op, op_merged); pcmk__xml_free(local_data); return; } if (pcmk__result_ok(&op->result) || dup - || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + || !pcmk__str_eq(op->originator, fenced_get_local_node(), + pcmk__str_casei)) { level = LOG_NOTICE; } do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) " QB_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), crm_exit_str(op->result.exit_status), pcmk_exec_status_str(op->result.execution_status), ((op->result.exit_reason == NULL)? "" : ": "), ((op->result.exit_reason == NULL)? "" : op->result.exit_reason), op->id); handle_local_reply_and_notify(op, data); if (!dup) { finalize_op_duplicates(op, data); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { pcmk__xml_free(op->request); op->request = NULL; } pcmk__xml_free(local_data); } /*! * \internal * \brief Finalize a watchdog fencer op after the waiting time expires * * \param[in,out] userdata Fencer operation that completed * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Self-fencing (%s) by %s for %s assumed complete " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, NULL, false); return G_SOURCE_REMOVE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Peer's '%s' action targeting %s for client %s timed out " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Peer did not return fence result within timeout"); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; crm_trace("Try another device for '%s' action targeting %s " "for client %s without delay " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } // Try another device, if appropriate request_peer_fencing(op, NULL); return G_SOURCE_REMOVE; } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] op Fencer operation that timed out * \param[in] reason Readable description of what step timed out */ static void finalize_timed_out_op(remote_fencing_op_t *op, const char *reason) { crm_debug("Action '%s' targeting %s for client %s timed out " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); if (op->phase == st_phase_on) { /* A remapped reboot operation timed out in the "on" phase, but the * "off" phase completed successfully, so quit trying any further * devices, and return success. */ op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { op->state = st_failed; pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason); } finalize_op(op, NULL, false); } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] userdata Fencer operation that timed out * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action '%s' targeting %s for client %s already completed " QB_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } else { finalize_timed_out_op(userdata, "Fencing did not complete within a " "total timeout based on the " "configured timeout and retries for " "any devices attempted"); } return G_SOURCE_REMOVE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %.8s targeting %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %.8s targeting %s already in progress", op->id, op->target); } else if (op->query_results) { // Query succeeded, so attempt the actual fencing crm_debug("Query %.8s targeting %s complete (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); request_peer_fencing(op, NULL); } else { crm_debug("Query %.8s targeting %s timed out (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); finalize_timed_out_op(op, "No capable peers replied to device query " "within timeout"); } return G_SOURCE_REMOVE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST__LEVEL_COUNT; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } /*! * \internal * \brief Add a device to an operation's automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to add */ static void add_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (!match) { op->automatic_list = g_list_prepend(op->automatic_list, pcmk__str_copy(device)); } } /*! * \internal * \brief Remove a device from the automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to remove */ static void remove_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (match) { op->automatic_list = g_list_remove(op->automatic_list, match->data); } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GList *devices) { GList *lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { const char *device = lpc->data; op->devices_list = g_list_append(op->devices_list, pcmk__str_copy(device)); } op->devices = op->devices_list; } /*! * \internal * \brief Check whether a node matches a topology target * * \param[in] tp Topology table entry to check * \param[in] node Name of node to check * * \return TRUE if node matches topology target */ static gboolean topology_matches(const stonith_topology_t *tp, const char *node) { regex_t r_patt; CRM_CHECK(node && tp && tp->target, return FALSE); switch (tp->kind) { case fenced_target_by_attribute: /* This level targets by attribute, so tp->target is a NAME=VALUE pair * of a permanent attribute applied to targeted nodes. The test below * relies on the locally cached copy of the CIB, so if fencing needs to * be done before the initial CIB is received or after a malformed CIB * is received, then the topology will be unable to be used. */ if (node_has_attr(node, tp->target_attribute, tp->target_value)) { crm_notice("Matched %s with %s by attribute", node, tp->target); return TRUE; } break; case fenced_target_by_pattern: /* This level targets node names matching a pattern, so tp->target * (and tp->target_pattern) is a regular expression. */ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { crm_info("Bad regex '%s' for fencing level", tp->target); } else { int status = regexec(&r_patt, node, 0, NULL, 0); regfree(&r_patt); if (status == 0) { crm_notice("Matched %s with %s by name", node, tp->target); return TRUE; } } break; case fenced_target_by_name: crm_trace("Testing %s against %s", node, tp->target); return pcmk__str_eq(tp->target, node, pcmk__str_casei); default: break; } crm_trace("No match for %s with %s", node, tp->target); return FALSE; } stonith_topology_t * find_topology_for_host(const char *host) { GHashTableIter tIter; stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp != NULL) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (topology_matches(tp, host)) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } } crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); return NULL; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * \param[in] empty_ok If true, an operation without a target (i.e. * queries) or a target without a topology will get a * pcmk_rc_ok return value instead of ENODEV * * \return Standard Pacemaker return value */ static int advance_topology_level(remote_fencing_op_t *op, bool empty_ok) { stonith_topology_t *tp = NULL; if (op->target) { tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return empty_ok? pcmk_rc_ok : ENODEV; } CRM_ASSERT(tp->levels != NULL); stonith__set_call_options(op->call_options, op->id, st_opt_topology); /* This is a new level, so undo any remapping left over from previous */ undo_op_remap(op); do { op->level++; } while (op->level < ST__LEVEL_COUNT && tp->levels[op->level] == NULL); if (op->level < ST__LEVEL_COUNT) { crm_trace("Attempting fencing level %d targeting %s (%d devices) " "for client %s@%s (id=%.8s)", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level if ((op->level > 1) && (op->client_delay > 0)) { op->client_delay = 0; } if ((g_list_next(op->devices_list) != NULL) && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about * switched power outlets for redundant power supplies.) */ op_phase_off(op); } return pcmk_rc_ok; } crm_info("All %sfencing options targeting %s for client %s@%s failed " QB_XS " id=%.8s", (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", op->target, op->client_name, op->originator, op->id); return ENODEV; } /*! * \internal * \brief If fencing operation is a duplicate, merge it into the other one * * \param[in,out] op Fencing operation to check */ static void merge_duplicates(remote_fencing_op_t *op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { const char *other_action = op_requested_action(other); pcmk__node_status_t *node = NULL; if (!strcmp(op->id, other->id)) { continue; // Don't compare against self } if (other->state > st_exec) { crm_trace("%.8s not duplicate of %.8s: not in progress", op->id, other->id); continue; } if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", op->id, other->id, op->target, other->target); continue; } if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) { crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", op->id, other->id, op->action, other_action); continue; } if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: same client %s", op->id, other->id, op->client_name); continue; } if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: suicide for %s", op->id, other->id, other->target); continue; } node = pcmk__get_node(0, other->originator, NULL, pcmk__node_search_cluster_member); if (!fencing_peer_active(node)) { crm_notice("Failing action '%s' targeting %s originating from " "client %s@%s: Originator is dead " QB_XS " id=%.8s", other->action, other->target, other->client_name, other->originator, other->id); crm_trace("%.8s not duplicate of %.8s: originator dead", op->id, other->id); other->state = st_failed; continue; } if ((other->total_timeout > 0) && (now > (other->total_timeout + other->created))) { crm_trace("%.8s not duplicate of %.8s: old (%lld vs. %lld + %ds)", op->id, other->id, (long long)now, (long long)other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); crm_trace("Best guess as to timeout used for %.8s: %ds", other->id, other->total_timeout); } crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical request from %s@%s " QB_XS " original=%.8s duplicate=%.8s total_timeout=%ds", op->action, op->target, op->client_name, other->client_name, other->originator, op->id, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; pcmk__node_status_t *entry = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } /*! * \internal * \brief Process a manual confirmation of a pending fence action * * \param[in] client IPC client that sent confirmation * \param[in,out] msg Request XML with manual confirmation * * \return Standard Pacemaker return code */ int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, msg, LOG_ERR); CRM_CHECK(dev != NULL, return EPROTO); crm_notice("Received manual confirmation that %s has been fenced", pcmk__s(crm_element_value(dev, PCMK__XA_ST_TARGET), "unknown target")); op = initiate_remote_stonith_op(client, msg, TRUE); if (op == NULL) { return EPROTO; } op->state = st_done; set_fencing_completed(op); op->delegate = pcmk__str_copy("a human"); // For the fencer's purposes, the fencing operation is done pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, msg, false); /* For the requester's purposes, the operation is still pending. The * actual result will be sent asynchronously via the operation's done_cb(). */ return EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith operation * * \param[in] client ID of local stonith client that initiated the operation * \param[in] request The request from the client that started the operation * \param[in] peer TRUE if this operation is owned by another stonith peer * (an operation owned by one peer is stored on all peers, * but only the owner executes it; all nodes get the results * once the owner finishes execution) */ void * create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_TARGET, request, LOG_NEVER); int call_options = 0; const char *operation = NULL; init_stonith_remote_op_hash_table(&stonith_remote_op_list); /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(stonith_remote_op_list, op_id); if (op) { crm_debug("Reusing existing remote fencing op %.8s for %s", op_id, ((client == NULL)? "unknown client" : client)); return op; } } op = pcmk__assert_alloc(1, sizeof(remote_fencing_op_t)); crm_element_value_int(request, PCMK__XA_ST_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays crm_element_value_int(request, PCMK__XA_ST_DELAY, &(op->client_delay)); if (peer && dev) { op->id = crm_element_value_copy(dev, PCMK__XA_ST_REMOTE_OP); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(stonith_remote_op_list, op->id, op); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, PCMK__XA_ST_DEVICE_ACTION); /* The node initiating the stonith operation. If an operation is relayed, * this is the last node the operation lands on. When in standalone mode, * origin is the ID of the client that originated the operation. * * Or may be the name of the function that created the operation. */ op->originator = crm_element_value_copy(dev, PCMK__XA_ST_ORIGIN); if (op->originator == NULL) { /* Local or relayed request */ - op->originator = pcmk__str_copy(stonith_our_uname); + op->originator = pcmk__str_copy(fenced_get_local_node()); } // Delegate may not be set op->delegate = crm_element_value_copy(dev, PCMK__XA_ST_DELEGATE); op->created = time(NULL); CRM_LOG_ASSERT(client != NULL); op->client_id = pcmk__str_copy(client); /* For a RELAY operation, set fenced on the client. */ operation = crm_element_value(request, PCMK__XA_ST_OP); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } else { op->client_name = crm_element_value_copy(request, PCMK__XA_ST_CLIENTNAME); } op->target = crm_element_value_copy(dev, PCMK__XA_ST_TARGET); // @TODO Figure out how to avoid copying XML here op->request = pcmk__xml_copy(NULL, request); crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options); op->call_options = call_options; crm_element_value_int(request, PCMK__XA_ST_CALLID, &(op->client_callid)); crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " "base timeout %ds, %u %s expected)", (peer && dev)? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name, op->base_timeout, op->replies_expected, pcmk__plural_alt(op->replies_expected, "reply", "replies")); if (op->call_options & st_opt_cs_nodeid) { int nodeid; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(op->target, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); /* Ensure the conversion only happens once */ stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); if ((node != NULL) && (node->name != NULL)) { pcmk__str_update(&(op->target), node->name); } else { crm_warn("Could not expand nodeid '%s' into a host name", op->target); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); if (op->state != st_duplicate) { /* kick history readers */ fenced_send_notification(PCMK__VALUE_ST_NOTIFY_HISTORY, NULL, NULL); } /* safe to trim as long as that doesn't touch pending ops */ stonith_fence_history_trim(); return op; } /*! * \internal * \brief Create a peer fencing operation from a request, and initiate it * * \param[in] client IPC client that made request (NULL to get from request) * \param[in] request Request XML * \param[in] manual_ack Whether this is a manual action confirmation * * \return Newly created operation on success, otherwise NULL */ remote_fencing_op_t * initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; const char *relay_op_id = NULL; const char *operation = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, PCMK__XA_ST_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { return op; } CRM_CHECK(op->action, return NULL); if (advance_topology_level(op, true) != pcmk_rc_ok) { op->state = st_failed; } switch (op->state) { case st_failed: // advance_topology_level() exhausted levels pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "All topology levels failed"); crm_warn("Could not request peer fencing (%s) targeting %s " QB_XS " id=%.8s", op->action, op->target, op->id); finalize_op(op, NULL, false); return op; case st_duplicate: crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " QB_XS " id=%.8s", op->action, op->target, op->id); return op; default: crm_notice("Requesting peer fencing (%s) targeting %s " QB_XS " id=%.8s state=%s base_timeout=%ds", op->action, op->target, op->id, stonith_op_state_str(op->state), op->base_timeout); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, op->call_options); crm_xml_add(query, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(query, PCMK__XA_ST_TARGET, op->target); crm_xml_add(query, PCMK__XA_ST_DEVICE_ACTION, op_requested_action(op)); crm_xml_add(query, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(query, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(query, PCMK__XA_ST_CLIENTNAME, op->client_name); crm_xml_add_int(query, PCMK__XA_ST_TIMEOUT, op->base_timeout); /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ operation = crm_element_value(request, PCMK__XA_ST_OP); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); if (relay_op_id) { crm_xml_add(query, PCMK__XA_ST_REMOTE_OP_RELAY, relay_op_id); } } pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, query); pcmk__xml_free(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static bool is_watchdog_fencing(const remote_fencing_op_t *op, const char *device) { return (stonith_watchdog_timeout_ms > 0 // Only an explicit mismatch is considered not a watchdog fencing. && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches) && pcmk__is_fencing_action(op->action) && node_does_watchdog_fencing(op->target)); } static peer_device_info_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GList *iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { peer_device_info_t *peer = iter->data; crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", peer->host, op->target, peer->ndevices, pcmk__plural_s(peer->ndevices), peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if (pcmk_is_set(op->call_options, st_opt_topology)) { if (grab_peer_device(op, peer, device, verified_devices_only)) { return peer; } } else if (!peer->tried && count_peer_devices(op, peer, verified_devices_only, fenced_support_flag(op->action))) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static peer_device_info_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; peer_device_info_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence (%s) %s using %s", op->action, op->target, device); } else { crm_trace("Checking for someone to fence (%s) %s", op->action, op->target); } /* Best choice is a peer other than the target with verified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } /* If no other peer has verified access, next best is unverified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } /* If no other peer can do it, last option is self-fencing * (which is never allowed for the "on" phase of a remapped reboot) */ if (op->phase != st_phase_on) { peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if (peer) { crm_trace("%s will fence itself", peer->host); return peer; } } /* Try the next fencing level if there is one (unless we're in the "on" * phase of a remapped "reboot", because we ignore errors in that case) */ } while ((op->phase != st_phase_on) && pcmk_is_set(op->call_options, st_opt_topology) && (advance_topology_level(op, false) == pcmk_rc_ok)); /* With a simple watchdog fencing configuration without a topology, * "device" is NULL here. Consider it should be done with watchdog fencing. */ if (is_watchdog_fencing(op, device)) { crm_info("Couldn't contact watchdog-fencing target-node (%s)", op->target); /* check_watchdog_fencing_and_wait will log additional info */ } else { crm_notice("Couldn't find anyone to fence (%s) %s using %s", op->action, op->target, (device? device : "any device")); } return NULL; } static int valid_fencing_timeout(int specified_timeout, bool action_specific, const remote_fencing_op_t *op, const char *device) { int timeout = specified_timeout; if (!is_watchdog_fencing(op, device)) { return timeout; } timeout = (int) QB_MIN(QB_MAX(specified_timeout, stonith_watchdog_timeout_ms / 1000), INT_MAX); if (timeout > specified_timeout) { if (action_specific) { crm_warn("pcmk_%s_timeout %ds for %s is too short (must be >= " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds " "instead", op->action, specified_timeout, device? device : "watchdog", timeout, timeout); } else { crm_warn("Fencing timeout %ds is too short (must be >= " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " %ds), using %ds " "instead", specified_timeout, timeout, timeout); } } return timeout; } static int get_device_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, bool with_delay) { int timeout = op->base_timeout; device_properties_t *props; timeout = valid_fencing_timeout(op->base_timeout, false, op, device); if (!peer || !device) { return timeout; } props = g_hash_table_lookup(peer->devices, device); if (!props) { return timeout; } if (props->custom_action_timeout[op->phase]) { timeout = valid_fencing_timeout(props->custom_action_timeout[op->phase], true, op, device); } // op->client_delay < 0 means disable any static/random fencing delays if (with_delay && (op->client_delay >= 0)) { // delay_base is eventually limited by delay_max timeout += (props->delay_max[op->phase] > 0 ? props->delay_max[op->phase] : props->delay_base[op->phase]); } return timeout; } struct timeout_data { const remote_fencing_op_t *op; const peer_device_info_t *peer; int total_timeout; }; /*! * \internal * \brief Add timeout to a total if device has not been executed yet * * \param[in] key GHashTable key (device ID) * \param[in] value GHashTable value (device properties) * \param[in,out] user_data Timeout data */ static void add_device_timeout(gpointer key, gpointer value, gpointer user_data) { const char *device_id = key; device_properties_t *props = value; struct timeout_data *timeout = user_data; if (!props->executed[timeout->op->phase] && !props->disallowed[timeout->op->phase]) { timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, device_id, true); } } static int get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) { struct timeout_data timeout; timeout.op = op; timeout.peer = peer; timeout.total_timeout = 0; g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); } static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer) { long long total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { int i; GList *device_list = NULL; GList *iter = NULL; GList *auto_list = NULL; if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none) && (op->automatic_list != NULL)) { auto_list = g_list_copy(op->automatic_list); } /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST__LEVEL_COUNT; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { bool found = false; for (iter = op->query_results; iter != NULL; iter = iter->next) { const peer_device_info_t *peer = iter->data; if (auto_list) { GList *match = g_list_find_custom(auto_list, device_list->data, sort_strings); if (match) { auto_list = g_list_remove(auto_list, match->data); } } if (find_peer_device(op, peer, device_list->data, fenced_support_flag(op->action))) { total_timeout += get_device_timeout(op, peer, device_list->data, true); found = true; break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ /* in case of watchdog-device we add the timeout to the budget if didn't get a reply */ if (!found && is_watchdog_fencing(op, device_list->data)) { total_timeout += stonith_watchdog_timeout_ms / 1000; } } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ //Add only exists automatic_list device timeout if (auto_list) { for (iter = auto_list; iter != NULL; iter = iter->next) { GList *iter2 = NULL; for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) { peer_device_info_t *peer = iter2->data; if (find_peer_device(op, peer, iter->data, st_device_supports_on)) { total_timeout += get_device_timeout(op, peer, iter->data, true); break; } } } } g_list_free(auto_list); } else if (chosen_peer) { total_timeout = get_peer_timeout(op, chosen_peer); } else { total_timeout = valid_fencing_timeout(op->base_timeout, false, op, NULL); } if (total_timeout <= 0) { total_timeout = op->base_timeout; } /* Take any requested fencing delay into account to prevent it from eating * up the total timeout. */ if (op->client_delay > 0) { total_timeout += op->client_delay; } return (int) QB_MIN(total_timeout, INT_MAX); } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GList *iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a synchronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); client_node = crm_element_value(op->request, PCMK__XA_ST_CLIENTNODE); call_id = crm_element_value(op->request, PCMK__XA_ST_CALLID); client_id = crm_element_value(op->request, PCMK__XA_ST_CLIENTID); if (!client_node || !call_id || !client_id) { return; } - if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) { + if (pcmk__str_eq(client_node, fenced_get_local_node(), pcmk__str_casei)) { // Client is connected to this node, so send update directly to them do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(update, PCMK__XA_ST_CLIENTID, client_id); crm_xml_add(update, PCMK__XA_ST_CALLID, call_id); crm_xml_add_int(update, PCMK__XA_ST_TIMEOUT, op_timeout); pcmk__cluster_send_message(pcmk__get_node(0, client_node, NULL, pcmk__node_search_cluster_member), pcmk__cluster_msg_fenced, update); pcmk__xml_free(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %.8s to client %s", dup->id, dup->client_name); report_timeout_period(iter->data, op_timeout); } } /*! * \internal * \brief Advance an operation to the next device in its topology * * \param[in,out] op Fencer operation to advance * \param[in] device ID of device that just completed * \param[in,out] msg If not NULL, XML reply of last delegated operation */ static void advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, xmlNode *msg) { /* Advance to the next device at this topology level, if any */ if (op->devices) { op->devices = op->devices->next; } /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); /* If there are no more devices at this topology level, run through any * remaining devices with automatic unfencing */ if (op->devices == NULL) { op->devices = op->automatic_list; } } if ((op->devices == NULL) && (op->phase == st_phase_off)) { /* We're done with this level and with required devices, but we had * remapped "reboot" to "off", so start over with "on". If any devices * need to be turned back on, op->devices will be non-NULL after this. */ op_phase_on(op); } // This function is only called if the previous device succeeded pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (op->devices) { /* Necessary devices remain, so execute the next one */ crm_trace("Next targeting %s on behalf of %s@%s", op->target, op->client_name, op->originator); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; } request_peer_fencing(op, NULL); } else { /* We're done with all devices and phases, so finalize operation */ crm_trace("Marking complex fencing op targeting %s as complete", op->target); op->state = st_done; finalize_op(op, msg, false); } } static gboolean check_watchdog_fencing_and_wait(remote_fencing_op_t * op) { if (node_does_watchdog_fencing(op->target)) { guint timeout_ms = QB_MIN(stonith_watchdog_timeout_ms, UINT_MAX); crm_notice("Waiting %s for %s to self-fence (%s) for " "client %s " QB_XS " id=%.8s", pcmk__readable_interval(timeout_ms), op->target, op->action, op->client_name, op->id); if (op->op_timer_one) { g_source_remove(op->op_timer_one); } op->op_timer_one = g_timeout_add(timeout_ms, remote_op_watchdog_done, op); return TRUE; } else { crm_debug("Skipping fallback to watchdog-fencing as %s is " "not in host-list", op->target); } return FALSE; } /*! * \internal * \brief Ask a peer to execute a fencing operation * * \param[in,out] op Fencing operation to be executed * \param[in,out] peer If NULL or topology is in use, choose best peer to * execute the fencing, otherwise use this peer */ static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) { const char *device = NULL; int timeout; CRM_CHECK(op != NULL, return); crm_trace("Action %.8s targeting %s for %s is %s", op->id, op->target, op->client_name, stonith_op_state_str(op->state)); if ((op->phase == st_phase_on) && (op->devices != NULL)) { /* We are in the "on" phase of a remapped topology reboot. If this * device has pcmk_reboot_action="off", or doesn't support the "on" * action, skip it. * * We can't check device properties at this point because we haven't * chosen a peer for this stage yet. Instead, we check the local node's * knowledge about the device. If different versions of the fence agent * are installed on different nodes, there's a chance this could be * mistaken, but the worst that could happen is we don't try turning the * node back on when we should. */ device = op->devices->data; if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF, pcmk__str_none)) { crm_info("Not turning %s back on using %s because the device is " "configured to stay off (pcmk_reboot_action='off')", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } if (!fenced_device_supports_on(device)) { crm_info("Not turning %s back on using %s because the agent " "doesn't support 'on'", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } } timeout = op->base_timeout; if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer); op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total timeout set to %ds for peer's fencing targeting %s for %s" QB_XS "id=%.8s", op->total_timeout, op->target, op->client_name, op->id); } if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore the caller's peer preference if topology is in use, because * that peer might not have access to the required device. With * topology, stonith_choose_peer() removes the device from further * consideration, so the timeout must be calculated beforehand. * * @TODO Basing the total timeout on the caller's preferred peer (above) * is less than ideal. */ peer = stonith_choose_peer(op); device = op->devices->data; /* Fencing timeout sent to peer takes no delay into account. * The peer will add a dedicated timer for any delay upon * schedule_stonith_command(). */ timeout = get_device_timeout(op, peer, device, false); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); const pcmk__node_status_t *peer_node = pcmk__get_node(0, peer->host, NULL, pcmk__node_search_cluster_member); if (op->client_delay > 0) { /* Take requested fencing delay into account to prevent it from * eating up the timeout. */ timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay; } crm_xml_add(remote_op, PCMK__XA_ST_REMOTE_OP, op->id); crm_xml_add(remote_op, PCMK__XA_ST_TARGET, op->target); crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ACTION, op->action); crm_xml_add(remote_op, PCMK__XA_ST_ORIGIN, op->originator); crm_xml_add(remote_op, PCMK__XA_ST_CLIENTID, op->client_id); crm_xml_add(remote_op, PCMK__XA_ST_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, PCMK__XA_ST_TIMEOUT, timeout); crm_xml_add_int(remote_op, PCMK__XA_ST_CALLOPT, op->call_options); crm_xml_add_int(remote_op, PCMK__XA_ST_DELAY, op->client_delay); if (device) { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device, true); crm_notice("Requesting that %s perform '%s' action targeting %s " "using %s " QB_XS " for client %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, PCMK__XA_ST_DEVICE_ID, device); } else { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); crm_notice("Requesting that %s perform '%s' action targeting %s " QB_XS " for client %s (%ds, %s)", peer->host, op->action, op->target, op->client_name, timeout_one, pcmk__readable_interval(stonith_watchdog_timeout_ms)); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } if (!is_watchdog_fencing(op, device) || !check_watchdog_fencing_and_wait(op)) { /* Some thoughts about self-fencing cases reaching this point: - Actually check in check_watchdog_fencing_and_wait shouldn't fail if STONITH_WATCHDOG_ID is chosen as fencing-device and it being present implies watchdog-fencing is enabled anyway - If watchdog-fencing is disabled either in general or for a specific target - detected in check_watchdog_fencing_and_wait - for some other kind of self-fencing we can't expect a success answer but timeout is fine if the node doesn't come back in between - Delicate might be the case where we have watchdog-fencing enabled for a node but the watchdog-fencing-device isn't explicitly chosen for suicide. Local pe-execution in sbd may detect the node as unclean and lead to timely suicide. Otherwise the selection of PCMK_OPT_STONITH_WATCHDOG_TIMEOUT at least is questionable. */ /* coming here we're not waiting for watchdog timeout - thus engage timer with timout evaluated before */ op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } pcmk__cluster_send_message(peer_node, pcmk__cluster_msg_fenced, remote_op); peer->tried = TRUE; pcmk__xml_free(remote_op); return; } else if (op->phase == st_phase_on) { /* A remapped "on" cannot be executed, but the node was already * turned off successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " "after successful 'off'", device, op->target); advance_topology_device_in_level(op, device, NULL); return; } else if (op->owner == FALSE) { crm_err("Fencing (%s) targeting %s for client %s is not ours to control", op->action, op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of fencing (%s) %s for client %s " QB_XS " state=%s", op->action, op->target, op->client_name, stonith_op_state_str(op->state)); CRM_CHECK(op->state < st_done, return); finalize_timed_out_op(op, "All nodes failed, or are unable, to " "fence target"); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if (is_watchdog_fencing(op, device) && check_watchdog_fencing_and_wait(op)) { /* Consider a watchdog fencing targeting an offline node executing * once it starts waiting for the target to self-fence. So that when * the query timer pops, remote_op_query_timeout() considers the * fencing already in progress. */ op->state = st_exec; return; } if (op->state == st_query) { crm_info("No peers (out of %d) have devices capable of fencing " "(%s) %s for client %s " QB_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } else { if (pcmk_is_set(op->call_options, st_opt_topology)) { pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } /* ... else use existing result from previous failed attempt * (topology is not in use, and no devices remain to be attempted). * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would * prevent finalize_op() from setting the correct delegate if * needed. */ crm_info("No peers (out of %d) are capable of fencing (%s) %s " "for client %s " QB_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); } op->state = st_failed; finalize_op(op, NULL, false); } else { crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " "for client %s " QB_XS " id=%.8s", op->action, op->target, (device? " using " : ""), (device? device : ""), op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const peer_device_info_t *peer_a = a; const peer_device_info_t *peer_b = b; return (peer_b->ndevices - peer_a->ndevices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not * * \param[in] op Fencing operation with topology to check */ static gboolean all_topology_devices_found(const remote_fencing_op_t *op) { GList *device = NULL; GList *iter = NULL; device_properties_t *match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (pcmk__is_fencing_action(op->action)) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST__LEVEL_COUNT; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { peer_device_info_t *peer = iter->data; if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } match = find_peer_device(op, peer, device->data, st_device_supports_none); } if (!match) { return FALSE; } } } return TRUE; } /*! * \internal * \brief Parse action-specific device properties from XML * * \param[in] xml XML element containing the properties * \param[in] peer Name of peer that sent XML (for logs) * \param[in] device Device ID (for logs) * \param[in] action Action the properties relate to (for logs) * \param[in,out] op Fencing operation that properties are being parsed for * \param[in] phase Phase the properties relate to * \param[in,out] props Device properties to update */ static void parse_action_specific(const xmlNode *xml, const char *peer, const char *device, const char *action, remote_fencing_op_t *op, enum st_remap_phase phase, device_properties_t *props) { props->custom_action_timeout[phase] = 0; crm_element_value_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, &props->custom_action_timeout[phase]); if (props->custom_action_timeout[phase]) { crm_trace("Peer %s with device %s returned %s action timeout %ds", peer, device, action, props->custom_action_timeout[phase]); } props->delay_max[phase] = 0; crm_element_value_int(xml, PCMK__XA_ST_DELAY_MAX, &props->delay_max[phase]); if (props->delay_max[phase]) { crm_trace("Peer %s with device %s returned maximum of random delay %ds for %s", peer, device, props->delay_max[phase], action); } props->delay_base[phase] = 0; crm_element_value_int(xml, PCMK__XA_ST_DELAY_BASE, &props->delay_base[phase]); if (props->delay_base[phase]) { crm_trace("Peer %s with device %s returned base delay %ds for %s", peer, device, props->delay_base[phase], action); } /* Handle devices with automatic unfencing */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { int required = 0; crm_element_value_int(xml, PCMK__XA_ST_REQUIRED, &required); if (required) { crm_trace("Peer %s requires device %s to execute for action %s", peer, device, action); add_required_device(op, device); } } /* If a reboot is remapped to off+on, it's possible that a node is allowed * to perform one action but not another. */ if (pcmk__xe_attr_is_true(xml, PCMK__XA_ST_ACTION_DISALLOWED)) { props->disallowed[phase] = TRUE; crm_trace("Peer %s is disallowed from executing %s for device %s", peer, action, device); } } /*! * \internal * \brief Parse one device's properties from peer's XML query reply * * \param[in] xml XML node containing device properties * \param[in,out] op Operation that query and reply relate to * \param[in,out] peer Peer's device information * \param[in] device ID of device being parsed */ static void add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, peer_device_info_t *peer, const char *device) { xmlNode *child; int verified = 0; device_properties_t *props = pcmk__assert_alloc(1, sizeof(device_properties_t)); int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */ /* Add a new entry to this peer's devices list */ g_hash_table_insert(peer->devices, pcmk__str_copy(device), props); /* Peers with verified (monitored) access will be preferred */ crm_element_value_int(xml, PCMK__XA_ST_MONITOR_VERIFIED, &verified); if (verified) { crm_trace("Peer %s has confirmed a verified device %s", peer->host, device); props->verified = TRUE; } crm_element_value_int(xml, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, &flags); props->device_support_flags = flags; /* Parse action-specific device properties */ parse_action_specific(xml, peer->host, device, op_requested_action(op), op, st_phase_requested, props); for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child)) { /* Replies for "reboot" operations will include the action-specific * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_OFF, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF, op, st_phase_off, props); } else if (pcmk__str_eq(pcmk__xe_id(child), PCMK_ACTION_ON, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_ON, op, st_phase_on, props); } } } /*! * \internal * \brief Parse a peer's XML query reply and add it to operation's results * * \param[in,out] op Operation that query and reply relate to * \param[in] host Name of peer that sent this reply * \param[in] ndevices Number of devices expected in reply * \param[in] xml XML node containing device list * * \return Newly allocated result structure with parsed reply */ static peer_device_info_t * add_result(remote_fencing_op_t *op, const char *host, int ndevices, const xmlNode *xml) { peer_device_info_t *peer = pcmk__assert_alloc(1, sizeof(peer_device_info_t)); xmlNode *child; peer->host = pcmk__str_copy(host); peer->devices = pcmk__strkey_table(free, free); /* Each child element describes one capable device available to the peer */ for (child = pcmk__xe_first_child(xml, NULL, NULL, NULL); child != NULL; child = pcmk__xe_next(child)) { const char *device = pcmk__xe_id(child); if (device) { add_device_properties(child, op, peer, device); } } peer->ndevices = g_hash_table_size(peer->devices); CRM_CHECK(ndevices == peer->ndevices, crm_err("Query claimed to have %d device%s but %d found", ndevices, pcmk__plural_s(ndevices), peer->ndevices)); op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); return peer; } /*! * \internal * \brief Handle a peer's reply to our fencing query * * Parse a query result from XML and store it in the remote operation * table, and when enough replies have been received, issue a fencing request. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error * * \note See initiate_remote_stonith_op() for how the XML query was initially * formed, and stonith_query() for how the peer formed its XML reply. */ int process_remote_stonith_query(xmlNode *msg) { int ndevices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; peer_device_info_t *peer = NULL; uint32_t replies_expected; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" PCMK__XA_ST_AVAILABLE_DEVICES, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, PCMK__XA_ST_AVAILABLE_DEVICES, &ndevices); op = g_hash_table_lookup(stonith_remote_op_list, id); if (op == NULL) { crm_debug("Received query reply for unknown or expired operation %s", id); return -EOPNOTSUPP; } replies_expected = fencing_active_peers(); if (op->replies_expected < replies_expected) { replies_expected = op->replies_expected; } if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = crm_element_value(msg, PCMK__XA_SRC); host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", op->replies, replies_expected, host, op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); if (ndevices > 0) { peer = add_result(op, host, ndevices, dev); } pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (pcmk_is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } } else if (op->state == st_query) { int nverified = count_peer_devices(op, peer, TRUE, fenced_support_flag(op->action)); /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if ((peer != NULL) && !host_is_target && nverified) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified device%s", nverified, pcmk__plural_s(nverified)); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if ((peer != NULL) && (op->state == st_done)) { crm_info("Discarding query result from %s (%d device%s): " "Operation is %s", peer->host, peer->ndevices, pcmk__plural_s(peer->ndevices), stonith_op_state_str(op->state)); } return pcmk_ok; } /*! * \internal * \brief Handle a peer's reply to a fencing request * * Parse a fencing reply from XML, and either finalize the operation * or attempt another device as appropriate. * * \param[in] msg XML reply received */ void fenced_process_fencing_reply(xmlNode *msg) { const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" PCMK__XA_ST_REMOTE_OP, msg, LOG_ERR); pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(dev != NULL, return); id = crm_element_value(dev, PCMK__XA_ST_REMOTE_OP); CRM_CHECK(id != NULL, return); dev = stonith__find_xe_with_result(msg); CRM_CHECK(dev != NULL, return); stonith__xe_get_result(dev, &result); device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (stonith_remote_op_list) { op = g_hash_table_lookup(stonith_remote_op_list, id); } if ((op == NULL) && pcmk__result_ok(&result)) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, PCMK__XA_ST_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Received peer result of unknown or expired operation %s", id); pcmk__reset_result(&result); return; } pcmk__reset_result(&op->result); op->result = result; // The operation takes ownership of the result if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { crm_err("Received outdated reply for device %s (instead of %s) to " "fence (%s) %s. Operation already timed out at peer level.", device, (const char *) op->devices->data, op->action, op->target); return; } if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT), PCMK__VALUE_BROADCAST, pcmk__str_none)) { if (pcmk__result_ok(&op->result)) { op->state = st_done; } else { op->state = st_failed; } finalize_op(op, msg, false); return; - } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + } else if (!pcmk__str_eq(op->originator, fenced_get_local_node(), + pcmk__str_casei)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err("Received non-broadcast fencing result for operation %.8s " "we do not own (device %s targeting %s)", op->id, device, op->target); return; } if (pcmk_is_set(op->call_options, st_opt_topology)) { const char *device = NULL; const char *reason = op->result.exit_reason; /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { finalize_op(op, msg, false); return; } device = crm_element_value(msg, PCMK__XA_ST_DEVICE_ID); if ((op->phase == 2) && !pcmk__result_ok(&op->result)) { /* A remapped "on" failed, but the node was already turned off * successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s " "after successful 'off'", device, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : ": ", (reason == NULL)? "" : reason, op->target); pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: " "%s%s%s%s", op->action, op->target, ((device == NULL)? "" : " using "), ((device == NULL)? "" : device), op->client_name, op->originator, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } if (pcmk__result_ok(&op->result)) { /* An operation completed successfully. Try another device if * necessary, otherwise mark the operation as done. */ advance_topology_device_in_level(op, device, msg); return; } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (advance_topology_level(op, false) != pcmk_rc_ok) { op->state = st_failed; finalize_op(op, msg, false); return; } } } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) { op->state = st_done; finalize_op(op, msg, false); return; } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT) && (op->devices == NULL)) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; finalize_op(op, msg, false); return; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure */ crm_trace("Next for %s on behalf of %s@%s (result was: %s)", op->target, op->originator, op->client_name, pcmk_exec_status_str(op->result.execution_status)); request_peer_fencing(op, NULL); } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; /* We don't have to worry about remapped reboots here * because if state is done, any remapping has been undone */ } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/daemons/fenced/fenced_scheduler.c b/daemons/fenced/fenced_scheduler.c index b231b396e7..ffa35692a7 100644 --- a/daemons/fenced/fenced_scheduler.c +++ b/daemons/fenced/fenced_scheduler.c @@ -1,233 +1,260 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include static pcmk_scheduler_t *scheduler = NULL; /*! * \internal * \brief Initialize scheduler data for fencer purposes * * \return Standard Pacemaker return code */ int fenced_scheduler_init(void) { pcmk__output_t *logger = NULL; int rc = pcmk__log_output_new(&logger); if (rc != pcmk_rc_ok) { return rc; } scheduler = pe_new_working_set(); if (scheduler == NULL) { pcmk__output_free(logger); return ENOMEM; } pe__register_messages(logger); pcmk__register_lib_messages(logger); pcmk__output_set_log_level(logger, LOG_TRACE); scheduler->priv->out = logger; return pcmk_rc_ok; } +/*! + * \internal + * \brief Set the local node name for scheduling purposes + * + * \param[in] node_name Name to set as local node name + */ +void +fenced_set_local_node(const char *node_name) +{ + CRM_ASSERT(scheduler != NULL); + + scheduler->priv->local_node_name = pcmk__str_copy(node_name); +} + +/*! + * \internal + * \brief Get the local node name + * + * \return Local node name + */ +const char * +fenced_get_local_node(void) +{ + if (scheduler == NULL) { + return NULL; + } + return scheduler->priv->local_node_name; +} + /*! * \internal * \brief Free all scheduler-related resources */ void fenced_scheduler_cleanup(void) { if (scheduler != NULL) { pcmk__output_t *logger = scheduler->priv->out; if (logger != NULL) { logger->finish(logger, CRM_EX_OK, true, NULL); pcmk__output_free(logger); scheduler->priv->out = NULL; } pe_free_working_set(scheduler); scheduler = NULL; } } /*! * \internal * \brief Check whether the local node is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node if found, otherwise NULL */ static pcmk_node_t * local_node_allowed_for(const pcmk_resource_t *rsc) { - if ((rsc != NULL) && (stonith_our_uname != NULL)) { + if ((rsc != NULL) && (scheduler->priv->local_node_name != NULL)) { GHashTableIter iter; pcmk_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { - if (pcmk__str_eq(node->priv->name, stonith_our_uname, + if (pcmk__str_eq(node->priv->name, scheduler->priv->local_node_name, pcmk__str_casei)) { return node; } } } return NULL; } /*! * \internal * \brief If a given resource or any of its children are fencing devices, * register the devices * * \param[in,out] data Resource to check * \param[in,out] user_data Ignored */ static void register_if_fencing_device(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const char *rsc_id = pcmk__s(rsc->priv->history_id, rsc->id); xmlNode *xml = NULL; GHashTableIter hash_iter; pcmk_node_t *node = NULL; const char *name = NULL; const char *value = NULL; const char *rclass = NULL; const char *agent = NULL; const char *rsc_provides = NULL; stonith_key_value_t *params = NULL; // If this is a collective resource, check children instead if (rsc->priv->children != NULL) { for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { register_if_fencing_device(iter->data, NULL); if (pcmk__is_clone(rsc)) { return; // Only one instance needs to be checked for clones } } return; } rclass = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS); if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { return; // Not a fencing device } if (pe__resource_is_disabled(rsc)) { crm_info("Ignoring fencing device %s because it is disabled", rsc->id); return; } if ((stonith_watchdog_timeout_ms <= 0) && pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_info("Ignoring fencing device %s " "because watchdog fencing is disabled", rsc->id); return; } // Check whether local node is allowed to run resource node = local_node_allowed_for(rsc); if (node == NULL) { crm_info("Ignoring fencing device %s " "because local node is not allowed to run it", rsc->id); return; } if (node->assign->score < 0) { crm_info("Ignoring fencing device %s " "because local node has preference %s for it", rsc->id, pcmk_readable_score(node->assign->score)); return; } // If device is in a group, check whether local node is allowed for group if (pcmk__is_group(rsc->priv->parent)) { pcmk_node_t *group_node = local_node_allowed_for(rsc->priv->parent); if ((group_node != NULL) && (group_node->assign->score < 0)) { crm_info("Ignoring fencing device %s " "because local node has preference %s for its group", rsc->id, pcmk_readable_score(group_node->assign->score)); return; } } crm_debug("Reloading configuration of fencing device %s", rsc->id); agent = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE); /* @COMPAT Support for node attribute expressions in rules for resource * meta-attributes is deprecated. When we can break behavioral backward * compatibility, replace node with NULL here. */ get_meta_attributes(rsc->priv->meta, rsc, node, scheduler); rsc_provides = g_hash_table_lookup(rsc->priv->meta, PCMK_STONITH_PROVIDES); g_hash_table_iter_init(&hash_iter, pe_rsc_params(rsc, node, scheduler)); while (g_hash_table_iter_next(&hash_iter, (gpointer *) &name, (gpointer *) &value)) { if ((name == NULL) || (value == NULL)) { continue; } params = stonith_key_value_add(params, name, value); } xml = create_device_registration_xml(rsc_id, st_namespace_any, agent, params, rsc_provides); stonith_key_value_freeall(params, 1, 1); CRM_ASSERT(stonith_device_register(xml, TRUE) == pcmk_ok); pcmk__xml_free(xml); } /*! * \internal * \brief Run the scheduler for fencer purposes * * \param[in] cib Cluster's current CIB */ void fenced_scheduler_run(xmlNode *cib) { CRM_CHECK((cib != NULL) && (scheduler != NULL), return); if (scheduler->priv->now != NULL) { crm_time_free(scheduler->priv->now); scheduler->priv->now = NULL; } - scheduler->localhost = stonith_our_uname; pcmk__schedule_actions(cib, pcmk__sched_location_only - |pcmk__sched_no_compat |pcmk__sched_no_counts, scheduler); g_list_foreach(scheduler->priv->resources, register_if_fencing_device, NULL); scheduler->input = NULL; // Wasn't a copy, so don't let API free it pe_reset_working_set(scheduler); } diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index db974ef769..2ba0e44c94 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,678 +1,674 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "daemon for executing fencing devices in a Pacemaker cluster" -char *stonith_our_uname = NULL; long long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static struct { bool no_cib_connect; gchar **log_files; } options; crm_exit_t exit_code = CRM_EX_OK; static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -ECONNREFUSED; } if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_NACK, NULL, CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, PCMK__XA_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(request, PCMK__XA_ST_OP, op); crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); - crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname); + crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, request); pcmk__xml_free(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); c->name = crm_strdup_printf("%s.%u", pcmk__s(value, "unknown"), c->pid); } crm_element_value_int(request, PCMK__XA_ST_CALLOPT, &call_options); crm_trace("Flags %#08" PRIx32 "/%#08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, PCMK__XA_ST_CLIENTID, c->id); crm_xml_add(request, PCMK__XA_ST_CLIENTNAME, pcmk__client_name(c)); - crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, stonith_our_uname); + crm_xml_add(request, PCMK__XA_ST_CLIENTNODE, fenced_get_local_node()); crm_log_xml_trace(request, "ipc-received"); stonith_command(c, id, flags, request, NULL); pcmk__xml_free(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, PCMK__XA_SRC); const char *op = crm_element_value(msg, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_POKE, pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk__cpg_message_data(handle, nodeid, pid, msg, &from); if(data == NULL) { return; } xml = pcmk__xml_parse(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, PCMK__XA_SRC, from); stonith_peer_callback(xml, NULL); pcmk__xml_free(xml); free(data); } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options) { /* send callback to originating child */ int local_rc = pcmk_rc_ok; int rid = 0; uint32_t ipc_flags = crm_ipc_server_event; if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_LOG_ASSERT(client->request_id); rid = client->request_id; client->request_id = 0; ipc_flags = crm_ipc_flags_none; } local_rc = pcmk__ipc_send_xml(client, rid, notify_src, ipc_flags); if (local_rc == pcmk_rc_ok) { crm_trace("Sent response %d to client %s", rid, pcmk__client_name(client)); } else { crm_warn("%synchronous reply to client %s failed: %s", (pcmk_is_set(call_options, st_opt_sync_call)? "S" : "As"), pcmk__client_name(client), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, PCMK__VALUE_ST_NOTIFY_HISTORY_SYNCED, pcmk__str_none)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { const xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, PCMK__XA_SUBT); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " QB_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = pcmk__xe_create(NULL, PCMK__XE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_T, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE); crm_xml_add(notify_data, PCMK__XA_ST_CALLID, call_id); crm_xml_add_int(notify_data, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } pcmk__xml_free(notify_data); } /*! * \internal * \brief Notify relevant IPC clients of a fencing operation result * * \param[in] type Notification type * \param[in] result Result of fencing operation (assume success if NULL) * \param[in] data If not NULL, add to notification as call data */ void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); CRM_LOG_ASSERT(type != NULL); crm_xml_add(update_msg, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(update_msg, PCMK__XA_SUBT, type); crm_xml_add(update_msg, PCMK__XA_ST_OP, type); stonith__xe_set_result(update_msg, result); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(update_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); pcmk__xml_free(update_msg); crm_trace("Notify complete"); } /*! * \internal * \brief Send notifications for a configuration change to subscribed clients * * \param[in] op Notification type (\c STONITH_OP_DEVICE_ADD, * \c STONITH_OP_DEVICE_DEL, \c STONITH_OP_LEVEL_ADD, or * \c STONITH_OP_LEVEL_DEL) * \param[in] result Operation result * \param[in] desc Description of what changed (either device ID or string * representation of level * ([])) */ void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc) { xmlNode *notify_data = pcmk__xe_create(NULL, op); crm_xml_add(notify_data, PCMK__XA_ST_DEVICE_ID, desc); fenced_send_notification(op, result, notify_data); pcmk__xml_free(notify_data); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } } static void stonith_cleanup(void) { fenced_cib_cleanup(); if (ipcs) { qb_ipcs_destroy(ipcs); } pcmk__cluster_destroy_node_caches(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); fenced_unregister_handlers(); - - free(stonith_our_uname); - stonith_our_uname = NULL; } static gboolean stand_alone_cpg_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { stand_alone = FALSE; options.no_cib_connect = true; return TRUE; } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node, const void *data) { if ((type != pcmk__node_update_processes) && !pcmk_is_set(node->flags, pcmk__node_status_remote)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(query, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(query, PCMK__XA_ST_OP, STONITH_OP_POKE); crm_debug("Broadcasting our uname because of node %" PRIu32, node->cluster_layer_id); pcmk__cluster_send_message(NULL, pcmk__cluster_msg_fenced, query); pcmk__xml_free(query); } } /* @COMPAT Deprecated since 2.1.8. Use pcmk_list_fence_attrs() or * crm_resource --list-options=fencing instead of querying daemon metadata. */ static int fencer_metadata(void) { const char *name = "pacemaker-fenced"; const char *desc_short = N_("Instance attributes available for all " "\"stonith\"-class resources"); const char *desc_long = N_("Instance attributes available for all " "\"stonith\"-class resources and used by " "Pacemaker's fence daemon"); return pcmk__daemon_metadata(out, name, desc_short, desc_long, pcmk__opt_fencing); } static GOptionEntry entries[] = { { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, N_("Deprecated (will be removed in a future release)"), NULL }, { "stand-alone-w-cpg", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, stand_alone_cpg_cb, N_("Intended for use in regression testing only"), NULL }, { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, N_("Send logs to the additional named logfile"), NULL }, { NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; pcmk_cluster_t *cluster = NULL; crm_ipc_t *old_instance = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "l"); GOptionContext *context = build_arg_context(args, &output_group); crm_log_preinit(NULL, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } if ((g_strv_length(processed_args) >= 2) && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { rc = fencer_metadata(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unable to display metadata: %s", pcmk_rc_str(rc)); } goto done; } // Open additional log files pcmk__add_logfiles(options.log_files, out); crm_log_init(NULL, LOG_INFO + args->verbosity, TRUE, (args->verbosity > 0), argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (old_instance == NULL) { /* crm_ipc_new() will have already logged an error message with * crm_err() */ exit_code = CRM_EX_FATAL; goto done; } if (pcmk__connect_generic_ipc(old_instance) == pcmk_rc_ok) { // IPC endpoint already up crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); goto done; } else { // Not up or not authentic, we'll proceed either way crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); pcmk__cluster_init_node_caches(); rc = fenced_scheduler_init(); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error initializing scheduler data: %s", pcmk_rc_str(rc)); goto done; } cluster = pcmk_cluster_new(); if (!stand_alone) { #if SUPPORT_COROSYNC if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) { pcmk_cluster_set_destroy_fn(cluster, stonith_peer_cs_destroy); pcmk_cpg_set_deliver_fn(cluster, stonith_peer_ais_callback); pcmk_cpg_set_confchg_fn(cluster, pcmk__cpg_confchg_cb); } #endif // SUPPORT_COROSYNC pcmk__cluster_set_status_callback(&st_peer_update_callback); if (pcmk_cluster_connect(cluster) != pcmk_rc_ok) { exit_code = CRM_EX_FATAL; crm_crit("Cannot sign in to the cluster... terminating"); goto done; } - pcmk__str_update(&stonith_our_uname, cluster->priv->node_name); + fenced_set_local_node(cluster->priv->node_name); if (!options.no_cib_connect) { setup_cib(); } } else { - pcmk__str_update(&stonith_our_uname, "localhost"); + fenced_set_local_node("localhost"); crm_warn("Stand-alone mode is deprecated and will be removed " "in a future release"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); // Create the mainloop and run it... mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); done: g_strfreev(processed_args); pcmk__free_arg_context(context); g_strfreev(options.log_files); stonith_cleanup(); pcmk_cluster_free(cluster); fenced_scheduler_cleanup(); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h index 75ae55def2..9c61c200b8 100644 --- a/daemons/fenced/pacemaker-fenced.h +++ b/daemons/fenced/pacemaker-fenced.h @@ -1,330 +1,331 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include // uint32_t, uint64_t #include // xmlNode #include #include #include #include /*! * \internal * \brief Check whether target has already been fenced recently * * \param[in] tolerance Number of seconds to look back in time * \param[in] target Name of node to search for * \param[in] action Action we want to match * * \return TRUE if an equivalent fencing operation took place in the last * \p tolerance seconds, FALSE otherwise */ gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action); typedef struct stonith_device_s { char *id; char *agent; char *namespace; /*! list of actions that must execute on the target node. Used for unfencing */ GString *on_target_actions; GList *targets; time_t targets_age; gboolean has_attr_map; // Whether target's nodeid should be passed as a parameter to the agent gboolean include_nodeid; /* whether the cluster should automatically unfence nodes with the device */ gboolean automatic_unfencing; guint priority; uint32_t flags; // Group of enum st_device_flags GHashTable *params; GHashTable *aliases; GList *pending_ops; mainloop_timer_t *timer; crm_trigger_t *work; xmlNode *agent_metadata; /*! A verified device is one that has contacted the * agent successfully to perform a monitor operation */ gboolean verified; gboolean cib_registered; gboolean api_registered; gboolean dirty; } stonith_device_t; /* These values are used to index certain arrays by "phase". Usually an * operation has only one "phase", so phase is always zero. However, some * reboots are remapped to "off" then "on", in which case "reboot" will be * phase 0, "off" will be phase 1 and "on" will be phase 2. */ enum st_remap_phase { st_phase_requested = 0, st_phase_off = 1, st_phase_on = 2, st_phase_max = 3 }; typedef struct remote_fencing_op_s { /* The unique id associated with this operation */ char *id; /*! The node this operation will fence */ char *target; /*! The fencing action to perform on the target. (reboot, on, off) */ char *action; /*! When was the fencing action recorded (seconds since epoch) */ time_t created; /*! Marks if the final notifications have been sent to local stonith clients. */ gboolean notify_sent; /*! The number of query replies received */ guint replies; /*! The number of query replies expected */ guint replies_expected; /*! Does this node own control of this operation */ gboolean owner; /*! After query is complete, This the high level timer that expires the entire operation */ guint op_timer_total; /*! This timer expires the current fencing request. Many fencing * requests may exist in a single operation */ guint op_timer_one; /*! This timer expires the query request sent out to determine * what nodes are contain what devices, and who those devices can fence */ guint query_timer; /*! This is the default timeout to use for each fencing device if no * custom timeout is received in the query. */ gint base_timeout; /*! This is the calculated total timeout an operation can take before * expiring. This is calculated by adding together all the timeout * values associated with the devices this fencing operation may call */ gint total_timeout; /*! * Fencing delay (in seconds) requested by API client (used by controller to * implement \c PCMK_OPT_PRIORITY_FENCING_DELAY). A value of -1 means * disable all configured delays. */ int client_delay; /*! Delegate is the node being asked to perform a fencing action * on behalf of the node that owns the remote operation. Some operations * will involve multiple delegates. This value represents the final delegate * that is used. */ char *delegate; /*! The point at which the remote operation completed */ time_t completed; //! Group of enum stonith_call_options associated with this operation uint32_t call_options; /*! The current state of the remote operation. This indicates * what stage the op is in, query, exec, done, duplicate, failed. */ enum op_state state; /*! The node that owns the remote operation */ char *originator; /*! The local client id that initiated the fencing request */ char *client_id; /*! The client's call_id that initiated the fencing request */ int client_callid; /*! The name of client that initiated the fencing request */ char *client_name; /*! List of the received query results for all the nodes in the cpg group */ GList *query_results; /*! The original request that initiated the remote stonith operation */ xmlNode *request; /*! The current topology level being executed */ guint level; /*! The current operation phase being executed */ enum st_remap_phase phase; /*! Devices with automatic unfencing (always run if "on" requested, never if remapped) */ GList *automatic_list; /*! List of all devices at the currently executing topology level */ GList *devices_list; /*! Current entry in the topology device list */ GList *devices; /*! List of duplicate operations attached to this operation. Once this operation * completes, the duplicate operations will be closed out as well. */ GList *duplicates; /*! The point at which the remote operation completed(nsec) */ long long completed_nsec; /*! The (potentially intermediate) result of the operation */ pcmk__action_result_t result; } remote_fencing_op_t; void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged); // Fencer-specific client flags enum st_client_flags { st_callback_unknown = UINT64_C(0), st_callback_notify_fence = (UINT64_C(1) << 0), st_callback_device_add = (UINT64_C(1) << 2), st_callback_device_del = (UINT64_C(1) << 4), st_callback_notify_history = (UINT64_C(1) << 5), st_callback_notify_history_synced = (UINT64_C(1) << 6) }; // How the user specified the target of a topology level enum fenced_target_by { fenced_target_by_unknown = -1, // Invalid or not yet parsed fenced_target_by_name, // By target name fenced_target_by_pattern, // By a pattern matching target names fenced_target_by_attribute, // By a node attribute/value on target }; /* * Complex fencing requirements are specified via fencing topologies. * A topology consists of levels; each level is a list of fencing devices. * Topologies are stored in a hash table by node name. When a node needs to be * fenced, if it has an entry in the topology table, the levels are tried * sequentially, and the devices in each level are tried sequentially. * Fencing is considered successful as soon as any level succeeds; * a level is considered successful if all its devices succeed. * Essentially, all devices at a given level are "and-ed" and the * levels are "or-ed". * * This structure is used for the topology table entries. * Topology levels start from 1, so levels[0] is unused and always NULL. */ typedef struct stonith_topology_s { enum fenced_target_by kind; // How target was specified /*! Node name regex or attribute name=value for which topology applies */ char *target; char *target_value; char *target_pattern; char *target_attribute; /*! Names of fencing devices at each topology level */ GList *levels[ST__LEVEL_COUNT]; } stonith_topology_t; void stonith_shutdown(int nsig); void init_device_list(void); void free_device_list(void); void init_topology_list(void); void free_topology_list(void); void free_stonith_remote_op_list(void); void init_stonith_remote_op_hash_table(GHashTable **table); void free_metadata_cache(void); void fenced_unregister_handlers(void); uint64_t get_stonith_flag(const char *name); void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *op_request, const char *remote_peer); int stonith_device_register(xmlNode *msg, gboolean from_cib); void stonith_device_remove(const char *id, bool from_cib); char *stonith_level_key(const xmlNode *msg, enum fenced_target_by); void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result); stonith_topology_t *find_topology_for_host(const char *host); void do_local_reply(const xmlNode *notify_src, pcmk__client_t *client, int call_options); xmlNode *fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result); void do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); void fenced_send_notification(const char *type, const pcmk__action_result_t *result, xmlNode *data); void fenced_send_config_notification(const char *op, const pcmk__action_result_t *result, const char *desc); remote_fencing_op_t *initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack); void fenced_process_fencing_reply(xmlNode *msg); int process_remote_stonith_query(xmlNode * msg); void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); void stonith_fence_history(xmlNode *msg, xmlNode **output, const char *remote_peer, int options); void stonith_fence_history_trim(void); bool fencing_peer_active(pcmk__node_status_t *peer); void set_fencing_completed(remote_fencing_op_t * op); int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg); const char *fenced_device_reboot_action(const char *device_id); bool fenced_device_supports_on(const char *device_id); gboolean node_has_attr(const char *node, const char *name, const char *value); gboolean node_does_watchdog_fencing(const char *node); void fencing_topology_init(void); void setup_cib(void); void fenced_cib_cleanup(void); int fenced_scheduler_init(void); +void fenced_set_local_node(const char *node_name); +const char *fenced_get_local_node(void); void fenced_scheduler_cleanup(void); void fenced_scheduler_run(xmlNode *cib); static inline void fenced_set_protocol_error(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } /*! * \internal * \brief Get the device flag to use with a given action when searching devices * * \param[in] action Action to check * * \return st_device_supports_on if \p action is "on", otherwise * st_device_supports_none */ static inline uint32_t fenced_support_flag(const char *action) { if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { return st_device_supports_on; } return st_device_supports_none; } -extern char *stonith_our_uname; extern gboolean stand_alone; extern GHashTable *device_list; extern GHashTable *topology; extern long long stonith_watchdog_timeout_ms; extern GList *stonith_watchdog_targets; extern GHashTable *stonith_remote_op_list; extern crm_exit_t exit_code; extern gboolean stonith_shutdown_flag; diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c index f4c8d262c6..163146664f 100644 --- a/daemons/schedulerd/schedulerd_messages.c +++ b/daemons/schedulerd/schedulerd_messages.c @@ -1,330 +1,330 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "pacemaker-schedulerd.h" static GHashTable *schedulerd_handlers = NULL; static pcmk_scheduler_t * init_working_set(void) { pcmk_scheduler_t *scheduler = pe_new_working_set(); pcmk__mem_assert(scheduler); scheduler->priv->out = logger_out; return scheduler; } static xmlNode * handle_pecalc_request(pcmk__request_t *request) { static struct series_s { const char *name; const char *param; /* Maximum number of inputs of this kind to save to disk. * If -1, save all; if 0, save none. */ int wrap; } series[] = { { "pe-error", PCMK_OPT_PE_ERROR_SERIES_MAX, -1 }, { "pe-warn", PCMK_OPT_PE_WARN_SERIES_MAX, 5000 }, { "pe-input", PCMK_OPT_PE_INPUT_SERIES_MAX, 4000 }, }; xmlNode *msg = request->xml; xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL); xmlNode *xml_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); static char *last_digest = NULL; static char *filename = NULL; unsigned int seq; int series_id = 0; int series_wrap = 0; char *digest = NULL; const char *value = NULL; time_t execution_date = time(NULL); xmlNode *converted = NULL; xmlNode *reply = NULL; bool is_repoke = false; bool process = true; pcmk_scheduler_t *scheduler = init_working_set(); pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); digest = pcmk__digest_xml(xml_data, false); converted = pcmk__xml_copy(NULL, xml_data); if (pcmk_update_configured_schema(&converted, true) != pcmk_rc_ok) { - scheduler->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH); - crm_xml_add_int(scheduler->graph, "transition_id", 0); - crm_xml_add_int(scheduler->graph, PCMK_OPT_CLUSTER_DELAY, 0); + scheduler->priv->graph = pcmk__xe_create(NULL, + PCMK__XE_TRANSITION_GRAPH); + crm_xml_add_int(scheduler->priv->graph, "transition_id", 0); + crm_xml_add_int(scheduler->priv->graph, PCMK_OPT_CLUSTER_DELAY, 0); process = false; free(digest); } else if (pcmk__str_eq(digest, last_digest, pcmk__str_casei)) { is_repoke = true; free(digest); } else { free(last_digest); last_digest = digest; } if (process) { pcmk__schedule_actions(converted, pcmk__sched_no_counts - |pcmk__sched_no_compat |pcmk__sched_show_utilization, scheduler); } // Get appropriate index into series[] array if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_error) || crm_config_error) { series_id = 0; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_warning) || crm_config_warning) { series_id = 1; } else { series_id = 2; } value = pcmk__cluster_option(scheduler->priv->options, series[series_id].param); if ((value == NULL) || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { series_wrap = series[series_id].wrap; } if (pcmk__read_series_sequence(PE_STATE_DIR, series[series_id].name, &seq) != pcmk_rc_ok) { // @TODO maybe handle errors better ... seq = 0; } crm_trace("Series %s: wrap=%d, seq=%u, pref=%s", series[series_id].name, series_wrap, seq, value); scheduler->input = NULL; - reply = create_reply(msg, scheduler->graph); + reply = create_reply(msg, scheduler->priv->graph); if (reply == NULL) { pcmk__format_result(&request->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "Failed building ping reply for client %s", pcmk__client_name(request->ipc_client)); goto done; } if (series_wrap == 0) { // Don't save any inputs of this kind free(filename); filename = NULL; } else if (!is_repoke) { // Input changed, save to disk free(filename); filename = pcmk__series_filename(PE_STATE_DIR, series[series_id].name, seq, true); } crm_xml_add(reply, PCMK__XA_CRM_TGRAPH_IN, filename); pcmk__log_transition_summary(scheduler, filename); if (series_wrap == 0) { crm_debug("Not saving input to disk (disabled by configuration)"); } else if (is_repoke) { crm_info("Input has not changed since last time, not saving to disk"); } else { unlink(filename); crm_xml_add_ll(xml_data, PCMK_XA_EXECUTION_DATE, (long long) execution_date); pcmk__xml_write_file(xml_data, filename, true); pcmk__write_series_sequence(PE_STATE_DIR, series[series_id].name, ++seq, series_wrap); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); done: pcmk__xml_free(converted); pe_free_working_set(scheduler); return reply; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INVALID_PARAM); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", pcmk__client_name(request->ipc_client)); return NULL; } static xmlNode * handle_hello_request(pcmk__request_t *request) { pcmk__ipc_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); crm_trace("Received IPC hello from %s", pcmk__client_name(request->ipc_client)); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static void schedulerd_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_HELLO, handle_hello_request }, { CRM_OP_PECALC, handle_pecalc_request }, { NULL, handle_unknown_request }, }; schedulerd_handlers = pcmk__register_handlers(handlers); } static int32_t pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static int32_t pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; xmlNode *msg = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *sys_to = NULL; CRM_CHECK(c != NULL, return 0); if (schedulerd_handlers == NULL) { schedulerd_register_handlers(); } msg = pcmk__client_data2xml(c, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_PROTOCOL); return 0; } sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO); if (pcmk__str_eq(crm_element_value(msg, PCMK__XA_SUBT), PCMK__VALUE_RESPONSE, pcmk__str_none)) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); crm_info("Ignoring IPC reply from %s", pcmk__client_name(c)); } else if (!pcmk__str_eq(sys_to, CRM_SYSTEM_PENGINE, pcmk__str_none)) { pcmk__ipc_send_ack(c, id, flags, PCMK__XE_ACK, NULL, CRM_EX_INDETERMINATE); crm_info("Ignoring invalid IPC message: to '%s' not " CRM_SYSTEM_PENGINE, pcmk__s(sys_to, "")); } else { char *log_msg = NULL; const char *reason = NULL; xmlNode *reply = NULL; pcmk__request_t request = { .ipc_client = c, .ipc_id = id, .ipc_flags = flags, .peer = NULL, .xml = msg, .call_options = 0, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_CRM_TASK); CRM_CHECK(request.op != NULL, return 0); reply = pcmk__process_request(&request, schedulerd_handlers); if (reply != NULL) { pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event); pcmk__xml_free(reply); } reason = request.result.exit_reason; log_msg = crm_strdup_printf("Processed %s request from %s %s: %s%s%s%s", request.op, pcmk__request_origin_type(&request), pcmk__request_origin(&request), pcmk_exec_status_str(request.result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); if (!pcmk__result_ok(&request.result)) { crm_warn("%s", log_msg); } else { crm_debug("%s", log_msg); } free(log_msg); pcmk__reset_request(&request); } pcmk__xml_free(msg); return 0; } /* Error code means? */ static int32_t pe_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); pcmk__free_client(client); return 0; } static void pe_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pe_ipc_closed(c); } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = pe_ipc_accept, .connection_created = NULL, .msg_process = pe_ipc_dispatch, .connection_closed = pe_ipc_closed, .connection_destroyed = pe_ipc_destroy }; diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h index 6d5a3db44c..15c72805eb 100644 --- a/include/crm/common/internal.h +++ b/include/crm/common/internal.h @@ -1,403 +1,400 @@ /* * Copyright 2015-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_INTERNAL__H #define PCMK__CRM_COMMON_INTERNAL__H #include // pid_t, getpid() #include // bool #include // uint8_t, uint64_t #include // guint, GList, GHashTable #include // xmlNode #include // crm_strdup_printf() #include // do_crm_log_unlikely(), etc. #include // mainloop_io_t, struct ipc_client_callbacks #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /* This says whether the current application is a Pacemaker daemon or not, * and is used to change default logging settings such as whether to log to * stderr, etc., as well as a few other details such as whether blackbox signal * handling is enabled. * * It is set when logging is initialized, and does not need to be set directly. */ extern bool pcmk__is_daemon; -//! Node name of the local node -extern char *pcmk__our_nodename; - // Number of elements in a statically defined array #define PCMK__NELEM(a) ((int) (sizeof(a)/sizeof(a[0])) ) #if SUPPORT_CIBSECRETS /* internal CIB utilities (from cib_secrets.c) */ int pcmk__substitute_secrets(const char *rsc_id, GHashTable *params); #endif /* internal main loop utilities (from mainloop.c) */ int pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata, const struct ipc_client_callbacks *callbacks, mainloop_io_t **source); guint pcmk__mainloop_timer_get_period(const mainloop_timer_t *timer); /* internal node-related XML utilities (from nodes.c) */ /*! * \internal * \brief Add local node name and ID to an XML node * * \param[in,out] request XML node to modify * \param[in] node The local node's name * \param[in] nodeid The local node's ID (can be 0) */ void pcmk__xe_add_node(xmlNode *xml, const char *node, int nodeid); /* internal name/value utilities (from nvpair.c) */ int pcmk__scan_nvpair(const char *input, char **name, char **value); char *pcmk__format_nvpair(const char *name, const char *value, const char *units); /*! * \internal * \brief Add a boolean attribute to an XML node. * * \param[in,out] node XML node to add attributes to * \param[in] name XML attribute to create * \param[in] value Value to give to the attribute */ void pcmk__xe_set_bool_attr(xmlNodePtr node, const char *name, bool value); /*! * \internal * \brief Extract a boolean attribute's value from an XML element * * \param[in] node XML node to get attribute from * \param[in] name XML attribute to get * * \return True if the given \p name is an attribute on \p node and has * the value \c PCMK_VALUE_TRUE, False in all other cases */ bool pcmk__xe_attr_is_true(const xmlNode *node, const char *name); /*! * \internal * \brief Extract a boolean attribute's value from an XML element, with * error checking * * \param[in] node XML node to get attribute from * \param[in] name XML attribute to get * \param[out] value Destination for the value of the attribute * * \return EINVAL if \p name or \p value are NULL, ENODATA if \p node is * NULL or the attribute does not exist, pcmk_rc_unknown_format * if the attribute is not a boolean, and pcmk_rc_ok otherwise. * * \note \p value only has any meaning if the return value is pcmk_rc_ok. */ int pcmk__xe_get_bool_attr(const xmlNode *node, const char *name, bool *value); /* internal procfs utilities (from procfs.c) */ pid_t pcmk__procfs_pid_of(const char *name); unsigned int pcmk__procfs_num_cores(void); int pcmk__procfs_pid2path(pid_t pid, char path[], size_t path_size); bool pcmk__procfs_has_pids(void); /* internal functions related to process IDs (from pid.c) */ /*! * \internal * \brief Check whether process exists (by PID and optionally executable path) * * \param[in] pid PID of process to check * \param[in] daemon If not NULL, path component to match with procfs entry * * \return Standard Pacemaker return code * \note Particular return codes of interest include pcmk_rc_ok for alive, * ESRCH for process is not alive (verified by kill and/or executable path * match), EACCES for caller unable or not allowed to check. A result of * "alive" is less reliable when \p daemon is not provided or procfs is * not available, since there is no guarantee that the PID has not been * recycled for another process. * \note This function cannot be used to verify \e authenticity of the process. */ int pcmk__pid_active(pid_t pid, const char *daemon); int pcmk__read_pidfile(const char *filename, pid_t *pid); int pcmk__pidfile_matches(const char *filename, pid_t expected_pid, const char *expected_name, pid_t *pid); int pcmk__lock_pidfile(const char *filename, const char *name); // bitwise arithmetic utilities /*! * \internal * \brief Set specified flags in a flag group * * \param[in] function Function name of caller * \param[in] line Line number of caller * \param[in] log_level Log a message at this level * \param[in] flag_type Label describing this flag group (for logging) * \param[in] target Name of object whose flags these are (for logging) * \param[in] flag_group Flag group being manipulated * \param[in] flags Which flags in the group should be set * \param[in] flags_str Readable equivalent of \p flags (for logging) * * \return Possibly modified flag group */ static inline uint64_t pcmk__set_flags_as(const char *function, int line, uint8_t log_level, const char *flag_type, const char *target, uint64_t flag_group, uint64_t flags, const char *flags_str) { uint64_t result = flag_group | flags; if (result != flag_group) { do_crm_log_unlikely(log_level, "%s flags %#.8llx (%s) for %s set by %s:%d", ((flag_type == NULL)? "Group of" : flag_type), (unsigned long long) flags, ((flags_str == NULL)? "flags" : flags_str), ((target == NULL)? "target" : target), function, line); } return result; } /*! * \internal * \brief Clear specified flags in a flag group * * \param[in] function Function name of caller * \param[in] line Line number of caller * \param[in] log_level Log a message at this level * \param[in] flag_type Label describing this flag group (for logging) * \param[in] target Name of object whose flags these are (for logging) * \param[in] flag_group Flag group being manipulated * \param[in] flags Which flags in the group should be cleared * \param[in] flags_str Readable equivalent of \p flags (for logging) * * \return Possibly modified flag group */ static inline uint64_t pcmk__clear_flags_as(const char *function, int line, uint8_t log_level, const char *flag_type, const char *target, uint64_t flag_group, uint64_t flags, const char *flags_str) { uint64_t result = flag_group & ~flags; if (result != flag_group) { do_crm_log_unlikely(log_level, "%s flags %#.8llx (%s) for %s cleared by %s:%d", ((flag_type == NULL)? "Group of" : flag_type), (unsigned long long) flags, ((flags_str == NULL)? "flags" : flags_str), ((target == NULL)? "target" : target), function, line); } return result; } /*! * \internal * \brief Get readable string for whether specified flags are set * * \param[in] flag_group Group of flags to check * \param[in] flags Which flags in \p flag_group should be checked * * \return "true" if all \p flags are set in \p flag_group, otherwise "false" */ static inline const char * pcmk__flag_text(uint64_t flag_group, uint64_t flags) { return pcmk__btoa(pcmk_all_flags_set(flag_group, flags)); } // miscellaneous utilities (from utils.c) void pcmk__daemonize(const char *name, const char *pidfile); void pcmk__panic(const char *origin); pid_t pcmk__locate_sbd(void); void pcmk__sleep_ms(unsigned int ms); extern int pcmk__score_red; extern int pcmk__score_green; extern int pcmk__score_yellow; /*! * \internal * \brief Allocate new zero-initialized memory, asserting on failure * * \param[in] file File where \p function is located * \param[in] function Calling function * \param[in] line Line within \p file * \param[in] nmemb Number of elements to allocate memory for * \param[in] size Size of each element * * \return Newly allocated memory of of size nmemb * size (guaranteed * not to be \c NULL) * * \note The caller is responsible for freeing the return value using \c free(). */ static inline void * pcmk__assert_alloc_as(const char *file, const char *function, uint32_t line, size_t nmemb, size_t size) { void *ptr = calloc(nmemb, size); if (ptr == NULL) { crm_abort(file, function, line, "Out of memory", FALSE, TRUE); crm_exit(CRM_EX_OSERR); } return ptr; } /*! * \internal * \brief Allocate new zero-initialized memory, asserting on failure * * \param[in] nmemb Number of elements to allocate memory for * \param[in] size Size of each element * * \return Newly allocated memory of of size nmemb * size (guaranteed * not to be \c NULL) * * \note The caller is responsible for freeing the return value using \c free(). */ #define pcmk__assert_alloc(nmemb, size) \ pcmk__assert_alloc_as(__FILE__, __func__, __LINE__, nmemb, size) /*! * \internal * \brief Resize a dynamically allocated memory block * * \param[in] ptr Memory block to resize (or NULL to allocate new memory) * \param[in] size New size of memory block in bytes (must be > 0) * * \return Pointer to resized memory block * * \note This asserts on error, so the result is guaranteed to be non-NULL * (which is the main advantage of this over directly using realloc()). */ static inline void * pcmk__realloc(void *ptr, size_t size) { void *new_ptr; // realloc(p, 0) can replace free(p) but this wrapper can't CRM_ASSERT(size > 0); new_ptr = realloc(ptr, size); if (new_ptr == NULL) { free(ptr); abort(); } return new_ptr; } static inline char * pcmk__getpid_s(void) { return crm_strdup_printf("%lu", (unsigned long) getpid()); } // More efficient than g_list_length(list) == 1 static inline bool pcmk__list_of_1(GList *list) { return list && (list->next == NULL); } // More efficient than g_list_length(list) > 1 static inline bool pcmk__list_of_multiple(GList *list) { return list && (list->next != NULL); } /* convenience functions for failure-related node attributes */ #define PCMK__FAIL_COUNT_PREFIX "fail-count" #define PCMK__LAST_FAILURE_PREFIX "last-failure" /*! * \internal * \brief Generate a failure-related node attribute name for a resource * * \param[in] prefix Start of attribute name * \param[in] rsc_id Resource name * \param[in] op Operation name * \param[in] interval_ms Operation interval * * \return Newly allocated string with attribute name * * \note Failure attributes are named like PREFIX-RSC#OP_INTERVAL (for example, * "fail-count-myrsc#monitor_30000"). The '#' is used because it is not * a valid character in a resource ID, to reliably distinguish where the * operation name begins. The '_' is used simply to be more comparable to * action labels like "myrsc_monitor_30000". */ static inline char * pcmk__fail_attr_name(const char *prefix, const char *rsc_id, const char *op, guint interval_ms) { CRM_CHECK(prefix && rsc_id && op, return NULL); return crm_strdup_printf("%s-%s#%s_%u", prefix, rsc_id, op, interval_ms); } static inline char * pcmk__failcount_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__FAIL_COUNT_PREFIX, rsc_id, op, interval_ms); } static inline char * pcmk__lastfailure_name(const char *rsc_id, const char *op, guint interval_ms) { return pcmk__fail_attr_name(PCMK__LAST_FAILURE_PREFIX, rsc_id, op, interval_ms); } // internal resource agent functions (from agents.c) int pcmk__effective_rc(int rc); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_INTERNAL__H diff --git a/include/crm/common/scheduler.h b/include/crm/common/scheduler.h index ed44ee4cf1..2644e9d983 100644 --- a/include/crm/common/scheduler.h +++ b/include/crm/common/scheduler.h @@ -1,138 +1,130 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_SCHEDULER__H #define PCMK__CRM_COMMON_SCHEDULER__H #include // time_t #include // xmlNode #include // guint, GList, GHashTable #include // crm_time_t #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif /*! * \file * \brief Scheduler API * \ingroup core */ // NOTE: sbd (as of at least 1.5.2) uses this enum //! Possible responses to loss of quorum enum pe_quorum_policy { pcmk_no_quorum_freeze, //!< Do not recover resources from outside partition pcmk_no_quorum_stop, //!< Stop all resources in partition pcmk_no_quorum_ignore, //!< Act as if partition still holds quorum pcmk_no_quorum_fence, //!< Fence all nodes in partition pcmk_no_quorum_demote, //!< Demote promotable resources and stop all others #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) // NOTE: sbd (as of at least 1.5.2) uses this value //! \deprecated Use pcmk_no_quorum_freeze instead no_quorum_freeze = pcmk_no_quorum_freeze, // NOTE: sbd (as of at least 1.5.2) uses this value //! \deprecated Use pcmk_no_quorum_stop instead no_quorum_stop = pcmk_no_quorum_stop, // NOTE: sbd (as of at least 1.5.2) uses this value //! \deprecated Use pcmk_no_quorum_ignore instead no_quorum_ignore = pcmk_no_quorum_ignore, //! \deprecated Use pcmk_no_quorum_fence instead no_quorum_suicide = pcmk_no_quorum_fence, // NOTE: sbd (as of at least 1.5.2) uses this value //! \deprecated Use pcmk_no_quorum_demote instead no_quorum_demote = pcmk_no_quorum_demote, #endif }; //! \internal Do not use typedef struct pcmk__scheduler_private pcmk__scheduler_private_t; /* Implementation of pcmk_scheduler_t * * @COMPAT Drop this struct once all members are moved to * pcmk__scheduler_private_t, and repoint pcmk_scheduler_t to that */ //!@{ //! \deprecated Do not use (public access will be removed in a future release) struct pcmk__scheduler { // Be careful about when each piece of information is available and final // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Set scheduler input with pcmk_set_scheduler_cib() instead xmlNode *input; // CIB XML // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_get_dc() instead pcmk_node_t *dc_node; // Node object for DC // NOTE: sbd (as of at least 1.5.2) uses this - // @COMPAT Change to uint64_t at a compatibility break //! \deprecated Call pcmk_has_quorum() to check quorum - unsigned long long flags; // Group of enum pcmk__scheduler_flags + uint64_t flags; // Group of enum pcmk__scheduler_flags // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_get_no_quorum_policy() to get no-quorum policy enum pe_quorum_policy no_quorum_policy; // Response to loss of quorum // NOTE: sbd (as of at least 1.5.2) uses this //! \deprecated Call pcmk_find_node() to find a node instead GList *nodes; // Nodes in cluster - int action_id; // ID to use for next created action - xmlNode *graph; // Transition graph - GHashTable *template_rsc_sets; // Mappings of template ID to resource ID - - // @COMPAT Replace this with a fencer variable (only place it's used) - const char *localhost; // \deprecated Do not use - GHashTable *tags; // Configuration tags (ID -> pcmk__idref_t*) int blocked_resources; // Number of blocked resources in cluster int disabled_resources; // Number of disabled resources in cluster GList *param_check; // History entries that need to be checked GList *stop_needed; // Containers that need stop actions time_t recheck_by; // Hint to controller when to reschedule int ninstances; // Total number of resource instances guint shutdown_lock; // How long to lock resources (seconds) int priority_fencing_delay; // Priority fencing delay pcmk__scheduler_private_t *priv; // For Pacemaker use only guint node_pending_timeout; // Pending join times out after this (ms) }; //!@} pcmk_node_t *pcmk_get_dc(const pcmk_scheduler_t *scheduler); enum pe_quorum_policy pcmk_get_no_quorum_policy(const pcmk_scheduler_t *scheduler); int pcmk_set_scheduler_cib(pcmk_scheduler_t *scheduler, xmlNode *cib); bool pcmk_has_quorum(const pcmk_scheduler_t *scheduler); pcmk_node_t *pcmk_find_node(const pcmk_scheduler_t *scheduler, const char *node_name); #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_SCHEDULER__H diff --git a/include/crm/common/scheduler_internal.h b/include/crm/common/scheduler_internal.h index b50d221831..63d1fe0a01 100644 --- a/include/crm/common/scheduler_internal.h +++ b/include/crm/common/scheduler_internal.h @@ -1,281 +1,279 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_COMMON_SCHEDULER_INTERNAL__H #define PCMK__CRM_COMMON_SCHEDULER_INTERNAL__H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif enum pcmk__check_parameters { /* Clear fail count if parameters changed for un-expired start or monitor * last_failure. */ pcmk__check_last_failure, /* Clear fail count if parameters changed for start, monitor, promote, or * migrate_from actions for active resources. */ pcmk__check_active, }; // Scheduling options and conditions enum pcmk__scheduler_flags { // No scheduler flags set (compare with equality rather than bit set) pcmk__sched_none = 0ULL, /* These flags are dynamically determined conditions */ // Whether partition has quorum (via \c PCMK_XA_HAVE_QUORUM attribute) //! \deprecated Call pcmk_has_quorum() to check quorum instead pcmk__sched_quorate = (1ULL << 0), // Whether cluster is symmetric (via symmetric-cluster property) pcmk__sched_symmetric_cluster = (1ULL << 1), // Whether scheduling encountered a non-configuration error pcmk__sched_processing_error = (1ULL << 2), // Whether cluster is in maintenance mode (via maintenance-mode property) pcmk__sched_in_maintenance = (1ULL << 3), // Whether fencing is enabled (via stonith-enabled property) pcmk__sched_fencing_enabled = (1ULL << 4), // Whether cluster has a fencing resource (via CIB resources) /*! \deprecated To indicate the cluster has a fencing resource, add either a * fencing resource configuration or the have-watchdog cluster option to the * input CIB */ pcmk__sched_have_fencing = (1ULL << 5), // Whether any resource provides or requires unfencing (via CIB resources) pcmk__sched_enable_unfencing = (1ULL << 6), // Whether concurrent fencing is allowed (via concurrent-fencing property) pcmk__sched_concurrent_fencing = (1ULL << 7), /* * Whether resources removed from the configuration should be stopped (via * stop-orphan-resources property) */ pcmk__sched_stop_removed_resources = (1ULL << 8), /* * Whether recurring actions removed from the configuration should be * cancelled (via stop-orphan-actions property) */ pcmk__sched_cancel_removed_actions = (1ULL << 9), // Whether to stop all resources (via stop-all-resources property) pcmk__sched_stop_all = (1ULL << 10), // Whether scheduler processing encountered a warning pcmk__sched_processing_warning = (1ULL << 11), /* * Whether start failure should be treated as if * \c PCMK_META_MIGRATION_THRESHOLD is 1 (via * \c PCMK_OPT_START_FAILURE_IS_FATAL property) */ pcmk__sched_start_failure_fatal = (1ULL << 12), // Unused pcmk__sched_remove_after_stop = (1ULL << 13), // Whether unseen nodes should be fenced (via startup-fencing property) pcmk__sched_startup_fencing = (1ULL << 14), /* * Whether resources should be left stopped when their node shuts down * cleanly (via shutdown-lock property) */ pcmk__sched_shutdown_lock = (1ULL << 15), /* * Whether resources' current state should be probed (when unknown) before * scheduling any other actions (via the enable-startup-probes property) */ pcmk__sched_probe_resources = (1ULL << 16), // Whether the CIB status section has been parsed yet pcmk__sched_have_status = (1ULL << 17), // Whether the cluster includes any Pacemaker Remote nodes (via CIB) pcmk__sched_have_remote_nodes = (1ULL << 18), /* The remaining flags are scheduling options that must be set explicitly */ /* * Whether to skip unpacking the CIB status section and stop the scheduling * sequence after applying node-specific location criteria (skipping * assignment, ordering, actions, etc.). */ pcmk__sched_location_only = (1ULL << 20), // Whether sensitive resource attributes have been masked pcmk__sched_sanitized = (1ULL << 21), // Skip counting of total, disabled, and blocked resource instances pcmk__sched_no_counts = (1ULL << 23), - /* - * Skip deprecated code kept solely for backward API compatibility - * (internal code should always set this) - */ - pcmk__sched_no_compat = (1ULL << 24), - // Whether node scores should be output instead of logged pcmk__sched_output_scores = (1ULL << 25), // Whether to show node and resource utilization (in log or output) pcmk__sched_show_utilization = (1ULL << 26), /* * Whether to stop the scheduling sequence after unpacking the CIB, * calculating cluster status, and applying node health (skipping * applying node-specific location criteria, assignment, etc.) */ pcmk__sched_validate_only = (1ULL << 27), }; // Implementation of pcmk__scheduler_private_t struct pcmk__scheduler_private { // Be careful about when each piece of information is available and final + char *local_node_name; // Name of node running scheduler (if known) crm_time_t *now; // Time to use when evaluating rules pcmk__output_t *out; // Output object for displaying messages GHashTable *options; // Cluster options const char *fence_action; // Default fencing action int fence_timeout_ms; // Value of stonith-timeout property in ms const char *placement_strategy; // Value of placement-strategy property xmlNode *rsc_defaults; // Configured resource defaults xmlNode *op_defaults; // Configured operation defaults GList *resources; // Resources in cluster + GHashTable *templates; // Key = template ID, value = resource list GList *actions; // All scheduled actions GHashTable *singletons; // Scheduled non-resource actions + int next_action_id; // Counter used as ID for actions xmlNode *failed; // History entries of failed actions GList *location_constraints; // Location constraints GList *colocation_constraints; // Colocation constraints GList *ordering_constraints; // Ordering constraints GHashTable *ticket_constraints; // Key = ticket ID, value = pcmk__ticket_t int next_ordering_id; // Counter used as ID for orderings + xmlNode *graph; // Transition graph int synapse_count; // Number of transition graph synapses }; // Group of enum pcmk__warnings flags for warnings we want to log once extern uint32_t pcmk__warnings; /*! * \internal * \brief Log a resource-tagged message at info severity * * \param[in] rsc Tag message with this resource's ID * \param[in] fmt... printf(3)-style format and arguments */ #define pcmk__rsc_info(rsc, fmt, args...) \ crm_log_tag(LOG_INFO, ((rsc) == NULL)? "" : (rsc)->id, (fmt), ##args) /*! * \internal * \brief Log a resource-tagged message at debug severity * * \param[in] rsc Tag message with this resource's ID * \param[in] fmt... printf(3)-style format and arguments */ #define pcmk__rsc_debug(rsc, fmt, args...) \ crm_log_tag(LOG_DEBUG, ((rsc) == NULL)? "" : (rsc)->id, (fmt), ##args) /*! * \internal * \brief Log a resource-tagged message at trace severity * * \param[in] rsc Tag message with this resource's ID * \param[in] fmt... printf(3)-style format and arguments */ #define pcmk__rsc_trace(rsc, fmt, args...) \ crm_log_tag(LOG_TRACE, ((rsc) == NULL)? "" : (rsc)->id, (fmt), ##args) /*! * \internal * \brief Log an error and remember that current scheduler input has errors * * \param[in,out] scheduler Scheduler data * \param[in] fmt... printf(3)-style format and arguments */ #define pcmk__sched_err(scheduler, fmt...) do { \ pcmk__set_scheduler_flags((scheduler), \ pcmk__sched_processing_error); \ crm_err(fmt); \ } while (0) /*! * \internal * \brief Log a warning and remember that current scheduler input has warnings * * \param[in,out] scheduler Scheduler data * \param[in] fmt... printf(3)-style format and arguments */ #define pcmk__sched_warn(scheduler, fmt...) do { \ pcmk__set_scheduler_flags((scheduler), \ pcmk__sched_processing_warning); \ crm_warn(fmt); \ } while (0) /*! * \internal * \brief Set scheduler flags * * \param[in,out] scheduler Scheduler data * \param[in] flags_to_set Group of enum pcmk__scheduler_flags to set */ #define pcmk__set_scheduler_flags(scheduler, flags_to_set) do { \ (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", crm_system_name, \ (scheduler)->flags, (flags_to_set), #flags_to_set); \ } while (0) /*! * \internal * \brief Clear scheduler flags * * \param[in,out] scheduler Scheduler data * \param[in] flags_to_clear Group of enum pcmk__scheduler_flags to clear */ #define pcmk__clear_scheduler_flags(scheduler, flags_to_clear) do { \ (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", crm_system_name, \ (scheduler)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) #ifdef __cplusplus } #endif #endif // PCMK__CRM_COMMON_SCHEDULER_INTERNAL__H diff --git a/include/pcmki/pcmki_status.h b/include/pcmki/pcmki_status.h index f4127be486..4725462095 100644 --- a/include/pcmki/pcmki_status.h +++ b/include/pcmki/pcmki_status.h @@ -1,43 +1,44 @@ /* * Copyright 2022-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__PCMKI_PCMKI_STATUS__H #define PCMK__PCMKI_PCMKI_STATUS__H #include #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif -int pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, - cib_t *cib, xmlNode *current_cib, +int pcmk__output_cluster_status(pcmk_scheduler_t *scheduler, + stonith_t *stonith, cib_t *cib, + xmlNode *current_cib, enum pcmk_pacemakerd_state pcmkd_state, enum pcmk__fence_history fence_history, uint32_t show, uint32_t show_opts, const char *only_node, const char *only_rsc, const char *neg_location_prefix); int pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history, uint32_t show, uint32_t show_opts, const char *only_node, const char *only_rsc, const char *neg_location_prefix, unsigned int timeout_ms); #ifdef __cplusplus } #endif #endif // PCMK__PCMKI_PCMKI_STATUS__H diff --git a/lib/common/logging.c b/lib/common/logging.c index c09cd7e0b6..5589ba6811 100644 --- a/lib/common/logging.c +++ b/lib/common/logging.c @@ -1,1270 +1,1269 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // Use high-resolution (millisecond) timestamps if libqb supports them #ifdef QB_FEATURE_LOG_HIRES_TIMESTAMPS #define TIMESTAMP_FORMAT_SPEC "%%T" typedef struct timespec *log_time_t; #else #define TIMESTAMP_FORMAT_SPEC "%%t" typedef time_t log_time_t; #endif unsigned int crm_log_level = LOG_INFO; unsigned int crm_trace_nonlog = 0; bool pcmk__is_daemon = false; -char *pcmk__our_nodename = NULL; static unsigned int crm_log_priority = LOG_NOTICE; static GLogFunc glib_log_default = NULL; static pcmk__output_t *logger_out = NULL; pcmk__config_error_func pcmk__config_error_handler = NULL; pcmk__config_warning_func pcmk__config_warning_handler = NULL; void *pcmk__config_error_context = NULL; void *pcmk__config_warning_context = NULL; static gboolean crm_tracing_enabled(void); static void crm_glib_handler(const gchar * log_domain, GLogLevelFlags flags, const gchar * message, gpointer user_data) { int log_level = LOG_WARNING; GLogLevelFlags msg_level = (flags & G_LOG_LEVEL_MASK); static struct qb_log_callsite *glib_cs = NULL; if (glib_cs == NULL) { glib_cs = qb_log_callsite_get(__func__, __FILE__, "glib-handler", LOG_DEBUG, __LINE__, crm_trace_nonlog); } switch (msg_level) { case G_LOG_LEVEL_CRITICAL: log_level = LOG_CRIT; if (!crm_is_callsite_active(glib_cs, LOG_DEBUG, crm_trace_nonlog)) { /* log and record how we got here */ crm_abort(__FILE__, __func__, __LINE__, message, TRUE, TRUE); } break; case G_LOG_LEVEL_ERROR: log_level = LOG_ERR; break; case G_LOG_LEVEL_MESSAGE: log_level = LOG_NOTICE; break; case G_LOG_LEVEL_INFO: log_level = LOG_INFO; break; case G_LOG_LEVEL_DEBUG: log_level = LOG_DEBUG; break; case G_LOG_LEVEL_WARNING: case G_LOG_FLAG_RECURSION: case G_LOG_FLAG_FATAL: case G_LOG_LEVEL_MASK: log_level = LOG_WARNING; break; } do_crm_log(log_level, "%s: %s", log_domain, message); } #ifndef NAME_MAX # define NAME_MAX 256 #endif /*! * \internal * \brief Write out a blackbox (enabling blackboxes if needed) * * \param[in] nsig Signal number that was received * * \note This is a true signal handler, and so must be async-safe. */ static void crm_trigger_blackbox(int nsig) { if(nsig == SIGTRAP) { /* Turn it on if it wasn't already */ crm_enable_blackbox(nsig); } crm_write_blackbox(nsig, NULL); } void crm_log_deinit(void) { if (glib_log_default != NULL) { g_log_set_default_handler(glib_log_default, NULL); } } #define FMT_MAX 256 /*! * \internal * \brief Set the log format string based on the passed-in method * * \param[in] method The detail level of the log output * \param[in] daemon The daemon ID included in error messages * \param[in] use_pid Cached result of getpid() call, for efficiency * \param[in] use_nodename Cached result of uname() call, for efficiency * */ /* XXX __attribute__((nonnull)) for use_nodename parameter */ static void set_format_string(int method, const char *daemon, pid_t use_pid, const char *use_nodename) { if (method == QB_LOG_SYSLOG) { // The system log gets a simplified, user-friendly format qb_log_ctl(method, QB_LOG_CONF_EXTENDED, QB_FALSE); qb_log_format_set(method, "%g %p: %b"); } else { // Everything else gets more detail, for advanced troubleshooting int offset = 0; char fmt[FMT_MAX]; if (method > QB_LOG_STDERR) { // If logging to file, prefix with timestamp, node name, daemon ID offset += snprintf(fmt + offset, FMT_MAX - offset, TIMESTAMP_FORMAT_SPEC " %s %-20s[%lu] ", use_nodename, daemon, (unsigned long) use_pid); } // Add function name (in parentheses) offset += snprintf(fmt + offset, FMT_MAX - offset, "(%%n"); if (crm_tracing_enabled()) { // When tracing, add file and line number offset += snprintf(fmt + offset, FMT_MAX - offset, "@%%f:%%l"); } offset += snprintf(fmt + offset, FMT_MAX - offset, ")"); // Add tag (if any), severity, and actual message offset += snprintf(fmt + offset, FMT_MAX - offset, " %%g\t%%p: %%b"); CRM_LOG_ASSERT(offset > 0); qb_log_format_set(method, fmt); } } #define DEFAULT_LOG_FILE CRM_LOG_DIR "/pacemaker.log" static bool logfile_disabled(const char *filename) { return pcmk__str_eq(filename, PCMK_VALUE_NONE, pcmk__str_casei) || pcmk__str_eq(filename, "/dev/null", pcmk__str_none); } /*! * \internal * \brief Fix log file ownership if group is wrong or doesn't have access * * \param[in] filename Log file name (for logging only) * \param[in] logfd Log file descriptor * * \return Standard Pacemaker return code */ static int chown_logfile(const char *filename, int logfd) { uid_t pcmk_uid = 0; gid_t pcmk_gid = 0; struct stat st; int rc; // Get the log file's current ownership and permissions if (fstat(logfd, &st) < 0) { return errno; } // Any other errors don't prevent file from being used as log rc = pcmk_daemon_user(&pcmk_uid, &pcmk_gid); if (rc != pcmk_ok) { rc = pcmk_legacy2rc(rc); crm_warn("Not changing '%s' ownership because user information " "unavailable: %s", filename, pcmk_rc_str(rc)); return pcmk_rc_ok; } if ((st.st_gid == pcmk_gid) && ((st.st_mode & S_IRWXG) == (S_IRGRP|S_IWGRP))) { return pcmk_rc_ok; } if (fchown(logfd, pcmk_uid, pcmk_gid) < 0) { crm_warn("Couldn't change '%s' ownership to user %s gid %d: %s", filename, CRM_DAEMON_USER, pcmk_gid, strerror(errno)); } return pcmk_rc_ok; } // Reset log file permissions (using environment variable if set) static void chmod_logfile(const char *filename, int logfd) { const char *modestr = pcmk__env_option(PCMK__ENV_LOGFILE_MODE); mode_t filemode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; if (modestr != NULL) { long filemode_l = strtol(modestr, NULL, 8); if ((filemode_l != LONG_MIN) && (filemode_l != LONG_MAX)) { filemode = (mode_t) filemode_l; } } if ((filemode != 0) && (fchmod(logfd, filemode) < 0)) { crm_warn("Couldn't change '%s' mode to %04o: %s", filename, filemode, strerror(errno)); } } // If we're root, correct a log file's permissions if needed static int set_logfile_permissions(const char *filename, FILE *logfile) { if (geteuid() == 0) { int logfd = fileno(logfile); int rc = chown_logfile(filename, logfd); if (rc != pcmk_rc_ok) { return rc; } chmod_logfile(filename, logfd); } return pcmk_rc_ok; } // Enable libqb logging to a new log file static void enable_logfile(int fd) { qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_TRUE); #if 0 qb_log_ctl(fd, QB_LOG_CONF_FILE_SYNC, 1); // Turn on synchronous writes #endif #ifdef HAVE_qb_log_conf_QB_LOG_CONF_MAX_LINE_LEN // Longer than default, for logging long XML lines qb_log_ctl(fd, QB_LOG_CONF_MAX_LINE_LEN, 800); #endif crm_update_callsites(); } static inline void disable_logfile(int fd) { qb_log_ctl(fd, QB_LOG_CONF_ENABLED, QB_FALSE); } static void setenv_logfile(const char *filename) { // Some resource agents will log only if environment variable is set if (pcmk__env_option(PCMK__ENV_LOGFILE) == NULL) { pcmk__set_env_option(PCMK__ENV_LOGFILE, filename, true); } } /*! * \brief Add a file to be used as a Pacemaker detail log * * \param[in] filename Name of log file to use * * \return Standard Pacemaker return code */ int pcmk__add_logfile(const char *filename) { /* No log messages from this function will be logged to the new log! * If another target such as syslog has already been added, the messages * should show up there. */ int fd = 0; int rc = pcmk_rc_ok; FILE *logfile = NULL; bool is_default = false; static int default_fd = -1; static bool have_logfile = false; // Use default if caller didn't specify (and we don't already have one) if (filename == NULL) { if (have_logfile) { return pcmk_rc_ok; } filename = DEFAULT_LOG_FILE; } // If the user doesn't want logging, we're done if (logfile_disabled(filename)) { return pcmk_rc_ok; } // If the caller wants the default and we already have it, we're done is_default = pcmk__str_eq(filename, DEFAULT_LOG_FILE, pcmk__str_none); if (is_default && (default_fd >= 0)) { return pcmk_rc_ok; } // Check whether we have write access to the file logfile = fopen(filename, "a"); if (logfile == NULL) { rc = errno; crm_warn("Logging to '%s' is disabled: %s " QB_XS " uid=%u gid=%u", filename, strerror(rc), geteuid(), getegid()); return rc; } rc = set_logfile_permissions(filename, logfile); if (rc != pcmk_rc_ok) { crm_warn("Logging to '%s' is disabled: %s " QB_XS " permissions", filename, strerror(rc)); fclose(logfile); return rc; } // Close and reopen as libqb logging target fclose(logfile); fd = qb_log_file_open(filename); if (fd < 0) { crm_warn("Logging to '%s' is disabled: %s " QB_XS " qb_log_file_open", filename, strerror(-fd)); return -fd; // == +errno } if (is_default) { default_fd = fd; setenv_logfile(filename); } else if (default_fd >= 0) { crm_notice("Switching logging to %s", filename); disable_logfile(default_fd); } crm_notice("Additional logging available in %s", filename); enable_logfile(fd); have_logfile = true; return pcmk_rc_ok; } /*! * \brief Add multiple additional log files * * \param[in] log_files Array of log files to add * \param[in] out Output object to use for error reporting * * \return Standard Pacemaker return code */ void pcmk__add_logfiles(gchar **log_files, pcmk__output_t *out) { if (log_files == NULL) { return; } for (gchar **fname = log_files; *fname != NULL; fname++) { int rc = pcmk__add_logfile(*fname); if (rc != pcmk_rc_ok) { out->err(out, "Logging to %s is disabled: %s", *fname, pcmk_rc_str(rc)); } } } static int blackbox_trigger = 0; static volatile char *blackbox_file_prefix = NULL; static void blackbox_logger(int32_t t, struct qb_log_callsite *cs, log_time_t timestamp, const char *msg) { if(cs && cs->priority < LOG_ERR) { crm_write_blackbox(SIGTRAP, cs); /* Bypass the over-dumping logic */ } else { crm_write_blackbox(0, cs); } } static void crm_control_blackbox(int nsig, bool enable) { int lpc = 0; if (blackbox_file_prefix == NULL) { pid_t pid = getpid(); blackbox_file_prefix = crm_strdup_printf("%s/%s-%lu", CRM_BLACKBOX_DIR, crm_system_name, (unsigned long) pid); } if (enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_SIZE, 5 * 1024 * 1024); /* Any size change drops existing entries */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); /* Setting the size seems to disable it */ /* Enable synchronous logging */ for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_TRUE); } crm_notice("Initiated blackbox recorder: %s", blackbox_file_prefix); /* Save to disk on abnormal termination */ crm_signal_handler(SIGSEGV, crm_trigger_blackbox); crm_signal_handler(SIGABRT, crm_trigger_blackbox); crm_signal_handler(SIGILL, crm_trigger_blackbox); crm_signal_handler(SIGBUS, crm_trigger_blackbox); crm_signal_handler(SIGFPE, crm_trigger_blackbox); crm_update_callsites(); blackbox_trigger = qb_log_custom_open(blackbox_logger, NULL, NULL, NULL); qb_log_ctl(blackbox_trigger, QB_LOG_CONF_ENABLED, QB_TRUE); crm_trace("Trigger: %d is %d %d", blackbox_trigger, qb_log_ctl(blackbox_trigger, QB_LOG_CONF_STATE_GET, 0), QB_LOG_STATE_ENABLED); crm_update_callsites(); } else if (!enable && qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_STATE_GET, 0) == QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); /* Disable synchronous logging again when the blackbox is disabled */ for (lpc = QB_LOG_BLACKBOX; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_FILE_SYNC, QB_FALSE); } } } void crm_enable_blackbox(int nsig) { crm_control_blackbox(nsig, TRUE); } void crm_disable_blackbox(int nsig) { crm_control_blackbox(nsig, FALSE); } /*! * \internal * \brief Write out a blackbox, if blackboxes are enabled * * \param[in] nsig Signal that was received * \param[in] cs libqb callsite * * \note This may be called via a true signal handler and so must be async-safe. * @TODO actually make this async-safe */ void crm_write_blackbox(int nsig, const struct qb_log_callsite *cs) { static volatile int counter = 1; static volatile time_t last = 0; char buffer[NAME_MAX]; time_t now = time(NULL); if (blackbox_file_prefix == NULL) { return; } switch (nsig) { case 0: case SIGTRAP: /* The graceful case - such as assertion failure or user request */ if (nsig == 0 && now == last) { /* Prevent over-dumping */ return; } snprintf(buffer, NAME_MAX, "%s.%d", blackbox_file_prefix, counter++); if (nsig == SIGTRAP) { crm_notice("Blackbox dump requested, please see %s for contents", buffer); } else if (cs) { syslog(LOG_NOTICE, "Problem detected at %s:%d (%s), please see %s for additional details", cs->function, cs->lineno, cs->filename, buffer); } else { crm_notice("Problem detected, please see %s for additional details", buffer); } last = now; qb_log_blackbox_write_to_file(buffer); /* Flush the existing contents * A size change would also work */ qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_TRUE); break; default: /* Do as little as possible, just try to get what we have out * We logged the filename when the blackbox was enabled */ crm_signal_handler(nsig, SIG_DFL); qb_log_blackbox_write_to_file((const char *)blackbox_file_prefix); qb_log_ctl(QB_LOG_BLACKBOX, QB_LOG_CONF_ENABLED, QB_FALSE); raise(nsig); break; } } static const char * crm_quark_to_string(uint32_t tag) { const char *text = g_quark_to_string(tag); if (text) { return text; } return ""; } static void crm_log_filter_source(int source, const char *trace_files, const char *trace_fns, const char *trace_fmts, const char *trace_tags, const char *trace_blackbox, struct qb_log_callsite *cs) { if (qb_log_ctl(source, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { return; } else if (cs->tags != crm_trace_nonlog && source == QB_LOG_BLACKBOX) { /* Blackbox gets everything if enabled */ qb_bit_set(cs->targets, source); } else if (source == blackbox_trigger && blackbox_trigger > 0) { /* Should this log message result in the blackbox being dumped */ if (cs->priority <= LOG_ERR) { qb_bit_set(cs->targets, source); } else if (trace_blackbox) { char *key = crm_strdup_printf("%s:%d", cs->function, cs->lineno); if (strstr(trace_blackbox, key) != NULL) { qb_bit_set(cs->targets, source); } free(key); } } else if (source == QB_LOG_SYSLOG) { /* No tracing to syslog */ if (cs->priority <= crm_log_priority && cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } /* Log file tracing options... */ } else if (cs->priority <= crm_log_level) { qb_bit_set(cs->targets, source); } else if (trace_files && strstr(trace_files, cs->filename) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fns && strstr(trace_fns, cs->function) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_fmts && strstr(trace_fmts, cs->format) != NULL) { qb_bit_set(cs->targets, source); } else if (trace_tags && cs->tags != 0 && cs->tags != crm_trace_nonlog && g_quark_to_string(cs->tags) != NULL) { qb_bit_set(cs->targets, source); } } #ifndef HAVE_STRCHRNUL /* strchrnul() is a GNU extension. If not present, use our own definition. * The GNU version returns char*, but we only need it to be const char*. */ static const char * strchrnul(const char *s, int c) { while ((*s != c) && (*s != '\0')) { ++s; } return s; } #endif static void crm_log_filter(struct qb_log_callsite *cs) { int lpc = 0; static int need_init = 1; static const char *trace_fns = NULL; static const char *trace_tags = NULL; static const char *trace_fmts = NULL; static const char *trace_files = NULL; static const char *trace_blackbox = NULL; if (need_init) { need_init = 0; trace_fns = pcmk__env_option(PCMK__ENV_TRACE_FUNCTIONS); trace_fmts = pcmk__env_option(PCMK__ENV_TRACE_FORMATS); trace_tags = pcmk__env_option(PCMK__ENV_TRACE_TAGS); trace_files = pcmk__env_option(PCMK__ENV_TRACE_FILES); trace_blackbox = pcmk__env_option(PCMK__ENV_TRACE_BLACKBOX); if (trace_tags != NULL) { uint32_t tag; char token[500]; const char *offset = NULL; const char *next = trace_tags; do { offset = next; next = strchrnul(offset, ','); snprintf(token, sizeof(token), "%.*s", (int)(next - offset), offset); tag = g_quark_from_string(token); crm_info("Created GQuark %u from token '%s' in '%s'", tag, token, trace_tags); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } } cs->targets = 0; /* Reset then find targets to enable */ for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { crm_log_filter_source(lpc, trace_files, trace_fns, trace_fmts, trace_tags, trace_blackbox, cs); } } gboolean crm_is_callsite_active(struct qb_log_callsite *cs, uint8_t level, uint32_t tags) { gboolean refilter = FALSE; if (cs == NULL) { return FALSE; } if (cs->priority != level) { cs->priority = level; refilter = TRUE; } if (cs->tags != tags) { cs->tags = tags; refilter = TRUE; } if (refilter) { crm_log_filter(cs); } if (cs->targets == 0) { return FALSE; } return TRUE; } void crm_update_callsites(void) { static gboolean log = TRUE; if (log) { log = FALSE; crm_debug ("Enabling callsites based on priority=%d, files=%s, functions=%s, formats=%s, tags=%s", crm_log_level, pcmk__env_option(PCMK__ENV_TRACE_FILES), pcmk__env_option(PCMK__ENV_TRACE_FUNCTIONS), pcmk__env_option(PCMK__ENV_TRACE_FORMATS), pcmk__env_option(PCMK__ENV_TRACE_TAGS)); } qb_log_filter_fn_set(crm_log_filter); } static gboolean crm_tracing_enabled(void) { return (crm_log_level == LOG_TRACE) || (pcmk__env_option(PCMK__ENV_TRACE_FILES) != NULL) || (pcmk__env_option(PCMK__ENV_TRACE_FUNCTIONS) != NULL) || (pcmk__env_option(PCMK__ENV_TRACE_FORMATS) != NULL) || (pcmk__env_option(PCMK__ENV_TRACE_TAGS) != NULL); } static int crm_priority2int(const char *name) { struct syslog_names { const char *name; int priority; }; static struct syslog_names p_names[] = { {"emerg", LOG_EMERG}, {"alert", LOG_ALERT}, {"crit", LOG_CRIT}, {"error", LOG_ERR}, {"warning", LOG_WARNING}, {"notice", LOG_NOTICE}, {"info", LOG_INFO}, {"debug", LOG_DEBUG}, {NULL, -1} }; int lpc; for (lpc = 0; name != NULL && p_names[lpc].name != NULL; lpc++) { if (pcmk__str_eq(p_names[lpc].name, name, pcmk__str_none)) { return p_names[lpc].priority; } } return crm_log_priority; } /*! * \internal * \brief Set the identifier for the current process * * If the identifier crm_system_name is not already set, then it is set as follows: * - it is passed to the function via the "entity" parameter, or * - it is derived from the executable name * * The identifier can be used in logs, IPC, and more. * * This method also sets the PCMK_service environment variable. * * \param[in] entity If not NULL, will be assigned to the identifier * \param[in] argc The number of command line parameters * \param[in] argv The command line parameter values */ static void set_identity(const char *entity, int argc, char *const *argv) { if (crm_system_name != NULL) { return; // Already set, don't overwrite } if (entity != NULL) { crm_system_name = pcmk__str_copy(entity); } else if ((argc > 0) && (argv != NULL)) { char *mutable = strdup(argv[0]); char *modified = basename(mutable); if (strstr(modified, "lt-") == modified) { modified += 3; } crm_system_name = pcmk__str_copy(modified); free(mutable); } else { crm_system_name = pcmk__str_copy("Unknown"); } // Used by fencing.py.py (in fence-agents) pcmk__set_env_option(PCMK__ENV_SERVICE, crm_system_name, false); } void crm_log_preinit(const char *entity, int argc, char *const *argv) { /* Configure libqb logging with nothing turned on */ struct utsname res; int lpc = 0; int32_t qb_facility = 0; pid_t pid = getpid(); const char *nodename = "localhost"; static bool have_logging = false; if (have_logging) { return; } have_logging = true; pcmk__xml_init(); if (crm_trace_nonlog == 0) { crm_trace_nonlog = g_quark_from_static_string("Pacemaker non-logging tracepoint"); } umask(S_IWGRP | S_IWOTH | S_IROTH); /* Redirect messages from glib functions to our handler */ glib_log_default = g_log_set_default_handler(crm_glib_handler, NULL); /* and for good measure... - this enum is a bit field (!) */ g_log_set_always_fatal((GLogLevelFlags) 0); /*value out of range */ /* Set crm_system_name, which is used as the logging name. It may also * be used for other purposes such as an IPC client name. */ set_identity(entity, argc, argv); qb_facility = qb_log_facility2int("local0"); qb_log_init(crm_system_name, qb_facility, LOG_ERR); crm_log_level = LOG_CRIT; /* Nuke any syslog activity until it's asked for */ qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_FALSE); #ifdef HAVE_qb_log_conf_QB_LOG_CONF_MAX_LINE_LEN // Shorter than default, generous for what we *should* send to syslog qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_MAX_LINE_LEN, 256); #endif if (uname(memset(&res, 0, sizeof(res))) == 0 && *res.nodename != '\0') { nodename = res.nodename; } /* Set format strings and disable threading * Pacemaker and threads do not mix well (due to the amount of forking) */ qb_log_tags_stringify_fn_set(crm_quark_to_string); for (lpc = QB_LOG_SYSLOG; lpc < QB_LOG_TARGET_MAX; lpc++) { qb_log_ctl(lpc, QB_LOG_CONF_THREADED, QB_FALSE); #ifdef HAVE_qb_log_conf_QB_LOG_CONF_ELLIPSIS // End truncated lines with '...' qb_log_ctl(lpc, QB_LOG_CONF_ELLIPSIS, QB_TRUE); #endif set_format_string(lpc, crm_system_name, pid, nodename); } #ifdef ENABLE_NLS /* Enable translations (experimental). Currently we only have a few * proof-of-concept translations for some option help. The goal would be to * offer translations for option help and man pages rather than logs or * documentation, to reduce the burden of maintaining them. */ // Load locale information for the local host from the environment setlocale(LC_ALL, ""); // Tell gettext where to find Pacemaker message catalogs CRM_ASSERT(bindtextdomain(PACKAGE, PCMK__LOCALE_DIR) != NULL); // Tell gettext to use the Pacemaker message catalogs CRM_ASSERT(textdomain(PACKAGE) != NULL); // Tell gettext that the translated strings are stored in UTF-8 bind_textdomain_codeset(PACKAGE, "UTF-8"); #endif } gboolean crm_log_init(const char *entity, uint8_t level, gboolean daemon, gboolean to_stderr, int argc, char **argv, gboolean quiet) { const char *syslog_priority = NULL; const char *facility = pcmk__env_option(PCMK__ENV_LOGFACILITY); const char *f_copy = facility; pcmk__is_daemon = daemon; crm_log_preinit(entity, argc, argv); if (level > LOG_TRACE) { level = LOG_TRACE; } if(level > crm_log_level) { crm_log_level = level; } /* Should we log to syslog */ if (facility == NULL) { if (pcmk__is_daemon) { facility = "daemon"; } else { facility = PCMK_VALUE_NONE; } pcmk__set_env_option(PCMK__ENV_LOGFACILITY, facility, true); } if (pcmk__str_eq(facility, PCMK_VALUE_NONE, pcmk__str_casei)) { quiet = TRUE; } else { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_FACILITY, qb_log_facility2int(facility)); } if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { /* Override the default setting */ crm_log_level = LOG_DEBUG; } /* What lower threshold do we have for sending to syslog */ syslog_priority = pcmk__env_option(PCMK__ENV_LOGPRIORITY); if (syslog_priority) { crm_log_priority = crm_priority2int(syslog_priority); } qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", crm_log_priority); // Log to syslog unless requested to be quiet if (!quiet) { qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); } /* Should we log to stderr */ if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_STDERR)) { /* Override the default setting */ to_stderr = TRUE; } crm_enable_stderr(to_stderr); // Log to a file if we're a daemon or user asked for one { const char *logfile = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(PCMK_VALUE_NONE, logfile, pcmk__str_casei) && (pcmk__is_daemon || (logfile != NULL))) { // Daemons always get a log file, unless explicitly set to "none" pcmk__add_logfile(logfile); } } if (pcmk__is_daemon && pcmk__env_option_enabled(crm_system_name, PCMK__ENV_BLACKBOX)) { crm_enable_blackbox(0); } /* Summary */ crm_trace("Quiet: %d, facility %s", quiet, f_copy); pcmk__env_option(PCMK__ENV_LOGFILE); pcmk__env_option(PCMK__ENV_LOGFACILITY); crm_update_callsites(); /* Ok, now we can start logging... */ // Disable daemon request if user isn't root or Pacemaker daemon user if (pcmk__is_daemon) { const char *user = getenv("USER"); if (user != NULL && !pcmk__strcase_any_of(user, "root", CRM_DAEMON_USER, NULL)) { crm_trace("Not switching to corefile directory for %s", user); pcmk__is_daemon = false; } } if (pcmk__is_daemon) { int user = getuid(); struct passwd *pwent = getpwuid(user); if (pwent == NULL) { crm_perror(LOG_ERR, "Cannot get name for uid: %d", user); } else if (!pcmk__strcase_any_of(pwent->pw_name, "root", CRM_DAEMON_USER, NULL)) { crm_trace("Don't change active directory for regular user: %s", pwent->pw_name); } else if (chdir(CRM_CORE_DIR) < 0) { crm_perror(LOG_INFO, "Cannot change active directory to " CRM_CORE_DIR); } else { crm_info("Changed active directory to " CRM_CORE_DIR); } /* Original meanings from signal(7) * * Signal Value Action Comment * SIGTRAP 5 Core Trace/breakpoint trap * SIGUSR1 30,10,16 Term User-defined signal 1 * SIGUSR2 31,12,17 Term User-defined signal 2 * * Our usage is as similar as possible */ mainloop_add_signal(SIGUSR1, crm_enable_blackbox); mainloop_add_signal(SIGUSR2, crm_disable_blackbox); mainloop_add_signal(SIGTRAP, crm_trigger_blackbox); } else if (!quiet) { crm_log_args(argc, argv); } return TRUE; } /* returns the old value */ unsigned int set_crm_log_level(unsigned int level) { unsigned int old = crm_log_level; if (level > LOG_TRACE) { level = LOG_TRACE; } crm_log_level = level; crm_update_callsites(); crm_trace("New log level: %d", level); return old; } void crm_enable_stderr(int enable) { if (enable && qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_TRUE); crm_update_callsites(); } else if (enable == FALSE) { qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); } } /*! * \brief Make logging more verbose * * If logging to stderr is not already enabled when this function is called, * enable it. Otherwise, increase the log level by 1. * * \param[in] argc Ignored * \param[in] argv Ignored */ void crm_bump_log_level(int argc, char **argv) { if (qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_STATE_GET, 0) != QB_LOG_STATE_ENABLED) { crm_enable_stderr(TRUE); } else { set_crm_log_level(crm_log_level + 1); } } unsigned int get_crm_log_level(void) { return crm_log_level; } /*! * \brief Log the command line (once) * * \param[in] Number of values in \p argv * \param[in] Command-line arguments (including command name) * * \note This function will only log once, even if called with different * arguments. */ void crm_log_args(int argc, char **argv) { static bool logged = false; gchar *arg_string = NULL; if ((argc == 0) || (argv == NULL) || logged) { return; } logged = true; arg_string = g_strjoinv(" ", argv); crm_notice("Invoked: %s", arg_string); g_free(arg_string); } void crm_log_output_fn(const char *file, const char *function, int line, int level, const char *prefix, const char *output) { const char *next = NULL; const char *offset = NULL; if (level == LOG_NEVER) { return; } if (output == NULL) { if (level != LOG_STDOUT) { level = LOG_TRACE; } output = "-- empty --"; } next = output; do { offset = next; next = strchrnul(offset, '\n'); do_crm_log_alias(level, file, function, line, "%s [ %.*s ]", prefix, (int)(next - offset), offset); if (next[0] != 0) { next++; } } while (next != NULL && next[0] != 0); } void pcmk__cli_init_logging(const char *name, unsigned int verbosity) { crm_log_init(name, LOG_ERR, FALSE, FALSE, 0, NULL, TRUE); for (int i = 0; i < verbosity; i++) { /* These arguments are ignored, so pass placeholders. */ crm_bump_log_level(0, NULL); } } /*! * \brief Log XML line-by-line in a formatted fashion * * \param[in] file File name to use for log filtering * \param[in] function Function name to use for log filtering * \param[in] line Line number to use for log filtering * \param[in] tags Logging tags to use for log filtering * \param[in] level Priority at which to log the messages * \param[in] text Prefix for each line * \param[in] xml XML to log * * \note This does nothing when \p level is \p LOG_STDOUT. * \note Do not call this function directly. It should be called only from the * \p do_crm_log_xml() macro. */ void pcmk_log_xml_as(const char *file, const char *function, uint32_t line, uint32_t tags, uint8_t level, const char *text, const xmlNode *xml) { if (xml == NULL) { do_crm_log(level, "%s%sNo data to dump as XML", pcmk__s(text, ""), pcmk__str_empty(text)? "" : " "); } else { if (logger_out == NULL) { CRM_CHECK(pcmk__log_output_new(&logger_out) == pcmk_rc_ok, return); } pcmk__output_set_log_level(logger_out, level); pcmk__output_set_log_filter(logger_out, file, function, line, tags); pcmk__xml_show(logger_out, text, xml, 1, pcmk__xml_fmt_pretty |pcmk__xml_fmt_open |pcmk__xml_fmt_children |pcmk__xml_fmt_close); pcmk__output_set_log_filter(logger_out, NULL, NULL, 0U, 0U); } } /*! * \internal * \brief Log XML changes line-by-line in a formatted fashion * * \param[in] file File name to use for log filtering * \param[in] function Function name to use for log filtering * \param[in] line Line number to use for log filtering * \param[in] tags Logging tags to use for log filtering * \param[in] level Priority at which to log the messages * \param[in] xml XML whose changes to log * * \note This does nothing when \p level is \c LOG_STDOUT. */ void pcmk__log_xml_changes_as(const char *file, const char *function, uint32_t line, uint32_t tags, uint8_t level, const xmlNode *xml) { if (xml == NULL) { do_crm_log(level, "No XML to dump"); return; } if (logger_out == NULL) { CRM_CHECK(pcmk__log_output_new(&logger_out) == pcmk_rc_ok, return); } pcmk__output_set_log_level(logger_out, level); pcmk__output_set_log_filter(logger_out, file, function, line, tags); pcmk__xml_show_changes(logger_out, xml); pcmk__output_set_log_filter(logger_out, NULL, NULL, 0U, 0U); } /*! * \internal * \brief Log an XML patchset line-by-line in a formatted fashion * * \param[in] file File name to use for log filtering * \param[in] function Function name to use for log filtering * \param[in] line Line number to use for log filtering * \param[in] tags Logging tags to use for log filtering * \param[in] level Priority at which to log the messages * \param[in] patchset XML patchset to log * * \note This does nothing when \p level is \c LOG_STDOUT. */ void pcmk__log_xml_patchset_as(const char *file, const char *function, uint32_t line, uint32_t tags, uint8_t level, const xmlNode *patchset) { if (patchset == NULL) { do_crm_log(level, "No patchset to dump"); return; } if (logger_out == NULL) { CRM_CHECK(pcmk__log_output_new(&logger_out) == pcmk_rc_ok, return); } pcmk__output_set_log_level(logger_out, level); pcmk__output_set_log_filter(logger_out, file, function, line, tags); logger_out->message(logger_out, "xml-patchset", patchset); pcmk__output_set_log_filter(logger_out, NULL, NULL, 0U, 0U); } /*! * \internal * \brief Free the logging library's internal log output object */ void pcmk__free_common_logger(void) { if (logger_out != NULL) { logger_out->finish(logger_out, CRM_EX_OK, true, NULL); pcmk__output_free(logger_out); logger_out = NULL; } } void pcmk__set_config_error_handler(pcmk__config_error_func error_handler, void *error_context) { pcmk__config_error_handler = error_handler; pcmk__config_error_context = error_context; } void pcmk__set_config_warning_handler(pcmk__config_warning_func warning_handler, void *warning_context) { pcmk__config_warning_handler = warning_handler; pcmk__config_warning_context = warning_context; } diff --git a/lib/common/results.c b/lib/common/results.c index b778d48210..ddd098f238 100644 --- a/lib/common/results.c +++ b/lib/common/results.c @@ -1,1093 +1,1091 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include G_DEFINE_QUARK(pcmk-rc-error-quark, pcmk__rc_error) G_DEFINE_QUARK(pcmk-exitc-error-quark, pcmk__exitc_error) // General (all result code types) /*! * \brief Get the name and description of a given result code * * A result code can be interpreted as a member of any one of several families. * * \param[in] code The result code to look up * \param[in] type How \p code should be interpreted * \param[out] name Where to store the result code's name * \param[out] desc Where to store the result code's description * * \return Standard Pacemaker return code */ int pcmk_result_get_strings(int code, enum pcmk_result_type type, const char **name, const char **desc) { const char *code_name = NULL; const char *code_desc = NULL; switch (type) { case pcmk_result_legacy: code_name = pcmk_errorname(code); code_desc = pcmk_strerror(code); break; case pcmk_result_rc: code_name = pcmk_rc_name(code); code_desc = pcmk_rc_str(code); break; case pcmk_result_exitcode: code_name = crm_exit_name(code); code_desc = crm_exit_str((crm_exit_t) code); break; default: return pcmk_rc_undetermined; } if (name != NULL) { *name = code_name; } if (desc != NULL) { *desc = code_desc; } return pcmk_rc_ok; } /*! * \internal * \brief Get the lower and upper bounds of a result code family * * \param[in] type Type of result code * \param[out] lower Where to store the lower bound * \param[out] upper Where to store the upper bound * * \return Standard Pacemaker return code * * \note There is no true upper bound on standard Pacemaker return codes or * legacy return codes. All system \p errno values are valid members of * these result code families, and there is no global upper limit nor a * constant by which to refer to the highest \p errno value on a given * system. */ int pcmk__result_bounds(enum pcmk_result_type type, int *lower, int *upper) { CRM_ASSERT((lower != NULL) && (upper != NULL)); switch (type) { case pcmk_result_legacy: *lower = pcmk_ok; *upper = 256; // should be enough for almost any system error code break; case pcmk_result_rc: *lower = pcmk_rc_error - pcmk__n_rc + 1; *upper = 256; break; case pcmk_result_exitcode: *lower = CRM_EX_OK; *upper = CRM_EX_MAX; break; default: *lower = 0; *upper = -1; return pcmk_rc_undetermined; } return pcmk_rc_ok; } // @COMPAT Legacy function return codes //! \deprecated Use standard return codes and pcmk_rc_name() instead const char * pcmk_errorname(int rc) { rc = abs(rc); switch (rc) { case pcmk_err_generic: return "pcmk_err_generic"; case pcmk_err_no_quorum: return "pcmk_err_no_quorum"; case pcmk_err_schema_validation: return "pcmk_err_schema_validation"; case pcmk_err_transform_failed: return "pcmk_err_transform_failed"; case pcmk_err_old_data: return "pcmk_err_old_data"; case pcmk_err_diff_failed: return "pcmk_err_diff_failed"; case pcmk_err_diff_resync: return "pcmk_err_diff_resync"; case pcmk_err_cib_modified: return "pcmk_err_cib_modified"; case pcmk_err_cib_backup: return "pcmk_err_cib_backup"; case pcmk_err_cib_save: return "pcmk_err_cib_save"; case pcmk_err_cib_corrupt: return "pcmk_err_cib_corrupt"; case pcmk_err_multiple: return "pcmk_err_multiple"; case pcmk_err_node_unknown: return "pcmk_err_node_unknown"; case pcmk_err_already: return "pcmk_err_already"; case pcmk_err_bad_nvpair: return "pcmk_err_bad_nvpair"; case pcmk_err_unknown_format: return "pcmk_err_unknown_format"; default: return pcmk_rc_name(rc); // system errno } } //! \deprecated Use standard return codes and pcmk_rc_str() instead const char * pcmk_strerror(int rc) { return pcmk_rc_str(pcmk_legacy2rc(rc)); } // Standard Pacemaker API return codes /* This array is used only for nonzero values of pcmk_rc_e. Its values must be * kept in the exact reverse order of the enum value numbering (i.e. add new * values to the end of the array). */ static const struct pcmk__rc_info { const char *name; const char *desc; int legacy_rc; } pcmk__rcs[] = { { "pcmk_rc_error", "Error", -pcmk_err_generic, }, { "pcmk_rc_unknown_format", "Unknown output format", -pcmk_err_unknown_format, }, { "pcmk_rc_bad_nvpair", "Bad name/value pair given", -pcmk_err_bad_nvpair, }, { "pcmk_rc_already", "Already in requested state", -pcmk_err_already, }, { "pcmk_rc_node_unknown", "Node not found", -pcmk_err_node_unknown, }, { "pcmk_rc_multiple", "Resource active on multiple nodes", -pcmk_err_multiple, }, { "pcmk_rc_cib_corrupt", "Could not parse on-disk configuration", -pcmk_err_cib_corrupt, }, { "pcmk_rc_cib_save", "Could not save new configuration to disk", -pcmk_err_cib_save, }, { "pcmk_rc_cib_backup", "Could not archive previous configuration", -pcmk_err_cib_backup, }, { "pcmk_rc_cib_modified", "On-disk configuration was manually modified", -pcmk_err_cib_modified, }, { "pcmk_rc_diff_resync", "Application of update diff failed, requesting full refresh", -pcmk_err_diff_resync, }, { "pcmk_rc_diff_failed", "Application of update diff failed", -pcmk_err_diff_failed, }, { "pcmk_rc_old_data", "Update was older than existing configuration", -pcmk_err_old_data, }, { "pcmk_rc_transform_failed", "Schema transform failed", -pcmk_err_transform_failed, }, { "pcmk_rc_schema_unchanged", "Schema is already the latest available", -pcmk_err_schema_unchanged, }, { "pcmk_rc_schema_validation", "Update does not conform to the configured schema", -pcmk_err_schema_validation, }, { "pcmk_rc_no_quorum", "Operation requires quorum", -pcmk_err_no_quorum, }, { "pcmk_rc_ipc_unauthorized", "IPC server is blocked by unauthorized process", -pcmk_err_generic, }, { "pcmk_rc_ipc_unresponsive", "IPC server is unresponsive", -pcmk_err_generic, }, { "pcmk_rc_ipc_pid_only", "IPC server process is active but not accepting connections", -pcmk_err_generic, }, { "pcmk_rc_op_unsatisfied", "Not applicable under current conditions", -pcmk_err_generic, }, { "pcmk_rc_undetermined", "Result undetermined", -pcmk_err_generic, }, { "pcmk_rc_before_range", "Result occurs before given range", -pcmk_err_generic, }, { "pcmk_rc_within_range", "Result occurs within given range", -pcmk_err_generic, }, { "pcmk_rc_after_range", "Result occurs after given range", -pcmk_err_generic, }, { "pcmk_rc_no_output", "Output message produced no output", -pcmk_err_generic, }, { "pcmk_rc_no_input", "Input file not available", -pcmk_err_generic, }, { "pcmk_rc_underflow", "Value too small to be stored in data type", -pcmk_err_generic, }, { "pcmk_rc_dot_error", "Error writing dot(1) file", -pcmk_err_generic, }, { "pcmk_rc_graph_error", "Error writing graph file", -pcmk_err_generic, }, { "pcmk_rc_invalid_transition", "Cluster simulation produced invalid transition", -pcmk_err_generic, }, { "pcmk_rc_unpack_error", "Unable to parse CIB XML", -pcmk_err_generic, }, { "pcmk_rc_duplicate_id", "Two or more XML elements have the same ID", -pcmk_err_generic, }, { "pcmk_rc_disabled", "Disabled", -pcmk_err_generic, }, { "pcmk_rc_bad_input", "Bad input value provided", -pcmk_err_generic, }, { "pcmk_rc_bad_xml_patch", "Bad XML patch format", -pcmk_err_generic, }, { "pcmk_rc_no_transaction", "No active transaction found", -pcmk_err_generic, }, { "pcmk_rc_ns_resolution", "Nameserver resolution error", -pcmk_err_generic, }, { "pcmk_rc_compression", "Compression/decompression error", -pcmk_err_generic, }, }; /*! * \internal * \brief The number of enum pcmk_rc_e values, excluding \c pcmk_rc_ok * * This constant stores the number of negative standard Pacemaker return codes. * These represent Pacemaker-custom error codes. The count does not include * positive system error numbers, nor does it include \c pcmk_rc_ok (success). */ const size_t pcmk__n_rc = PCMK__NELEM(pcmk__rcs); /*! * \brief Get a return code constant name as a string * * \param[in] rc Integer return code to convert * * \return String of constant name corresponding to rc */ const char * pcmk_rc_name(int rc) { if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < pcmk__n_rc)) { return pcmk__rcs[pcmk_rc_error - rc].name; } switch (rc) { case pcmk_rc_ok: return "pcmk_rc_ok"; case E2BIG: return "E2BIG"; case EACCES: return "EACCES"; case EADDRINUSE: return "EADDRINUSE"; case EADDRNOTAVAIL: return "EADDRNOTAVAIL"; case EAFNOSUPPORT: return "EAFNOSUPPORT"; case EAGAIN: return "EAGAIN"; case EALREADY: return "EALREADY"; case EBADF: return "EBADF"; case EBADMSG: return "EBADMSG"; case EBUSY: return "EBUSY"; case ECANCELED: return "ECANCELED"; case ECHILD: return "ECHILD"; case ECOMM: return "ECOMM"; case ECONNABORTED: return "ECONNABORTED"; case ECONNREFUSED: return "ECONNREFUSED"; case ECONNRESET: return "ECONNRESET"; /* case EDEADLK: return "EDEADLK"; */ case EDESTADDRREQ: return "EDESTADDRREQ"; case EDOM: return "EDOM"; case EDQUOT: return "EDQUOT"; case EEXIST: return "EEXIST"; case EFAULT: return "EFAULT"; case EFBIG: return "EFBIG"; case EHOSTDOWN: return "EHOSTDOWN"; case EHOSTUNREACH: return "EHOSTUNREACH"; case EIDRM: return "EIDRM"; case EILSEQ: return "EILSEQ"; case EINPROGRESS: return "EINPROGRESS"; case EINTR: return "EINTR"; case EINVAL: return "EINVAL"; case EIO: return "EIO"; case EISCONN: return "EISCONN"; case EISDIR: return "EISDIR"; case ELIBACC: return "ELIBACC"; case ELOOP: return "ELOOP"; case EMFILE: return "EMFILE"; case EMLINK: return "EMLINK"; case EMSGSIZE: return "EMSGSIZE"; #ifdef EMULTIHOP // Not available on OpenBSD case EMULTIHOP: return "EMULTIHOP"; #endif case ENAMETOOLONG: return "ENAMETOOLONG"; case ENETDOWN: return "ENETDOWN"; case ENETRESET: return "ENETRESET"; case ENETUNREACH: return "ENETUNREACH"; case ENFILE: return "ENFILE"; case ENOBUFS: return "ENOBUFS"; case ENODATA: return "ENODATA"; case ENODEV: return "ENODEV"; case ENOENT: return "ENOENT"; case ENOEXEC: return "ENOEXEC"; case ENOKEY: return "ENOKEY"; case ENOLCK: return "ENOLCK"; #ifdef ENOLINK // Not available on OpenBSD case ENOLINK: return "ENOLINK"; #endif case ENOMEM: return "ENOMEM"; case ENOMSG: return "ENOMSG"; case ENOPROTOOPT: return "ENOPROTOOPT"; case ENOSPC: return "ENOSPC"; #ifdef ENOSR case ENOSR: return "ENOSR"; #endif #ifdef ENOSTR case ENOSTR: return "ENOSTR"; #endif case ENOSYS: return "ENOSYS"; case ENOTBLK: return "ENOTBLK"; case ENOTCONN: return "ENOTCONN"; case ENOTDIR: return "ENOTDIR"; case ENOTEMPTY: return "ENOTEMPTY"; case ENOTSOCK: return "ENOTSOCK"; #if ENOTSUP != EOPNOTSUPP case ENOTSUP: return "ENOTSUP"; #endif case ENOTTY: return "ENOTTY"; case ENOTUNIQ: return "ENOTUNIQ"; case ENXIO: return "ENXIO"; case EOPNOTSUPP: return "EOPNOTSUPP"; case EOVERFLOW: return "EOVERFLOW"; case EPERM: return "EPERM"; case EPFNOSUPPORT: return "EPFNOSUPPORT"; case EPIPE: return "EPIPE"; case EPROTO: return "EPROTO"; case EPROTONOSUPPORT: return "EPROTONOSUPPORT"; case EPROTOTYPE: return "EPROTOTYPE"; case ERANGE: return "ERANGE"; case EREMOTE: return "EREMOTE"; case EREMOTEIO: return "EREMOTEIO"; case EROFS: return "EROFS"; case ESHUTDOWN: return "ESHUTDOWN"; case ESPIPE: return "ESPIPE"; case ESOCKTNOSUPPORT: return "ESOCKTNOSUPPORT"; case ESRCH: return "ESRCH"; case ESTALE: return "ESTALE"; case ETIME: return "ETIME"; case ETIMEDOUT: return "ETIMEDOUT"; case ETXTBSY: return "ETXTBSY"; #ifdef EUNATCH case EUNATCH: return "EUNATCH"; #endif case EUSERS: return "EUSERS"; /* case EWOULDBLOCK: return "EWOULDBLOCK"; */ case EXDEV: return "EXDEV"; #ifdef EBADE // Not available on OS X case EBADE: return "EBADE"; case EBADFD: return "EBADFD"; case EBADSLT: return "EBADSLT"; case EDEADLOCK: return "EDEADLOCK"; case EBADR: return "EBADR"; case EBADRQC: return "EBADRQC"; case ECHRNG: return "ECHRNG"; #ifdef EISNAM // Not available on OS X, Illumos, Solaris case EISNAM: return "EISNAM"; case EKEYEXPIRED: return "EKEYEXPIRED"; case EKEYREVOKED: return "EKEYREVOKED"; #endif case EKEYREJECTED: return "EKEYREJECTED"; case EL2HLT: return "EL2HLT"; case EL2NSYNC: return "EL2NSYNC"; case EL3HLT: return "EL3HLT"; case EL3RST: return "EL3RST"; case ELIBBAD: return "ELIBBAD"; case ELIBMAX: return "ELIBMAX"; case ELIBSCN: return "ELIBSCN"; case ELIBEXEC: return "ELIBEXEC"; #ifdef ENOMEDIUM // Not available on OS X, Illumos, Solaris case ENOMEDIUM: return "ENOMEDIUM"; case EMEDIUMTYPE: return "EMEDIUMTYPE"; #endif case ENONET: return "ENONET"; case ENOPKG: return "ENOPKG"; case EREMCHG: return "EREMCHG"; case ERESTART: return "ERESTART"; case ESTRPIPE: return "ESTRPIPE"; #ifdef EUCLEAN // Not available on OS X, Illumos, Solaris case EUCLEAN: return "EUCLEAN"; #endif case EXFULL: return "EXFULL"; #endif // EBADE default: return "Unknown"; } } /*! * \brief Get a user-friendly description of a return code * * \param[in] rc Integer return code to convert * * \return String description of rc */ const char * pcmk_rc_str(int rc) { if (rc == pcmk_rc_ok) { return "OK"; } if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < pcmk__n_rc)) { return pcmk__rcs[pcmk_rc_error - rc].desc; } if (rc < 0) { return "Error"; } // Handle values that could be defined by system or by portability.h switch (rc) { #ifdef PCMK__ENOTUNIQ case ENOTUNIQ: return "Name not unique on network"; #endif #ifdef PCMK__ECOMM case ECOMM: return "Communication error on send"; #endif #ifdef PCMK__ELIBACC case ELIBACC: return "Can not access a needed shared library"; #endif #ifdef PCMK__EREMOTEIO case EREMOTEIO: return "Remote I/O error"; #endif #ifdef PCMK__ENOKEY case ENOKEY: return "Required key not available"; #endif #ifdef PCMK__ENODATA case ENODATA: return "No data available"; #endif #ifdef PCMK__ETIME case ETIME: return "Timer expired"; #endif #ifdef PCMK__EKEYREJECTED case EKEYREJECTED: return "Key was rejected by service"; #endif default: return strerror(rc); } } // This returns negative values for errors //! \deprecated Use standard return codes instead int pcmk_rc2legacy(int rc) { if (rc >= 0) { return -rc; // OK or system errno } if ((rc <= pcmk_rc_error) && ((pcmk_rc_error - rc) < pcmk__n_rc)) { return pcmk__rcs[pcmk_rc_error - rc].legacy_rc; } return -pcmk_err_generic; } //! \deprecated Use standard return codes instead int pcmk_legacy2rc(int legacy_rc) { legacy_rc = abs(legacy_rc); switch (legacy_rc) { case pcmk_err_no_quorum: return pcmk_rc_no_quorum; case pcmk_err_schema_validation: return pcmk_rc_schema_validation; case pcmk_err_schema_unchanged: return pcmk_rc_schema_unchanged; case pcmk_err_transform_failed: return pcmk_rc_transform_failed; case pcmk_err_old_data: return pcmk_rc_old_data; case pcmk_err_diff_failed: return pcmk_rc_diff_failed; case pcmk_err_diff_resync: return pcmk_rc_diff_resync; case pcmk_err_cib_modified: return pcmk_rc_cib_modified; case pcmk_err_cib_backup: return pcmk_rc_cib_backup; case pcmk_err_cib_save: return pcmk_rc_cib_save; case pcmk_err_cib_corrupt: return pcmk_rc_cib_corrupt; case pcmk_err_multiple: return pcmk_rc_multiple; case pcmk_err_node_unknown: return pcmk_rc_node_unknown; case pcmk_err_already: return pcmk_rc_already; case pcmk_err_bad_nvpair: return pcmk_rc_bad_nvpair; case pcmk_err_unknown_format: return pcmk_rc_unknown_format; case pcmk_err_generic: return pcmk_rc_error; case pcmk_ok: return pcmk_rc_ok; default: return legacy_rc; // system errno } } // Exit status codes const char * crm_exit_name(crm_exit_t exit_code) { switch (exit_code) { case CRM_EX_OK: return "CRM_EX_OK"; case CRM_EX_ERROR: return "CRM_EX_ERROR"; case CRM_EX_INVALID_PARAM: return "CRM_EX_INVALID_PARAM"; case CRM_EX_UNIMPLEMENT_FEATURE: return "CRM_EX_UNIMPLEMENT_FEATURE"; case CRM_EX_INSUFFICIENT_PRIV: return "CRM_EX_INSUFFICIENT_PRIV"; case CRM_EX_NOT_INSTALLED: return "CRM_EX_NOT_INSTALLED"; case CRM_EX_NOT_CONFIGURED: return "CRM_EX_NOT_CONFIGURED"; case CRM_EX_NOT_RUNNING: return "CRM_EX_NOT_RUNNING"; case CRM_EX_PROMOTED: return "CRM_EX_PROMOTED"; case CRM_EX_FAILED_PROMOTED: return "CRM_EX_FAILED_PROMOTED"; case CRM_EX_USAGE: return "CRM_EX_USAGE"; case CRM_EX_DATAERR: return "CRM_EX_DATAERR"; case CRM_EX_NOINPUT: return "CRM_EX_NOINPUT"; case CRM_EX_NOUSER: return "CRM_EX_NOUSER"; case CRM_EX_NOHOST: return "CRM_EX_NOHOST"; case CRM_EX_UNAVAILABLE: return "CRM_EX_UNAVAILABLE"; case CRM_EX_SOFTWARE: return "CRM_EX_SOFTWARE"; case CRM_EX_OSERR: return "CRM_EX_OSERR"; case CRM_EX_OSFILE: return "CRM_EX_OSFILE"; case CRM_EX_CANTCREAT: return "CRM_EX_CANTCREAT"; case CRM_EX_IOERR: return "CRM_EX_IOERR"; case CRM_EX_TEMPFAIL: return "CRM_EX_TEMPFAIL"; case CRM_EX_PROTOCOL: return "CRM_EX_PROTOCOL"; case CRM_EX_NOPERM: return "CRM_EX_NOPERM"; case CRM_EX_CONFIG: return "CRM_EX_CONFIG"; case CRM_EX_FATAL: return "CRM_EX_FATAL"; case CRM_EX_PANIC: return "CRM_EX_PANIC"; case CRM_EX_DISCONNECT: return "CRM_EX_DISCONNECT"; case CRM_EX_DIGEST: return "CRM_EX_DIGEST"; case CRM_EX_NOSUCH: return "CRM_EX_NOSUCH"; case CRM_EX_QUORUM: return "CRM_EX_QUORUM"; case CRM_EX_UNSAFE: return "CRM_EX_UNSAFE"; case CRM_EX_EXISTS: return "CRM_EX_EXISTS"; case CRM_EX_MULTIPLE: return "CRM_EX_MULTIPLE"; case CRM_EX_EXPIRED: return "CRM_EX_EXPIRED"; case CRM_EX_NOT_YET_IN_EFFECT: return "CRM_EX_NOT_YET_IN_EFFECT"; case CRM_EX_INDETERMINATE: return "CRM_EX_INDETERMINATE"; case CRM_EX_UNSATISFIED: return "CRM_EX_UNSATISFIED"; case CRM_EX_OLD: return "CRM_EX_OLD"; case CRM_EX_TIMEOUT: return "CRM_EX_TIMEOUT"; case CRM_EX_DEGRADED: return "CRM_EX_DEGRADED"; case CRM_EX_DEGRADED_PROMOTED: return "CRM_EX_DEGRADED_PROMOTED"; case CRM_EX_NONE: return "CRM_EX_NONE"; case CRM_EX_MAX: return "CRM_EX_UNKNOWN"; } return "CRM_EX_UNKNOWN"; } const char * crm_exit_str(crm_exit_t exit_code) { switch (exit_code) { case CRM_EX_OK: return "OK"; case CRM_EX_ERROR: return "Error occurred"; case CRM_EX_INVALID_PARAM: return "Invalid parameter"; case CRM_EX_UNIMPLEMENT_FEATURE: return "Unimplemented"; case CRM_EX_INSUFFICIENT_PRIV: return "Insufficient privileges"; case CRM_EX_NOT_INSTALLED: return "Not installed"; case CRM_EX_NOT_CONFIGURED: return "Not configured"; case CRM_EX_NOT_RUNNING: return "Not running"; case CRM_EX_PROMOTED: return "Promoted"; case CRM_EX_FAILED_PROMOTED: return "Failed in promoted role"; case CRM_EX_USAGE: return "Incorrect usage"; case CRM_EX_DATAERR: return "Invalid data given"; case CRM_EX_NOINPUT: return "Input file not available"; case CRM_EX_NOUSER: return "User does not exist"; case CRM_EX_NOHOST: return "Host does not exist"; case CRM_EX_UNAVAILABLE: return "Necessary service unavailable"; case CRM_EX_SOFTWARE: return "Internal software bug"; case CRM_EX_OSERR: return "Operating system error occurred"; case CRM_EX_OSFILE: return "System file not available"; case CRM_EX_CANTCREAT: return "Cannot create output file"; case CRM_EX_IOERR: return "I/O error occurred"; case CRM_EX_TEMPFAIL: return "Temporary failure, try again"; case CRM_EX_PROTOCOL: return "Protocol violated"; case CRM_EX_NOPERM: return "Insufficient privileges"; case CRM_EX_CONFIG: return "Invalid configuration"; case CRM_EX_FATAL: return "Fatal error occurred, will not respawn"; case CRM_EX_PANIC: return "System panic required"; case CRM_EX_DISCONNECT: return "Not connected"; case CRM_EX_DIGEST: return "Digest mismatch"; case CRM_EX_NOSUCH: return "No such object"; case CRM_EX_QUORUM: return "Quorum required"; case CRM_EX_UNSAFE: return "Operation not safe"; case CRM_EX_EXISTS: return "Requested item already exists"; case CRM_EX_MULTIPLE: return "Multiple items match request"; case CRM_EX_EXPIRED: return "Requested item has expired"; case CRM_EX_NOT_YET_IN_EFFECT: return "Requested item is not yet in effect"; case CRM_EX_INDETERMINATE: return "Could not determine status"; case CRM_EX_UNSATISFIED: return "Not applicable under current conditions"; case CRM_EX_OLD: return "Update was older than existing configuration"; case CRM_EX_TIMEOUT: return "Timeout occurred"; case CRM_EX_DEGRADED: return "Service is active but might fail soon"; case CRM_EX_DEGRADED_PROMOTED: return "Service is promoted but might fail soon"; case CRM_EX_NONE: return "No exit status available"; case CRM_EX_MAX: return "Error occurred"; } if ((exit_code > 128) && (exit_code < CRM_EX_MAX)) { return "Interrupted by signal"; } return "Unknown exit status"; } /*! * \brief Map a function return code to the most similar exit code * * \param[in] rc Function return code * * \return Most similar exit code */ crm_exit_t pcmk_rc2exitc(int rc) { switch (rc) { case pcmk_rc_ok: case pcmk_rc_no_output: // quiet mode, or nothing to output return CRM_EX_OK; case pcmk_rc_no_quorum: return CRM_EX_QUORUM; case pcmk_rc_old_data: return CRM_EX_OLD; case pcmk_rc_schema_validation: case pcmk_rc_transform_failed: case pcmk_rc_unpack_error: return CRM_EX_CONFIG; case pcmk_rc_bad_nvpair: return CRM_EX_INVALID_PARAM; case EACCES: return CRM_EX_INSUFFICIENT_PRIV; case EBADF: case EINVAL: case EFAULT: case ENOSYS: case EOVERFLOW: case pcmk_rc_underflow: case pcmk_rc_compression: return CRM_EX_SOFTWARE; case EBADMSG: case EMSGSIZE: case ENOMSG: case ENOPROTOOPT: case EPROTO: case EPROTONOSUPPORT: case EPROTOTYPE: return CRM_EX_PROTOCOL; case ECOMM: case ENOMEM: return CRM_EX_OSERR; case ECONNABORTED: case ECONNREFUSED: case ECONNRESET: case ENOTCONN: return CRM_EX_DISCONNECT; case EEXIST: case pcmk_rc_already: return CRM_EX_EXISTS; case EIO: case pcmk_rc_dot_error: case pcmk_rc_graph_error: return CRM_EX_IOERR; case ENOTSUP: #if EOPNOTSUPP != ENOTSUP case EOPNOTSUPP: #endif return CRM_EX_UNIMPLEMENT_FEATURE; case ENOTUNIQ: case pcmk_rc_multiple: return CRM_EX_MULTIPLE; case ENODEV: case ENOENT: case ENXIO: case pcmk_rc_no_transaction: case pcmk_rc_unknown_format: return CRM_EX_NOSUCH; case pcmk_rc_node_unknown: case pcmk_rc_ns_resolution: return CRM_EX_NOHOST; case ETIME: case ETIMEDOUT: return CRM_EX_TIMEOUT; case EAGAIN: case EBUSY: return CRM_EX_UNSATISFIED; case pcmk_rc_before_range: return CRM_EX_NOT_YET_IN_EFFECT; case pcmk_rc_after_range: return CRM_EX_EXPIRED; case pcmk_rc_undetermined: return CRM_EX_INDETERMINATE; case pcmk_rc_op_unsatisfied: return CRM_EX_UNSATISFIED; case pcmk_rc_within_range: return CRM_EX_OK; case pcmk_rc_no_input: return CRM_EX_NOINPUT; case pcmk_rc_duplicate_id: return CRM_EX_MULTIPLE; case pcmk_rc_bad_input: case pcmk_rc_bad_xml_patch: return CRM_EX_DATAERR; default: return CRM_EX_ERROR; } } /*! * \brief Map a function return code to the most similar OCF exit code * * \param[in] rc Function return code * * \return Most similar OCF exit code */ enum ocf_exitcode pcmk_rc2ocf(int rc) { switch (rc) { case pcmk_rc_ok: return PCMK_OCF_OK; case pcmk_rc_bad_nvpair: return PCMK_OCF_INVALID_PARAM; case EACCES: return PCMK_OCF_INSUFFICIENT_PRIV; case ENOTSUP: #if EOPNOTSUPP != ENOTSUP case EOPNOTSUPP: #endif return PCMK_OCF_UNIMPLEMENT_FEATURE; default: return PCMK_OCF_UNKNOWN_ERROR; } } // Other functions /*! * \brief Map a getaddrinfo() return code to the most similar Pacemaker * return code * * \param[in] gai getaddrinfo() return code * * \return Most similar Pacemaker return code */ int pcmk__gaierror2rc(int gai) { switch (gai) { case 0: return pcmk_rc_ok; case EAI_AGAIN: return EAGAIN; case EAI_BADFLAGS: case EAI_SERVICE: return EINVAL; case EAI_FAMILY: return EAFNOSUPPORT; case EAI_MEMORY: return ENOMEM; case EAI_NONAME: return pcmk_rc_node_unknown; case EAI_SOCKTYPE: return ESOCKTNOSUPPORT; case EAI_SYSTEM: return errno; default: return pcmk_rc_ns_resolution; } } /*! * \brief Map a bz2 return code to the most similar Pacemaker return code * * \param[in] bz2 bz2 return code * * \return Most similar Pacemaker return code */ int pcmk__bzlib2rc(int bz2) { switch (bz2) { case BZ_OK: case BZ_RUN_OK: case BZ_FLUSH_OK: case BZ_FINISH_OK: case BZ_STREAM_END: return pcmk_rc_ok; case BZ_MEM_ERROR: return ENOMEM; case BZ_DATA_ERROR: case BZ_DATA_ERROR_MAGIC: case BZ_UNEXPECTED_EOF: return pcmk_rc_bad_input; case BZ_IO_ERROR: return EIO; case BZ_OUTBUFF_FULL: return EFBIG; default: return pcmk_rc_compression; } } crm_exit_t crm_exit(crm_exit_t rc) { /* A compiler could theoretically use any type for crm_exit_t, but an int * should always hold it, so cast to int to keep static analysis happy. */ if ((((int) rc) < 0) || (((int) rc) > CRM_EX_MAX)) { rc = CRM_EX_ERROR; } mainloop_cleanup(); pcmk__xml_cleanup(); - free(pcmk__our_nodename); - if (crm_system_name) { crm_info("Exiting %s " QB_XS " with status %d", crm_system_name, rc); free(crm_system_name); } else { crm_trace("Exiting with status %d", rc); } pcmk__free_common_logger(); qb_log_fini(); // Don't log anything after this point exit(rc); } /* * External action results */ /*! * \internal * \brief Set the result of an action * * \param[out] result Where to set action result * \param[in] exit_status OCF exit status to set * \param[in] exec_status Execution status to set * \param[in] exit_reason Human-friendly description of event to set */ void pcmk__set_result(pcmk__action_result_t *result, int exit_status, enum pcmk_exec_status exec_status, const char *exit_reason) { if (result == NULL) { return; } result->exit_status = exit_status; result->execution_status = exec_status; if (!pcmk__str_eq(result->exit_reason, exit_reason, pcmk__str_none)) { free(result->exit_reason); result->exit_reason = (exit_reason == NULL)? NULL : strdup(exit_reason); } } /*! * \internal * \brief Set the result of an action, with a formatted exit reason * * \param[out] result Where to set action result * \param[in] exit_status OCF exit status to set * \param[in] exec_status Execution status to set * \param[in] format printf-style format for a human-friendly * description of reason for result * \param[in] ... arguments for \p format */ G_GNUC_PRINTF(4, 5) void pcmk__format_result(pcmk__action_result_t *result, int exit_status, enum pcmk_exec_status exec_status, const char *format, ...) { va_list ap; int len = 0; char *reason = NULL; if (result == NULL) { return; } result->exit_status = exit_status; result->execution_status = exec_status; if (format != NULL) { va_start(ap, format); len = vasprintf(&reason, format, ap); CRM_ASSERT(len > 0); va_end(ap); } free(result->exit_reason); result->exit_reason = reason; } /*! * \internal * \brief Set the output of an action * * \param[out] result Action result to set output for * \param[in] out Action output to set (must be dynamically * allocated) * \param[in] err Action error output to set (must be dynamically * allocated) * * \note \p result will take ownership of \p out and \p err, so the caller * should not free them. */ void pcmk__set_result_output(pcmk__action_result_t *result, char *out, char *err) { if (result == NULL) { return; } free(result->action_stdout); result->action_stdout = out; free(result->action_stderr); result->action_stderr = err; } /*! * \internal * \brief Clear a result's exit reason, output, and error output * * \param[in,out] result Result to reset */ void pcmk__reset_result(pcmk__action_result_t *result) { if (result == NULL) { return; } free(result->exit_reason); result->exit_reason = NULL; free(result->action_stdout); result->action_stdout = NULL; free(result->action_stderr); result->action_stderr = NULL; } /*! * \internal * \brief Copy the result of an action * * \param[in] src Result to copy * \param[out] dst Where to copy \p src to */ void pcmk__copy_result(const pcmk__action_result_t *src, pcmk__action_result_t *dst) { CRM_CHECK((src != NULL) && (dst != NULL), return); dst->exit_status = src->exit_status; dst->execution_status = src->execution_status; dst->exit_reason = pcmk__str_copy(src->exit_reason); dst->action_stdout = pcmk__str_copy(src->action_stdout); dst->action_stderr = pcmk__str_copy(src->action_stderr); } diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c index cba6b9804d..8194e961ab 100644 --- a/lib/pacemaker/pcmk_graph_producer.c +++ b/lib/pacemaker/pcmk_graph_producer.c @@ -1,1111 +1,1111 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include "libpacemaker_private.h" // Convenience macros for logging action properties #define action_type_str(flags) \ (pcmk_is_set((flags), pcmk__action_pseudo)? "pseudo-action" : "action") #define action_optional_str(flags) \ (pcmk_is_set((flags), pcmk__action_optional)? "optional" : "required") #define action_runnable_str(flags) \ (pcmk_is_set((flags), pcmk__action_runnable)? "runnable" : "unrunnable") #define action_node_str(a) \ (((a)->node == NULL)? "no node" : (a)->node->priv->name) /*! * \internal * \brief Add an XML node tag for a specified ID * * \param[in] id Node UUID to add * \param[in,out] xml Parent XML tag to add to */ static xmlNode* add_node_to_xml_by_id(const char *id, xmlNode *xml) { xmlNode *node_xml; node_xml = pcmk__xe_create(xml, PCMK_XE_NODE); crm_xml_add(node_xml, PCMK_XA_ID, id); return node_xml; } /*! * \internal * \brief Add an XML node tag for a specified node * * \param[in] node Node to add * \param[in,out] xml XML to add node to */ static void add_node_to_xml(const pcmk_node_t *node, void *xml) { add_node_to_xml_by_id(node->priv->id, (xmlNode *) xml); } /*! * \internal * \brief Count (optionally add to XML) nodes needing maintenance state update * * \param[in,out] xml Parent XML tag to add to, if any * \param[in] scheduler Scheduler data * * \return Count of nodes added * \note Only Pacemaker Remote nodes are considered currently */ static int add_maintenance_nodes(xmlNode *xml, const pcmk_scheduler_t *scheduler) { xmlNode *maintenance = NULL; int count = 0; if (xml != NULL) { maintenance = pcmk__xe_create(xml, PCMK__XE_MAINTENANCE); } for (const GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { const pcmk_node_t *node = iter->data; if (!pcmk__is_pacemaker_remote_node(node)) { continue; } if ((node->details->maintenance && !pcmk_is_set(node->priv->flags, pcmk__node_remote_maint)) || (!node->details->maintenance && pcmk_is_set(node->priv->flags, pcmk__node_remote_maint))) { if (maintenance != NULL) { crm_xml_add(add_node_to_xml_by_id(node->priv->id, maintenance), PCMK__XA_NODE_IN_MAINTENANCE, (node->details->maintenance? "1" : "0")); } count++; } } crm_trace("%s %d nodes in need of maintenance mode update in state", ((maintenance == NULL)? "Counted" : "Added"), count); return count; } /*! * \internal * \brief Add pseudo action with nodes needing maintenance state update * * \param[in,out] scheduler Scheduler data */ static void add_maintenance_update(pcmk_scheduler_t *scheduler) { pcmk_action_t *action = NULL; if (add_maintenance_nodes(NULL, scheduler) != 0) { action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, scheduler); pcmk__set_action_flags(action, pcmk__action_always_in_graph); } } /*! * \internal * \brief Add XML with nodes that an action is expected to bring down * * If a specified action is expected to bring any nodes down, add an XML block * with their UUIDs. When a node is lost, this allows the controller to * determine whether it was expected. * * \param[in,out] xml Parent XML tag to add to * \param[in] action Action to check for downed nodes */ static void add_downed_nodes(xmlNode *xml, const pcmk_action_t *action) { CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL), return); if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { /* Shutdown makes the action's node down */ xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* Fencing makes the action's node and any hosted guest nodes down */ const char *fence = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); if (pcmk__is_fencing_action(fence)) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->node->priv->id, downed); pe_foreach_guest_node(action->node->priv->scheduler, action->node, add_node_to_xml, downed); } } else if ((action->rsc != NULL) && pcmk_is_set(action->rsc->flags, pcmk__rsc_is_remote_connection) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Stopping a remote connection resource makes connected node down, * unless it's part of a migration */ GList *iter; pcmk_action_t *input; bool migrating = false; for (iter = action->actions_before; iter != NULL; iter = iter->next) { input = ((pcmk__related_action_t *) iter->data)->action; if ((input->rsc != NULL) && pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none) && pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { migrating = true; break; } } if (!migrating) { xmlNode *downed = pcmk__xe_create(xml, PCMK__XE_DOWNED); add_node_to_xml_by_id(action->rsc->id, downed); } } } /*! * \internal * \brief Create a transition graph operation key for a clone action * * \param[in] action Clone action * \param[in] interval_ms Action interval in milliseconds * * \return Newly allocated string with transition graph operation key */ static char * clone_op_key(const pcmk_action_t *action, guint interval_ms) { if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_operation"); return pcmk__notify_key(action->rsc->priv->history_id, n_type, n_task); } return pcmk__op_key(action->rsc->priv->history_id, pcmk__s(action->cancel_task, action->task), interval_ms); } /*! * \internal * \brief Add node details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] xml Transition graph action XML for \p action */ static void add_node_details(const pcmk_action_t *action, xmlNode *xml) { pcmk_node_t *router_node = pcmk__connection_host_for_action(action); crm_xml_add(xml, PCMK__META_ON_NODE, action->node->priv->name); crm_xml_add(xml, PCMK__META_ON_NODE_UUID, action->node->priv->id); if (router_node != NULL) { crm_xml_add(xml, PCMK__XA_ROUTER_NODE, router_node->priv->name); } } /*! * \internal * \brief Add resource details to transition graph action XML * * \param[in] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_resource_details(const pcmk_action_t *action, xmlNode *action_xml) { xmlNode *rsc_xml = NULL; const char *attr_list[] = { PCMK_XA_CLASS, PCMK_XA_PROVIDER, PCMK_XA_TYPE, }; /* If a resource is locked to a node via PCMK_OPT_SHUTDOWN_LOCK, mark its * actions so the controller can preserve the lock when the action * completes. */ if (pcmk__action_locks_rsc_to_node(action)) { crm_xml_add_ll(action_xml, PCMK_OPT_SHUTDOWN_LOCK, (long long) action->rsc->priv->lock_time); } // List affected resource rsc_xml = pcmk__xe_create(action_xml, (const char *) action->rsc->priv->xml->name); if (pcmk_is_set(action->rsc->flags, pcmk__rsc_removed) && (action->rsc->priv->history_id != NULL)) { /* Use the numbered instance name here, because if there is more * than one instance on a node, we need to make sure the command * goes to the right one. * * This is important even for anonymous clones, because the clone's * unique meta-attribute might have just been toggled from on to * off. */ crm_debug("Using orphan clone name %s instead of history ID %s", action->rsc->id, action->rsc->priv->history_id); crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->priv->history_id); crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } else if (!pcmk_is_set(action->rsc->flags, pcmk__rsc_unique)) { const char *xml_id = pcmk__xe_id(action->rsc->priv->xml); crm_debug("Using anonymous clone name %s for %s (aka %s)", xml_id, action->rsc->id, action->rsc->priv->history_id); /* ID is what we'd like client to use * LONG_ID is what they might know it as instead * * LONG_ID is only strictly needed /here/ during the * transition period until all nodes in the cluster * are running the new software /and/ have rebooted * once (meaning that they've only ever spoken to a DC * supporting this feature). * * If anyone toggles the unique flag to 'on', the * 'instance free' name will correspond to an orphan * and fall into the clause above instead */ crm_xml_add(rsc_xml, PCMK_XA_ID, xml_id); if ((action->rsc->priv->history_id != NULL) && !pcmk__str_eq(xml_id, action->rsc->priv->history_id, pcmk__str_none)) { crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->priv->history_id); } else { crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id); } } else { CRM_ASSERT(action->rsc->priv->history_id == NULL); crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->id); } for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) { crm_xml_add(rsc_xml, attr_list[lpc], g_hash_table_lookup(action->rsc->priv->meta, attr_list[lpc])); } } /*! * \internal * \brief Add action attributes to transition graph action XML * * \param[in,out] action Scheduled action * \param[in,out] action_xml Transition graph action XML for \p action */ static void add_action_attributes(pcmk_action_t *action, xmlNode *action_xml) { xmlNode *args_xml = NULL; pcmk_resource_t *rsc = action->rsc; /* We create free-standing XML to start, so we can sort the attributes * before adding it to action_xml, which keeps the scheduler regression * test graphs comparable. */ args_xml = pcmk__xe_create(action_xml, PCMK__XE_ATTRIBUTES); crm_xml_add(args_xml, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); g_hash_table_foreach(action->extra, hash2field, args_xml); if ((rsc != NULL) && (action->node != NULL)) { // Get the resource instance attributes, evaluated properly for node GHashTable *params = pe_rsc_params(rsc, action->node, rsc->priv->scheduler); pcmk__substitute_remote_addr(rsc, params); g_hash_table_foreach(params, hash2smartfield, args_xml); } else if ((rsc != NULL) && (rsc->priv->variant <= pcmk__rsc_variant_primitive)) { GHashTable *params = pe_rsc_params(rsc, NULL, rsc->priv->scheduler); g_hash_table_foreach(params, hash2smartfield, args_xml); } g_hash_table_foreach(action->meta, hash2metafield, args_xml); if (rsc != NULL) { pcmk_resource_t *parent = rsc; while (parent != NULL) { parent->priv->cmds->add_graph_meta(parent, args_xml); parent = parent->priv->parent; } pcmk__add_guest_meta_to_xml(args_xml, action); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none) && (action->node != NULL)) { /* Pass the node's attributes as meta-attributes. * * @TODO: Determine whether it is still necessary to do this. It was * added in 33d99707, probably for the libfence-based implementation in * c9a90bd, which is no longer used. */ g_hash_table_foreach(action->node->priv->attrs, hash2metafield, args_xml); } pcmk__xe_sort_attrs(args_xml); } /*! * \internal * \brief Create the transition graph XML for a scheduled action * * \param[in,out] parent Parent XML element to add action to * \param[in,out] action Scheduled action * \param[in] skip_details If false, add action details as sub-elements * \param[in] scheduler Scheduler data */ static void create_graph_action(xmlNode *parent, pcmk_action_t *action, bool skip_details, const pcmk_scheduler_t *scheduler) { bool needs_node_info = true; bool needs_maintenance_info = false; xmlNode *action_xml = NULL; if ((action == NULL) || (scheduler == NULL)) { return; } // Create the top-level element based on task if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { /* All fences need node info; guest node fences are pseudo-events */ if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); } else { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } } else if (pcmk__str_any_of(action->task, PCMK_ACTION_DO_SHUTDOWN, PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) { action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); } else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) { // CIB-only clean-up for shutdown locks action_xml = pcmk__xe_create(parent, PCMK__XE_CRM_EVENT); crm_xml_add(action_xml, PCMK__XA_MODE, PCMK__VALUE_CIB); } else if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_none)) { needs_maintenance_info = true; } action_xml = pcmk__xe_create(parent, PCMK__XE_PSEUDO_EVENT); needs_node_info = false; } else { action_xml = pcmk__xe_create(parent, PCMK__XE_RSC_OP); } crm_xml_add_int(action_xml, PCMK_XA_ID, action->id); crm_xml_add(action_xml, PCMK_XA_OPERATION, action->task); if ((action->rsc != NULL) && (action->rsc->priv->history_id != NULL)) { char *clone_key = NULL; guint interval_ms; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } clone_key = clone_op_key(action, interval_ms); crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, clone_key); crm_xml_add(action_xml, "internal_" PCMK__XA_OPERATION_KEY, action->uuid); free(clone_key); } else { crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, action->uuid); } if (needs_node_info && (action->node != NULL)) { add_node_details(action, action_xml); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE, action->node->priv->name); pcmk__insert_dup(action->meta, PCMK__META_ON_NODE_UUID, action->node->priv->id); } if (skip_details) { return; } if ((action->rsc != NULL) && !pcmk_is_set(action->flags, pcmk__action_pseudo)) { // This is a real resource action, so add resource details add_resource_details(action, action_xml); } /* List any attributes in effect */ add_action_attributes(action, action_xml); /* List any nodes this action is expected to make down */ if (needs_node_info && (action->node != NULL)) { add_downed_nodes(action_xml, action); } if (needs_maintenance_info) { add_maintenance_nodes(action_xml, scheduler); } } /*! * \internal * \brief Check whether an action should be added to the transition graph * * \param[in,out] action Action to check * * \return true if action should be added to graph, otherwise false */ static bool should_add_action_to_graph(pcmk_action_t *action) { if (!pcmk_is_set(action->flags, pcmk__action_runnable)) { crm_trace("Ignoring action %s (%d): unrunnable", action->uuid, action->id); return false; } if (pcmk_is_set(action->flags, pcmk__action_optional) && !pcmk_is_set(action->flags, pcmk__action_always_in_graph)) { crm_trace("Ignoring action %s (%d): optional", action->uuid, action->id); return false; } /* Actions for unmanaged resources should be excluded from the graph, * with the exception of monitors and cancellation of recurring monitors. */ if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pcmk__rsc_managed) && !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { const char *interval_ms_s; /* A cancellation of a recurring monitor will get here because the task * is cancel rather than monitor, but the interval can still be used to * recognize it. The interval has been normalized to milliseconds by * this point, so a string comparison is sufficient. */ interval_ms_s = g_hash_table_lookup(action->meta, PCMK_META_INTERVAL); if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) { crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)", action->uuid, action->id, action->rsc->id); return false; } } /* Always add pseudo-actions, fence actions, and shutdown actions (already * determined to be required and runnable by this point) */ if (pcmk_is_set(action->flags, pcmk__action_pseudo) || pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH, PCMK_ACTION_DO_SHUTDOWN, NULL)) { return true; } if (action->node == NULL) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was not assigned to a node (bug?)", action->uuid, action->id); pcmk__log_action("Unassigned", action, false); return false; } if (pcmk_is_set(action->flags, pcmk__action_on_dc)) { crm_trace("Action %s (%d) should be dumped: " "can run on DC instead of %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (pcmk__is_guest_or_bundle_node(action->node) && !pcmk_is_set(action->node->priv->flags, pcmk__node_remote_reset)) { crm_trace("Action %s (%d) should be dumped: " "assuming will be runnable on guest %s", action->uuid, action->id, pcmk__node_name(action->node)); } else if (!action->node->details->online) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for offline node (bug?)", action->uuid, action->id); pcmk__log_action("Offline node", action, false); return false; } else if (action->node->details->unclean) { pcmk__sched_err(action->scheduler, "Skipping action %s (%d) " "because it was scheduled for unclean node (bug?)", action->uuid, action->id); pcmk__log_action("Unclean node", action, false); return false; } return true; } /*! * \internal * \brief Check whether an ordering's flags can change an action * * \param[in] ordering Ordering to check * * \return true if ordering has flags that can change an action, false otherwise */ static bool ordering_can_change_actions(const pcmk__related_action_t *ordering) { return pcmk_any_flags_set(ordering->flags, ~(pcmk__ar_then_implies_first_graphed |pcmk__ar_first_implies_then_graphed |pcmk__ar_ordered)); } /*! * \internal * \brief Check whether an action input should be in the transition graph * * \param[in] action Action to check * \param[in,out] input Action input to check * * \return true if input should be in graph, false otherwise * \note This function may not only check an input, but disable it under certian * circumstances (load or anti-colocation orderings that are not needed). */ static bool should_add_input_to_graph(const pcmk_action_t *action, pcmk__related_action_t *input) { if (input->graphed) { return true; } if (input->flags == pcmk__ar_none) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering disabled", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable) && !ordering_can_change_actions(input)) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional and input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (!pcmk_is_set(input->action->flags, pcmk__action_runnable) && pcmk_is_set(input->flags, pcmk__ar_min_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "minimum number of instances required but input unrunnable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->flags, pcmk__ar_unmigratable_then_blocks) && !pcmk_is_set(input->action->flags, pcmk__action_runnable)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input blocked if 'then' unmigratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (pcmk_is_set(input->flags, pcmk__ar_if_first_unmigratable) && pcmk_is_set(input->action->flags, pcmk__action_migratable)) { crm_trace("Ignoring %s (%d) input %s (%d): ordering applies " "only if input is unmigratable, but it is migratable", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if ((input->flags == pcmk__ar_ordered) && pcmk_is_set(input->action->flags, pcmk__action_migratable) && pcmk__ends_with(input->action->uuid, "_stop_0")) { crm_trace("Ignoring %s (%d) input %s (%d): " "optional but stop in migration", action->uuid, action->id, input->action->uuid, input->action->id); return false; } else if (input->flags == pcmk__ar_if_on_same_node_or_target) { pcmk_node_t *input_node = input->action->node; if ((action->rsc != NULL) && pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { pcmk_node_t *assigned = action->rsc->priv->assigned_node; /* For load_stopped -> migrate_to orderings, we care about where * the resource has been assigned, not where migrate_to will be * executed. */ if (!pcmk__same_node(input_node, assigned)) { crm_trace("Ignoring %s (%d) input %s (%d): " "migration target %s is not same as input node %s", action->uuid, action->id, input->action->uuid, input->action->id, (assigned? assigned->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } } else if (!pcmk__same_node(input_node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, (action->node? action->node->priv->name : ""), (input_node? input_node->priv->name : "")); input->flags = pcmk__ar_none; return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): " "ordering optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->flags == pcmk__ar_if_required_on_same_node) { if (input->action->node && action->node && !pcmk__same_node(input->action->node, action->node)) { crm_trace("Ignoring %s (%d) input %s (%d): " "not on same node (%s vs %s)", action->uuid, action->id, input->action->uuid, input->action->id, pcmk__node_name(action->node), pcmk__node_name(input->action->node)); input->flags = pcmk__ar_none; return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional)) { crm_trace("Ignoring %s (%d) input %s (%d): optional", action->uuid, action->id, input->action->uuid, input->action->id); input->flags = pcmk__ar_none; return false; } } else if (input->action->rsc && input->action->rsc != action->rsc && pcmk_is_set(input->action->rsc->flags, pcmk__rsc_failed) && !pcmk_is_set(input->action->rsc->flags, pcmk__rsc_managed) && pcmk__ends_with(input->action->uuid, "_stop_0") && pcmk__is_clone(action->rsc)) { crm_warn("Ignoring requirement that %s complete before %s:" " unmanaged failed resources cannot prevent clone shutdown", input->action->uuid, action->uuid); return false; } else if (pcmk_is_set(input->action->flags, pcmk__action_optional) && !pcmk_any_flags_set(input->action->flags, pcmk__action_always_in_graph |pcmk__action_added_to_graph) && !should_add_action_to_graph(input->action)) { crm_trace("Ignoring %s (%d) input %s (%d): " "input optional", action->uuid, action->id, input->action->uuid, input->action->id); return false; } crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x", action->uuid, action->id, action_type_str(input->action->flags), input->action->uuid, input->action->id, action_node_str(input->action), action_runnable_str(input->action->flags), action_optional_str(input->action->flags), input->flags); return true; } /*! * \internal * \brief Check whether an ordering creates an ordering loop * * \param[in] init_action "First" action in ordering * \param[in] action Callers should always set this the same as * \p init_action (this function may use a different * value for recursive calls) * \param[in,out] input Action wrapper for "then" action in ordering * * \return true if the ordering creates a loop, otherwise false */ bool pcmk__graph_has_loop(const pcmk_action_t *init_action, const pcmk_action_t *action, pcmk__related_action_t *input) { bool has_loop = false; if (pcmk_is_set(input->action->flags, pcmk__action_detect_loop)) { crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); return false; } // Don't need to check inputs that won't be used if (!should_add_input_to_graph(action, input)) { return false; } if (input->action == init_action) { crm_debug("Input loop found in %s@%s ->...-> %s@%s", action->uuid, action->node? action->node->priv->name : "", init_action->uuid, init_action->node? init_action->node->priv->name : ""); return true; } pcmk__set_action_flags(input->action, pcmk__action_detect_loop); crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)" "for graph loop with %s@%s ", action->uuid, action->node? action->node->priv->name : "", input->action->uuid, input->action->node? input->action->node->priv->name : "", input->flags, init_action->uuid, init_action->node? init_action->node->priv->name : ""); // Recursively check input itself for loops for (GList *iter = input->action->actions_before; iter != NULL; iter = iter->next) { if (pcmk__graph_has_loop(init_action, input->action, (pcmk__related_action_t *) iter->data)) { // Recursive call already logged a debug message has_loop = true; break; } } pcmk__clear_action_flags(input->action, pcmk__action_detect_loop); if (!has_loop) { crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)", input->action->uuid, input->action->node? input->action->node->priv->name : "", action->uuid, action->node? action->node->priv->name : "", input->flags); } return has_loop; } /*! * \internal * \brief Create a synapse XML element for a transition graph * * \param[in] action Action that synapse is for * \param[in,out] scheduler Scheduler data containing graph * * \return Newly added XML element for new graph synapse */ static xmlNode * create_graph_synapse(const pcmk_action_t *action, pcmk_scheduler_t *scheduler) { int synapse_priority = 0; - xmlNode *syn = pcmk__xe_create(scheduler->graph, "synapse"); + xmlNode *syn = pcmk__xe_create(scheduler->priv->graph, "synapse"); crm_xml_add_int(syn, PCMK_XA_ID, scheduler->priv->synapse_count++); if (action->rsc != NULL) { synapse_priority = action->rsc->priv->priority; } if (action->priority > synapse_priority) { synapse_priority = action->priority; } if (synapse_priority > 0) { crm_xml_add_int(syn, PCMK__XA_PRIORITY, synapse_priority); } return syn; } /*! * \internal * \brief Add an action to the transition graph XML if appropriate * * \param[in,out] data Action to possibly add * \param[in,out] user_data Scheduler data * * \note This will de-duplicate the action inputs, meaning that the * pcmk__related_action_t:type flags can no longer be relied on to retain * their original settings. That means this MUST be called after * pcmk__apply_orderings() is complete, and nothing after this should rely * on those type flags. (For example, some code looks for type equal to * some flag rather than whether the flag is set, and some code looks for * particular combinations of flags -- such code must be done before * pcmk__create_graph().) */ static void add_action_to_graph(gpointer data, gpointer user_data) { pcmk_action_t *action = (pcmk_action_t *) data; pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) user_data; xmlNode *syn = NULL; xmlNode *set = NULL; xmlNode *in = NULL; /* If we haven't already, de-duplicate inputs (even if we won't be adding * the action to the graph, so that crm_simulate's dot graphs don't have * duplicates). */ if (!pcmk_is_set(action->flags, pcmk__action_inputs_deduplicated)) { pcmk__deduplicate_action_inputs(action); pcmk__set_action_flags(action, pcmk__action_inputs_deduplicated); } if (pcmk_is_set(action->flags, pcmk__action_added_to_graph) || !should_add_action_to_graph(action)) { return; // Already added, or shouldn't be } pcmk__set_action_flags(action, pcmk__action_added_to_graph); crm_trace("Adding action %d (%s%s%s) to graph", action->id, action->uuid, ((action->node == NULL)? "" : " on "), ((action->node == NULL)? "" : action->node->priv->name)); syn = create_graph_synapse(action, scheduler); set = pcmk__xe_create(syn, "action_set"); in = pcmk__xe_create(syn, "inputs"); create_graph_action(set, action, false, scheduler); for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) { pcmk__related_action_t *input = lpc->data; if (should_add_input_to_graph(action, input)) { xmlNode *input_xml = pcmk__xe_create(in, "trigger"); input->graphed = true; create_graph_action(input_xml, input->action, true, scheduler); } } } static int transition_id = -1; /*! * \internal * \brief Log a message after calculating a transition * * \param[in] scheduler Scheduler data * \param[in] filename Where transition input is stored */ void pcmk__log_transition_summary(const pcmk_scheduler_t *scheduler, const char *filename) { if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_error) || crm_config_error) { crm_err("Calculated transition %d (with errors)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else if (pcmk_is_set(scheduler->flags, pcmk__sched_processing_warning) || crm_config_warning) { crm_warn("Calculated transition %d (with warnings)%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } else { crm_notice("Calculated transition %d%s%s", transition_id, (filename == NULL)? "" : ", saving inputs in ", (filename == NULL)? "" : filename); } if (crm_config_error) { crm_notice("Configuration errors found during scheduler processing," " please run \"crm_verify -L\" to identify issues"); } } /*! * \internal * \brief Add a resource's actions to the transition graph * * \param[in,out] rsc Resource whose actions should be added */ void pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc) { GList *iter = NULL; CRM_ASSERT(rsc != NULL); pcmk__rsc_trace(rsc, "Adding actions for %s to graph", rsc->id); // First add the resource's own actions g_list_foreach(rsc->priv->actions, add_action_to_graph, rsc->priv->scheduler); // Then recursively add its children's actions (appropriate to variant) for (iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data; child_rsc->priv->cmds->add_actions_to_graph(child_rsc); } } /*! * \internal * \brief Create a transition graph with all cluster actions needed * * \param[in,out] scheduler Scheduler data */ void pcmk__create_graph(pcmk_scheduler_t *scheduler) { GList *iter = NULL; const char *value = NULL; long long limit = 0LL; GHashTable *config_hash = scheduler->priv->options; transition_id++; crm_trace("Creating transition graph %d", transition_id); - scheduler->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH); + scheduler->priv->graph = pcmk__xe_create(NULL, PCMK__XE_TRANSITION_GRAPH); value = pcmk__cluster_option(config_hash, PCMK_OPT_CLUSTER_DELAY); - crm_xml_add(scheduler->graph, PCMK_OPT_CLUSTER_DELAY, value); + crm_xml_add(scheduler->priv->graph, PCMK_OPT_CLUSTER_DELAY, value); value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT); - crm_xml_add(scheduler->graph, PCMK_OPT_STONITH_TIMEOUT, value); + crm_xml_add(scheduler->priv->graph, PCMK_OPT_STONITH_TIMEOUT, value); - crm_xml_add(scheduler->graph, "failed-stop-offset", "INFINITY"); + crm_xml_add(scheduler->priv->graph, "failed-stop-offset", "INFINITY"); if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) { - crm_xml_add(scheduler->graph, "failed-start-offset", "INFINITY"); + crm_xml_add(scheduler->priv->graph, "failed-start-offset", "INFINITY"); } else { - crm_xml_add(scheduler->graph, "failed-start-offset", "1"); + crm_xml_add(scheduler->priv->graph, "failed-start-offset", "1"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_BATCH_LIMIT); - crm_xml_add(scheduler->graph, PCMK_OPT_BATCH_LIMIT, value); + crm_xml_add(scheduler->priv->graph, PCMK_OPT_BATCH_LIMIT, value); - crm_xml_add_int(scheduler->graph, "transition_id", transition_id); + crm_xml_add_int(scheduler->priv->graph, "transition_id", transition_id); value = pcmk__cluster_option(config_hash, PCMK_OPT_MIGRATION_LIMIT); if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { - crm_xml_add(scheduler->graph, PCMK_OPT_MIGRATION_LIMIT, value); + crm_xml_add(scheduler->priv->graph, PCMK_OPT_MIGRATION_LIMIT, value); } if (scheduler->recheck_by > 0) { char *recheck_epoch = NULL; recheck_epoch = crm_strdup_printf("%llu", (long long) scheduler->recheck_by); - crm_xml_add(scheduler->graph, "recheck-by", recheck_epoch); + crm_xml_add(scheduler->priv->graph, "recheck-by", recheck_epoch); free(recheck_epoch); } /* The following code will de-duplicate action inputs, so nothing past this * should rely on the action input type flags retaining their original * values. */ // Add resource actions to graph for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; pcmk__rsc_trace(rsc, "Processing actions for %s", rsc->id); rsc->priv->cmds->add_actions_to_graph(rsc); } // Add pseudo-action for list of nodes with maintenance state update add_maintenance_update(scheduler); // Add non-resource (node) actions for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; if ((action->rsc != NULL) && (action->node != NULL) && action->node->details->shutdown && !pcmk_is_set(action->rsc->flags, pcmk__rsc_maintenance) && !pcmk_any_flags_set(action->flags, pcmk__action_optional|pcmk__action_runnable) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) { /* Eventually we should just ignore the 'fence' case, but for now * it's the best way to detect (in CTS) when CIB resource updates * are being lost. */ if (pcmk_is_set(scheduler->flags, pcmk__sched_quorate) || (scheduler->no_quorum_policy == pcmk_no_quorum_ignore)) { const bool managed = pcmk_is_set(action->rsc->flags, pcmk__rsc_managed); const bool failed = pcmk_is_set(action->rsc->flags, pcmk__rsc_failed); crm_crit("Cannot %s %s because of %s:%s%s (%s)", action->node->details->unclean? "fence" : "shut down", pcmk__node_name(action->node), action->rsc->id, (managed? " blocked" : " unmanaged"), (failed? " failed" : ""), action->uuid); } } add_action_to_graph((gpointer) action, (gpointer) scheduler); } - crm_log_xml_trace(scheduler->graph, "graph"); + crm_log_xml_trace(scheduler->priv->graph, "graph"); } diff --git a/lib/pacemaker/pcmk_sched_constraints.c b/lib/pacemaker/pcmk_sched_constraints.c index 13aa12969d..93d3499393 100644 --- a/lib/pacemaker/pcmk_sched_constraints.c +++ b/lib/pacemaker/pcmk_sched_constraints.c @@ -1,438 +1,438 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" static bool evaluate_lifetime(xmlNode *lifetime, pcmk_scheduler_t *scheduler) { bool result = false; crm_time_t *next_change = crm_time_new_undefined(); pcmk_rule_input_t rule_input = { .now = scheduler->priv->now, }; result = (pcmk__evaluate_rules(lifetime, &rule_input, next_change) == pcmk_rc_ok); if (crm_time_is_defined(next_change)) { time_t recheck = (time_t) crm_time_get_seconds_since_epoch(next_change); pe__update_recheck_time(recheck, scheduler, "constraint lifetime"); } crm_time_free(next_change); return result; } /*! * \internal * \brief Unpack constraints from XML * * Given scheduler data, unpack all constraints from its input XML into * data structures. * * \param[in,out] scheduler Scheduler data */ void pcmk__unpack_constraints(pcmk_scheduler_t *scheduler) { xmlNode *xml_constraints = pcmk_find_cib_element(scheduler->input, PCMK_XE_CONSTRAINTS); for (xmlNode *xml_obj = pcmk__xe_first_child(xml_constraints, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { xmlNode *lifetime = NULL; const char *id = crm_element_value(xml_obj, PCMK_XA_ID); const char *tag = (const char *) xml_obj->name; if (id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " PCMK_XA_ID, tag); continue; } crm_trace("Unpacking %s constraint '%s'", tag, id); lifetime = pcmk__xe_first_child(xml_obj, PCMK__XE_LIFETIME, NULL, NULL); if (lifetime != NULL) { pcmk__config_warn("Support for '" PCMK__XE_LIFETIME "' element " "(in %s) is deprecated and will be dropped " "in a later release", id); } if ((lifetime != NULL) && !evaluate_lifetime(lifetime, scheduler)) { crm_info("Constraint %s %s is not active", tag, id); } else if (pcmk__str_eq(PCMK_XE_RSC_ORDER, tag, pcmk__str_none)) { pcmk__unpack_ordering(xml_obj, scheduler); } else if (pcmk__str_eq(PCMK_XE_RSC_COLOCATION, tag, pcmk__str_none)) { pcmk__unpack_colocation(xml_obj, scheduler); } else if (pcmk__str_eq(PCMK_XE_RSC_LOCATION, tag, pcmk__str_none)) { pcmk__unpack_location(xml_obj, scheduler); } else if (pcmk__str_eq(PCMK_XE_RSC_TICKET, tag, pcmk__str_none)) { pcmk__unpack_rsc_ticket(xml_obj, scheduler); } else { pcmk__config_err("Unsupported constraint type: %s", tag); } } } pcmk_resource_t * pcmk__find_constraint_resource(GList *rsc_list, const char *id) { if (id == NULL) { return NULL; } for (GList *iter = rsc_list; iter != NULL; iter = iter->next) { pcmk_resource_t *parent = iter->data; pcmk_resource_t *match = NULL; match = parent->priv->fns->find_rsc(parent, id, NULL, pcmk_rsc_match_history); if (match != NULL) { if (!pcmk__str_eq(match->id, id, pcmk__str_none)) { /* We found an instance of a clone instead */ match = uber_parent(match); crm_debug("Found %s for %s", match->id, id); } return match; } } crm_trace("No match for %s", id); return NULL; } /*! * \internal * \brief Check whether an ID references a resource tag * * \param[in] scheduler Scheduler data * \param[in] id Tag ID to search for * \param[out] tag Where to store tag, if found * * \return true if ID refers to a tagged resource or resource set template, * otherwise false */ static bool find_constraint_tag(const pcmk_scheduler_t *scheduler, const char *id, pcmk__idref_t **tag) { *tag = NULL; // Check whether id refers to a resource set template - if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id, + if (g_hash_table_lookup_extended(scheduler->priv->templates, id, NULL, (gpointer *) tag)) { if (*tag == NULL) { crm_notice("No resource is derived from template '%s'", id); return false; } return true; } // If not, check whether id refers to a tag if (g_hash_table_lookup_extended(scheduler->tags, id, NULL, (gpointer *) tag)) { if (*tag == NULL) { crm_notice("No resource is tagged with '%s'", id); return false; } return true; } pcmk__config_warn("No resource, template, or tag named '%s'", id); return false; } /*! * \brief * \internal Check whether an ID refers to a valid resource or tag * * \param[in] scheduler Scheduler data * \param[in] id ID to search for * \param[out] rsc Where to store resource, if found * (or NULL to skip searching resources) * \param[out] tag Where to store tag, if found * (or NULL to skip searching tags) * * \return true if id refers to a resource (possibly indirectly via a tag) */ bool pcmk__valid_resource_or_tag(const pcmk_scheduler_t *scheduler, const char *id, pcmk_resource_t **rsc, pcmk__idref_t **tag) { if (rsc != NULL) { *rsc = pcmk__find_constraint_resource(scheduler->priv->resources, id); if (*rsc != NULL) { return true; } } if ((tag != NULL) && find_constraint_tag(scheduler, id, tag)) { return true; } return false; } /*! * \internal * \brief Replace any resource tags with equivalent \C PCMK_XE_RESOURCE_REF * entries * * If a given constraint has resource sets, check each set for * \c PCMK_XE_RESOURCE_REF entries that list tags rather than resource IDs, and * replace any found with \c PCMK_XE_RESOURCE_REF entries for the corresponding * resource IDs. * * \param[in,out] xml_obj Constraint XML * \param[in] scheduler Scheduler data * * \return Equivalent XML with resource tags replaced (or NULL if none) * \note It is the caller's responsibility to free the return value with * \c pcmk__xml_free(). */ xmlNode * pcmk__expand_tags_in_sets(xmlNode *xml_obj, const pcmk_scheduler_t *scheduler) { xmlNode *new_xml = NULL; bool any_refs = false; // Short-circuit if there are no sets if (pcmk__xe_first_child(xml_obj, PCMK_XE_RESOURCE_SET, NULL, NULL) == NULL) { return NULL; } new_xml = pcmk__xml_copy(NULL, xml_obj); for (xmlNode *set = pcmk__xe_first_child(new_xml, PCMK_XE_RESOURCE_SET, NULL, NULL); set != NULL; set = pcmk__xe_next_same(set)) { GList *tag_refs = NULL; GList *iter = NULL; for (xmlNode *xml_rsc = pcmk__xe_first_child(set, PCMK_XE_RESOURCE_REF, NULL, NULL); xml_rsc != NULL; xml_rsc = pcmk__xe_next_same(xml_rsc)) { pcmk_resource_t *rsc = NULL; pcmk__idref_t *tag = NULL; if (!pcmk__valid_resource_or_tag(scheduler, pcmk__xe_id(xml_rsc), &rsc, &tag)) { pcmk__config_err("Ignoring resource sets for constraint '%s' " "because '%s' is not a valid resource or tag", pcmk__xe_id(xml_obj), pcmk__xe_id(xml_rsc)); pcmk__xml_free(new_xml); return NULL; } else if (rsc) { continue; } else if (tag) { /* PCMK_XE_RESOURCE_REF under PCMK_XE_RESOURCE_SET references * template or tag */ xmlNode *last_ref = xml_rsc; /* For example, given the original XML: * * * * * * * * If rsc2 and rsc3 are tagged with tag1, we add them after it: * * * * * * * * */ for (iter = tag->refs; iter != NULL; iter = iter->next) { const char *obj_ref = iter->data; xmlNode *new_rsc_ref = NULL; new_rsc_ref = xmlNewDocRawNode(set->doc, NULL, (pcmkXmlStr) PCMK_XE_RESOURCE_REF, NULL); crm_xml_add(new_rsc_ref, PCMK_XA_ID, obj_ref); xmlAddNextSibling(last_ref, new_rsc_ref); last_ref = new_rsc_ref; } any_refs = true; /* Freeing the resource_ref now would break the XML child * iteration, so just remember it for freeing later. */ tag_refs = g_list_append(tag_refs, xml_rsc); } } /* Now free '', and finally get: */ for (iter = tag_refs; iter != NULL; iter = iter->next) { xmlNode *tag_ref = iter->data; pcmk__xml_free(tag_ref); } g_list_free(tag_refs); } if (!any_refs) { pcmk__xml_free(new_xml); new_xml = NULL; } return new_xml; } /*! * \internal * \brief Convert a tag into a resource set of tagged resources * * \param[in,out] xml_obj Constraint XML * \param[out] rsc_set Where to store resource set XML * \param[in] attr Name of XML attribute with resource or tag ID * \param[in] convert_rsc If true, convert to set even if \p attr * references a resource * \param[in] scheduler Scheduler data */ bool pcmk__tag_to_set(xmlNode *xml_obj, xmlNode **rsc_set, const char *attr, bool convert_rsc, const pcmk_scheduler_t *scheduler) { const char *cons_id = NULL; const char *id = NULL; pcmk_resource_t *rsc = NULL; pcmk__idref_t *tag = NULL; *rsc_set = NULL; CRM_CHECK((xml_obj != NULL) && (attr != NULL), return false); cons_id = pcmk__xe_id(xml_obj); if (cons_id == NULL) { pcmk__config_err("Ignoring <%s> constraint without " PCMK_XA_ID, xml_obj->name); return false; } id = crm_element_value(xml_obj, attr); if (id == NULL) { return true; } if (!pcmk__valid_resource_or_tag(scheduler, id, &rsc, &tag)) { pcmk__config_err("Ignoring constraint '%s' because '%s' is not a " "valid resource or tag", cons_id, id); return false; } else if (tag) { /* The "attr" attribute (for a resource in a constraint) specifies a * template or tag. Add the corresponding PCMK_XE_RESOURCE_SET * containing the resources derived from or tagged with it. */ *rsc_set = pcmk__xe_create(xml_obj, PCMK_XE_RESOURCE_SET); crm_xml_add(*rsc_set, PCMK_XA_ID, id); for (GList *iter = tag->refs; iter != NULL; iter = iter->next) { const char *obj_ref = iter->data; xmlNode *rsc_ref = NULL; rsc_ref = pcmk__xe_create(*rsc_set, PCMK_XE_RESOURCE_REF); crm_xml_add(rsc_ref, PCMK_XA_ID, obj_ref); } // Set PCMK_XA_SEQUENTIAL=PCMK_VALUE_FALSE for the PCMK_XE_RESOURCE_SET pcmk__xe_set_bool_attr(*rsc_set, PCMK_XA_SEQUENTIAL, false); } else if ((rsc != NULL) && convert_rsc) { /* Even if a regular resource is referenced by "attr", convert it into a * PCMK_XE_RESOURCE_SET, because the other resource reference in the * constraint could be a template or tag. */ xmlNode *rsc_ref = NULL; *rsc_set = pcmk__xe_create(xml_obj, PCMK_XE_RESOURCE_SET); crm_xml_add(*rsc_set, PCMK_XA_ID, id); rsc_ref = pcmk__xe_create(*rsc_set, PCMK_XE_RESOURCE_REF); crm_xml_add(rsc_ref, PCMK_XA_ID, id); } else { return true; } /* Remove the "attr" attribute referencing the template/tag */ if (*rsc_set != NULL) { pcmk__xe_remove_attr(xml_obj, attr); } return true; } /*! * \internal * \brief Create constraints inherent to resource types * * \param[in,out] scheduler Scheduler data */ void pcmk__create_internal_constraints(pcmk_scheduler_t *scheduler) { crm_trace("Create internal constraints"); for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->internal_constraints(rsc); } } diff --git a/lib/pacemaker/pcmk_scheduler.c b/lib/pacemaker/pcmk_scheduler.c index ec3723b730..dc70cc6f04 100644 --- a/lib/pacemaker/pcmk_scheduler.c +++ b/lib/pacemaker/pcmk_scheduler.c @@ -1,891 +1,880 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" CRM_TRACE_INIT_DATA(pacemaker); /*! * \internal * \brief Do deferred action checks after assignment * * When unpacking the resource history, the scheduler checks for resource * configurations that have changed since an action was run. However, at that * time, bundles using the REMOTE_CONTAINER_HACK don't have their final * parameter information, so instead they add a deferred check to a list. This * function processes one entry in that list. * * \param[in,out] rsc Resource that action history is for * \param[in,out] node Node that action history is for * \param[in] rsc_op Action history entry * \param[in] check Type of deferred check to do */ static void check_params(pcmk_resource_t *rsc, pcmk_node_t *node, const xmlNode *rsc_op, enum pcmk__check_parameters check) { const char *reason = NULL; pcmk__op_digest_t *digest_data = NULL; switch (check) { case pcmk__check_active: if (pcmk__check_action_config(rsc, node, rsc_op) && pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL)) { reason = "action definition changed"; } break; case pcmk__check_last_failure: digest_data = rsc_action_digest_cmp(rsc, rsc_op, node, rsc->priv->scheduler); switch (digest_data->rc) { case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s has " "no digest to compare", rsc->id, pcmk__xe_id(rsc_op), node->priv->id); break; case pcmk__digest_match: break; default: reason = "resource parameters have changed"; break; } break; } if (reason != NULL) { pe__clear_failcount(rsc, node, reason, rsc->priv->scheduler); } } /*! * \internal * \brief Check whether a resource has failcount clearing scheduled on a node * * \param[in] node Node to check * \param[in] rsc Resource to check * * \return true if \p rsc has failcount clearing scheduled on \p node, * otherwise false */ static bool failcount_clear_action_exists(const pcmk_node_t *node, const pcmk_resource_t *rsc) { GList *list = pe__resource_actions(rsc, node, PCMK_ACTION_CLEAR_FAILCOUNT, TRUE); if (list != NULL) { g_list_free(list); return true; } return false; } /*! * \internal * \brief Ban a resource from a node if it reached its failure threshold there * * \param[in,out] data Resource to check failure threshold for * \param[in] user_data Node to check resource on */ static void check_failure_threshold(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; // If this is a collective resource, apply recursively to children instead if (rsc->priv->children != NULL) { g_list_foreach(rsc->priv->children, check_failure_threshold, user_data); return; } if (!failcount_clear_action_exists(node, rsc)) { /* Don't force the resource away from this node due to a failcount * that's going to be cleared. * * @TODO Failcount clearing can be scheduled in * pcmk__handle_rsc_config_changes() via process_rsc_history(), or in * schedule_resource_actions() via check_params(). This runs well before * then, so it cannot detect those, meaning we might check the migration * threshold when we shouldn't. Worst case, we stop or move the * resource, then move it back in the next transition. */ pcmk_resource_t *failed = NULL; if (pcmk__threshold_reached(rsc, node, &failed)) { resource_location(failed, node, -PCMK_SCORE_INFINITY, "__fail_limit__", rsc->priv->scheduler); } } } /*! * \internal * \brief If resource has exclusive discovery, ban node if not allowed * * Location constraints have a PCMK_XA_RESOURCE_DISCOVERY option that allows * users to specify where probes are done for the affected resource. If this is * set to \c exclusive, probes will only be done on nodes listed in exclusive * constraints. This function bans the resource from the node if the node is not * listed. * * \param[in,out] data Resource to check * \param[in] user_data Node to check resource on */ static void apply_exclusive_discovery(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; const pcmk_node_t *node = user_data; if (pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes) || pcmk_is_set(pe__const_top_resource(rsc, false)->flags, pcmk__rsc_exclusive_probes)) { pcmk_node_t *match = NULL; // If this is a collective resource, apply recursively to children g_list_foreach(rsc->priv->children, apply_exclusive_discovery, user_data); match = g_hash_table_lookup(rsc->priv->allowed_nodes, node->priv->id); if ((match != NULL) && (match->assign->probe_mode != pcmk__probe_exclusive)) { match->assign->score = -PCMK_SCORE_INFINITY; } } } /*! * \internal * \brief Apply stickiness to a resource if appropriate * * \param[in,out] data Resource to check for stickiness * \param[in] user_data Ignored */ static void apply_stickiness(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; pcmk_node_t *node = NULL; // If this is a collective resource, apply recursively to children instead if (rsc->priv->children != NULL) { g_list_foreach(rsc->priv->children, apply_stickiness, NULL); return; } /* A resource is sticky if it is managed, has stickiness configured, and is * active on a single node. */ if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed) || (rsc->priv->stickiness < 1) || !pcmk__list_of_1(rsc->priv->active_nodes)) { return; } node = rsc->priv->active_nodes->data; /* In a symmetric cluster, stickiness can always be used. In an * asymmetric cluster, we have to check whether the resource is still * allowed on the node, so we don't keep the resource somewhere it is no * longer explicitly enabled. */ if (!pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_symmetric_cluster) && (g_hash_table_lookup(rsc->priv->allowed_nodes, node->priv->id) == NULL)) { pcmk__rsc_debug(rsc, "Ignoring %s stickiness because the cluster is " "asymmetric and %s is not explicitly allowed", rsc->id, pcmk__node_name(node)); return; } pcmk__rsc_debug(rsc, "Resource %s has %d stickiness on %s", rsc->id, rsc->priv->stickiness, pcmk__node_name(node)); resource_location(rsc, node, rsc->priv->stickiness, "stickiness", rsc->priv->scheduler); } /*! * \internal * \brief Apply shutdown locks for all resources as appropriate * * \param[in,out] scheduler Scheduler data */ static void apply_shutdown_locks(pcmk_scheduler_t *scheduler) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { return; } for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->shutdown_lock(rsc); } } /* * \internal * \brief Apply node-specific scheduling criteria * * After the CIB has been unpacked, process node-specific scheduling criteria * including shutdown locks, location constraints, resource stickiness, * migration thresholds, and exclusive resource discovery. */ static void apply_node_criteria(pcmk_scheduler_t *scheduler) { crm_trace("Applying node-specific scheduling criteria"); apply_shutdown_locks(scheduler); pcmk__apply_locations(scheduler); g_list_foreach(scheduler->priv->resources, apply_stickiness, NULL); for (GList *node_iter = scheduler->nodes; node_iter != NULL; node_iter = node_iter->next) { for (GList *rsc_iter = scheduler->priv->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { check_failure_threshold(rsc_iter->data, node_iter->data); apply_exclusive_discovery(rsc_iter->data, node_iter->data); } } } /*! * \internal * \brief Assign resources to nodes * * \param[in,out] scheduler Scheduler data */ static void assign_resources(pcmk_scheduler_t *scheduler) { GList *iter = NULL; crm_trace("Assigning resources to nodes"); if (!pcmk__str_eq(scheduler->priv->placement_strategy, PCMK_VALUE_DEFAULT, pcmk__str_casei)) { pcmk__sort_resources(scheduler); } pcmk__show_node_capacities("Original", scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_have_remote_nodes)) { /* Assign remote connection resources first (which will also assign any * colocation dependencies). If the connection is migrating, always * prefer the partial migration target. */ for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; const pcmk_node_t *target = rsc->priv->partial_migration_target; if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { pcmk__rsc_trace(rsc, "Assigning remote connection resource '%s'", rsc->id); rsc->priv->cmds->assign(rsc, target, true); } } } /* now do the rest of the resources */ for (iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (!pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { pcmk__rsc_trace(rsc, "Assigning %s resource '%s'", rsc->priv->xml->name, rsc->id); rsc->priv->cmds->assign(rsc, NULL, true); } } pcmk__show_node_capacities("Remaining", scheduler); } /*! * \internal * \brief Schedule fail count clearing on online nodes if resource is orphaned * * \param[in,out] data Resource to check * \param[in] user_data Ignored */ static void clear_failcounts_if_orphaned(gpointer data, gpointer user_data) { pcmk_resource_t *rsc = data; if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { return; } crm_trace("Clear fail counts for orphaned resource %s", rsc->id); /* There's no need to recurse into rsc->private->children because those * should just be unassigned clone instances. */ for (GList *iter = rsc->priv->scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; pcmk_action_t *clear_op = NULL; if (!node->details->online) { continue; } if (pe_get_failcount(node, rsc, NULL, pcmk__fc_effective, NULL) == 0) { continue; } clear_op = pe__clear_failcount(rsc, node, "it is orphaned", rsc->priv->scheduler); /* We can't use order_action_then_stop() here because its * pcmk__ar_guest_allowed breaks things */ pcmk__new_ordering(clear_op->rsc, NULL, clear_op, rsc, stop_key(rsc), NULL, pcmk__ar_ordered, rsc->priv->scheduler); } } /*! * \internal * \brief Schedule any resource actions needed * * \param[in,out] scheduler Scheduler data */ static void schedule_resource_actions(pcmk_scheduler_t *scheduler) { // Process deferred action checks pe__foreach_param_check(scheduler, check_params); pe__free_param_checks(scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) { crm_trace("Scheduling probes"); pcmk__schedule_probes(scheduler); } if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { g_list_foreach(scheduler->priv->resources, clear_failcounts_if_orphaned, NULL); } crm_trace("Scheduling resource actions"); for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; rsc->priv->cmds->create_actions(rsc); } } /*! * \internal * \brief Check whether a resource or any of its descendants are managed * * \param[in] rsc Resource to check * * \return true if resource or any descendant is managed, otherwise false */ static bool is_managed(const pcmk_resource_t *rsc) { if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { return true; } for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { if (is_managed((pcmk_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether any resources in the cluster are managed * * \param[in] scheduler Scheduler data * * \return true if any resource is managed, otherwise false */ static bool any_managed_resources(const pcmk_scheduler_t *scheduler) { for (const GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { if (is_managed((const pcmk_resource_t *) iter->data)) { return true; } } return false; } /*! * \internal * \brief Check whether a node requires fencing * * \param[in] node Node to check * \param[in] have_managed Whether any resource in cluster is managed * * \return true if \p node should be fenced, otherwise false */ static bool needs_fencing(const pcmk_node_t *node, bool have_managed) { return have_managed && node->details->unclean && pe_can_fence(node->priv->scheduler, node); } /*! * \internal * \brief Check whether a node requires shutdown * * \param[in] node Node to check * * \return true if \p node should be shut down, otherwise false */ static bool needs_shutdown(const pcmk_node_t *node) { if (pcmk__is_pacemaker_remote_node(node)) { /* Do not send shutdown actions for Pacemaker Remote nodes. * @TODO We might come up with a good use for this in the future. */ return false; } return node->details->online && node->details->shutdown; } /*! * \internal * \brief Track and order non-DC fencing * * \param[in,out] list List of existing non-DC fencing actions * \param[in,out] action Fencing action to prepend to \p list * \param[in] scheduler Scheduler data * * \return (Possibly new) head of \p list */ static GList * add_nondc_fencing(GList *list, pcmk_action_t *action, const pcmk_scheduler_t *scheduler) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing) && (list != NULL)) { /* Concurrent fencing is disabled, so order each non-DC * fencing in a chain. If there is any DC fencing or * shutdown, it will be ordered after the last action in the * chain later. */ order_actions((pcmk_action_t *) list->data, action, pcmk__ar_ordered); } return g_list_prepend(list, action); } /*! * \internal * \brief Schedule a node for fencing * * \param[in,out] node Node that requires fencing */ static pcmk_action_t * schedule_fencing(pcmk_node_t *node) { pcmk_action_t *fencing = pe_fence_op(node, NULL, FALSE, "node is unclean", FALSE, node->priv->scheduler); pcmk__sched_warn(node->priv->scheduler, "Scheduling node %s for fencing", pcmk__node_name(node)); pcmk__order_vs_fence(fencing, node->priv->scheduler); return fencing; } /*! * \internal * \brief Create and order node fencing and shutdown actions * * \param[in,out] scheduler Scheduler data */ static void schedule_fencing_and_shutdowns(pcmk_scheduler_t *scheduler) { pcmk_action_t *dc_down = NULL; bool integrity_lost = false; bool have_managed = any_managed_resources(scheduler); GList *fencing_ops = NULL; GList *shutdown_ops = NULL; crm_trace("Scheduling fencing and shutdowns as needed"); if (!have_managed) { crm_notice("No fencing will be done until there are resources " "to manage"); } // Check each node for whether it needs fencing or shutdown for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; pcmk_action_t *fencing = NULL; const bool is_dc = pcmk__same_node(node, scheduler->dc_node); /* Guest nodes are "fenced" by recovering their container resource, * so handle them separately. */ if (pcmk__is_guest_or_bundle_node(node)) { if (pcmk_is_set(node->priv->flags, pcmk__node_remote_reset) && have_managed && pe_can_fence(scheduler, node)) { pcmk__fence_guest(node); } continue; } if (needs_fencing(node, have_managed)) { fencing = schedule_fencing(node); // Track DC and non-DC fence actions separately if (is_dc) { dc_down = fencing; } else { fencing_ops = add_nondc_fencing(fencing_ops, fencing, scheduler); } } else if (needs_shutdown(node)) { pcmk_action_t *down_op = pcmk__new_shutdown_action(node); // Track DC and non-DC shutdown actions separately if (is_dc) { dc_down = down_op; } else { shutdown_ops = g_list_prepend(shutdown_ops, down_op); } } if ((fencing == NULL) && node->details->unclean) { integrity_lost = true; pcmk__config_warn("Node %s is unclean but cannot be fenced", pcmk__node_name(node)); } } if (integrity_lost) { if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { pcmk__config_warn("Resource functionality and data integrity " "cannot be guaranteed (configure, enable, " "and test fencing to correct this)"); } else if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) { crm_notice("Unclean nodes will not be fenced until quorum is " "attained or " PCMK_OPT_NO_QUORUM_POLICY " is set to " PCMK_VALUE_IGNORE); } } if (dc_down != NULL) { /* Order any non-DC shutdowns before any DC shutdown, to avoid repeated * DC elections. However, we don't want to order non-DC shutdowns before * a DC *fencing*, because even though we don't want a node that's * shutting down to become DC, the DC fencing could be ordered before a * clone stop that's also ordered before the shutdowns, thus leading to * a graph loop. */ if (pcmk__str_eq(dc_down->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { pcmk__order_after_each(dc_down, shutdown_ops); } // Order any non-DC fencing before any DC fencing or shutdown if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) { /* With concurrent fencing, order each non-DC fencing action * separately before any DC fencing or shutdown. */ pcmk__order_after_each(dc_down, fencing_ops); } else if (fencing_ops != NULL) { /* Without concurrent fencing, the non-DC fencing actions are * already ordered relative to each other, so we just need to order * the DC fencing after the last action in the chain (which is the * first item in the list). */ order_actions((pcmk_action_t *) fencing_ops->data, dc_down, pcmk__ar_ordered); } } g_list_free(fencing_ops); g_list_free(shutdown_ops); } static void log_resource_details(pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv->out; GList *all = NULL; /* Due to the `crm_mon --node=` feature, out->message() for all the * resource-related messages expects a list of nodes that we are allowed to * output information for. Here, we create a wildcard to match all nodes. */ all = g_list_prepend(all, (gpointer) "*"); for (GList *item = scheduler->priv->resources; item != NULL; item = item->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) item->data; // Log all resources except inactive orphans if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed) || (rsc->priv->orig_role != pcmk_role_stopped)) { out->message(out, pcmk__map_element_name(rsc->priv->xml), 0UL, rsc, all, all); } } g_list_free(all); } static void log_all_actions(pcmk_scheduler_t *scheduler) { /* This only ever outputs to the log, so ignore whatever output object was * previously set and just log instead. */ pcmk__output_t *prev_out = scheduler->priv->out; pcmk__output_t *out = NULL; if (pcmk__log_output_new(&out) != pcmk_rc_ok) { return; } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_NOTICE); scheduler->priv->out = out; out->begin_list(out, NULL, NULL, "Actions"); pcmk__output_actions(scheduler); out->end_list(out); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); scheduler->priv->out = prev_out; } /*! * \internal * \brief Log all required but unrunnable actions at trace level * * \param[in] scheduler Scheduler data */ static void log_unrunnable_actions(const pcmk_scheduler_t *scheduler) { const uint64_t flags = pcmk__action_optional |pcmk__action_runnable |pcmk__action_pseudo; crm_trace("Required but unrunnable actions:"); for (const GList *iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { const pcmk_action_t *action = (const pcmk_action_t *) iter->data; if (!pcmk_any_flags_set(action->flags, flags)) { pcmk__log_action("\t", action, true); } } } /*! * \internal * \brief Unpack the CIB for scheduling * * \param[in,out] cib CIB XML to unpack (may be NULL if already unpacked) * \param[in] flags Scheduler flags to set in addition to defaults * \param[in,out] scheduler Scheduler data */ static void unpack_cib(xmlNode *cib, unsigned long long flags, pcmk_scheduler_t *scheduler) { - const char* localhost_save = NULL; - if (pcmk_is_set(scheduler->flags, pcmk__sched_have_status)) { crm_trace("Reusing previously calculated cluster status"); pcmk__set_scheduler_flags(scheduler, flags); return; } - if (scheduler->localhost) { - localhost_save = scheduler->localhost; - } - CRM_ASSERT(cib != NULL); crm_trace("Calculating cluster status"); /* This will zero the entire struct without freeing anything first, so * callers should never call pcmk__schedule_actions() with a populated data * set unless pcmk__sched_have_status is set (i.e. cluster_status() was * previously called, whether directly or via pcmk__schedule_actions()). */ set_working_set_defaults(scheduler); - if (localhost_save) { - scheduler->localhost = localhost_save; - } - pcmk__set_scheduler_flags(scheduler, flags); scheduler->input = cib; cluster_status(scheduler); // Sets pcmk__sched_have_status } /*! * \internal * \brief Run the scheduler for a given CIB * * \param[in,out] cib CIB XML to use as scheduler input * \param[in] flags Scheduler flags to set in addition to defaults * \param[in,out] scheduler Scheduler data */ void pcmk__schedule_actions(xmlNode *cib, unsigned long long flags, pcmk_scheduler_t *scheduler) { unpack_cib(cib, flags, scheduler); pcmk__set_assignment_methods(scheduler); pcmk__apply_node_health(scheduler); pcmk__unpack_constraints(scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_validate_only)) { return; } if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only) && pcmk__is_daemon) { log_resource_details(scheduler); } apply_node_criteria(scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { return; } pcmk__create_internal_constraints(scheduler); pcmk__handle_rsc_config_changes(scheduler); assign_resources(scheduler); schedule_resource_actions(scheduler); /* Remote ordering constraints need to happen prior to calculating fencing * because it is one more place we can mark nodes as needing fencing. */ pcmk__order_remote_connection_actions(scheduler); schedule_fencing_and_shutdowns(scheduler); pcmk__apply_orderings(scheduler); log_all_actions(scheduler); pcmk__create_graph(scheduler); if (get_crm_log_level() == LOG_TRACE) { log_unrunnable_actions(scheduler); } } /*! * \internal * \brief Initialize scheduler data * * Make our own copies of the CIB XML and date/time object, if they're not * \c NULL. This way we don't have to take ownership of the objects passed via * the API. * * This function is most useful for public API functions that want the caller * to retain ownership of the CIB object * * \param[in,out] out Output object * \param[in] input The CIB XML to check (if \c NULL, use current CIB) * \param[in] date Date and time to use in the scheduler (if \c NULL, * use current date and time). This can be used for * checking whether a rule is in effect at a certa * date and time. * \param[out] scheduler Where to store initialized scheduler data * * \return Standard Pacemaker return code */ int pcmk__init_scheduler(pcmk__output_t *out, xmlNodePtr input, const crm_time_t *date, pcmk_scheduler_t **scheduler) { // Allows for cleaner syntax than dereferencing the scheduler argument pcmk_scheduler_t *new_scheduler = NULL; new_scheduler = pe_new_working_set(); if (new_scheduler == NULL) { return ENOMEM; } - pcmk__set_scheduler_flags(new_scheduler, - pcmk__sched_no_counts|pcmk__sched_no_compat); + pcmk__set_scheduler_flags(new_scheduler, pcmk__sched_no_counts); // Populate the scheduler data // Make our own copy of the given input or fetch the CIB and use that if (input != NULL) { new_scheduler->input = pcmk__xml_copy(NULL, input); if (new_scheduler->input == NULL) { out->err(out, "Failed to copy input XML"); pe_free_working_set(new_scheduler); return ENOMEM; } } else { int rc = cib__signon_query(out, NULL, &(new_scheduler->input)); if (rc != pcmk_rc_ok) { pe_free_working_set(new_scheduler); return rc; } } // Make our own copy of the given crm_time_t object; otherwise // cluster_status() populates with the current time if (date != NULL) { // pcmk_copy_time() guarantees non-NULL new_scheduler->priv->now = pcmk_copy_time(date); } // Unpack everything cluster_status(new_scheduler); *scheduler = new_scheduler; return pcmk_rc_ok; } diff --git a/lib/pacemaker/pcmk_simulate.c b/lib/pacemaker/pcmk_simulate.c index 63c96807a2..0fe3a03580 100644 --- a/lib/pacemaker/pcmk_simulate.c +++ b/lib/pacemaker/pcmk_simulate.c @@ -1,1013 +1,1005 @@ /* * Copyright 2021-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" static pcmk__output_t *out = NULL; static cib_t *fake_cib = NULL; static GList *fake_resource_list = NULL; static const GList *fake_op_fail_list = NULL; static void set_effective_date(pcmk_scheduler_t *scheduler, bool print_original, const char *use_date); /*! * \internal * \brief Create an action name for use in a dot graph * * \param[in] action Action to create name for * \param[in] verbose If true, add action ID to name * * \return Newly allocated string with action name * \note It is the caller's responsibility to free the result. */ static char * create_action_name(const pcmk_action_t *action, bool verbose) { char *action_name = NULL; const char *prefix = ""; const char *action_host = NULL; const char *history_id = NULL; const char *task = action->task; if (action->node != NULL) { action_host = action->node->priv->name; } else if (!pcmk_is_set(action->flags, pcmk__action_pseudo)) { action_host = ""; } if (pcmk__str_eq(action->task, PCMK_ACTION_CANCEL, pcmk__str_none)) { prefix = "Cancel "; task = action->cancel_task; } if (action->rsc != NULL) { history_id = action->rsc->priv->history_id; } if (history_id != NULL) { char *key = NULL; guint interval_ms = 0; if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0, &interval_ms) != pcmk_rc_ok) { interval_ms = 0; } if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_NOTIFIED, NULL)) { const char *n_type = g_hash_table_lookup(action->meta, "notify_key_type"); const char *n_task = g_hash_table_lookup(action->meta, "notify_key_operation"); CRM_ASSERT(n_type != NULL); CRM_ASSERT(n_task != NULL); key = pcmk__notify_key(history_id, n_type, n_task); } else { key = pcmk__op_key(history_id, task, interval_ms); } if (action_host != NULL) { action_name = crm_strdup_printf("%s%s %s", prefix, key, action_host); } else { action_name = crm_strdup_printf("%s%s", prefix, key); } free(key); } else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) { const char *op = g_hash_table_lookup(action->meta, PCMK__META_STONITH_ACTION); action_name = crm_strdup_printf("%s%s '%s' %s", prefix, action->task, op, action_host); } else if (action->rsc && action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->uuid, action_host); } else if (action_host) { action_name = crm_strdup_printf("%s%s %s", prefix, action->task, action_host); } else { action_name = crm_strdup_printf("%s", action->uuid); } if (verbose) { char *with_id = crm_strdup_printf("%s (%d)", action_name, action->id); free(action_name); action_name = with_id; } return action_name; } /*! * \internal * \brief Display the status of a cluster * * \param[in,out] scheduler Scheduler data * \param[in] show_opts How to modify display (as pcmk_show_opt_e flags) * \param[in] section_opts Sections to display (as pcmk_section_e flags) * \param[in] title What to use as list title * \param[in] print_spacer Whether to display a spacer first */ static void print_cluster_status(pcmk_scheduler_t *scheduler, uint32_t show_opts, uint32_t section_opts, const char *title, bool print_spacer) { pcmk__output_t *out = scheduler->priv->out; GList *all = NULL; crm_exit_t stonith_rc = 0; enum pcmk_pacemakerd_state state = pcmk_pacemakerd_state_invalid; section_opts |= pcmk_section_nodes | pcmk_section_resources; show_opts |= pcmk_show_inactive_rscs | pcmk_show_failed_detail; all = g_list_prepend(all, (gpointer) "*"); PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "%s", title); out->message(out, "cluster-status", scheduler, state, stonith_rc, NULL, pcmk__fence_history_none, section_opts, show_opts, NULL, all, all); out->end_list(out); g_list_free(all); } /*! * \internal * \brief Display a summary of all actions scheduled in a transition * * \param[in,out] scheduler Scheduler data (fully scheduled) * \param[in] print_spacer Whether to display a spacer first */ static void print_transition_summary(pcmk_scheduler_t *scheduler, bool print_spacer) { pcmk__output_t *out = scheduler->priv->out; PCMK__OUTPUT_SPACER_IF(out, print_spacer); out->begin_list(out, NULL, NULL, "Transition Summary"); pcmk__output_actions(scheduler); out->end_list(out); } /*! * \internal * \brief Reset scheduler input, output, date, and flags * * \param[in,out] scheduler Scheduler data * \param[in] input What to set as cluster input * \param[in] out What to set as cluster output object * \param[in] use_date What to set as cluster's current timestamp * \param[in] flags Group of enum pcmk__scheduler_flags to set */ static void reset(pcmk_scheduler_t *scheduler, xmlNodePtr input, pcmk__output_t *out, const char *use_date, unsigned int flags) { scheduler->input = input; scheduler->priv->out = out; set_effective_date(scheduler, true, use_date); if (pcmk_is_set(flags, pcmk_sim_sanitized)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_sanitized); } if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_output_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_show_utilization); } } /*! * \brief Write out a file in dot(1) format describing the actions that will * be taken by the scheduler in response to an input CIB file. * * \param[in,out] scheduler Scheduler data * \param[in] dot_file The filename to write * \param[in] all_actions Write all actions, even those that are optional * or are on unmanaged resources * \param[in] verbose Add extra information, such as action IDs, to the * output * * \return Standard Pacemaker return code */ static int write_sim_dotfile(pcmk_scheduler_t *scheduler, const char *dot_file, bool all_actions, bool verbose) { GList *iter = NULL; FILE *dot_strm = fopen(dot_file, "w"); if (dot_strm == NULL) { return errno; } fprintf(dot_strm, " digraph \"g\" {\n"); for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; const char *style = "dashed"; const char *font = "black"; const char *color = "black"; char *action_name = create_action_name(action, verbose); if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { font = "orange"; } if (pcmk_is_set(action->flags, pcmk__action_added_to_graph)) { style = PCMK__VALUE_BOLD; color = "green"; } else if ((action->rsc != NULL) && !pcmk_is_set(action->rsc->flags, pcmk__rsc_managed)) { color = "red"; font = "purple"; if (!all_actions) { goto do_not_write; } } else if (pcmk_is_set(action->flags, pcmk__action_optional)) { color = "blue"; if (!all_actions) { goto do_not_write; } } else { color = "red"; CRM_LOG_ASSERT(!pcmk_is_set(action->flags, pcmk__action_runnable)); } pcmk__set_action_flags(action, pcmk__action_added_to_graph); fprintf(dot_strm, "\"%s\" [ style=%s color=\"%s\" fontcolor=\"%s\"]\n", action_name, style, color, font); do_not_write: free(action_name); } for (iter = scheduler->priv->actions; iter != NULL; iter = iter->next) { pcmk_action_t *action = (pcmk_action_t *) iter->data; for (GList *before_iter = action->actions_before; before_iter != NULL; before_iter = before_iter->next) { pcmk__related_action_t *before = before_iter->data; char *before_name = NULL; char *after_name = NULL; const char *style = "dashed"; bool optional = true; if (before->graphed) { optional = false; style = PCMK__VALUE_BOLD; } else if (before->flags == pcmk__ar_none) { continue; } else if (pcmk_is_set(before->action->flags, pcmk__action_added_to_graph) && pcmk_is_set(action->flags, pcmk__action_added_to_graph) && before->flags != pcmk__ar_if_on_same_node_or_target) { optional = false; } if (all_actions || !optional) { before_name = create_action_name(before->action, verbose); after_name = create_action_name(action, verbose); fprintf(dot_strm, "\"%s\" -> \"%s\" [ style = %s]\n", before_name, after_name, style); free(before_name); free(after_name); } } } fprintf(dot_strm, "}\n"); fflush(dot_strm); fclose(dot_strm); return pcmk_rc_ok; } /*! * \brief Profile the configuration updates and scheduler actions in a single * CIB file, printing the profiling timings. * * \note \p scheduler->priv->out must have been set to a valid \p pcmk__output_t * object before this function is called. * * \param[in] xml_file The CIB file to profile * \param[in] repeat Number of times to run * \param[in,out] scheduler Scheduler data * \param[in] use_date The date to set the cluster's time to (may be NULL) */ static void profile_file(const char *xml_file, long long repeat, pcmk_scheduler_t *scheduler, const char *use_date) { pcmk__output_t *out = scheduler->priv->out; xmlNode *cib_object = NULL; clock_t start = 0; clock_t end; - unsigned long long scheduler_flags = pcmk__sched_no_compat; + unsigned long long scheduler_flags = pcmk__sched_none; CRM_ASSERT(out != NULL); cib_object = pcmk__xml_read(xml_file); start = clock(); if (pcmk_find_cib_element(cib_object, PCMK_XE_STATUS) == NULL) { pcmk__xe_create(cib_object, PCMK_XE_STATUS); } if (pcmk_update_configured_schema(&cib_object, false) != pcmk_rc_ok) { pcmk__xml_free(cib_object); return; } if (!pcmk__validate_xml(cib_object, NULL, NULL, NULL)) { pcmk__xml_free(cib_object); return; } if (pcmk_is_set(scheduler->flags, pcmk__sched_output_scores)) { scheduler_flags |= pcmk__sched_output_scores; } if (pcmk_is_set(scheduler->flags, pcmk__sched_show_utilization)) { scheduler_flags |= pcmk__sched_show_utilization; } for (int i = 0; i < repeat; ++i) { xmlNode *input = cib_object; if (repeat > 1) { input = pcmk__xml_copy(NULL, cib_object); } scheduler->input = input; set_effective_date(scheduler, false, use_date); pcmk__schedule_actions(input, scheduler_flags, scheduler); pe_reset_working_set(scheduler); } end = clock(); out->message(out, "profile", xml_file, start, end); } void pcmk__profile_dir(const char *dir, long long repeat, pcmk_scheduler_t *scheduler, const char *use_date) { pcmk__output_t *out = scheduler->priv->out; struct dirent **namelist; int file_num = scandir(dir, &namelist, 0, alphasort); CRM_ASSERT(out != NULL); if (file_num > 0) { struct stat prop; char buffer[FILENAME_MAX]; out->begin_list(out, NULL, NULL, "Timings"); while (file_num--) { if ('.' == namelist[file_num]->d_name[0]) { free(namelist[file_num]); continue; } else if (!pcmk__ends_with_ext(namelist[file_num]->d_name, ".xml")) { free(namelist[file_num]); continue; } snprintf(buffer, sizeof(buffer), "%s/%s", dir, namelist[file_num]->d_name); if (stat(buffer, &prop) == 0 && S_ISREG(prop.st_mode)) { profile_file(buffer, repeat, scheduler, use_date); } free(namelist[file_num]); } free(namelist); out->end_list(out); } } /*! * \brief Set the date of the cluster, either to the value given by * \p use_date, or to the \c PCMK_XA_EXECUTION_DATE value in the CIB. * * \note \p scheduler->priv->out must have been set to a valid \p pcmk__output_t * object before this function is called. * * \param[in,out] scheduler Scheduler data * \param[in] print_original If \p true, the \c PCMK_XA_EXECUTION_DATE * should also be printed * \param[in] use_date The date to set the cluster's time to * (may be NULL) */ static void set_effective_date(pcmk_scheduler_t *scheduler, bool print_original, const char *use_date) { pcmk__output_t *out = scheduler->priv->out; time_t original_date = 0; CRM_ASSERT(out != NULL); crm_element_value_epoch(scheduler->input, PCMK_XA_EXECUTION_DATE, &original_date); if (use_date) { scheduler->priv->now = crm_time_new(use_date); out->info(out, "Setting effective cluster time: %s", use_date); crm_time_log(LOG_NOTICE, "Pretending 'now' is", scheduler->priv->now, crm_time_log_date | crm_time_log_timeofday); } else if (original_date != 0) { scheduler->priv->now = pcmk__copy_timet(original_date); if (print_original) { char *when = crm_time_as_string(scheduler->priv->now, crm_time_log_date |crm_time_log_timeofday); out->info(out, "Using the original execution date of: %s", when); free(when); } } } /*! * \internal * \brief Simulate successfully executing a pseudo-action in a graph * * \param[in,out] graph Graph to update with pseudo-action result * \param[in,out] action Pseudo-action to simulate executing * * \return Standard Pacemaker return code */ static int simulate_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *node = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-pseudo-action", node, task); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate executing a resource action in a graph * * \param[in,out] graph Graph to update with resource action result * \param[in,out] action Resource action to simulate executing * * \return Standard Pacemaker return code */ static int simulate_resource_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { int rc; lrmd_event_data_t *op = NULL; int target_outcome = PCMK_OCF_OK; const char *rtype = NULL; const char *rclass = NULL; const char *resource = NULL; const char *rprovider = NULL; const char *resource_config_name = NULL; const char *operation = crm_element_value(action->xml, PCMK_XA_OPERATION); const char *target_rc_s = crm_meta_value(action->params, PCMK__META_OP_TARGET_RC); xmlNode *cib_node = NULL; xmlNode *cib_resource = NULL; xmlNode *action_rsc = pcmk__xe_first_child(action->xml, PCMK_XE_PRIMITIVE, NULL, NULL); char *node = crm_element_value_copy(action->xml, PCMK__META_ON_NODE); char *uuid = NULL; const char *router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE); // Certain actions don't need to be displayed or history entries if (pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) { crm_debug("No history injection for %s op on %s", operation, node); goto done; // Confirm action and update graph } if (action_rsc == NULL) { // Shouldn't be possible crm_log_xml_err(action->xml, "Bad"); free(node); return EPROTO; } /* A resource might be known by different names in the configuration and in * the action (for example, a clone instance). Grab the configuration name * (which is preferred when writing history), and if necessary, the instance * name. */ resource_config_name = crm_element_value(action_rsc, PCMK_XA_ID); if (resource_config_name == NULL) { // Shouldn't be possible crm_log_xml_err(action->xml, "No ID"); free(node); return EPROTO; } resource = resource_config_name; if (pe_find_resource(fake_resource_list, resource) == NULL) { const char *longname = crm_element_value(action_rsc, PCMK__XA_LONG_ID); if ((longname != NULL) && (pe_find_resource(fake_resource_list, longname) != NULL)) { resource = longname; } } // Certain actions need to be displayed but don't need history entries if (pcmk__strcase_any_of(operation, PCMK_ACTION_DELETE, PCMK_ACTION_META_DATA, NULL)) { out->message(out, "inject-rsc-action", resource, operation, node, (guint) 0); goto done; // Confirm action and update graph } rclass = crm_element_value(action_rsc, PCMK_XA_CLASS); rtype = crm_element_value(action_rsc, PCMK_XA_TYPE); rprovider = crm_element_value(action_rsc, PCMK_XA_PROVIDER); pcmk__scan_min_int(target_rc_s, &target_outcome, 0); CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call) == pcmk_ok); // Ensure the action node is in the CIB uuid = crm_element_value_copy(action->xml, PCMK__META_ON_NODE_UUID); cib_node = pcmk__inject_node(fake_cib, node, ((router_node == NULL)? uuid: node)); free(uuid); CRM_ASSERT(cib_node != NULL); // Add a history entry for the action cib_resource = pcmk__inject_resource_history(out, cib_node, resource, resource_config_name, rclass, rtype, rprovider); if (cib_resource == NULL) { crm_err("Could not simulate action %d history for resource %s", action->id, resource); free(node); pcmk__xml_free(cib_node); return EINVAL; } // Simulate and display an executor event for the action result op = pcmk__event_from_graph_action(cib_resource, action, PCMK_EXEC_DONE, target_outcome, "User-injected result"); out->message(out, "inject-rsc-action", resource, op->op_type, node, op->interval_ms); // Check whether action is in a list of desired simulated failures for (const GList *iter = fake_op_fail_list; iter != NULL; iter = iter->next) { const char *spec = (const char *) iter->data; char *key = NULL; const char *match_name = NULL; // Allow user to specify anonymous clone with or without instance number key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource; } free(key); // If not found, try the resource's name in the configuration if ((match_name == NULL) && (strcmp(resource, resource_config_name) != 0)) { key = crm_strdup_printf(PCMK__OP_FMT "@%s=", resource_config_name, op->op_type, op->interval_ms, node); if (strncasecmp(key, spec, strlen(key)) == 0) { match_name = resource_config_name; } free(key); } if (match_name == NULL) { continue; // This failed action entry doesn't match } // ${match_name}_${task}_${interval_in_ms}@${node}=${rc} rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc); if (rc != 1) { out->err(out, "Invalid failed operation '%s' " "(result code must be integer)", spec); continue; // Keep checking other list entries } out->info(out, "Pretending action %d failed with rc=%d", action->id, op->rc); pcmk__set_graph_action_flags(action, pcmk__graph_action_failed); graph->abort_priority = PCMK_SCORE_INFINITY; pcmk__inject_failcount(out, fake_cib, cib_node, match_name, op->op_type, op->interval_ms, op->rc); break; } pcmk__inject_action_result(cib_resource, op, node, target_outcome); lrmd_free_event(op); rc = fake_cib->cmds->modify(fake_cib, PCMK_XE_STATUS, cib_node, cib_sync_call); CRM_ASSERT(rc == pcmk_ok); done: free(node); pcmk__xml_free(cib_node); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate successfully executing a cluster action * * \param[in,out] graph Graph to update with action result * \param[in,out] action Cluster action to simulate * * \return Standard Pacemaker return code */ static int simulate_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *node = crm_element_value(action->xml, PCMK__META_ON_NODE); const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION); xmlNode *rsc = pcmk__xe_first_child(action->xml, PCMK_XE_PRIMITIVE, NULL, NULL); pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); out->message(out, "inject-cluster-action", node, task, rsc); pcmk__update_graph(graph, action); return pcmk_rc_ok; } /*! * \internal * \brief Simulate successfully executing a fencing action * * \param[in,out] graph Graph to update with action result * \param[in,out] action Fencing action to simulate * * \return Standard Pacemaker return code */ static int simulate_fencing_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) { const char *op = crm_meta_value(action->params, PCMK__META_STONITH_ACTION); char *target = crm_element_value_copy(action->xml, PCMK__META_ON_NODE); out->message(out, "inject-fencing-action", target, op); if (!pcmk__str_eq(op, PCMK_ACTION_ON, pcmk__str_casei)) { int rc = pcmk_ok; GString *xpath = g_string_sized_new(512); // Set node state to offline xmlNode *cib_node = pcmk__inject_node_state_change(fake_cib, target, false); CRM_ASSERT(cib_node != NULL); crm_xml_add(cib_node, PCMK_XA_CRM_DEBUG_ORIGIN, __func__); rc = fake_cib->cmds->replace(fake_cib, PCMK_XE_STATUS, cib_node, cib_sync_call); CRM_ASSERT(rc == pcmk_ok); // Simulate controller clearing node's resource history and attributes pcmk__g_strcat(xpath, "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='", target, "']/" PCMK__XE_LRM, NULL); fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, cib_xpath|cib_sync_call); g_string_truncate(xpath, 0); pcmk__g_strcat(xpath, "//" PCMK__XE_NODE_STATE "[@" PCMK_XA_UNAME "='", target, "']" "/" PCMK__XE_TRANSIENT_ATTRIBUTES, NULL); fake_cib->cmds->remove(fake_cib, (const char *) xpath->str, NULL, cib_xpath|cib_sync_call); pcmk__xml_free(cib_node); g_string_free(xpath, TRUE); } pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed); pcmk__update_graph(graph, action); free(target); return pcmk_rc_ok; } enum pcmk__graph_status pcmk__simulate_transition(pcmk_scheduler_t *scheduler, cib_t *cib, const GList *op_fail_list) { pcmk__graph_t *transition = NULL; enum pcmk__graph_status graph_rc; pcmk__graph_functions_t simulation_fns = { simulate_pseudo_action, simulate_resource_action, simulate_cluster_action, simulate_fencing_action, }; out = scheduler->priv->out; fake_cib = cib; fake_op_fail_list = op_fail_list; if (!out->is_quiet(out)) { out->begin_list(out, NULL, NULL, "Executing Cluster Transition"); } pcmk__set_graph_functions(&simulation_fns); - transition = pcmk__unpack_graph(scheduler->graph, crm_system_name); + transition = pcmk__unpack_graph(scheduler->priv->graph, crm_system_name); pcmk__log_graph(LOG_DEBUG, transition); fake_resource_list = scheduler->priv->resources; do { graph_rc = pcmk__execute_graph(transition); } while (graph_rc == pcmk__graph_active); fake_resource_list = NULL; if (graph_rc != pcmk__graph_complete) { out->err(out, "Transition failed: %s", pcmk__graph_status2text(graph_rc)); pcmk__log_graph(LOG_ERR, transition); out->err(out, "An invalid transition was produced"); } pcmk__free_graph(transition); if (!out->is_quiet(out)) { // If not quiet, we'll need the resulting CIB for later display xmlNode *cib_object = NULL; int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call); CRM_ASSERT(rc == pcmk_ok); pe_reset_working_set(scheduler); scheduler->input = cib_object; out->end_list(out); } return graph_rc; } int pcmk__simulate(pcmk_scheduler_t *scheduler, pcmk__output_t *out, const pcmk_injections_t *injections, unsigned int flags, uint32_t section_opts, const char *use_date, const char *input_file, const char *graph_file, const char *dot_file) { int printed = pcmk_rc_no_output; int rc = pcmk_rc_ok; xmlNodePtr input = NULL; cib_t *cib = NULL; rc = cib__signon_query(out, &cib, &input); if (rc != pcmk_rc_ok) { goto simulate_done; } reset(scheduler, input, out, use_date, flags); cluster_status(scheduler); - if ((cib->variant == cib_native) - && pcmk_is_set(section_opts, pcmk_section_times)) { - if (pcmk__our_nodename == NULL) { - // Currently used only in the times section - pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0); - } - scheduler->localhost = pcmk__our_nodename; - } - if (!out->is_quiet(out)) { const bool show_pending = pcmk_is_set(flags, pcmk_sim_show_pending); if (pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) { printed = out->message(out, "maint-mode", scheduler->flags); } if (scheduler->disabled_resources || scheduler->blocked_resources) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); printed = out->info(out, "%d of %d resource instances DISABLED and " "%d BLOCKED from further action due to failure", scheduler->disabled_resources, scheduler->ninstances, scheduler->blocked_resources); } /* Most formatted output headers use caps for each word, but this one * only has the first word capitalized for compatibility with pcs. */ print_cluster_status(scheduler, (show_pending? pcmk_show_pending : 0), section_opts, "Current cluster status", (printed == pcmk_rc_ok)); printed = pcmk_rc_ok; } // If the user requested any injections, handle them if ((injections->node_down != NULL) || (injections->node_fail != NULL) || (injections->node_up != NULL) || (injections->op_inject != NULL) || (injections->ticket_activate != NULL) || (injections->ticket_grant != NULL) || (injections->ticket_revoke != NULL) || (injections->ticket_standby != NULL) || (injections->watchdog != NULL)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); pcmk__inject_scheduler_input(scheduler, cib, injections); printed = pcmk_rc_ok; rc = cib->cmds->query(cib, NULL, &input, cib_sync_call); if (rc != pcmk_rc_ok) { rc = pcmk_legacy2rc(rc); goto simulate_done; } cleanup_calculations(scheduler); reset(scheduler, input, out, use_date, flags); cluster_status(scheduler); } if (input_file != NULL) { rc = pcmk__xml_write_file(input, input_file, false); if (rc != pcmk_rc_ok) { goto simulate_done; } } if (pcmk_any_flags_set(flags, pcmk_sim_process | pcmk_sim_simulate)) { pcmk__output_t *logger_out = NULL; - unsigned long long scheduler_flags = pcmk__sched_no_compat; + unsigned long long scheduler_flags = pcmk__sched_none; if (pcmk_is_set(scheduler->flags, pcmk__sched_output_scores)) { scheduler_flags |= pcmk__sched_output_scores; } if (pcmk_is_set(scheduler->flags, pcmk__sched_show_utilization)) { scheduler_flags |= pcmk__sched_show_utilization; } if (pcmk_all_flags_set(scheduler->flags, pcmk__sched_output_scores |pcmk__sched_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Assignment Scores and Utilization Information"); printed = pcmk_rc_ok; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_output_scores)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Assignment Scores"); printed = pcmk_rc_ok; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_show_utilization)) { PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); out->begin_list(out, NULL, NULL, "Utilization Information"); printed = pcmk_rc_ok; } else { rc = pcmk__log_output_new(&logger_out); if (rc != pcmk_rc_ok) { goto simulate_done; } pe__register_messages(logger_out); pcmk__register_lib_messages(logger_out); scheduler->priv->out = logger_out; } pcmk__schedule_actions(input, scheduler_flags, scheduler); if (logger_out == NULL) { out->end_list(out); } else { logger_out->finish(logger_out, CRM_EX_OK, true, NULL); pcmk__output_free(logger_out); scheduler->priv->out = out; } input = NULL; /* Don't try and free it twice */ if (graph_file != NULL) { - rc = pcmk__xml_write_file(scheduler->graph, graph_file, false); + rc = pcmk__xml_write_file(scheduler->priv->graph, graph_file, + false); if (rc != pcmk_rc_ok) { rc = pcmk_rc_graph_error; goto simulate_done; } } if (dot_file != NULL) { rc = write_sim_dotfile(scheduler, dot_file, pcmk_is_set(flags, pcmk_sim_all_actions), pcmk_is_set(flags, pcmk_sim_verbose)); if (rc != pcmk_rc_ok) { rc = pcmk_rc_dot_error; goto simulate_done; } } if (!out->is_quiet(out)) { print_transition_summary(scheduler, printed == pcmk_rc_ok); } } rc = pcmk_rc_ok; if (!pcmk_is_set(flags, pcmk_sim_simulate)) { goto simulate_done; } PCMK__OUTPUT_SPACER_IF(out, printed == pcmk_rc_ok); if (pcmk__simulate_transition(scheduler, cib, injections->op_fail) != pcmk__graph_complete) { rc = pcmk_rc_invalid_transition; } if (out->is_quiet(out)) { goto simulate_done; } set_effective_date(scheduler, true, use_date); if (pcmk_is_set(flags, pcmk_sim_show_scores)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_output_scores); } if (pcmk_is_set(flags, pcmk_sim_show_utilization)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_show_utilization); } cluster_status(scheduler); print_cluster_status(scheduler, 0, section_opts, "Revised Cluster Status", true); simulate_done: cib__clean_up_connection(&cib); return rc; } int pcmk_simulate(xmlNodePtr *xml, pcmk_scheduler_t *scheduler, const pcmk_injections_t *injections, unsigned int flags, unsigned int section_opts, const char *use_date, const char *input_file, const char *graph_file, const char *dot_file) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pe__register_messages(out); pcmk__register_lib_messages(out); rc = pcmk__simulate(scheduler, out, injections, flags, section_opts, use_date, input_file, graph_file, dot_file); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c index f71bed497d..c07adbfb64 100644 --- a/lib/pacemaker/pcmk_status.c +++ b/lib/pacemaker/pcmk_status.c @@ -1,280 +1,283 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // stonith__register_messages() #include #include static stonith_t * fencing_connect(void) { stonith_t *st = stonith_api_new(); int rc = pcmk_rc_ok; if (st == NULL) { return NULL; } rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_rc_ok) { return st; } else { stonith_api_delete(st); return NULL; } } /*! * \internal * \brief Output the cluster status given a fencer and CIB connection * - * \param[in,out] out Output object + * \param[in,out] scheduler Scheduler object (will be reset) * \param[in,out] stonith Fencer connection * \param[in,out] cib CIB connection * \param[in] current_cib Current CIB XML * \param[in] pcmkd_state \p pacemakerd state * \param[in] fence_history How much of the fencing history to output * \param[in] show Group of \p pcmk_section_e flags * \param[in] show_opts Group of \p pcmk_show_opt_e flags * \param[in] only_node If a node name or tag, include only the * matching node(s) (if any) in the output. * If \p "*" or \p NULL, include all nodes * in the output. * \param[in] only_rsc If a resource ID or tag, include only the * matching resource(s) (if any) in the * output. If \p "*" or \p NULL, include all * resources in the output. * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID * * \return Standard Pacemaker return code */ int -pcmk__output_cluster_status(pcmk__output_t *out, stonith_t *stonith, cib_t *cib, - xmlNode *current_cib, +pcmk__output_cluster_status(pcmk_scheduler_t *scheduler, stonith_t *stonith, + cib_t *cib, xmlNode *current_cib, enum pcmk_pacemakerd_state pcmkd_state, enum pcmk__fence_history fence_history, uint32_t show, uint32_t show_opts, const char *only_node, const char *only_rsc, const char *neg_location_prefix) { xmlNode *cib_copy = pcmk__xml_copy(NULL, current_cib); stonith_history_t *stonith_history = NULL; int history_rc = 0; - pcmk_scheduler_t *scheduler = NULL; GList *unames = NULL; GList *resources = NULL; + pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; + if ((scheduler == NULL) || (scheduler->priv->out == NULL)) { + return EINVAL; + } + out = scheduler->priv->out; + rc = pcmk_update_configured_schema(&cib_copy, false); if (rc != pcmk_rc_ok) { cib__clean_up_connection(&cib); pcmk__xml_free(cib_copy); out->err(out, "Upgrade failed: %s", pcmk_rc_str(rc)); return rc; } /* get the stonith-history if there is evidence we need it */ if (fence_history != pcmk__fence_history_none) { history_rc = pcmk__get_fencing_history(stonith, &stonith_history, fence_history); } - scheduler = pe_new_working_set(); - pcmk__mem_assert(scheduler); - pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_compat); - + pe_reset_working_set(scheduler); scheduler->input = cib_copy; - scheduler->priv->out = out; cluster_status(scheduler); - if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) { - if (pcmk__our_nodename == NULL) { - // Currently used only in the times section - pcmk__query_node_name(out, 0, &pcmk__our_nodename, 0); - } - scheduler->localhost = pcmk__our_nodename; - } - /* Unpack constraints if any section will need them * (tickets may be referenced in constraints but not granted yet, * and bans need negative location constraints) */ if (pcmk_is_set(show, pcmk_section_bans) || pcmk_is_set(show, pcmk_section_tickets)) { pcmk__unpack_constraints(scheduler); } unames = pe__build_node_name_list(scheduler, only_node); resources = pe__build_rsc_list(scheduler, only_rsc); /* Always print DC if NULL. */ if (scheduler->dc_node == NULL) { show |= pcmk_section_dc; } out->message(out, "cluster-status", scheduler, pcmkd_state, pcmk_rc2exitc(history_rc), stonith_history, fence_history, show, show_opts, neg_location_prefix, unames, resources); g_list_free_full(unames, free); g_list_free_full(resources, free); stonith_history_free(stonith_history); stonith_history = NULL; - pe_free_working_set(scheduler); return rc; } int pcmk_status(xmlNodePtr *xml) { cib_t *cib = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; uint32_t show_opts = pcmk_show_pending |pcmk_show_inactive_rscs |pcmk_show_timing; cib = cib_new(); if (cib == NULL) { return pcmk_rc_cib_corrupt; } rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { cib_delete(cib); return rc; } pcmk__register_lib_messages(out); pe__register_messages(out); stonith__register_messages(out); rc = pcmk__status(out, cib, pcmk__fence_history_full, pcmk_section_all, show_opts, NULL, NULL, NULL, 0); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); cib_delete(cib); return rc; } /*! * \internal * \brief Query and output the cluster status * * The operation is considered a success if we're able to get the \p pacemakerd * state. If possible, we'll also try to connect to the fencer and CIB and * output their respective status information. * * \param[in,out] out Output object * \param[in,out] cib CIB connection * \param[in] fence_history How much of the fencing history to output * \param[in] show Group of \p pcmk_section_e flags * \param[in] show_opts Group of \p pcmk_show_opt_e flags * \param[in] only_node If a node name or tag, include only the * matching node(s) (if any) in the output. * If \p "*" or \p NULL, include all nodes * in the output. * \param[in] only_rsc If a resource ID or tag, include only the * matching resource(s) (if any) in the * output. If \p "*" or \p NULL, include all * resources in the output. * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID * \param[in] timeout_ms How long to wait for a reply from the * \p pacemakerd API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * If positive, \p pcmk_ipc_dispatch_main * will be used, and a new mainloop will be * created for this purpose (freed before * return). * * \return Standard Pacemaker return code */ int pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history, uint32_t show, uint32_t show_opts, const char *only_node, const char *only_rsc, const char *neg_location_prefix, unsigned int timeout_ms) { xmlNode *current_cib = NULL; int rc = pcmk_rc_ok; stonith_t *stonith = NULL; enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid; time_t last_updated = 0; + pcmk_scheduler_t *scheduler = NULL; if (cib == NULL) { return ENOTCONN; } if (cib->variant == cib_native) { rc = pcmk__pacemakerd_status(out, crm_system_name, timeout_ms, false, &pcmkd_state); if (rc != pcmk_rc_ok) { return rc; } last_updated = time(NULL); switch (pcmkd_state) { case pcmk_pacemakerd_state_running: case pcmk_pacemakerd_state_shutting_down: case pcmk_pacemakerd_state_remote: /* Fencer and CIB may still be available while shutting down or * running on a Pacemaker Remote node */ break; default: // Fencer and CIB are definitely unavailable out->message(out, "pacemakerd-health", NULL, pcmkd_state, NULL, last_updated); return rc; } if (fence_history != pcmk__fence_history_none) { stonith = fencing_connect(); } } rc = cib__signon_query(out, &cib, ¤t_cib); if (rc != pcmk_rc_ok) { if (pcmkd_state != pcmk_pacemakerd_state_invalid) { // Invalid at this point means we didn't query the pcmkd state out->message(out, "pacemakerd-health", NULL, pcmkd_state, NULL, last_updated); } goto done; } - rc = pcmk__output_cluster_status(out, stonith, cib, current_cib, + scheduler = pe_new_working_set(); + pcmk__mem_assert(scheduler); + scheduler->priv->out = out; + + if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) { + // Currently used only in the times section + pcmk__query_node_name(out, 0, &(scheduler->priv->local_node_name), 0); + } + + rc = pcmk__output_cluster_status(scheduler, stonith, cib, current_cib, pcmkd_state, fence_history, show, show_opts, only_node, only_rsc, neg_location_prefix); if (rc != pcmk_rc_ok) { out->err(out, "Error outputting status info from the fencer or CIB"); } done: + pe_free_working_set(scheduler); stonith_api_delete(stonith); pcmk__xml_free(current_cib); return pcmk_rc_ok; } diff --git a/lib/pacemaker/pcmk_verify.c b/lib/pacemaker/pcmk_verify.c index 56c3b9b383..eec557786c 100644 --- a/lib/pacemaker/pcmk_verify.c +++ b/lib/pacemaker/pcmk_verify.c @@ -1,156 +1,156 @@ /* * Copyright 2023-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" int pcmk__parse_cib(pcmk__output_t *out, const char *cib_source, xmlNodePtr *cib_object) { // @COMPAT Take an enum for cib_source instead of trying to figure it out? const char *first = cib_source; if (cib_source == NULL) { crm_info("Reading XML from: live cluster"); return cib__signon_query(out, NULL, cib_object); } while (isspace(*first)) { first++; } if (*first == '<') { *cib_object = pcmk__xml_parse(cib_source); } else { *cib_object = pcmk__xml_read(cib_source); } return (*cib_object == NULL)? ENODATA : pcmk_rc_ok; } int pcmk__verify(pcmk_scheduler_t *scheduler, pcmk__output_t *out, xmlNode **cib_object) { int rc = pcmk_rc_ok; xmlNode *status = NULL; xmlNode *cib_object_copy = NULL; CRM_ASSERT(cib_object != NULL); if (!pcmk__xe_is(*cib_object, PCMK_XE_CIB)) { rc = EBADMSG; out->err(out, "This tool can only check complete configurations (i.e. those starting with )."); goto verify_done; } status = pcmk_find_cib_element(*cib_object, PCMK_XE_STATUS); if (status == NULL) { pcmk__xe_create(*cib_object, PCMK_XE_STATUS); } if (!pcmk__validate_xml(*cib_object, NULL, (xmlRelaxNGValidityErrorFunc) out->err, out)) { crm_config_error = TRUE; rc = pcmk_rc_schema_validation; goto verify_done; } rc = pcmk_update_configured_schema(cib_object, false); if (rc != pcmk_rc_ok) { crm_config_error = TRUE; out->err(out, "The cluster will NOT be able to use this configuration.\n" "Please manually update the configuration to conform to the %s syntax.", pcmk__highest_schema_name()); goto verify_done; } /* Process the configuration to set crm_config_error/crm_config_warning. * * @TODO Some parts of the configuration are unpacked only when needed (for * example, action configuration), so we aren't necessarily checking those. */ if (*cib_object != NULL) { - unsigned long long flags = pcmk__sched_no_counts|pcmk__sched_no_compat; + unsigned long long flags = pcmk__sched_no_counts; if (status == NULL) { // No status available, so do minimal checks flags |= pcmk__sched_validate_only; } cib_object_copy = pcmk__xml_copy(NULL, *cib_object); /* The scheduler takes ownership of the XML object and potentially * frees it later. We want the caller of pcmk__verify to retain * ownership of the passed-in XML object, hence we pass in a copy * to the scheduler. */ pcmk__schedule_actions(cib_object_copy, flags, scheduler); } verify_done: if (crm_config_error) { rc = pcmk_rc_schema_validation; pcmk__config_err("CIB did not pass schema validation"); } else if (crm_config_warning) { rc = pcmk_rc_schema_validation; } return rc; } int pcmk_verify(xmlNodePtr *xml, const char *cib_source) { pcmk_scheduler_t *scheduler = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; xmlNode *cib_object = NULL; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } pe__register_messages(out); pcmk__register_lib_messages(out); rc = pcmk__parse_cib(out, cib_source, &cib_object); if (rc != pcmk_rc_ok) { out->err(out, "Couldn't parse input"); goto done; } scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = errno; out->err(out, "Couldn't allocate scheduler data: %s", pcmk_rc_str(rc)); goto done; } scheduler->priv->out = out; rc = pcmk__verify(scheduler, out, &cib_object); done: pe_free_working_set(scheduler); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); pcmk__xml_free(cib_object); return rc; } diff --git a/lib/pengine/complex.c b/lib/pengine/complex.c index 7eb1fb0348..de0881bb29 100644 --- a/lib/pengine/complex.c +++ b/lib/pengine/complex.c @@ -1,1272 +1,1272 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include "pe_status_private.h" void populate_hash(xmlNode * nvpair_list, GHashTable * hash, const char **attrs, int attrs_length); static pcmk_node_t *active_node(const pcmk_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean); static pcmk__rsc_methods_t resource_class_functions[] = { { native_unpack, native_find_rsc, native_parameter, native_active, native_resource_state, native_location, native_free, pe__count_common, pe__native_is_filtered, active_node, pe__primitive_max_per_node, }, { group_unpack, native_find_rsc, native_parameter, group_active, group_resource_state, native_location, group_free, pe__count_common, pe__group_is_filtered, active_node, pe__group_max_per_node, }, { clone_unpack, native_find_rsc, native_parameter, clone_active, clone_resource_state, native_location, clone_free, pe__count_common, pe__clone_is_filtered, active_node, pe__clone_max_per_node, }, { pe__unpack_bundle, native_find_rsc, native_parameter, pe__bundle_active, pe__bundle_resource_state, native_location, pe__free_bundle, pe__count_bundle, pe__bundle_is_filtered, pe__bundle_active_node, pe__bundle_max_per_node, } }; static enum pcmk__rsc_variant get_resource_type(const char *name) { if (pcmk__str_eq(name, PCMK_XE_PRIMITIVE, pcmk__str_casei)) { return pcmk__rsc_variant_primitive; } else if (pcmk__str_eq(name, PCMK_XE_GROUP, pcmk__str_casei)) { return pcmk__rsc_variant_group; } else if (pcmk__str_eq(name, PCMK_XE_CLONE, pcmk__str_casei)) { return pcmk__rsc_variant_clone; } else if (pcmk__str_eq(name, PCMK__XE_PROMOTABLE_LEGACY, pcmk__str_casei)) { // @COMPAT deprecated since 2.0.0 return pcmk__rsc_variant_clone; } else if (pcmk__str_eq(name, PCMK_XE_BUNDLE, pcmk__str_casei)) { return pcmk__rsc_variant_bundle; } return pcmk__rsc_variant_unknown; } /*! * \internal * \brief Insert a meta-attribute if not already present * * \param[in] key Meta-attribute name * \param[in] value Meta-attribute value to add if not already present * \param[in,out] table Meta-attribute hash table to insert into * * \note This is like pcmk__insert_meta() except it won't overwrite existing * values. */ static void dup_attr(gpointer key, gpointer value, gpointer user_data) { GHashTable *table = user_data; CRM_CHECK((key != NULL) && (table != NULL), return); if (pcmk__str_eq((const char *) value, "#default", pcmk__str_casei)) { // @COMPAT Deprecated since 2.1.8 pcmk__config_warn("Support for setting meta-attributes (such as %s) to " "the explicit value '#default' is deprecated and " "will be removed in a future release", (const char *) key); } else if ((value != NULL) && (g_hash_table_lookup(table, key) == NULL)) { pcmk__insert_dup(table, (const char *) key, (const char *) value); } } static void expand_parents_fixed_nvpairs(pcmk_resource_t *rsc, pe_rule_eval_data_t *rule_data, GHashTable *meta_hash, pcmk_scheduler_t *scheduler) { GHashTable *parent_orig_meta = pcmk__strkey_table(free, free); pcmk_resource_t *p = rsc->priv->parent; if (p == NULL) { return ; } /* Search all parent resources, get the fixed value of * PCMK_XE_META_ATTRIBUTES set only in the original xml, and stack it in the * hash table. The fixed value of the lower parent resource takes precedence * and is not overwritten. */ while(p != NULL) { /* A hash table for comparison is generated, including the id-ref. */ pe__unpack_dataset_nvpairs(p->priv->xml, PCMK_XE_META_ATTRIBUTES, rule_data, parent_orig_meta, NULL, FALSE, scheduler); p = p->priv->parent; } if (parent_orig_meta != NULL) { // This will not overwrite any values already existing for child g_hash_table_foreach(parent_orig_meta, dup_attr, meta_hash); } if (parent_orig_meta != NULL) { g_hash_table_destroy(parent_orig_meta); } return ; } void get_meta_attributes(GHashTable * meta_hash, pcmk_resource_t * rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pe_rsc_eval_data_t rsc_rule_data = { .standard = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS), .provider = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER), .agent = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE) }; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = &rsc_rule_data, .op_data = NULL }; if (node) { /* @COMPAT Support for node attribute expressions in rules for * meta-attributes is deprecated. When we can break behavioral backward * compatibility, drop this block. */ rule_data.node_hash = node->priv->attrs; } for (xmlAttrPtr a = pcmk__xe_first_attr(rsc->priv->xml); a != NULL; a = a->next) { if (a->children != NULL) { dup_attr((gpointer) a->name, (gpointer) a->children->content, meta_hash); } } pe__unpack_dataset_nvpairs(rsc->priv->xml, PCMK_XE_META_ATTRIBUTES, &rule_data, meta_hash, NULL, FALSE, scheduler); /* Set the PCMK_XE_META_ATTRIBUTES explicitly set in the parent resource to * the hash table of the child resource. If it is already explicitly set as * a child, it will not be overwritten. */ if (rsc->priv->parent != NULL) { expand_parents_fixed_nvpairs(rsc, &rule_data, meta_hash, scheduler); } /* check the defaults */ pe__unpack_dataset_nvpairs(scheduler->priv->rsc_defaults, PCMK_XE_META_ATTRIBUTES, &rule_data, meta_hash, NULL, FALSE, scheduler); /* If there is PCMK_XE_META_ATTRIBUTES that the parent resource has not * explicitly set, set a value that is not set from PCMK_XE_RSC_DEFAULTS * either. The values already set up to this point will not be overwritten. */ if (rsc->priv->parent != NULL) { g_hash_table_foreach(rsc->priv->parent->priv->meta, dup_attr, meta_hash); } } void get_rsc_attributes(GHashTable *meta_hash, const pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; if (node) { rule_data.node_hash = node->priv->attrs; } pe__unpack_dataset_nvpairs(rsc->priv->xml, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, meta_hash, NULL, FALSE, scheduler); /* set anything else based on the parent */ if (rsc->priv->parent != NULL) { get_rsc_attributes(meta_hash, rsc->priv->parent, node, scheduler); } else { if (pcmk__xe_first_child(scheduler->priv->rsc_defaults, PCMK_XE_INSTANCE_ATTRIBUTES, NULL, NULL) != NULL) { /* Not possible with schema validation enabled * * @COMPAT Drop support when we can break behavioral * backward compatibility */ pcmk__warn_once(pcmk__wo_instance_defaults, "Support for " PCMK_XE_INSTANCE_ATTRIBUTES " in " PCMK_XE_RSC_DEFAULTS " is deprecated and will be " "removed in a future release"); } /* and finally check the defaults */ pe__unpack_dataset_nvpairs(scheduler->priv->rsc_defaults, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, meta_hash, NULL, FALSE, scheduler); } } static char * template_op_key(xmlNode * op) { const char *name = crm_element_value(op, PCMK_XA_NAME); const char *role = crm_element_value(op, PCMK_XA_ROLE); char *key = NULL; if ((role == NULL) || pcmk__strcase_any_of(role, PCMK_ROLE_STARTED, PCMK_ROLE_UNPROMOTED, PCMK__ROLE_UNPROMOTED_LEGACY, NULL)) { role = PCMK__ROLE_UNKNOWN; } key = crm_strdup_printf("%s-%s", name, role); return key; } static gboolean unpack_template(xmlNode *xml_obj, xmlNode **expanded_xml, pcmk_scheduler_t *scheduler) { xmlNode *cib_resources = NULL; xmlNode *template = NULL; xmlNode *new_xml = NULL; xmlNode *child_xml = NULL; xmlNode *rsc_ops = NULL; xmlNode *template_ops = NULL; const char *template_ref = NULL; const char *id = NULL; if (xml_obj == NULL) { pcmk__config_err("No resource object for template unpacking"); return FALSE; } template_ref = crm_element_value(xml_obj, PCMK_XA_TEMPLATE); if (template_ref == NULL) { return TRUE; } id = pcmk__xe_id(xml_obj); if (id == NULL) { pcmk__config_err("'%s' object must have a id", xml_obj->name); return FALSE; } if (pcmk__str_eq(template_ref, id, pcmk__str_none)) { pcmk__config_err("The resource object '%s' should not reference itself", id); return FALSE; } cib_resources = get_xpath_object("//" PCMK_XE_RESOURCES, scheduler->input, LOG_TRACE); if (cib_resources == NULL) { pcmk__config_err("No resources configured"); return FALSE; } template = pcmk__xe_first_child(cib_resources, PCMK_XE_TEMPLATE, PCMK_XA_ID, template_ref); if (template == NULL) { pcmk__config_err("No template named '%s'", template_ref); return FALSE; } new_xml = pcmk__xml_copy(NULL, template); xmlNodeSetName(new_xml, xml_obj->name); crm_xml_add(new_xml, PCMK_XA_ID, id); crm_xml_add(new_xml, PCMK__META_CLONE, crm_element_value(xml_obj, PCMK__META_CLONE)); template_ops = pcmk__xe_first_child(new_xml, PCMK_XE_OPERATIONS, NULL, NULL); for (child_xml = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL); child_xml != NULL; child_xml = pcmk__xe_next(child_xml)) { xmlNode *new_child = pcmk__xml_copy(new_xml, child_xml); if (pcmk__xe_is(new_child, PCMK_XE_OPERATIONS)) { rsc_ops = new_child; } } if (template_ops && rsc_ops) { xmlNode *op = NULL; GHashTable *rsc_ops_hash = pcmk__strkey_table(free, NULL); for (op = pcmk__xe_first_child(rsc_ops, NULL, NULL, NULL); op != NULL; op = pcmk__xe_next(op)) { char *key = template_op_key(op); g_hash_table_insert(rsc_ops_hash, key, op); } for (op = pcmk__xe_first_child(template_ops, NULL, NULL, NULL); op != NULL; op = pcmk__xe_next(op)) { char *key = template_op_key(op); if (g_hash_table_lookup(rsc_ops_hash, key) == NULL) { pcmk__xml_copy(rsc_ops, op); } free(key); } if (rsc_ops_hash) { g_hash_table_destroy(rsc_ops_hash); } pcmk__xml_free(template_ops); } /*pcmk__xml_free(*expanded_xml); */ *expanded_xml = new_xml; #if 0 /* Disable multi-level templates for now */ if (!unpack_template(new_xml, expanded_xml, scheduler)) { pcmk__xml_free(*expanded_xml); *expanded_xml = NULL; return FALSE; } #endif return TRUE; } static gboolean add_template_rsc(xmlNode *xml_obj, pcmk_scheduler_t *scheduler) { const char *template_ref = NULL; const char *id = NULL; if (xml_obj == NULL) { pcmk__config_err("No resource object for processing resource list " "of template"); return FALSE; } template_ref = crm_element_value(xml_obj, PCMK_XA_TEMPLATE); if (template_ref == NULL) { return TRUE; } id = pcmk__xe_id(xml_obj); if (id == NULL) { pcmk__config_err("'%s' object must have a id", xml_obj->name); return FALSE; } if (pcmk__str_eq(template_ref, id, pcmk__str_none)) { pcmk__config_err("The resource object '%s' should not reference itself", id); return FALSE; } - pcmk__add_idref(scheduler->template_rsc_sets, template_ref, id); + pcmk__add_idref(scheduler->priv->templates, template_ref, id); return TRUE; } static bool detect_promotable(pcmk_resource_t *rsc) { const char *promotable = g_hash_table_lookup(rsc->priv->meta, PCMK_META_PROMOTABLE); if (crm_is_true(promotable)) { return TRUE; } // @COMPAT deprecated since 2.0.0 if (pcmk__xe_is(rsc->priv->xml, PCMK__XE_PROMOTABLE_LEGACY)) { pcmk__warn_once(pcmk__wo_master_element, "Support for <" PCMK__XE_PROMOTABLE_LEGACY "> (such " "as in %s) is deprecated and will be removed in a " "future release. Use <" PCMK_XE_CLONE "> with a " PCMK_META_PROMOTABLE " meta-attribute instead.", rsc->id); pcmk__insert_dup(rsc->priv->meta, PCMK_META_PROMOTABLE, PCMK_VALUE_TRUE); return TRUE; } return FALSE; } static void free_params_table(gpointer data) { g_hash_table_destroy((GHashTable *) data); } /*! * \brief Get a table of resource parameters * * \param[in,out] rsc Resource to query * \param[in] node Node for evaluating rules (NULL for defaults) * \param[in,out] scheduler Scheduler data * * \return Hash table containing resource parameter names and values * (or NULL if \p rsc or \p scheduler is NULL) * \note The returned table will be destroyed when the resource is freed, so * callers should not destroy it. */ GHashTable * pe_rsc_params(pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { GHashTable *params_on_node = NULL; /* A NULL node is used to request the resource's default parameters * (not evaluated for node), but we always want something non-NULL * as a hash table key. */ const char *node_name = ""; // Sanity check if ((rsc == NULL) || (scheduler == NULL)) { return NULL; } if ((node != NULL) && (node->priv->name != NULL)) { node_name = node->priv->name; } // Find the parameter table for given node if (rsc->priv->parameter_cache == NULL) { rsc->priv->parameter_cache = pcmk__strikey_table(free, free_params_table); } else { params_on_node = g_hash_table_lookup(rsc->priv->parameter_cache, node_name); } // If none exists yet, create one with parameters evaluated for node if (params_on_node == NULL) { params_on_node = pcmk__strkey_table(free, free); get_rsc_attributes(params_on_node, rsc, node, scheduler); g_hash_table_insert(rsc->priv->parameter_cache, strdup(node_name), params_on_node); } return params_on_node; } /*! * \internal * \brief Unpack a resource's \c PCMK_META_REQUIRES meta-attribute * * \param[in,out] rsc Resource being unpacked * \param[in] value Value of \c PCMK_META_REQUIRES meta-attribute * \param[in] is_default Whether \p value was selected by default */ static void unpack_requires(pcmk_resource_t *rsc, const char *value, bool is_default) { const pcmk_scheduler_t *scheduler = rsc->priv->scheduler; if (pcmk__str_eq(value, PCMK_VALUE_NOTHING, pcmk__str_casei)) { } else if (pcmk__str_eq(value, PCMK_VALUE_QUORUM, pcmk__str_casei)) { pcmk__set_rsc_flags(rsc, pcmk__rsc_needs_quorum); } else if (pcmk__str_eq(value, PCMK_VALUE_FENCING, pcmk__str_casei)) { pcmk__set_rsc_flags(rsc, pcmk__rsc_needs_fencing); if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { pcmk__config_warn("%s requires fencing but fencing is disabled", rsc->id); } } else if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { if (pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) { pcmk__config_warn("Resetting \"" PCMK_META_REQUIRES "\" for %s " "to \"" PCMK_VALUE_QUORUM "\" because fencing " "devices cannot require unfencing", rsc->id); unpack_requires(rsc, PCMK_VALUE_QUORUM, true); return; } else if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { pcmk__config_warn("Resetting \"" PCMK_META_REQUIRES "\" for %s " "to \"" PCMK_VALUE_QUORUM "\" because fencing is " "disabled", rsc->id); unpack_requires(rsc, PCMK_VALUE_QUORUM, true); return; } else { pcmk__set_rsc_flags(rsc, pcmk__rsc_needs_fencing |pcmk__rsc_needs_unfencing); } } else { const char *orig_value = value; if (pcmk_is_set(rsc->flags, pcmk__rsc_fence_device)) { value = PCMK_VALUE_QUORUM; } else if (pcmk__is_primitive(rsc) && xml_contains_remote_node(rsc->priv->xml)) { value = PCMK_VALUE_QUORUM; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_enable_unfencing)) { value = PCMK_VALUE_UNFENCING; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { value = PCMK_VALUE_FENCING; } else if (scheduler->no_quorum_policy == pcmk_no_quorum_ignore) { value = PCMK_VALUE_NOTHING; } else { value = PCMK_VALUE_QUORUM; } if (orig_value != NULL) { pcmk__config_err("Resetting '" PCMK_META_REQUIRES "' for %s " "to '%s' because '%s' is not valid", rsc->id, value, orig_value); } unpack_requires(rsc, value, true); return; } pcmk__rsc_trace(rsc, "\tRequired to start: %s%s", value, (is_default? " (default)" : "")); } static void warn_about_deprecated_classes(pcmk_resource_t *rsc) { const char *std = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS); if (pcmk__str_eq(std, PCMK_RESOURCE_CLASS_UPSTART, pcmk__str_none)) { pcmk__warn_once(pcmk__wo_upstart, "Support for Upstart resources (such as %s) is " "deprecated and will be removed in a future release", rsc->id); } else if (pcmk__str_eq(std, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_none)) { pcmk__warn_once(pcmk__wo_nagios, "Support for Nagios resources (such as %s) is " "deprecated and will be removed in a future release", rsc->id); } } /*! * \internal * \brief Unpack configuration XML for a given resource * * Unpack the XML object containing a resource's configuration into a new * \c pcmk_resource_t object. * * \param[in] xml_obj XML node containing the resource's configuration * \param[out] rsc Where to store the unpacked resource information * \param[in] parent Resource's parent, if any * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code * \note If pcmk_rc_ok is returned, \p *rsc is guaranteed to be non-NULL, and * the caller is responsible for freeing it using its variant-specific * free() method. Otherwise, \p *rsc is guaranteed to be NULL. */ int pe__unpack_resource(xmlNode *xml_obj, pcmk_resource_t **rsc, pcmk_resource_t *parent, pcmk_scheduler_t *scheduler) { xmlNode *expanded_xml = NULL; xmlNode *ops = NULL; const char *value = NULL; const char *id = NULL; bool guest_node = false; bool remote_node = false; pcmk__resource_private_t *rsc_private = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = NULL, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; CRM_CHECK(rsc != NULL, return EINVAL); CRM_CHECK((xml_obj != NULL) && (scheduler != NULL), *rsc = NULL; return EINVAL); rule_data.now = scheduler->priv->now; crm_log_xml_trace(xml_obj, "[raw XML]"); id = crm_element_value(xml_obj, PCMK_XA_ID); if (id == NULL) { pcmk__config_err("Ignoring <%s> configuration without " PCMK_XA_ID, xml_obj->name); return pcmk_rc_unpack_error; } if (unpack_template(xml_obj, &expanded_xml, scheduler) == FALSE) { return pcmk_rc_unpack_error; } *rsc = calloc(1, sizeof(pcmk_resource_t)); if (*rsc == NULL) { pcmk__sched_err(scheduler, "Unable to allocate memory for resource '%s'", id); return ENOMEM; } (*rsc)->priv = calloc(1, sizeof(pcmk__resource_private_t)); if ((*rsc)->priv == NULL) { pcmk__sched_err(scheduler, "Unable to allocate memory for resource '%s'", id); free(*rsc); return ENOMEM; } rsc_private = (*rsc)->priv; rsc_private->scheduler = scheduler; if (expanded_xml) { crm_log_xml_trace(expanded_xml, "[expanded XML]"); rsc_private->xml = expanded_xml; rsc_private->orig_xml = xml_obj; } else { rsc_private->xml = xml_obj; rsc_private->orig_xml = NULL; } /* Do not use xml_obj from here on, use (*rsc)->xml in case templates are involved */ rsc_private->parent = parent; ops = pcmk__xe_first_child(rsc_private->xml, PCMK_XE_OPERATIONS, NULL, NULL); rsc_private->ops_xml = pcmk__xe_resolve_idref(ops, scheduler->input); rsc_private->variant = get_resource_type((const char *) rsc_private->xml->name); if (rsc_private->variant == pcmk__rsc_variant_unknown) { pcmk__config_err("Ignoring resource '%s' of unknown type '%s'", id, rsc_private->xml->name); common_free(*rsc); *rsc = NULL; return pcmk_rc_unpack_error; } rsc_private->meta = pcmk__strkey_table(free, free); rsc_private->utilization = pcmk__strkey_table(free, free); rsc_private->probed_nodes = pcmk__strkey_table(NULL, free); rsc_private->allowed_nodes = pcmk__strkey_table(NULL, free); value = crm_element_value(rsc_private->xml, PCMK__META_CLONE); if (value) { (*rsc)->id = crm_strdup_printf("%s:%s", id, value); pcmk__insert_meta(rsc_private, PCMK__META_CLONE, value); } else { (*rsc)->id = strdup(id); } warn_about_deprecated_classes(*rsc); rsc_private->fns = &resource_class_functions[rsc_private->variant]; get_meta_attributes(rsc_private->meta, *rsc, NULL, scheduler); (*rsc)->flags = 0; pcmk__set_rsc_flags(*rsc, pcmk__rsc_unassigned); if (!pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_managed); } rsc_private->orig_role = pcmk_role_stopped; rsc_private->next_role = pcmk_role_unknown; rsc_private->ban_after_failures = PCMK_SCORE_INFINITY; value = g_hash_table_lookup(rsc_private->meta, PCMK_META_PRIORITY); rsc_private->priority = char2score(value); value = g_hash_table_lookup(rsc_private->meta, PCMK_META_CRITICAL); if ((value == NULL) || crm_is_true(value)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_critical); } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_NOTIFY); if (crm_is_true(value)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_notify); } if (xml_contains_remote_node(rsc_private->xml)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_is_remote_connection); if (g_hash_table_lookup(rsc_private->meta, PCMK__META_CONTAINER)) { guest_node = true; } else { remote_node = true; } } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_ALLOW_MIGRATE); if (crm_is_true(value)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_migratable); } else if ((value == NULL) && remote_node) { /* By default, we want remote nodes to be able * to float around the cluster without having to stop all the * resources within the remote-node before moving. Allowing * migration support enables this feature. If this ever causes * problems, migration support can be explicitly turned off with * PCMK_META_ALLOW_MIGRATE=false. */ pcmk__set_rsc_flags(*rsc, pcmk__rsc_migratable); } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_IS_MANAGED); if (value != NULL) { if (pcmk__str_eq(PCMK_VALUE_DEFAULT, value, pcmk__str_casei)) { // @COMPAT Deprecated since 2.1.8 pcmk__config_warn("Support for setting " PCMK_META_IS_MANAGED " to the explicit value '" PCMK_VALUE_DEFAULT "' is deprecated and will be removed in a " "future release (just leave it unset)"); } else if (crm_is_true(value)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_managed); } else { pcmk__clear_rsc_flags(*rsc, pcmk__rsc_managed); } } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_MAINTENANCE); if (crm_is_true(value)) { pcmk__clear_rsc_flags(*rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(*rsc, pcmk__rsc_maintenance); } if (pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) { pcmk__clear_rsc_flags(*rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(*rsc, pcmk__rsc_maintenance); } if (pcmk__is_clone(pe__const_top_resource(*rsc, false))) { value = g_hash_table_lookup(rsc_private->meta, PCMK_META_GLOBALLY_UNIQUE); if (crm_is_true(value)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_unique); } if (detect_promotable(*rsc)) { pcmk__set_rsc_flags(*rsc, pcmk__rsc_promotable); } } else { pcmk__set_rsc_flags(*rsc, pcmk__rsc_unique); } // @COMPAT Deprecated meta-attribute value = g_hash_table_lookup(rsc_private->meta, PCMK__META_RESTART_TYPE); if (pcmk__str_eq(value, PCMK_VALUE_RESTART, pcmk__str_casei)) { rsc_private->restart_type = pcmk__restart_restart; pcmk__rsc_trace(*rsc, "%s dependency restart handling: restart", (*rsc)->id); pcmk__warn_once(pcmk__wo_restart_type, "Support for " PCMK__META_RESTART_TYPE " is deprecated " "and will be removed in a future release"); } else { rsc_private->restart_type = pcmk__restart_ignore; pcmk__rsc_trace(*rsc, "%s dependency restart handling: ignore", (*rsc)->id); } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_MULTIPLE_ACTIVE); if (pcmk__str_eq(value, PCMK_VALUE_STOP_ONLY, pcmk__str_casei)) { rsc_private->multiply_active_policy = pcmk__multiply_active_stop; pcmk__rsc_trace(*rsc, "%s multiple running resource recovery: stop only", (*rsc)->id); } else if (pcmk__str_eq(value, PCMK_VALUE_BLOCK, pcmk__str_casei)) { rsc_private->multiply_active_policy = pcmk__multiply_active_block; pcmk__rsc_trace(*rsc, "%s multiple running resource recovery: block", (*rsc)->id); } else if (pcmk__str_eq(value, PCMK_VALUE_STOP_UNEXPECTED, pcmk__str_casei)) { rsc_private->multiply_active_policy = pcmk__multiply_active_unexpected; pcmk__rsc_trace(*rsc, "%s multiple running resource recovery: " "stop unexpected instances", (*rsc)->id); } else { // PCMK_VALUE_STOP_START if (!pcmk__str_eq(value, PCMK_VALUE_STOP_START, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_warn("%s is not a valid value for " PCMK_META_MULTIPLE_ACTIVE ", using default of " "\"" PCMK_VALUE_STOP_START "\"", value); } rsc_private->multiply_active_policy = pcmk__multiply_active_restart; pcmk__rsc_trace(*rsc, "%s multiple running resource recovery: stop/start", (*rsc)->id); } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_RESOURCE_STICKINESS); if (value != NULL) { if (pcmk__str_eq(PCMK_VALUE_DEFAULT, value, pcmk__str_casei)) { // @COMPAT Deprecated since 2.1.8 pcmk__config_warn("Support for setting " PCMK_META_RESOURCE_STICKINESS " to the explicit value '" PCMK_VALUE_DEFAULT "' is deprecated and will be removed in a " "future release (just leave it unset)"); } else { rsc_private->stickiness = char2score(value); } } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_MIGRATION_THRESHOLD); if (value != NULL) { if (pcmk__str_eq(PCMK_VALUE_DEFAULT, value, pcmk__str_casei)) { // @COMPAT Deprecated since 2.1.8 pcmk__config_warn("Support for setting " PCMK_META_MIGRATION_THRESHOLD " to the explicit value '" PCMK_VALUE_DEFAULT "' is deprecated and will be removed in a " "future release (just leave it unset)"); } else { rsc_private->ban_after_failures = char2score(value); if (rsc_private->ban_after_failures < 0) { /* @COMPAT We use 1 here to preserve previous behavior, but this * should probably use the default (INFINITY) or 0 (to disable) * instead. */ pcmk__warn_once(pcmk__wo_neg_threshold, PCMK_META_MIGRATION_THRESHOLD " must be non-negative, using 1 instead"); rsc_private->ban_after_failures = 1; } } } if (pcmk__str_eq(crm_element_value(rsc_private->xml, PCMK_XA_CLASS), PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing); pcmk__set_rsc_flags(*rsc, pcmk__rsc_fence_device); } value = g_hash_table_lookup(rsc_private->meta, PCMK_META_REQUIRES); unpack_requires(*rsc, value, false); value = g_hash_table_lookup(rsc_private->meta, PCMK_META_FAILURE_TIMEOUT); if (value != NULL) { pcmk_parse_interval_spec(value, &(rsc_private->failure_expiration_ms)); } if (remote_node) { GHashTable *params = pe_rsc_params(*rsc, NULL, scheduler); /* Grabbing the value now means that any rules based on node attributes * will evaluate to false, so such rules should not be used with * PCMK_REMOTE_RA_RECONNECT_INTERVAL. * * @TODO Evaluate per node before using */ value = g_hash_table_lookup(params, PCMK_REMOTE_RA_RECONNECT_INTERVAL); if (value) { /* reconnect delay works by setting failure_timeout and preventing the * connection from starting until the failure is cleared. */ pcmk_parse_interval_spec(value, &(rsc_private->remote_reconnect_ms)); /* We want to override any default failure_timeout in use when remote * PCMK_REMOTE_RA_RECONNECT_INTERVAL is in use. */ rsc_private->failure_expiration_ms = rsc_private->remote_reconnect_ms; } } get_target_role(*rsc, &(rsc_private->next_role)); pcmk__rsc_trace(*rsc, "%s desired next state: %s", (*rsc)->id, (rsc_private->next_role == pcmk_role_unknown)? "default" : pcmk_role_text(rsc_private->next_role)); if (rsc_private->fns->unpack(*rsc, scheduler) == FALSE) { rsc_private->fns->free(*rsc); *rsc = NULL; return pcmk_rc_unpack_error; } if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) { // This tag must stay exactly the same because it is tested elsewhere resource_location(*rsc, NULL, 0, "symmetric_default", scheduler); } else if (guest_node) { /* remote resources tied to a container resource must always be allowed * to opt-in to the cluster. Whether the connection resource is actually * allowed to be placed on a node is dependent on the container resource */ resource_location(*rsc, NULL, 0, "remote_connection_default", scheduler); } pcmk__rsc_trace(*rsc, "%s action notification: %s", (*rsc)->id, pcmk_is_set((*rsc)->flags, pcmk__rsc_notify)? "required" : "not required"); pe__unpack_dataset_nvpairs(rsc_private->xml, PCMK_XE_UTILIZATION, &rule_data, rsc_private->utilization, NULL, FALSE, scheduler); if (expanded_xml) { if (add_template_rsc(xml_obj, scheduler) == FALSE) { rsc_private->fns->free(*rsc); *rsc = NULL; return pcmk_rc_unpack_error; } } return pcmk_rc_ok; } gboolean is_parent(pcmk_resource_t *child, pcmk_resource_t *rsc) { pcmk_resource_t *parent = child; if (parent == NULL || rsc == NULL) { return FALSE; } while (parent->priv->parent != NULL) { if (parent->priv->parent == rsc) { return TRUE; } parent = parent->priv->parent; } return FALSE; } pcmk_resource_t * uber_parent(pcmk_resource_t *rsc) { pcmk_resource_t *parent = rsc; if (parent == NULL) { return NULL; } while ((parent->priv->parent != NULL) && !pcmk__is_bundle(parent->priv->parent)) { parent = parent->priv->parent; } return parent; } /*! * \internal * \brief Get the topmost parent of a resource as a const pointer * * \param[in] rsc Resource to check * \param[in] include_bundle If true, go all the way to bundle * * \return \p NULL if \p rsc is NULL, \p rsc if \p rsc has no parent, * the bundle if \p rsc is bundled and \p include_bundle is true, * otherwise the topmost parent of \p rsc up to a clone */ const pcmk_resource_t * pe__const_top_resource(const pcmk_resource_t *rsc, bool include_bundle) { const pcmk_resource_t *parent = rsc; if (parent == NULL) { return NULL; } while (parent->priv->parent != NULL) { if (!include_bundle && pcmk__is_bundle(parent->priv->parent)) { break; } parent = parent->priv->parent; } return parent; } void common_free(pcmk_resource_t * rsc) { if (rsc == NULL) { return; } pcmk__rsc_trace(rsc, "Freeing %s", rsc->id); if (rsc->priv->parameter_cache != NULL) { g_hash_table_destroy(rsc->priv->parameter_cache); } if ((rsc->priv->parent == NULL) && pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { pcmk__xml_free(rsc->priv->xml); rsc->priv->xml = NULL; pcmk__xml_free(rsc->priv->orig_xml); rsc->priv->orig_xml = NULL; } else if (rsc->priv->orig_xml != NULL) { // rsc->private->xml was expanded from a template pcmk__xml_free(rsc->priv->xml); rsc->priv->xml = NULL; } free(rsc->id); free(rsc->priv->variant_opaque); free(rsc->priv->history_id); free(rsc->priv->pending_action); free(rsc->priv->assigned_node); g_list_free(rsc->priv->actions); g_list_free(rsc->priv->active_nodes); g_list_free(rsc->priv->launched); g_list_free(rsc->priv->dangling_migration_sources); g_list_free(rsc->priv->with_this_colocations); g_list_free(rsc->priv->this_with_colocations); g_list_free(rsc->priv->location_constraints); g_list_free(rsc->priv->ticket_constraints); if (rsc->priv->meta != NULL) { g_hash_table_destroy(rsc->priv->meta); } if (rsc->priv->utilization != NULL) { g_hash_table_destroy(rsc->priv->utilization); } if (rsc->priv->probed_nodes != NULL) { g_hash_table_destroy(rsc->priv->probed_nodes); } if (rsc->priv->allowed_nodes != NULL) { g_hash_table_destroy(rsc->priv->allowed_nodes); } free(rsc->priv); free(rsc); } /*! * \internal * \brief Count a node and update most preferred to it as appropriate * * \param[in] rsc An active resource * \param[in] node A node that \p rsc is active on * \param[in,out] active This will be set to \p node if \p node is more * preferred than the current value * \param[in,out] count_all If not NULL, this will be incremented * \param[in,out] count_clean If not NULL, this will be incremented if \p node * is online and clean * * \return true if the count should continue, or false if sufficiently known */ bool pe__count_active_node(const pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_node_t **active, unsigned int *count_all, unsigned int *count_clean) { bool keep_looking = false; bool is_happy = false; CRM_CHECK((rsc != NULL) && (node != NULL) && (active != NULL), return false); is_happy = node->details->online && !node->details->unclean; if (count_all != NULL) { ++*count_all; } if ((count_clean != NULL) && is_happy) { ++*count_clean; } if ((count_all != NULL) || (count_clean != NULL)) { keep_looking = true; // We're counting, so go through entire list } if (rsc->priv->partial_migration_source != NULL) { if (pcmk__same_node(node, rsc->priv->partial_migration_source)) { *active = node; // This is the migration source } else { keep_looking = true; } } else if (!pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)) { if (is_happy && ((*active == NULL) || !(*active)->details->online || (*active)->details->unclean)) { *active = node; // This is the first clean node } else { keep_looking = true; } } if (*active == NULL) { *active = node; // This is the first node checked } return keep_looking; } // Shared implementation of pcmk__rsc_methods_t:active_node() static pcmk_node_t * active_node(const pcmk_resource_t *rsc, unsigned int *count_all, unsigned int *count_clean) { pcmk_node_t *active = NULL; if (count_all != NULL) { *count_all = 0; } if (count_clean != NULL) { *count_clean = 0; } if (rsc == NULL) { return NULL; } for (GList *iter = rsc->priv->active_nodes; iter != NULL; iter = iter->next) { if (!pe__count_active_node(rsc, (pcmk_node_t *) iter->data, &active, count_all, count_clean)) { break; // Don't waste time iterating if we don't have to } } return active; } /*! * \brief * \internal Find and count active nodes according to \c PCMK_META_REQUIRES * * \param[in] rsc Resource to check * \param[out] count If not NULL, will be set to count of active nodes * * \return An active node (or NULL if resource is not active anywhere) * * \note This is a convenience wrapper for active_node() where the count of all * active nodes or only clean active nodes is desired according to the * \c PCMK_META_REQUIRES meta-attribute. */ pcmk_node_t * pe__find_active_requires(const pcmk_resource_t *rsc, unsigned int *count) { if (rsc == NULL) { if (count != NULL) { *count = 0; } return NULL; } if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)) { return rsc->priv->fns->active_node(rsc, count, NULL); } else { return rsc->priv->fns->active_node(rsc, NULL, count); } } void pe__count_common(pcmk_resource_t *rsc) { if (rsc->priv->children != NULL) { for (GList *item = rsc->priv->children; item != NULL; item = item->next) { pcmk_resource_t *child = item->data; child->priv->fns->count(item->data); } } else if (!pcmk_is_set(rsc->flags, pcmk__rsc_removed) || (rsc->priv->orig_role > pcmk_role_stopped)) { rsc->priv->scheduler->ninstances++; if (pe__resource_is_disabled(rsc)) { rsc->priv->scheduler->disabled_resources++; } if (pcmk_is_set(rsc->flags, pcmk__rsc_blocked)) { rsc->priv->scheduler->blocked_resources++; } } } /*! * \internal * \brief Update a resource's next role * * \param[in,out] rsc Resource to be updated * \param[in] role Resource's new next role * \param[in] why Human-friendly reason why role is changing (for logs) */ void pe__set_next_role(pcmk_resource_t *rsc, enum rsc_role_e role, const char *why) { CRM_ASSERT((rsc != NULL) && (why != NULL)); if (rsc->priv->next_role != role) { pcmk__rsc_trace(rsc, "Resetting next role for %s from %s to %s (%s)", rsc->id, pcmk_role_text(rsc->priv->next_role), pcmk_role_text(role), why); rsc->priv->next_role = role; } } diff --git a/lib/pengine/pe_actions.c b/lib/pengine/pe_actions.c index 3dc312961a..ffede32120 100644 --- a/lib/pengine/pe_actions.c +++ b/lib/pengine/pe_actions.c @@ -1,1799 +1,1799 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "pe_status_private.h" static void unpack_operation(pcmk_action_t *action, const xmlNode *xml_obj, guint interval_ms); static void add_singleton(pcmk_scheduler_t *scheduler, pcmk_action_t *action) { if (scheduler->priv->singletons == NULL) { scheduler->priv->singletons = pcmk__strkey_table(NULL, NULL); } g_hash_table_insert(scheduler->priv->singletons, action->uuid, action); } static pcmk_action_t * lookup_singleton(pcmk_scheduler_t *scheduler, const char *action_uuid) { /* @TODO This is the only use of the pcmk_scheduler_t:singletons hash table. * Compare the performance of this approach to keeping the * pcmk_scheduler_t:actions list sorted by action key and just searching * that instead. */ if (scheduler->priv->singletons == NULL) { return NULL; } return g_hash_table_lookup(scheduler->priv->singletons, action_uuid); } /*! * \internal * \brief Find an existing action that matches arguments * * \param[in] key Action key to match * \param[in] rsc Resource to match (if any) * \param[in] node Node to match (if any) * \param[in] scheduler Scheduler data * * \return Existing action that matches arguments (or NULL if none) */ static pcmk_action_t * find_existing_action(const char *key, const pcmk_resource_t *rsc, const pcmk_node_t *node, const pcmk_scheduler_t *scheduler) { /* When rsc is NULL, it would be quicker to check * scheduler->priv->singletons, but checking all scheduler->priv->actions * takes the node into account. */ GList *actions = (rsc == NULL)? scheduler->priv->actions : rsc->priv->actions; GList *matches = find_actions(actions, key, node); pcmk_action_t *action = NULL; if (matches == NULL) { return NULL; } CRM_LOG_ASSERT(!pcmk__list_of_multiple(matches)); action = matches->data; g_list_free(matches); return action; } /*! * \internal * \brief Find the XML configuration corresponding to a specific action key * * \param[in] rsc Resource to find action configuration for * \param[in] key "RSC_ACTION_INTERVAL" of action to find * \param[in] include_disabled If false, do not return disabled actions * * \return XML configuration of desired action if any, otherwise NULL */ static xmlNode * find_exact_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled) { for (xmlNode *operation = pcmk__xe_first_child(rsc->priv->ops_xml, PCMK_XE_OP, NULL, NULL); operation != NULL; operation = pcmk__xe_next_same(operation)) { bool enabled = false; const char *config_name = NULL; const char *interval_spec = NULL; guint tmp_ms = 0U; // @TODO This does not consider meta-attributes, rules, defaults, etc. if (!include_disabled && (pcmk__xe_get_bool_attr(operation, PCMK_META_ENABLED, &enabled) == pcmk_rc_ok) && !enabled) { continue; } interval_spec = crm_element_value(operation, PCMK_META_INTERVAL); pcmk_parse_interval_spec(interval_spec, &tmp_ms); if (tmp_ms != interval_ms) { continue; } config_name = crm_element_value(operation, PCMK_XA_NAME); if (pcmk__str_eq(action_name, config_name, pcmk__str_none)) { return operation; } } return NULL; } /*! * \internal * \brief Find the XML configuration of a resource action * * \param[in] rsc Resource to find action configuration for * \param[in] action_name Action name to search for * \param[in] interval_ms Action interval (in milliseconds) to search for * \param[in] include_disabled If false, do not return disabled actions * * \return XML configuration of desired action if any, otherwise NULL */ xmlNode * pcmk__find_action_config(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, bool include_disabled) { xmlNode *action_config = NULL; // Try requested action first action_config = find_exact_action_config(rsc, action_name, interval_ms, include_disabled); // For migrate_to and migrate_from actions, retry with "migrate" // @TODO This should be either documented or deprecated if ((action_config == NULL) && pcmk__str_any_of(action_name, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL)) { action_config = find_exact_action_config(rsc, "migrate", 0, include_disabled); } return action_config; } /*! * \internal * \brief Create a new action object * * \param[in] key Action key * \param[in] task Action name * \param[in,out] rsc Resource that action is for (if any) * \param[in] node Node that action is on (if any) * \param[in] optional Whether action should be considered optional * \param[in,out] scheduler Scheduler data * * \return Newly allocated action * \note This function takes ownership of \p key. It is the caller's * responsibility to free the return value with pe_free_action(). */ static pcmk_action_t * new_action(char *key, const char *task, pcmk_resource_t *rsc, const pcmk_node_t *node, bool optional, pcmk_scheduler_t *scheduler) { pcmk_action_t *action = pcmk__assert_alloc(1, sizeof(pcmk_action_t)); action->rsc = rsc; action->task = pcmk__str_copy(task); action->uuid = key; action->scheduler = scheduler; if (node) { action->node = pe__copy_node(node); } if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_casei)) { // Resource history deletion for a node can be done on the DC pcmk__set_action_flags(action, pcmk__action_on_dc); } pcmk__set_action_flags(action, pcmk__action_runnable); if (optional) { pcmk__set_action_flags(action, pcmk__action_optional); } else { pcmk__clear_action_flags(action, pcmk__action_optional); } if (rsc == NULL) { action->meta = pcmk__strkey_table(free, free); } else { guint interval_ms = 0; parse_op_key(key, NULL, NULL, &interval_ms); action->op_entry = pcmk__find_action_config(rsc, task, interval_ms, true); /* If the given key is for one of the many notification pseudo-actions * (pre_notify_promote, etc.), the actual action name is "notify" */ if ((action->op_entry == NULL) && (strstr(key, "_notify_") != NULL)) { action->op_entry = find_exact_action_config(rsc, PCMK_ACTION_NOTIFY, 0, true); } unpack_operation(action, action->op_entry, interval_ms); } pcmk__rsc_trace(rsc, "Created %s action %d (%s): %s for %s on %s", (optional? "optional" : "required"), - scheduler->action_id, key, task, + scheduler->priv->next_action_id, key, task, ((rsc == NULL)? "no resource" : rsc->id), pcmk__node_name(node)); - action->id = scheduler->action_id++; + action->id = scheduler->priv->next_action_id++; scheduler->priv->actions = g_list_prepend(scheduler->priv->actions, action); if (rsc == NULL) { add_singleton(scheduler, action); } else { rsc->priv->actions = g_list_prepend(rsc->priv->actions, action); } return action; } /*! * \internal * \brief Unpack a resource's action-specific instance parameters * * \param[in] action_xml XML of action's configuration in CIB (if any) * \param[in,out] node_attrs Table of node attributes (for rule evaluation) * \param[in,out] scheduler Cluster working set (for rule evaluation) * * \return Newly allocated hash table of action-specific instance parameters */ GHashTable * pcmk__unpack_action_rsc_params(const xmlNode *action_xml, GHashTable *node_attrs, pcmk_scheduler_t *scheduler) { GHashTable *params = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = node_attrs, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pe__unpack_dataset_nvpairs(action_xml, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, params, NULL, FALSE, scheduler); return params; } /*! * \internal * \brief Update an action's optional flag * * \param[in,out] action Action to update * \param[in] optional Requested optional status */ static void update_action_optional(pcmk_action_t *action, gboolean optional) { // Force a non-recurring action to be optional if its resource is unmanaged if ((action->rsc != NULL) && (action->node != NULL) && !pcmk_is_set(action->flags, pcmk__action_pseudo) && !pcmk_is_set(action->rsc->flags, pcmk__rsc_managed) && (g_hash_table_lookup(action->meta, PCMK_META_INTERVAL) == NULL)) { pcmk__rsc_debug(action->rsc, "%s on %s is optional (%s is unmanaged)", action->uuid, pcmk__node_name(action->node), action->rsc->id); pcmk__set_action_flags(action, pcmk__action_optional); // We shouldn't clear runnable here because ... something // Otherwise require the action if requested } else if (!optional) { pcmk__clear_action_flags(action, pcmk__action_optional); } } static enum pe_quorum_policy effective_quorum_policy(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { enum pe_quorum_policy policy = scheduler->no_quorum_policy; if (pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) { policy = pcmk_no_quorum_ignore; } else if (scheduler->no_quorum_policy == pcmk_no_quorum_demote) { switch (rsc->priv->orig_role) { case pcmk_role_promoted: case pcmk_role_unpromoted: if (rsc->priv->next_role > pcmk_role_unpromoted) { pe__set_next_role(rsc, pcmk_role_unpromoted, PCMK_OPT_NO_QUORUM_POLICY "=demote"); } policy = pcmk_no_quorum_ignore; break; default: policy = pcmk_no_quorum_stop; break; } } return policy; } /*! * \internal * \brief Update a resource action's runnable flag * * \param[in,out] action Action to update * \param[in,out] scheduler Scheduler data * * \note This may also schedule fencing if a stop is unrunnable. */ static void update_resource_action_runnable(pcmk_action_t *action, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = action->rsc; if (pcmk_is_set(action->flags, pcmk__action_pseudo)) { return; } if (action->node == NULL) { pcmk__rsc_trace(rsc, "%s is unrunnable (unallocated)", action->uuid); pcmk__clear_action_flags(action, pcmk__action_runnable); } else if (!pcmk_is_set(action->flags, pcmk__action_on_dc) && !(action->node->details->online) && (!pcmk__is_guest_or_bundle_node(action->node) || pcmk_is_set(action->node->priv->flags, pcmk__node_remote_reset))) { pcmk__clear_action_flags(action, pcmk__action_runnable); do_crm_log(LOG_WARNING, "%s on %s is unrunnable (node is offline)", action->uuid, pcmk__node_name(action->node)); if (pcmk_is_set(rsc->flags, pcmk__rsc_managed) && pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei) && !(action->node->details->unclean)) { pe_fence_node(scheduler, action->node, "stop is unrunnable", false); } } else if (!pcmk_is_set(action->flags, pcmk__action_on_dc) && action->node->details->pending) { pcmk__clear_action_flags(action, pcmk__action_runnable); do_crm_log(LOG_WARNING, "Action %s on %s is unrunnable (node is pending)", action->uuid, pcmk__node_name(action->node)); } else if (action->needs == pcmk__requires_nothing) { pe_action_set_reason(action, NULL, TRUE); if (pcmk__is_guest_or_bundle_node(action->node) && !pe_can_fence(scheduler, action->node)) { /* An action that requires nothing usually does not require any * fencing in order to be runnable. However, there is an exception: * such an action cannot be completed if it is on a guest node whose * host is unclean and cannot be fenced. */ pcmk__rsc_debug(rsc, "%s on %s is unrunnable " "(node's host cannot be fenced)", action->uuid, pcmk__node_name(action->node)); pcmk__clear_action_flags(action, pcmk__action_runnable); } else { pcmk__rsc_trace(rsc, "%s on %s does not require fencing or quorum", action->uuid, pcmk__node_name(action->node)); pcmk__set_action_flags(action, pcmk__action_runnable); } } else { switch (effective_quorum_policy(rsc, scheduler)) { case pcmk_no_quorum_stop: pcmk__rsc_debug(rsc, "%s on %s is unrunnable (no quorum)", action->uuid, pcmk__node_name(action->node)); pcmk__clear_action_flags(action, pcmk__action_runnable); pe_action_set_reason(action, "no quorum", true); break; case pcmk_no_quorum_freeze: if (!rsc->priv->fns->active(rsc, TRUE) || (rsc->priv->next_role > rsc->priv->orig_role)) { pcmk__rsc_debug(rsc, "%s on %s is unrunnable (no quorum)", action->uuid, pcmk__node_name(action->node)); pcmk__clear_action_flags(action, pcmk__action_runnable); pe_action_set_reason(action, "quorum freeze", true); } break; default: //pe_action_set_reason(action, NULL, TRUE); pcmk__set_action_flags(action, pcmk__action_runnable); break; } } } static bool valid_stop_on_fail(const char *value) { return !pcmk__strcase_any_of(value, PCMK_VALUE_STANDBY, PCMK_VALUE_DEMOTE, PCMK_VALUE_STOP, NULL); } /*! * \internal * \brief Validate (and possibly reset) resource action's on_fail meta-attribute * * \param[in] rsc Resource that action is for * \param[in] action_name Action name * \param[in] action_config Action configuration XML from CIB (if any) * \param[in,out] meta Table of action meta-attributes */ static void validate_on_fail(const pcmk_resource_t *rsc, const char *action_name, const xmlNode *action_config, GHashTable *meta) { const char *name = NULL; const char *role = NULL; const char *interval_spec = NULL; const char *value = g_hash_table_lookup(meta, PCMK_META_ON_FAIL); guint interval_ms = 0U; // Stop actions can only use certain on-fail values if (pcmk__str_eq(action_name, PCMK_ACTION_STOP, pcmk__str_none) && !valid_stop_on_fail(value)) { pcmk__config_err("Resetting '" PCMK_META_ON_FAIL "' for %s stop " "action to default value because '%s' is not " "allowed for stop", rsc->id, value); g_hash_table_remove(meta, PCMK_META_ON_FAIL); return; } /* Demote actions default on-fail to the on-fail value for the first * recurring monitor for the promoted role (if any). */ if (pcmk__str_eq(action_name, PCMK_ACTION_DEMOTE, pcmk__str_none) && (value == NULL)) { /* @TODO This does not consider promote options set in a meta-attribute * block (which may have rules that need to be evaluated) rather than * XML properties. */ for (xmlNode *operation = pcmk__xe_first_child(rsc->priv->ops_xml, PCMK_XE_OP, NULL, NULL); operation != NULL; operation = pcmk__xe_next_same(operation)) { bool enabled = false; const char *promote_on_fail = NULL; /* We only care about explicit on-fail (if promote uses default, so * can demote) */ promote_on_fail = crm_element_value(operation, PCMK_META_ON_FAIL); if (promote_on_fail == NULL) { continue; } // We only care about recurring monitors for the promoted role name = crm_element_value(operation, PCMK_XA_NAME); role = crm_element_value(operation, PCMK_XA_ROLE); if (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_none) || !pcmk__strcase_any_of(role, PCMK_ROLE_PROMOTED, PCMK__ROLE_PROMOTED_LEGACY, NULL)) { continue; } interval_spec = crm_element_value(operation, PCMK_META_INTERVAL); pcmk_parse_interval_spec(interval_spec, &interval_ms); if (interval_ms == 0U) { continue; } // We only care about enabled monitors if ((pcmk__xe_get_bool_attr(operation, PCMK_META_ENABLED, &enabled) == pcmk_rc_ok) && !enabled) { continue; } /* Demote actions can't default to * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE */ if (pcmk__str_eq(promote_on_fail, PCMK_VALUE_DEMOTE, pcmk__str_casei)) { continue; } // Use value from first applicable promote action found pcmk__insert_dup(meta, PCMK_META_ON_FAIL, promote_on_fail); } return; } if (pcmk__str_eq(action_name, PCMK_ACTION_LRM_DELETE, pcmk__str_none) && !pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) { pcmk__insert_dup(meta, PCMK_META_ON_FAIL, PCMK_VALUE_IGNORE); return; } // PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE is allowed only for certain actions if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) { name = crm_element_value(action_config, PCMK_XA_NAME); role = crm_element_value(action_config, PCMK_XA_ROLE); interval_spec = crm_element_value(action_config, PCMK_META_INTERVAL); pcmk_parse_interval_spec(interval_spec, &interval_ms); if (!pcmk__str_eq(name, PCMK_ACTION_PROMOTE, pcmk__str_none) && ((interval_ms == 0U) || !pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_none) || !pcmk__strcase_any_of(role, PCMK_ROLE_PROMOTED, PCMK__ROLE_PROMOTED_LEGACY, NULL))) { pcmk__config_err("Resetting '" PCMK_META_ON_FAIL "' for %s %s " "action to default value because 'demote' is not " "allowed for it", rsc->id, name); g_hash_table_remove(meta, PCMK_META_ON_FAIL); return; } } } static int unpack_timeout(const char *value) { long long timeout_ms = crm_get_msec(value); if (timeout_ms <= 0) { timeout_ms = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } return (int) QB_MIN(timeout_ms, INT_MAX); } // true if value contains valid, non-NULL interval origin for recurring op static bool unpack_interval_origin(const char *value, const xmlNode *xml_obj, guint interval_ms, const crm_time_t *now, long long *start_delay) { long long result = 0; guint interval_sec = interval_ms / 1000; crm_time_t *origin = NULL; // Ignore unspecified values and non-recurring operations if ((value == NULL) || (interval_ms == 0) || (now == NULL)) { return false; } // Parse interval origin from text origin = crm_time_new(value); if (origin == NULL) { pcmk__config_err("Ignoring '" PCMK_META_INTERVAL_ORIGIN "' for " "operation '%s' because '%s' is not valid", pcmk__s(pcmk__xe_id(xml_obj), "(missing ID)"), value); return false; } // Get seconds since origin (negative if origin is in the future) result = crm_time_get_seconds(now) - crm_time_get_seconds(origin); crm_time_free(origin); // Calculate seconds from closest interval to now result = result % interval_sec; // Calculate seconds remaining until next interval result = ((result <= 0)? 0 : interval_sec) - result; crm_info("Calculated a start delay of %llds for operation '%s'", result, pcmk__s(pcmk__xe_id(xml_obj), "(unspecified)")); if (start_delay != NULL) { *start_delay = result * 1000; // milliseconds } return true; } static int unpack_start_delay(const char *value, GHashTable *meta) { long long start_delay_ms = 0; if (value == NULL) { return 0; } start_delay_ms = crm_get_msec(value); start_delay_ms = QB_MIN(start_delay_ms, INT_MAX); if (start_delay_ms < 0) { start_delay_ms = 0; } if (meta != NULL) { g_hash_table_replace(meta, strdup(PCMK_META_START_DELAY), pcmk__itoa(start_delay_ms)); } return (int) start_delay_ms; } /*! * \internal * \brief Find a resource's most frequent recurring monitor * * \param[in] rsc Resource to check * * \return Operation XML configured for most frequent recurring monitor for * \p rsc (if any) */ static xmlNode * most_frequent_monitor(const pcmk_resource_t *rsc) { guint min_interval_ms = G_MAXUINT; xmlNode *op = NULL; for (xmlNode *operation = pcmk__xe_first_child(rsc->priv->ops_xml, PCMK_XE_OP, NULL, NULL); operation != NULL; operation = pcmk__xe_next_same(operation)) { bool enabled = false; guint interval_ms = 0U; const char *interval_spec = crm_element_value(operation, PCMK_META_INTERVAL); // We only care about enabled recurring monitors if (!pcmk__str_eq(crm_element_value(operation, PCMK_XA_NAME), PCMK_ACTION_MONITOR, pcmk__str_none)) { continue; } pcmk_parse_interval_spec(interval_spec, &interval_ms); if (interval_ms == 0U) { continue; } // @TODO This does not consider meta-attributes, rules, defaults, etc. if ((pcmk__xe_get_bool_attr(operation, PCMK_META_ENABLED, &enabled) == pcmk_rc_ok) && !enabled) { continue; } if (interval_ms < min_interval_ms) { min_interval_ms = interval_ms; op = operation; } } return op; } /*! * \internal * \brief Unpack action meta-attributes * * \param[in,out] rsc Resource that action is for * \param[in] node Node that action is on * \param[in] action_name Action name * \param[in] interval_ms Action interval (in milliseconds) * \param[in] action_config Action XML configuration from CIB (if any) * * Unpack a resource action's meta-attributes (normalizing the interval, * timeout, and start delay values as integer milliseconds) from its CIB XML * configuration (including defaults). * * \return Newly allocated hash table with normalized action meta-attributes */ GHashTable * pcmk__unpack_action_meta(pcmk_resource_t *rsc, const pcmk_node_t *node, const char *action_name, guint interval_ms, const xmlNode *action_config) { GHashTable *meta = NULL; const char *timeout_spec = NULL; const char *str = NULL; pe_rsc_eval_data_t rsc_rule_data = { .standard = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS), .provider = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER), .agent = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE), }; pe_op_eval_data_t op_rule_data = { .op_name = action_name, .interval = interval_ms, }; pe_rule_eval_data_t rule_data = { /* @COMPAT Support for node attribute expressions in operation * meta-attributes (whether in the operation configuration or operation * defaults) is deprecated. When we can break behavioral backward * compatibility, drop this line. */ .node_hash = (node == NULL)? NULL : node->priv->attrs, .now = rsc->priv->scheduler->priv->now, .match_data = NULL, .rsc_data = &rsc_rule_data, .op_data = &op_rule_data, }; meta = pcmk__strkey_table(free, free); // Cluster-wide pe__unpack_dataset_nvpairs(rsc->priv->scheduler->priv->op_defaults, PCMK_XE_META_ATTRIBUTES, &rule_data, meta, NULL, FALSE, rsc->priv->scheduler); // Derive default timeout for probes from recurring monitor timeouts if (pcmk_is_probe(action_name, interval_ms)) { xmlNode *min_interval_mon = most_frequent_monitor(rsc); if (min_interval_mon != NULL) { /* @TODO This does not consider timeouts set in * PCMK_XE_META_ATTRIBUTES blocks (which may also have rules that * need to be evaluated). */ timeout_spec = crm_element_value(min_interval_mon, PCMK_META_TIMEOUT); if (timeout_spec != NULL) { pcmk__rsc_trace(rsc, "Setting default timeout for %s probe to " "most frequent monitor's timeout '%s'", rsc->id, timeout_spec); pcmk__insert_dup(meta, PCMK_META_TIMEOUT, timeout_spec); } } } if (action_config != NULL) { // take precedence over defaults pe__unpack_dataset_nvpairs(action_config, PCMK_XE_META_ATTRIBUTES, &rule_data, meta, NULL, TRUE, rsc->priv->scheduler); /* Anything set as an XML property has highest precedence. * This ensures we use the name and interval from the tag. * (See below for the only exception, fence device start/probe timeout.) */ for (xmlAttrPtr attr = action_config->properties; attr != NULL; attr = attr->next) { pcmk__insert_dup(meta, (const char *) attr->name, pcmk__xml_attr_value(attr)); } } g_hash_table_remove(meta, PCMK_XA_ID); // Normalize interval to milliseconds if (interval_ms > 0) { g_hash_table_insert(meta, pcmk__str_copy(PCMK_META_INTERVAL), crm_strdup_printf("%u", interval_ms)); } else { g_hash_table_remove(meta, PCMK_META_INTERVAL); } /* Timeout order of precedence (highest to lowest): * 1. pcmk_monitor_timeout resource parameter (only for starts and probes * when rsc has pcmk_ra_cap_fence_params; this gets used for recurring * monitors via the executor instead) * 2. timeout configured in (with taking precedence over * ) * 3. timeout configured in * 4. PCMK_DEFAULT_ACTION_TIMEOUT_MS */ // Check for pcmk_monitor_timeout if (pcmk_is_set(pcmk_get_ra_caps(rsc_rule_data.standard), pcmk_ra_cap_fence_params) && (pcmk__str_eq(action_name, PCMK_ACTION_START, pcmk__str_none) || pcmk_is_probe(action_name, interval_ms))) { GHashTable *params = pe_rsc_params(rsc, node, rsc->priv->scheduler); timeout_spec = g_hash_table_lookup(params, "pcmk_monitor_timeout"); if (timeout_spec != NULL) { pcmk__rsc_trace(rsc, "Setting timeout for %s %s to " "pcmk_monitor_timeout (%s)", rsc->id, action_name, timeout_spec); pcmk__insert_dup(meta, PCMK_META_TIMEOUT, timeout_spec); } } // Normalize timeout to positive milliseconds timeout_spec = g_hash_table_lookup(meta, PCMK_META_TIMEOUT); g_hash_table_insert(meta, pcmk__str_copy(PCMK_META_TIMEOUT), pcmk__itoa(unpack_timeout(timeout_spec))); // Ensure on-fail has a valid value validate_on_fail(rsc, action_name, action_config, meta); // Normalize PCMK_META_START_DELAY str = g_hash_table_lookup(meta, PCMK_META_START_DELAY); if (str != NULL) { unpack_start_delay(str, meta); } else { long long start_delay = 0; str = g_hash_table_lookup(meta, PCMK_META_INTERVAL_ORIGIN); if (unpack_interval_origin(str, action_config, interval_ms, rsc->priv->scheduler->priv->now, &start_delay)) { g_hash_table_insert(meta, pcmk__str_copy(PCMK_META_START_DELAY), crm_strdup_printf("%lld", start_delay)); } } return meta; } /*! * \internal * \brief Determine an action's quorum and fencing dependency * * \param[in] rsc Resource that action is for * \param[in] action_name Name of action being unpacked * * \return Quorum and fencing dependency appropriate to action */ enum pcmk__requires pcmk__action_requires(const pcmk_resource_t *rsc, const char *action_name) { const char *value = NULL; enum pcmk__requires requires = pcmk__requires_nothing; CRM_CHECK((rsc != NULL) && (action_name != NULL), return requires); if (!pcmk__strcase_any_of(action_name, PCMK_ACTION_START, PCMK_ACTION_PROMOTE, NULL)) { value = "nothing (not start or promote)"; } else if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing)) { requires = pcmk__requires_fencing; value = "fencing"; } else if (pcmk_is_set(rsc->flags, pcmk__rsc_needs_quorum)) { requires = pcmk__requires_quorum; value = "quorum"; } else { value = "nothing"; } pcmk__rsc_trace(rsc, "%s of %s requires %s", action_name, rsc->id, value); return requires; } /*! * \internal * \brief Parse action failure response from a user-provided string * * \param[in] rsc Resource that action is for * \param[in] action_name Name of action * \param[in] interval_ms Action interval (in milliseconds) * \param[in] value User-provided configuration value for on-fail * * \return Action failure response parsed from \p text */ enum pcmk__on_fail pcmk__parse_on_fail(const pcmk_resource_t *rsc, const char *action_name, guint interval_ms, const char *value) { const char *desc = NULL; bool needs_remote_reset = false; enum pcmk__on_fail on_fail = pcmk__on_fail_ignore; const pcmk_scheduler_t *scheduler = NULL; // There's no enum value for unknown or invalid, so assert CRM_ASSERT((rsc != NULL) && (action_name != NULL)); scheduler = rsc->priv->scheduler; if (value == NULL) { // Use default } else if (pcmk__str_eq(value, PCMK_VALUE_BLOCK, pcmk__str_casei)) { on_fail = pcmk__on_fail_block; desc = "block"; } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE, pcmk__str_casei)) { if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { on_fail = pcmk__on_fail_fence_node; desc = "node fencing"; } else { pcmk__config_err("Resetting '" PCMK_META_ON_FAIL "' for " "%s of %s to 'stop' because 'fence' is not " "valid when fencing is disabled", action_name, rsc->id); on_fail = pcmk__on_fail_stop; desc = "stop resource"; } } else if (pcmk__str_eq(value, PCMK_VALUE_STANDBY, pcmk__str_casei)) { on_fail = pcmk__on_fail_standby_node; desc = "node standby"; } else if (pcmk__strcase_any_of(value, PCMK_VALUE_IGNORE, PCMK_VALUE_NOTHING, NULL)) { desc = "ignore"; } else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) { on_fail = pcmk__on_fail_ban; desc = "force migration"; } else if (pcmk__str_eq(value, PCMK_VALUE_STOP, pcmk__str_casei)) { on_fail = pcmk__on_fail_stop; desc = "stop resource"; } else if (pcmk__str_eq(value, PCMK_VALUE_RESTART, pcmk__str_casei)) { on_fail = pcmk__on_fail_restart; desc = "restart (and possibly migrate)"; } else if (pcmk__str_eq(value, PCMK_VALUE_RESTART_CONTAINER, pcmk__str_casei)) { if (rsc->priv->launcher == NULL) { pcmk__rsc_debug(rsc, "Using default " PCMK_META_ON_FAIL " for %s " "of %s because it does not have a launcher", action_name, rsc->id); } else { on_fail = pcmk__on_fail_restart_container; desc = "restart container (and possibly migrate)"; } } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) { on_fail = pcmk__on_fail_demote; desc = "demote instance"; } else { pcmk__config_err("Using default '" PCMK_META_ON_FAIL "' for " "%s of %s because '%s' is not valid", action_name, rsc->id, value); } /* Remote node connections are handled specially. Failures that result * in dropping an active connection must result in fencing. The only * failures that don't are probes and starts. The user can explicitly set * PCMK_META_ON_FAIL=PCMK_VALUE_FENCE to fence after start failures. */ if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection) && pcmk__is_remote_node(pcmk_find_node(scheduler, rsc->id)) && !pcmk_is_probe(action_name, interval_ms) && !pcmk__str_eq(action_name, PCMK_ACTION_START, pcmk__str_none)) { needs_remote_reset = true; if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { desc = NULL; // Force default for unmanaged connections } } if (desc != NULL) { // Explicit value used, default not needed } else if (rsc->priv->launcher != NULL) { on_fail = pcmk__on_fail_restart_container; desc = "restart container (and possibly migrate) (default)"; } else if (needs_remote_reset) { if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { desc = "fence remote node (default)"; } else { desc = "recover remote node connection (default)"; } on_fail = pcmk__on_fail_reset_remote; } else { on_fail = pcmk__on_fail_stop; desc = "stop unmanaged remote node (enforcing default)"; } } else if (pcmk__str_eq(action_name, PCMK_ACTION_STOP, pcmk__str_none)) { if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { on_fail = pcmk__on_fail_fence_node; desc = "resource fence (default)"; } else { on_fail = pcmk__on_fail_block; desc = "resource block (default)"; } } else { on_fail = pcmk__on_fail_restart; desc = "restart (and possibly migrate) (default)"; } pcmk__rsc_trace(rsc, "Failure handling for %s-interval %s of %s: %s", pcmk__readable_interval(interval_ms), action_name, rsc->id, desc); return on_fail; } /*! * \internal * \brief Determine a resource's role after failure of an action * * \param[in] rsc Resource that action is for * \param[in] action_name Action name * \param[in] on_fail Failure handling for action * \param[in] meta Unpacked action meta-attributes * * \return Resource role that results from failure of action */ enum rsc_role_e pcmk__role_after_failure(const pcmk_resource_t *rsc, const char *action_name, enum pcmk__on_fail on_fail, GHashTable *meta) { const char *value = NULL; enum rsc_role_e role = pcmk_role_unknown; // Set default for role after failure specially in certain circumstances switch (on_fail) { case pcmk__on_fail_stop: role = pcmk_role_stopped; break; case pcmk__on_fail_reset_remote: if (rsc->priv->remote_reconnect_ms != 0U) { role = pcmk_role_stopped; } break; default: break; } // @COMPAT Check for explicitly configured role (deprecated) value = g_hash_table_lookup(meta, PCMK__META_ROLE_AFTER_FAILURE); if (value != NULL) { pcmk__warn_once(pcmk__wo_role_after, "Support for " PCMK__META_ROLE_AFTER_FAILURE " is " "deprecated and will be removed in a future release"); if (role == pcmk_role_unknown) { role = pcmk_parse_role(value); if (role == pcmk_role_unknown) { pcmk__config_err("Ignoring invalid value %s for " PCMK__META_ROLE_AFTER_FAILURE, value); } } } if (role == pcmk_role_unknown) { // Use default if (pcmk__str_eq(action_name, PCMK_ACTION_PROMOTE, pcmk__str_none)) { role = pcmk_role_unpromoted; } else { role = pcmk_role_started; } } pcmk__rsc_trace(rsc, "Role after %s %s failure is: %s", rsc->id, action_name, pcmk_role_text(role)); return role; } /*! * \internal * \brief Unpack action configuration * * Unpack a resource action's meta-attributes (normalizing the interval, * timeout, and start delay values as integer milliseconds), requirements, and * failure policy from its CIB XML configuration (including defaults). * * \param[in,out] action Resource action to unpack into * \param[in] xml_obj Action configuration XML (NULL for defaults only) * \param[in] interval_ms How frequently to perform the operation */ static void unpack_operation(pcmk_action_t *action, const xmlNode *xml_obj, guint interval_ms) { const char *value = NULL; action->meta = pcmk__unpack_action_meta(action->rsc, action->node, action->task, interval_ms, xml_obj); action->needs = pcmk__action_requires(action->rsc, action->task); value = g_hash_table_lookup(action->meta, PCMK_META_ON_FAIL); action->on_fail = pcmk__parse_on_fail(action->rsc, action->task, interval_ms, value); action->fail_role = pcmk__role_after_failure(action->rsc, action->task, action->on_fail, action->meta); } /*! * \brief Create or update an action object * * \param[in,out] rsc Resource that action is for (if any) * \param[in,out] key Action key (must be non-NULL) * \param[in] task Action name (must be non-NULL) * \param[in] on_node Node that action is on (if any) * \param[in] optional Whether action should be considered optional * \param[in,out] scheduler Scheduler data * * \return Action object corresponding to arguments (guaranteed not to be * \c NULL) * \note This function takes ownership of (and might free) \p key, and * \p scheduler takes ownership of the returned action (the caller should * not free it). */ pcmk_action_t * custom_action(pcmk_resource_t *rsc, char *key, const char *task, const pcmk_node_t *on_node, gboolean optional, pcmk_scheduler_t *scheduler) { pcmk_action_t *action = NULL; CRM_ASSERT((key != NULL) && (task != NULL) && (scheduler != NULL)); action = find_existing_action(key, rsc, on_node, scheduler); if (action == NULL) { action = new_action(key, task, rsc, on_node, optional, scheduler); } else { free(key); } update_action_optional(action, optional); if (rsc != NULL) { /* An action can be initially created with a NULL node, and later have * the node added via find_existing_action() (above) -> find_actions(). * That is why the extra parameters are unpacked here rather than in * new_action(). */ if ((action->node != NULL) && (action->op_entry != NULL) && !pcmk_is_set(action->flags, pcmk__action_attrs_evaluated)) { GHashTable *attrs = action->node->priv->attrs; if (action->extra != NULL) { g_hash_table_destroy(action->extra); } action->extra = pcmk__unpack_action_rsc_params(action->op_entry, attrs, scheduler); pcmk__set_action_flags(action, pcmk__action_attrs_evaluated); } update_resource_action_runnable(action, scheduler); } if (action->extra == NULL) { action->extra = pcmk__strkey_table(free, free); } return action; } pcmk_action_t * get_pseudo_op(const char *name, pcmk_scheduler_t *scheduler) { pcmk_action_t *op = lookup_singleton(scheduler, name); if (op == NULL) { op = custom_action(NULL, strdup(name), name, NULL, TRUE, scheduler); pcmk__set_action_flags(op, pcmk__action_pseudo|pcmk__action_runnable); } return op; } static GList * find_unfencing_devices(GList *candidates, GList *matches) { for (GList *gIter = candidates; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *candidate = gIter->data; if (candidate->priv->children != NULL) { matches = find_unfencing_devices(candidate->priv->children, matches); } else if (!pcmk_is_set(candidate->flags, pcmk__rsc_fence_device)) { continue; } else if (pcmk_is_set(candidate->flags, pcmk__rsc_needs_unfencing)) { matches = g_list_prepend(matches, candidate); } else if (pcmk__str_eq(g_hash_table_lookup(candidate->priv->meta, PCMK_STONITH_PROVIDES), PCMK_VALUE_UNFENCING, pcmk__str_casei)) { matches = g_list_prepend(matches, candidate); } } return matches; } static int node_priority_fencing_delay(const pcmk_node_t *node, const pcmk_scheduler_t *scheduler) { int member_count = 0; int online_count = 0; int top_priority = 0; int lowest_priority = 0; GList *gIter = NULL; // PCMK_OPT_PRIORITY_FENCING_DELAY is disabled if (scheduler->priority_fencing_delay <= 0) { return 0; } /* No need to request a delay if the fencing target is not a normal cluster * member, for example if it's a remote node or a guest node. */ if (node->priv->variant != pcmk__node_variant_cluster) { return 0; } // No need to request a delay if the fencing target is in our partition if (node->details->online) { return 0; } for (gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *n = gIter->data; if (n->priv->variant != pcmk__node_variant_cluster) { continue; } member_count ++; if (n->details->online) { online_count++; } if (member_count == 1 || n->priv->priority > top_priority) { top_priority = n->priv->priority; } if (member_count == 1 || n->priv->priority < lowest_priority) { lowest_priority = n->priv->priority; } } // No need to delay if we have more than half of the cluster members if (online_count > member_count / 2) { return 0; } /* All the nodes have equal priority. * Any configured corresponding `pcmk_delay_base/max` will be applied. */ if (lowest_priority == top_priority) { return 0; } if (node->priv->priority < top_priority) { return 0; } return scheduler->priority_fencing_delay; } pcmk_action_t * pe_fence_op(pcmk_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pcmk_scheduler_t *scheduler) { char *op_key = NULL; pcmk_action_t *stonith_op = NULL; if(op == NULL) { op = scheduler->priv->fence_action; } op_key = crm_strdup_printf("%s-%s-%s", PCMK_ACTION_STONITH, node->priv->name, op); stonith_op = lookup_singleton(scheduler, op_key); if(stonith_op == NULL) { stonith_op = custom_action(NULL, op_key, PCMK_ACTION_STONITH, node, TRUE, scheduler); pcmk__insert_meta(stonith_op, PCMK__META_ON_NODE, node->priv->name); pcmk__insert_meta(stonith_op, PCMK__META_ON_NODE_UUID, node->priv->id); pcmk__insert_meta(stonith_op, PCMK__META_STONITH_ACTION, op); if (pcmk_is_set(scheduler->flags, pcmk__sched_enable_unfencing)) { /* Extra work to detect device changes */ GString *digests_all = g_string_sized_new(1024); GString *digests_secure = g_string_sized_new(1024); GList *matches = find_unfencing_devices(scheduler->priv->resources, NULL); for (GList *gIter = matches; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *match = gIter->data; const char *agent = g_hash_table_lookup(match->priv->meta, PCMK_XA_TYPE); pcmk__op_digest_t *data = NULL; data = pe__compare_fencing_digest(match, agent, node, scheduler); if (data->rc == pcmk__digest_mismatch) { optional = FALSE; crm_notice("Unfencing node %s because the definition of " "%s changed", pcmk__node_name(node), match->id); if (!pcmk__is_daemon && (scheduler->priv->out != NULL)) { pcmk__output_t *out = scheduler->priv->out; out->info(out, "notice: Unfencing node %s because the " "definition of %s changed", pcmk__node_name(node), match->id); } } pcmk__g_strcat(digests_all, match->id, ":", agent, ":", data->digest_all_calc, ",", NULL); pcmk__g_strcat(digests_secure, match->id, ":", agent, ":", data->digest_secure_calc, ",", NULL); } pcmk__insert_dup(stonith_op->meta, PCMK__META_DIGESTS_ALL, digests_all->str); g_string_free(digests_all, TRUE); pcmk__insert_dup(stonith_op->meta, PCMK__META_DIGESTS_SECURE, digests_secure->str); g_string_free(digests_secure, TRUE); } } else { free(op_key); } if (scheduler->priority_fencing_delay > 0 /* It's a suitable case where PCMK_OPT_PRIORITY_FENCING_DELAY * applies. At least add PCMK_OPT_PRIORITY_FENCING_DELAY field as * an indicator. */ && (priority_delay /* The priority delay needs to be recalculated if this function has * been called by schedule_fencing_and_shutdowns() after node * priority has already been calculated by native_add_running(). */ || g_hash_table_lookup(stonith_op->meta, PCMK_OPT_PRIORITY_FENCING_DELAY) != NULL)) { /* Add PCMK_OPT_PRIORITY_FENCING_DELAY to the fencing op even if * it's 0 for the targeting node. So that it takes precedence over * any possible `pcmk_delay_base/max`. */ char *delay_s = pcmk__itoa(node_priority_fencing_delay(node, scheduler)); g_hash_table_insert(stonith_op->meta, strdup(PCMK_OPT_PRIORITY_FENCING_DELAY), delay_s); } if(optional == FALSE && pe_can_fence(scheduler, node)) { pcmk__clear_action_flags(stonith_op, pcmk__action_optional); pe_action_set_reason(stonith_op, reason, false); } else if(reason && stonith_op->reason == NULL) { stonith_op->reason = strdup(reason); } return stonith_op; } void pe_free_action(pcmk_action_t *action) { if (action == NULL) { return; } g_list_free_full(action->actions_before, free); g_list_free_full(action->actions_after, free); if (action->extra) { g_hash_table_destroy(action->extra); } if (action->meta) { g_hash_table_destroy(action->meta); } free(action->cancel_task); free(action->reason); free(action->task); free(action->uuid); free(action->node); free(action); } enum pcmk__action_type get_complex_task(const pcmk_resource_t *rsc, const char *name) { enum pcmk__action_type task = pcmk__parse_action(name); if (pcmk__is_primitive(rsc)) { switch (task) { case pcmk__action_stopped: case pcmk__action_started: case pcmk__action_demoted: case pcmk__action_promoted: crm_trace("Folding %s back into its atomic counterpart for %s", name, rsc->id); --task; break; default: break; } } return task; } /*! * \internal * \brief Find first matching action in a list * * \param[in] input List of actions to search * \param[in] uuid If not NULL, action must have this UUID * \param[in] task If not NULL, action must have this action name * \param[in] on_node If not NULL, action must be on this node * * \return First action in list that matches criteria, or NULL if none */ pcmk_action_t * find_first_action(const GList *input, const char *uuid, const char *task, const pcmk_node_t *on_node) { CRM_CHECK(uuid || task, return NULL); for (const GList *gIter = input; gIter != NULL; gIter = gIter->next) { pcmk_action_t *action = (pcmk_action_t *) gIter->data; if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) { continue; } else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) { continue; } else if (on_node == NULL) { return action; } else if (action->node == NULL) { continue; } else if (pcmk__same_node(on_node, action->node)) { return action; } } return NULL; } GList * find_actions(GList *input, const char *key, const pcmk_node_t *on_node) { GList *gIter = input; GList *result = NULL; CRM_CHECK(key != NULL, return NULL); for (; gIter != NULL; gIter = gIter->next) { pcmk_action_t *action = (pcmk_action_t *) gIter->data; if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) { continue; } else if (on_node == NULL) { crm_trace("Action %s matches (ignoring node)", key); result = g_list_prepend(result, action); } else if (action->node == NULL) { crm_trace("Action %s matches (unallocated, assigning to %s)", key, pcmk__node_name(on_node)); action->node = pe__copy_node(on_node); result = g_list_prepend(result, action); } else if (pcmk__same_node(on_node, action->node)) { crm_trace("Action %s on %s matches", key, pcmk__node_name(on_node)); result = g_list_prepend(result, action); } } return result; } GList * find_actions_exact(GList *input, const char *key, const pcmk_node_t *on_node) { GList *result = NULL; CRM_CHECK(key != NULL, return NULL); if (on_node == NULL) { return NULL; } for (GList *gIter = input; gIter != NULL; gIter = gIter->next) { pcmk_action_t *action = (pcmk_action_t *) gIter->data; if ((action->node != NULL) && pcmk__str_eq(key, action->uuid, pcmk__str_casei) && pcmk__same_node(on_node, action->node)) { crm_trace("Action %s on %s matches", key, pcmk__node_name(on_node)); result = g_list_prepend(result, action); } } return result; } /*! * \brief Find all actions of given type for a resource * * \param[in] rsc Resource to search * \param[in] node Find only actions scheduled on this node * \param[in] task Action name to search for * \param[in] require_node If TRUE, NULL node or action node will not match * * \return List of actions found (or NULL if none) * \note If node is not NULL and require_node is FALSE, matching actions * without a node will be assigned to node. */ GList * pe__resource_actions(const pcmk_resource_t *rsc, const pcmk_node_t *node, const char *task, bool require_node) { GList *result = NULL; char *key = pcmk__op_key(rsc->id, task, 0); if (require_node) { result = find_actions_exact(rsc->priv->actions, key, node); } else { result = find_actions(rsc->priv->actions, key, node); } free(key); return result; } /*! * \internal * \brief Create an action reason string based on the action itself * * \param[in] action Action to create reason string for * \param[in] flag Action flag that was cleared * * \return Newly allocated string suitable for use as action reason * \note It is the caller's responsibility to free() the result. */ char * pe__action2reason(const pcmk_action_t *action, enum pcmk__action_flags flag) { const char *change = NULL; switch (flag) { case pcmk__action_runnable: change = "unrunnable"; break; case pcmk__action_migratable: change = "unmigrateable"; break; case pcmk__action_optional: change = "required"; break; default: // Bug: caller passed unsupported flag CRM_CHECK(change != NULL, change = ""); break; } return crm_strdup_printf("%s%s%s %s", change, (action->rsc == NULL)? "" : " ", (action->rsc == NULL)? "" : action->rsc->id, action->task); } void pe_action_set_reason(pcmk_action_t *action, const char *reason, bool overwrite) { if (action->reason != NULL && overwrite) { pcmk__rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'", action->uuid, action->reason, pcmk__s(reason, "(none)")); } else if (action->reason == NULL) { pcmk__rsc_trace(action->rsc, "Set %s reason to '%s'", action->uuid, pcmk__s(reason, "(none)")); } else { // crm_assert(action->reason != NULL && !overwrite); return; } pcmk__str_update(&action->reason, reason); } /*! * \internal * \brief Create an action to clear a resource's history from CIB * * \param[in,out] rsc Resource to clear * \param[in] node Node to clear history on */ void pe__clear_resource_history(pcmk_resource_t *rsc, const pcmk_node_t *node) { CRM_ASSERT((rsc != NULL) && (node != NULL)); custom_action(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_LRM_DELETE, 0), PCMK_ACTION_LRM_DELETE, node, FALSE, rsc->priv->scheduler); } #define sort_return(an_int, why) do { \ free(a_uuid); \ free(b_uuid); \ crm_trace("%s (%d) %c %s (%d) : %s", \ a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \ b_xml_id, b_call_id, why); \ return an_int; \ } while(0) int pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b) { int a_call_id = -1; int b_call_id = -1; char *a_uuid = NULL; char *b_uuid = NULL; const char *a_xml_id = crm_element_value(xml_a, PCMK_XA_ID); const char *b_xml_id = crm_element_value(xml_b, PCMK_XA_ID); const char *a_node = crm_element_value(xml_a, PCMK__META_ON_NODE); const char *b_node = crm_element_value(xml_b, PCMK__META_ON_NODE); bool same_node = pcmk__str_eq(a_node, b_node, pcmk__str_casei); if (same_node && pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_none)) { /* We have duplicate PCMK__XE_LRM_RSC_OP entries in the status * section which is unlikely to be a good thing * - we can handle it easily enough, but we need to get * to the bottom of why it's happening. */ pcmk__config_err("Duplicate " PCMK__XE_LRM_RSC_OP " entries named %s", a_xml_id); sort_return(0, "duplicate"); } crm_element_value_int(xml_a, PCMK__XA_CALL_ID, &a_call_id); crm_element_value_int(xml_b, PCMK__XA_CALL_ID, &b_call_id); if (a_call_id == -1 && b_call_id == -1) { /* both are pending ops so it doesn't matter since * stops are never pending */ sort_return(0, "pending"); } else if (same_node && a_call_id >= 0 && a_call_id < b_call_id) { sort_return(-1, "call id"); } else if (same_node && b_call_id >= 0 && a_call_id > b_call_id) { sort_return(1, "call id"); } else if (a_call_id >= 0 && b_call_id >= 0 && (!same_node || a_call_id == b_call_id)) { /* The op and last_failed_op are the same. Order on * PCMK_XA_LAST_RC_CHANGE. */ time_t last_a = -1; time_t last_b = -1; crm_element_value_epoch(xml_a, PCMK_XA_LAST_RC_CHANGE, &last_a); crm_element_value_epoch(xml_b, PCMK_XA_LAST_RC_CHANGE, &last_b); crm_trace("rc-change: %lld vs %lld", (long long) last_a, (long long) last_b); if (last_a >= 0 && last_a < last_b) { sort_return(-1, "rc-change"); } else if (last_b >= 0 && last_a > last_b) { sort_return(1, "rc-change"); } sort_return(0, "rc-change"); } else { /* One of the inputs is a pending operation. * Attempt to use PCMK__XA_TRANSITION_MAGIC to determine its age relative * to the other. */ int a_id = -1; int b_id = -1; const char *a_magic = crm_element_value(xml_a, PCMK__XA_TRANSITION_MAGIC); const char *b_magic = crm_element_value(xml_b, PCMK__XA_TRANSITION_MAGIC); CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic")); if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic a"); } if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL, NULL)) { sort_return(0, "bad magic b"); } /* try to determine the relative age of the operation... * some pending operations (e.g. a start) may have been superseded * by a subsequent stop * * [a|b]_id == -1 means it's a shutdown operation and _always_ comes last */ if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) { /* * some of the logic in here may be redundant... * * if the UUID from the TE doesn't match then one better * be a pending operation. * pending operations don't survive between elections and joins * because we query the LRM directly */ if (b_call_id == -1) { sort_return(-1, "transition + call"); } else if (a_call_id == -1) { sort_return(1, "transition + call"); } } else if ((a_id >= 0 && a_id < b_id) || b_id == -1) { sort_return(-1, "transition"); } else if ((b_id >= 0 && a_id > b_id) || a_id == -1) { sort_return(1, "transition"); } } /* we should never end up here */ CRM_CHECK(FALSE, sort_return(0, "default")); } gint sort_op_by_callid(gconstpointer a, gconstpointer b) { return pe__is_newer_op((const xmlNode *) a, (const xmlNode *) b); } /*! * \internal * \brief Create a new pseudo-action for a resource * * \param[in,out] rsc Resource to create action for * \param[in] task Action name * \param[in] optional Whether action should be considered optional * \param[in] runnable Whethe action should be considered runnable * * \return New action object corresponding to arguments */ pcmk_action_t * pe__new_rsc_pseudo_action(pcmk_resource_t *rsc, const char *task, bool optional, bool runnable) { pcmk_action_t *action = NULL; CRM_ASSERT((rsc != NULL) && (task != NULL)); action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL, optional, rsc->priv->scheduler); pcmk__set_action_flags(action, pcmk__action_pseudo); if (runnable) { pcmk__set_action_flags(action, pcmk__action_runnable); } return action; } /*! * \internal * \brief Add the expected result to an action * * \param[in,out] action Action to add expected result to * \param[in] expected_result Expected result to add * * \note This is more efficient than calling pcmk__insert_meta(). */ void pe__add_action_expected_result(pcmk_action_t *action, int expected_result) { CRM_ASSERT((action != NULL) && (action->meta != NULL)); g_hash_table_insert(action->meta, pcmk__str_copy(PCMK__META_OP_TARGET_RC), pcmk__itoa(expected_result)); } diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c index 7573deda57..bf2389b1fd 100644 --- a/lib/pengine/pe_output.c +++ b/lib/pengine/pe_output.c @@ -1,3464 +1,3464 @@ /* * Copyright 2019-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include const char * pe__resource_description(const pcmk_resource_t *rsc, uint32_t show_opts) { const char * desc = NULL; // User-supplied description if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description)) { desc = crm_element_value(rsc->priv->xml, PCMK_XA_DESCRIPTION); } return desc; } /* Never display node attributes whose name starts with one of these prefixes */ #define FILTER_STR { PCMK__FAIL_COUNT_PREFIX, PCMK__LAST_FAILURE_PREFIX, \ PCMK__NODE_ATTR_SHUTDOWN, PCMK_NODE_ATTR_TERMINATE, \ PCMK_NODE_ATTR_STANDBY, "#", NULL } static int compare_attribute(gconstpointer a, gconstpointer b) { int rc; rc = strcmp((const char *)a, (const char *)b); return rc; } /*! * \internal * \brief Determine whether extended information about an attribute should be added. * * \param[in] node Node that ran this resource * \param[in,out] rsc_list List of resources for this node * \param[in,out] scheduler Scheduler data * \param[in] attrname Attribute to find * \param[out] expected_score Expected value for this attribute * * \return true if extended information should be printed, false otherwise * \note Currently, extended information is only supported for ping/pingd * resources, for which a message will be printed if connectivity is lost * or degraded. */ static bool add_extra_info(const pcmk_node_t *node, GList *rsc_list, pcmk_scheduler_t *scheduler, const char *attrname, int *expected_score) { GList *gIter = NULL; for (gIter = rsc_list; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; const char *type = g_hash_table_lookup(rsc->priv->meta, PCMK_XA_TYPE); const char *name = NULL; GHashTable *params = NULL; if (rsc->priv->children != NULL) { if (add_extra_info(node, rsc->priv->children, scheduler, attrname, expected_score)) { return true; } } if (!pcmk__strcase_any_of(type, "ping", "pingd", NULL)) { continue; } params = pe_rsc_params(rsc, node, scheduler); name = g_hash_table_lookup(params, PCMK_XA_NAME); if (name == NULL) { name = "pingd"; } /* To identify the resource with the attribute name. */ if (pcmk__str_eq(name, attrname, pcmk__str_casei)) { int host_list_num = 0; const char *hosts = g_hash_table_lookup(params, "host_list"); const char *multiplier = g_hash_table_lookup(params, "multiplier"); int multiplier_i; if (hosts) { char **host_list = g_strsplit(hosts, " ", 0); host_list_num = g_strv_length(host_list); g_strfreev(host_list); } if ((multiplier == NULL) || (pcmk__scan_min_int(multiplier, &multiplier_i, INT_MIN) != pcmk_rc_ok)) { /* The ocf:pacemaker:ping resource agent defaults multiplier to * 1. The agent currently does not handle invalid text, but it * should, and this would be a reasonable choice ... */ multiplier_i = 1; } *expected_score = host_list_num * multiplier_i; return true; } } return false; } static GList * filter_attr_list(GList *attr_list, char *name) { int i; const char *filt_str[] = FILTER_STR; CRM_CHECK(name != NULL, return attr_list); /* filtering automatic attributes */ for (i = 0; filt_str[i] != NULL; i++) { if (g_str_has_prefix(name, filt_str[i])) { return attr_list; } } return g_list_insert_sorted(attr_list, name, compare_attribute); } static GList * get_operation_list(xmlNode *rsc_entry) { GList *op_list = NULL; xmlNode *rsc_op = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { const char *task = crm_element_value(rsc_op, PCMK_XA_OPERATION); const char *interval_ms_s = crm_element_value(rsc_op, PCMK_META_INTERVAL); const char *op_rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* Ignore notifies and some probes */ if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none) || (pcmk__str_eq(task, "probe", pcmk__str_none) && (op_rc_i == CRM_EX_NOT_RUNNING))) { continue; } if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) { op_list = g_list_append(op_list, rsc_op); } } op_list = g_list_sort(op_list, sort_op_by_callid); return op_list; } static void add_dump_node(gpointer key, gpointer value, gpointer user_data) { xmlNodePtr node = user_data; node = pcmk__xe_create(node, (const char *) key); pcmk__xe_set_content(node, "%s", (const char *) value); } static void append_dump_text(gpointer key, gpointer value, gpointer user_data) { char **dump_text = user_data; char *new_text = crm_strdup_printf("%s %s=%s", *dump_text, (char *)key, (char *)value); free(*dump_text); *dump_text = new_text; } #define XPATH_STACK "//" PCMK_XE_NVPAIR \ "[@" PCMK_XA_NAME "='" \ PCMK_OPT_CLUSTER_INFRASTRUCTURE "']" static const char * get_cluster_stack(pcmk_scheduler_t *scheduler) { xmlNode *stack = get_xpath_object(XPATH_STACK, scheduler->input, LOG_DEBUG); if (stack != NULL) { return crm_element_value(stack, PCMK_XA_VALUE); } return PCMK_VALUE_UNKNOWN; } static char * last_changed_string(const char *last_written, const char *user, const char *client, const char *origin) { if (last_written != NULL || user != NULL || client != NULL || origin != NULL) { return crm_strdup_printf("%s%s%s%s%s%s%s", last_written ? last_written : "", user ? " by " : "", user ? user : "", client ? " via " : "", client ? client : "", origin ? " on " : "", origin ? origin : ""); } else { return strdup(""); } } static char * op_history_string(xmlNode *xml_op, const char *task, const char *interval_ms_s, int rc, bool print_timing) { const char *call = crm_element_value(xml_op, PCMK__XA_CALL_ID); char *interval_str = NULL; char *buf = NULL; if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *pair = pcmk__format_nvpair(PCMK_XA_INTERVAL, interval_ms_s, "ms"); interval_str = crm_strdup_printf(" %s", pair); free(pair); } if (print_timing) { char *last_change_str = NULL; char *exec_str = NULL; char *queue_str = NULL; const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *epoch_str = pcmk__epoch2str(&epoch, 0); last_change_str = crm_strdup_printf(" %s=\"%s\"", PCMK_XA_LAST_RC_CHANGE, pcmk__s(epoch_str, "")); free(epoch_str); } value = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); if (value) { char *pair = pcmk__format_nvpair(PCMK_XA_EXEC_TIME, value, "ms"); exec_str = crm_strdup_printf(" %s", pair); free(pair); } value = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); if (value) { char *pair = pcmk__format_nvpair(PCMK_XA_QUEUE_TIME, value, "ms"); queue_str = crm_strdup_printf(" %s", pair); free(pair); } buf = crm_strdup_printf("(%s) %s:%s%s%s%s rc=%d (%s)", call, task, interval_str ? interval_str : "", last_change_str ? last_change_str : "", exec_str ? exec_str : "", queue_str ? queue_str : "", rc, services_ocf_exitcode_str(rc)); if (last_change_str) { free(last_change_str); } if (exec_str) { free(exec_str); } if (queue_str) { free(queue_str); } } else { buf = crm_strdup_printf("(%s) %s%s%s", call, task, interval_str ? ":" : "", interval_str ? interval_str : ""); } if (interval_str) { free(interval_str); } return buf; } static char * resource_history_string(pcmk_resource_t *rsc, const char *rsc_id, bool all, int failcount, time_t last_failure) { char *buf = NULL; if (rsc == NULL) { buf = crm_strdup_printf("%s: orphan", rsc_id); } else if (all || failcount || last_failure > 0) { char *failcount_s = NULL; char *lastfail_s = NULL; if (failcount > 0) { failcount_s = crm_strdup_printf(" %s=%d", PCMK_XA_FAIL_COUNT, failcount); } else { failcount_s = strdup(""); } if (last_failure > 0) { buf = pcmk__epoch2str(&last_failure, 0); lastfail_s = crm_strdup_printf(" %s='%s'", PCMK_XA_LAST_FAILURE, buf); free(buf); } buf = crm_strdup_printf("%s: " PCMK_META_MIGRATION_THRESHOLD "=%d%s%s", rsc_id, rsc->priv->ban_after_failures, failcount_s, pcmk__s(lastfail_s, "")); free(failcount_s); free(lastfail_s); } else { buf = crm_strdup_printf("%s:", rsc_id); } return buf; } /*! * \internal * \brief Get a node's feature set for status display purposes * * \param[in] node Node to check * * \return String representation of feature set if the node is fully up (using * "<3.15.1" for older nodes that don't set the #feature-set attribute), * otherwise NULL */ static const char * get_node_feature_set(const pcmk_node_t *node) { if (node->details->online && pcmk_is_set(node->priv->flags, pcmk__node_expected_up) && !pcmk__is_pacemaker_remote_node(node)) { const char *feature_set = g_hash_table_lookup(node->priv->attrs, CRM_ATTR_FEATURE_SET); /* The feature set attribute is present since 3.15.1. If it is missing, * then the node must be running an earlier version. */ return pcmk__s(feature_set, "<3.15.1"); } return NULL; } static bool is_mixed_version(pcmk_scheduler_t *scheduler) { const char *feature_set = NULL; for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = gIter->data; const char *node_feature_set = get_node_feature_set(node); if (node_feature_set != NULL) { if (feature_set == NULL) { feature_set = node_feature_set; } else if (strcmp(feature_set, node_feature_set) != 0) { return true; } } } return false; } static void formatted_xml_buf(const pcmk_resource_t *rsc, GString *xml_buf, bool raw) { if (raw && (rsc->priv->orig_xml != NULL)) { pcmk__xml_string(rsc->priv->orig_xml, pcmk__xml_fmt_pretty, xml_buf, 0); } else { pcmk__xml_string(rsc->priv->xml, pcmk__xml_fmt_pretty, xml_buf, 0); } } #define XPATH_DC_VERSION "//" PCMK_XE_NVPAIR \ "[@" PCMK_XA_NAME "='" PCMK_OPT_DC_VERSION "']" PCMK__OUTPUT_ARGS("cluster-summary", "pcmk_scheduler_t *", "enum pcmk_pacemakerd_state", "uint32_t", "uint32_t") static int cluster_summary(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(scheduler); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s, pcmkd_state); } if (pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object(XPATH_DC_VERSION, scheduler->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, PCMK_XA_VALUE) : NULL; const char *quorum = crm_element_value(scheduler->input, PCMK_XA_HAVE_QUORUM); char *dc_name = scheduler->dc_node? pe__node_display_name(scheduler->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; bool mixed_version = is_mixed_version(scheduler); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-dc", scheduler->dc_node, quorum, dc_version_s, dc_name, mixed_version); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(scheduler->input, PCMK_XA_CIB_LAST_WRITTEN); const char *user = crm_element_value(scheduler->input, PCMK_XA_UPDATE_USER); const char *client = crm_element_value(scheduler->input, PCMK_XA_UPDATE_CLIENT); const char *origin = crm_element_value(scheduler->input, PCMK_XA_UPDATE_ORIGIN); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); - out->message(out, "cluster-times", - scheduler->localhost, last_written, user, client, origin); + out->message(out, "cluster-times", scheduler->priv->local_node_name, + last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(scheduler->nodes), scheduler->ninstances, scheduler->disabled_resources, scheduler->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-options", scheduler); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", scheduler->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } PCMK__OUTPUT_ARGS("cluster-summary", "pcmk_scheduler_t *", "enum pcmk_pacemakerd_state", "uint32_t", "uint32_t") static int cluster_summary_html(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); int rc = pcmk_rc_no_output; const char *stack_s = get_cluster_stack(scheduler); if (pcmk_is_set(section_opts, pcmk_section_stack)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-stack", stack_s, pcmkd_state); } /* Always print DC if none, even if not requested */ if ((scheduler->dc_node == NULL) || pcmk_is_set(section_opts, pcmk_section_dc)) { xmlNode *dc_version = get_xpath_object(XPATH_DC_VERSION, scheduler->input, LOG_DEBUG); const char *dc_version_s = dc_version? crm_element_value(dc_version, PCMK_XA_VALUE) : NULL; const char *quorum = crm_element_value(scheduler->input, PCMK_XA_HAVE_QUORUM); char *dc_name = scheduler->dc_node? pe__node_display_name(scheduler->dc_node, pcmk_is_set(show_opts, pcmk_show_node_id)) : NULL; bool mixed_version = is_mixed_version(scheduler); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-dc", scheduler->dc_node, quorum, dc_version_s, dc_name, mixed_version); free(dc_name); } if (pcmk_is_set(section_opts, pcmk_section_times)) { const char *last_written = crm_element_value(scheduler->input, PCMK_XA_CIB_LAST_WRITTEN); const char *user = crm_element_value(scheduler->input, PCMK_XA_UPDATE_USER); const char *client = crm_element_value(scheduler->input, PCMK_XA_UPDATE_CLIENT); const char *origin = crm_element_value(scheduler->input, PCMK_XA_UPDATE_ORIGIN); PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); - out->message(out, "cluster-times", - scheduler->localhost, last_written, user, client, origin); + out->message(out, "cluster-times", scheduler->priv->local_node_name, + last_written, user, client, origin); } if (pcmk_is_set(section_opts, pcmk_section_counts)) { PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Cluster Summary"); out->message(out, "cluster-counts", g_list_length(scheduler->nodes), scheduler->ninstances, scheduler->disabled_resources, scheduler->blocked_resources); } if (pcmk_is_set(section_opts, pcmk_section_options)) { /* Kind of a hack - close the list we may have opened earlier in this * function so we can put all the options into their own list. We * only want to do this on HTML output, though. */ PCMK__OUTPUT_LIST_FOOTER(out, rc); out->begin_list(out, NULL, NULL, "Config Options"); out->message(out, "cluster-options", scheduler); } PCMK__OUTPUT_LIST_FOOTER(out, rc); if (pcmk_is_set(section_opts, pcmk_section_maint_mode)) { if (out->message(out, "maint-mode", scheduler->flags) == pcmk_rc_ok) { rc = pcmk_rc_ok; } } return rc; } char * pe__node_display_name(pcmk_node_t *node, bool print_detail) { char *node_name; const char *node_host = NULL; const char *node_id = NULL; int name_len; CRM_ASSERT((node != NULL) && (node->priv->name != NULL)); /* Host is displayed only if this is a guest node and detail is requested */ if (print_detail && pcmk__is_guest_or_bundle_node(node)) { const pcmk_resource_t *launcher = NULL; const pcmk_node_t *host_node = NULL; launcher = node->priv->remote->priv->launcher; host_node = pcmk__current_node(launcher); if (host_node && host_node->details) { node_host = host_node->priv->name; } if (node_host == NULL) { node_host = ""; /* so we at least get "uname@" to indicate guest */ } } /* Node ID is displayed if different from uname and detail is requested */ if (print_detail && !pcmk__str_eq(node->priv->name, node->priv->id, pcmk__str_casei)) { node_id = node->priv->id; } /* Determine name length */ name_len = strlen(node->priv->name) + 1; if (node_host) { name_len += strlen(node_host) + 1; /* "@node_host" */ } if (node_id) { name_len += strlen(node_id) + 3; /* + " (node_id)" */ } /* Allocate and populate display name */ node_name = pcmk__assert_alloc(name_len, sizeof(char)); strcpy(node_name, node->priv->name); if (node_host) { strcat(node_name, "@"); strcat(node_name, node_host); } if (node_id) { strcat(node_name, " ("); strcat(node_name, node_id); strcat(node_name, ")"); } return node_name; } int pe__name_and_nvpairs_xml(pcmk__output_t *out, bool is_list, const char *tag_name, ...) { xmlNodePtr xml_node = NULL; va_list pairs; CRM_ASSERT(tag_name != NULL); xml_node = pcmk__output_xml_peek_parent(out); CRM_ASSERT(xml_node != NULL); xml_node = pcmk__xe_create(xml_node, tag_name); va_start(pairs, tag_name); pcmk__xe_set_propv(xml_node, pairs); va_end(pairs); if (is_list) { pcmk__output_xml_push_parent(out, xml_node); } return pcmk_rc_ok; } static const char * role_desc(enum rsc_role_e role) { if (role == pcmk_role_promoted) { return "in " PCMK_ROLE_PROMOTED " role "; } return ""; } PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t") static int ban_html(pcmk__output_t *out, va_list args) { pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *); pcmk__location_t *location = va_arg(args, pcmk__location_t *); uint32_t show_opts = va_arg(args, uint32_t); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); char *buf = crm_strdup_printf("%s\tprevents %s from running %son %s", location->id, location->rsc->id, role_desc(location->role_filter), node_name); pcmk__output_create_html_node(out, "li", NULL, NULL, buf); free(node_name); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t") static int ban_text(pcmk__output_t *out, va_list args) { pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *); pcmk__location_t *location = va_arg(args, pcmk__location_t *); uint32_t show_opts = va_arg(args, uint32_t); char *node_name = pe__node_display_name(pe_node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->list_item(out, NULL, "%s\tprevents %s from running %son %s", location->id, location->rsc->id, role_desc(location->role_filter), node_name); free(node_name); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban", "pcmk_node_t *", "pcmk__location_t *", "uint32_t") static int ban_xml(pcmk__output_t *out, va_list args) { pcmk_node_t *pe_node = va_arg(args, pcmk_node_t *); pcmk__location_t *location = va_arg(args, pcmk__location_t *); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); const char *promoted_only = pcmk__btoa(location->role_filter == pcmk_role_promoted); char *weight_s = pcmk__itoa(pe_node->assign->score); pcmk__output_create_xml_node(out, PCMK_XE_BAN, PCMK_XA_ID, location->id, PCMK_XA_RESOURCE, location->rsc->id, PCMK_XA_NODE, pe_node->priv->name, PCMK_XA_WEIGHT, weight_s, PCMK_XA_PROMOTED_ONLY, promoted_only, /* This is a deprecated alias for * promoted_only. Removing it will break * backward compatibility of the API schema, * which will require an API schema major * version bump. */ PCMK__XA_PROMOTED_ONLY_LEGACY, promoted_only, NULL); free(weight_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ban-list", "pcmk_scheduler_t *", "const char *", "GList *", "uint32_t", "bool") static int ban_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); const char *prefix = va_arg(args, const char *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); GList *gIter, *gIter2; int rc = pcmk_rc_no_output; /* Print each ban */ for (gIter = scheduler->priv->location_constraints; gIter != NULL; gIter = gIter->next) { pcmk__location_t *location = gIter->data; const pcmk_resource_t *rsc = location->rsc; if (prefix != NULL && !g_str_has_prefix(location->id, prefix)) { continue; } if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(pe__const_top_resource(rsc, false)), only_rsc, pcmk__str_star_matches)) { continue; } for (gIter2 = location->nodes; gIter2 != NULL; gIter2 = gIter2->next) { pcmk_node_t *node = (pcmk_node_t *) gIter2->data; if (node->assign->score < 0) { PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Negative Location Constraints"); out->message(out, "ban", node, location, show_opts); } } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_html(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr resources_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNode *child = NULL; child = pcmk__html_create(nodes_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%d node%s configured", nnodes, pcmk__plural_s(nnodes)); if (ndisabled && nblocked) { child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "DISABLED"); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, ", %d ", nblocked); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "BLOCKED"); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " from further action due to failure)"); } else if (ndisabled && !nblocked) { child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), ndisabled); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "DISABLED"); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, ")"); } else if (!ndisabled && nblocked) { child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%d resource instance%s configured (%d ", nresources, pcmk__plural_s(nresources), nblocked); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "BLOCKED"); child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " from further action due to failure)"); } else { child = pcmk__html_create(resources_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_text(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); out->list_item(out, NULL, "%d node%s configured", nnodes, pcmk__plural_s(nnodes)); if (ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED, %d BLOCKED from " "further action due to failure)", nresources, pcmk__plural_s(nresources), ndisabled, nblocked); } else if (ndisabled && !nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d DISABLED)", nresources, pcmk__plural_s(nresources), ndisabled); } else if (!ndisabled && nblocked) { out->list_item(out, NULL, "%d resource instance%s configured " "(%d BLOCKED from further action " "due to failure)", nresources, pcmk__plural_s(nresources), nblocked); } else { out->list_item(out, NULL, "%d resource instance%s configured", nresources, pcmk__plural_s(nresources)); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-counts", "unsigned int", "int", "int", "int") static int cluster_counts_xml(pcmk__output_t *out, va_list args) { unsigned int nnodes = va_arg(args, unsigned int); int nresources = va_arg(args, int); int ndisabled = va_arg(args, int); int nblocked = va_arg(args, int); xmlNodePtr nodes_node = NULL; xmlNodePtr resources_node = NULL; char *s = NULL; nodes_node = pcmk__output_create_xml_node(out, PCMK_XE_NODES_CONFIGURED, NULL); resources_node = pcmk__output_create_xml_node(out, PCMK_XE_RESOURCES_CONFIGURED, NULL); s = pcmk__itoa(nnodes); crm_xml_add(nodes_node, PCMK_XA_NUMBER, s); free(s); s = pcmk__itoa(nresources); crm_xml_add(resources_node, PCMK_XA_NUMBER, s); free(s); s = pcmk__itoa(ndisabled); crm_xml_add(resources_node, PCMK_XA_DISABLED, s); free(s); s = pcmk__itoa(nblocked); crm_xml_add(resources_node, PCMK_XA_BLOCKED, s); free(s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_html(pcmk__output_t *out, va_list args) { pcmk_node_t *dc = va_arg(args, pcmk_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); bool mixed_version = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); xmlNode *child = NULL; child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "Current DC: "); if (dc) { child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%s (version %s) -", dc_name, pcmk__s(dc_version_s, "unknown")); if (mixed_version) { child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_WARNING); pcmk__xe_set_content(child, " MIXED-VERSION"); } child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " partition"); if (crm_is_true(quorum)) { child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " with"); } else { child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_WARNING); pcmk__xe_set_content(child, " WITHOUT"); } child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " quorum"); } else { child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_WARNING); pcmk__xe_set_content(child, "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_text(pcmk__output_t *out, va_list args) { pcmk_node_t *dc = va_arg(args, pcmk_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name = va_arg(args, char *); bool mixed_version = va_arg(args, int); if (dc) { out->list_item(out, "Current DC", "%s (version %s) - %spartition %s quorum", dc_name, dc_version_s ? dc_version_s : "unknown", mixed_version ? "MIXED-VERSION " : "", crm_is_true(quorum) ? "with" : "WITHOUT"); } else { out->list_item(out, "Current DC", "NONE"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-dc", "pcmk_node_t *", "const char *", "const char *", "char *", "int") static int cluster_dc_xml(pcmk__output_t *out, va_list args) { pcmk_node_t *dc = va_arg(args, pcmk_node_t *); const char *quorum = va_arg(args, const char *); const char *dc_version_s = va_arg(args, const char *); char *dc_name G_GNUC_UNUSED = va_arg(args, char *); bool mixed_version = va_arg(args, int); if (dc) { const char *with_quorum = pcmk__btoa(crm_is_true(quorum)); const char *mixed_version_s = pcmk__btoa(mixed_version); pcmk__output_create_xml_node(out, PCMK_XE_CURRENT_DC, PCMK_XA_PRESENT, PCMK_VALUE_TRUE, PCMK_XA_VERSION, pcmk__s(dc_version_s, ""), PCMK_XA_NAME, dc->priv->name, PCMK_XA_ID, dc->priv->id, PCMK_XA_WITH_QUORUM, with_quorum, PCMK_XA_MIXED_VERSION, mixed_version_s, NULL); } else { pcmk__output_create_xml_node(out, PCMK_XE_CURRENT_DC, PCMK_XA_PRESENT, PCMK_VALUE_FALSE, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("maint-mode", "unsigned long long int") static int cluster_maint_mode_text(pcmk__output_t *out, va_list args) { unsigned long long flags = va_arg(args, unsigned long long); if (pcmk_is_set(flags, pcmk__sched_in_maintenance)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will not attempt to start, stop or recover services\n"); return pcmk_rc_ok; } else if (pcmk_is_set(flags, pcmk__sched_stop_all)) { pcmk__formatted_printf(out, "\n *** Resource management is DISABLED ***\n"); pcmk__formatted_printf(out, " The cluster will keep all resources stopped\n"); return pcmk_rc_ok; } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_html(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { out->list_item(out, NULL, "STONITH of failed nodes enabled"); } else { out->list_item(out, NULL, "STONITH of failed nodes disabled"); } if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) { out->list_item(out, NULL, "Cluster is symmetric"); } else { out->list_item(out, NULL, "Cluster is asymmetric"); } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case pcmk_no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case pcmk_no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case pcmk_no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case pcmk_no_quorum_fence: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } if (pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); xmlNode *child = NULL; child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "Resource management: "); child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "DISABLED"); child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " (the cluster will not attempt to start, stop," " or recover services)"); } else if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_all)) { xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); xmlNode *child = NULL; child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "Resource management: "); child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "STOPPED"); child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " (the cluster will keep all resources stopped)"); } else { out->list_item(out, NULL, "Resource management: enabled"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_log(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); if (pcmk_is_set(scheduler->flags, pcmk__sched_in_maintenance)) { return out->info(out, "Resource management is DISABLED. The cluster will not attempt to start, stop or recover services."); } else if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_all)) { return out->info(out, "Resource management is DISABLED. The cluster has stopped all resources."); } else { return pcmk_rc_no_output; } } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_text(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { out->list_item(out, NULL, "STONITH of failed nodes enabled"); } else { out->list_item(out, NULL, "STONITH of failed nodes disabled"); } if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) { out->list_item(out, NULL, "Cluster is symmetric"); } else { out->list_item(out, NULL, "Cluster is asymmetric"); } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: out->list_item(out, NULL, "No quorum policy: Freeze resources"); break; case pcmk_no_quorum_stop: out->list_item(out, NULL, "No quorum policy: Stop ALL resources"); break; case pcmk_no_quorum_demote: out->list_item(out, NULL, "No quorum policy: Demote promotable " "resources and stop all other resources"); break; case pcmk_no_quorum_ignore: out->list_item(out, NULL, "No quorum policy: Ignore"); break; case pcmk_no_quorum_fence: out->list_item(out, NULL, "No quorum policy: Suicide"); break; } return pcmk_rc_ok; } /*! * \internal * \brief Get readable string representation of a no-quorum policy * * \param[in] policy No-quorum policy * * \return String representation of \p policy */ static const char * no_quorum_policy_text(enum pe_quorum_policy policy) { switch (policy) { case pcmk_no_quorum_freeze: return PCMK_VALUE_FREEZE; case pcmk_no_quorum_stop: return PCMK_VALUE_STOP; case pcmk_no_quorum_demote: return PCMK_VALUE_DEMOTE; case pcmk_no_quorum_ignore: return PCMK_VALUE_IGNORE; case pcmk_no_quorum_fence: return PCMK_VALUE_FENCE_LEGACY; default: return PCMK_VALUE_UNKNOWN; } } PCMK__OUTPUT_ARGS("cluster-options", "pcmk_scheduler_t *") static int cluster_options_xml(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); const char *stonith_enabled = pcmk__flag_text(scheduler->flags, pcmk__sched_fencing_enabled); const char *symmetric_cluster = pcmk__flag_text(scheduler->flags, pcmk__sched_symmetric_cluster); const char *no_quorum_policy = no_quorum_policy_text(scheduler->no_quorum_policy); const char *maintenance_mode = pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance); const char *stop_all_resources = pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all); char *stonith_timeout_ms_s = pcmk__itoa(scheduler->priv->fence_timeout_ms); char *priority_fencing_delay_ms_s = pcmk__itoa(scheduler->priority_fencing_delay * 1000); pcmk__output_create_xml_node(out, PCMK_XE_CLUSTER_OPTIONS, PCMK_XA_STONITH_ENABLED, stonith_enabled, PCMK_XA_SYMMETRIC_CLUSTER, symmetric_cluster, PCMK_XA_NO_QUORUM_POLICY, no_quorum_policy, PCMK_XA_MAINTENANCE_MODE, maintenance_mode, PCMK_XA_STOP_ALL_RESOURCES, stop_all_resources, PCMK_XA_STONITH_TIMEOUT_MS, stonith_timeout_ms_s, PCMK_XA_PRIORITY_FENCING_DELAY_MS, priority_fencing_delay_ms_s, NULL); free(stonith_timeout_ms_s); free(priority_fencing_delay_ms_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_html(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, "li", NULL); xmlNode *child = NULL; child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "Stack: "); child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%s", stack_s); if (pcmkd_state != pcmk_pacemakerd_state_invalid) { child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " ("); child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%s", pcmk__pcmkd_state_enum2friendly(pcmkd_state)); child = pcmk__html_create(node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, ")"); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_text(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); if (pcmkd_state != pcmk_pacemakerd_state_invalid) { out->list_item(out, "Stack", "%s (%s)", stack_s, pcmk__pcmkd_state_enum2friendly(pcmkd_state)); } else { out->list_item(out, "Stack", "%s", stack_s); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-stack", "const char *", "enum pcmk_pacemakerd_state") static int cluster_stack_xml(pcmk__output_t *out, va_list args) { const char *stack_s = va_arg(args, const char *); enum pcmk_pacemakerd_state pcmkd_state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = NULL; if (pcmkd_state != pcmk_pacemakerd_state_invalid) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(pcmkd_state); } pcmk__output_create_xml_node(out, PCMK_XE_STACK, PCMK_XA_TYPE, stack_s, PCMK_XA_PACEMAKERD_STATE, state_s, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_html(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); xmlNodePtr updated_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNodePtr changed_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNode *child = NULL; char *time_s = NULL; child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "Last updated: "); child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL, NULL); time_s = pcmk__epoch2str(NULL, 0); pcmk__xe_set_content(child, "%s", time_s); free(time_s); if (our_nodename != NULL) { child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, " on "); child = pcmk__html_create(updated_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%s", our_nodename); } child = pcmk__html_create(changed_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "Last change: "); child = pcmk__html_create(changed_node, PCMK__XE_SPAN, NULL, NULL); time_s = last_changed_string(last_written, user, client, origin); pcmk__xe_set_content(child, "%s", time_s); free(time_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_xml(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *time_s = pcmk__epoch2str(NULL, 0); pcmk__output_create_xml_node(out, PCMK_XE_LAST_UPDATE, PCMK_XA_TIME, time_s, PCMK_XA_ORIGIN, our_nodename, NULL); pcmk__output_create_xml_node(out, PCMK_XE_LAST_CHANGE, PCMK_XA_TIME, pcmk__s(last_written, ""), PCMK_XA_USER, pcmk__s(user, ""), PCMK_XA_CLIENT, pcmk__s(client, ""), PCMK_XA_ORIGIN, pcmk__s(origin, ""), NULL); free(time_s); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("cluster-times", "const char *", "const char *", "const char *", "const char *", "const char *") static int cluster_times_text(pcmk__output_t *out, va_list args) { const char *our_nodename = va_arg(args, const char *); const char *last_written = va_arg(args, const char *); const char *user = va_arg(args, const char *); const char *client = va_arg(args, const char *); const char *origin = va_arg(args, const char *); char *time_s = pcmk__epoch2str(NULL, 0); out->list_item(out, "Last updated", "%s%s%s", time_s, (our_nodename != NULL)? " on " : "", pcmk__s(our_nodename, "")); free(time_s); time_s = last_changed_string(last_written, user, client, origin); out->list_item(out, "Last change", " %s", time_s); free(time_s); return pcmk_rc_ok; } /*! * \internal * \brief Display a failed action in less-technical natural language * * \param[in,out] out Output object to use for display * \param[in] xml_op XML containing failed action * \param[in] op_key Operation key of failed action * \param[in] node_name Where failed action occurred * \param[in] rc OCF exit code of failed action * \param[in] status Execution status of failed action * \param[in] exit_reason Exit reason given for failed action * \param[in] exec_time String containing execution time in milliseconds */ static void failed_action_friendly(pcmk__output_t *out, const xmlNode *xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { char *rsc_id = NULL; char *task = NULL; guint interval_ms = 0; time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key) || !parse_op_key(op_key, &rsc_id, &task, &interval_ms)) { pcmk__str_update(&rsc_id, "unknown resource"); pcmk__str_update(&task, "unknown action"); interval_ms = 0; } CRM_ASSERT((rsc_id != NULL) && (task != NULL)); str = g_string_sized_new(256); // Should be sufficient for most messages pcmk__g_strcat(str, rsc_id, " ", NULL); if (interval_ms != 0) { pcmk__g_strcat(str, pcmk__readable_interval(interval_ms), "-interval ", NULL); } pcmk__g_strcat(str, pcmk__readable_action(task, interval_ms), " on ", node_name, NULL); if (status == PCMK_EXEC_DONE) { pcmk__g_strcat(str, " returned '", services_ocf_exitcode_str(rc), "'", NULL); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, " (", exit_reason, ")", NULL); } } else { pcmk__g_strcat(str, " could not be executed (", pcmk_exec_status_str(status), NULL); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, ": ", exit_reason, NULL); } g_string_append_c(str, ')'); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change_epoch) == pcmk_ok) { char *s = pcmk__epoch2str(&last_change_epoch, 0); pcmk__g_strcat(str, " at ", s, NULL); free(s); } if (!pcmk__str_empty(exec_time)) { int exec_time_ms = 0; if ((pcmk__scan_min_int(exec_time, &exec_time_ms, 0) == pcmk_rc_ok) && (exec_time_ms > 0)) { pcmk__g_strcat(str, " after ", pcmk__readable_interval(exec_time_ms), NULL); } } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); free(rsc_id); free(task); } /*! * \internal * \brief Display a failed action with technical details * * \param[in,out] out Output object to use for display * \param[in] xml_op XML containing failed action * \param[in] op_key Operation key of failed action * \param[in] node_name Where failed action occurred * \param[in] rc OCF exit code of failed action * \param[in] status Execution status of failed action * \param[in] exit_reason Exit reason given for failed action * \param[in] exec_time String containing execution time in milliseconds */ static void failed_action_technical(pcmk__output_t *out, const xmlNode *xml_op, const char *op_key, const char *node_name, int rc, int status, const char *exit_reason, const char *exec_time) { const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); const char *queue_time = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); const char *exit_status = services_ocf_exitcode_str(rc); const char *lrm_status = pcmk_exec_status_str(status); time_t last_change_epoch = 0; GString *str = NULL; if (pcmk__str_empty(op_key)) { op_key = "unknown operation"; } if (pcmk__str_empty(exit_status)) { exit_status = "unknown exit status"; } if (pcmk__str_empty(call_id)) { call_id = "unknown"; } str = g_string_sized_new(256); g_string_append_printf(str, "%s on %s '%s' (%d): call=%s, status='%s'", op_key, node_name, exit_status, rc, call_id, lrm_status); if (!pcmk__str_empty(exit_reason)) { pcmk__g_strcat(str, ", exitreason='", exit_reason, "'", NULL); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change_epoch) == pcmk_ok) { char *last_change_str = pcmk__epoch2str(&last_change_epoch, 0); pcmk__g_strcat(str, ", " PCMK_XA_LAST_RC_CHANGE "=" "'", last_change_str, "'", NULL); free(last_change_str); } if (!pcmk__str_empty(queue_time)) { pcmk__g_strcat(str, ", queued=", queue_time, "ms", NULL); } if (!pcmk__str_empty(exec_time)) { pcmk__g_strcat(str, ", exec=", exec_time, "ms", NULL); } out->list_item(out, NULL, "%s", str->str); g_string_free(str, TRUE); } PCMK__OUTPUT_ARGS("failed-action", "xmlNode *", "uint32_t") static int failed_action_default(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); uint32_t show_opts = va_arg(args, uint32_t); const char *op_key = pcmk__xe_history_key(xml_op); const char *node_name = crm_element_value(xml_op, PCMK_XA_UNAME); const char *exit_reason = crm_element_value(xml_op, PCMK_XA_EXIT_REASON); const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); int rc; int status; pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_RC_CODE), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, 0); if (pcmk__str_empty(node_name)) { node_name = "unknown node"; } if (pcmk_is_set(show_opts, pcmk_show_failed_detail)) { failed_action_technical(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } else { failed_action_friendly(out, xml_op, op_key, node_name, rc, status, exit_reason, exec_time); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action", "xmlNode *", "uint32_t") static int failed_action_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); const char *op_key = pcmk__xe_history_key(xml_op); const char *op_key_name = PCMK_XA_OP_KEY; int rc; int status; const char *uname = crm_element_value(xml_op, PCMK_XA_UNAME); const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); const char *exitstatus = NULL; const char *exit_reason = pcmk__s(crm_element_value(xml_op, PCMK_XA_EXIT_REASON), "none"); const char *status_s = NULL; time_t epoch = 0; gchar *exit_reason_esc = NULL; char *rc_s = NULL; xmlNodePtr node = NULL; if (pcmk__xml_needs_escape(exit_reason, pcmk__xml_escape_attr)) { exit_reason_esc = pcmk__xml_escape(exit_reason, pcmk__xml_escape_attr); exit_reason = exit_reason_esc; } pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_RC_CODE), &rc, 0); pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, 0); if (crm_element_value(xml_op, PCMK__XA_OPERATION_KEY) == NULL) { op_key_name = PCMK_XA_ID; } exitstatus = services_ocf_exitcode_str(rc); rc_s = pcmk__itoa(rc); status_s = pcmk_exec_status_str(status); node = pcmk__output_create_xml_node(out, PCMK_XE_FAILURE, op_key_name, op_key, PCMK_XA_NODE, uname, PCMK_XA_EXITSTATUS, exitstatus, PCMK_XA_EXITREASON, exit_reason, PCMK_XA_EXITCODE, rc_s, PCMK_XA_CALL, call_id, PCMK_XA_STATUS, status_s, NULL); free(rc_s); if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { const char *queue_time = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); const char *exec = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); const char *task = crm_element_value(xml_op, PCMK_XA_OPERATION); guint interval_ms = 0; char *interval_ms_s = NULL; char *rc_change = pcmk__epoch2str(&epoch, crm_time_log_date |crm_time_log_timeofday |crm_time_log_with_timezone); crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &interval_ms); interval_ms_s = crm_strdup_printf("%u", interval_ms); pcmk__xe_set_props(node, PCMK_XA_LAST_RC_CHANGE, rc_change, PCMK_XA_QUEUED, queue_time, PCMK_XA_EXEC, exec, PCMK_XA_INTERVAL, interval_ms_s, PCMK_XA_TASK, task, NULL); free(interval_ms_s); free(rc_change); } g_free(exit_reason_esc); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("failed-action-list", "pcmk_scheduler_t *", "GList *", "GList *", "uint32_t", "bool") static int failed_action_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); xmlNode *xml_op = NULL; int rc = pcmk_rc_no_output; if (xmlChildElementCount(scheduler->priv->failed) == 0) { return rc; } for (xml_op = pcmk__xe_first_child(scheduler->priv->failed, NULL, NULL, NULL); xml_op != NULL; xml_op = pcmk__xe_next(xml_op)) { char *rsc = NULL; if (!pcmk__str_in_list(crm_element_value(xml_op, PCMK_XA_UNAME), only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } if (pcmk_xe_mask_probe_failure(xml_op)) { continue; } if (!parse_op_key(pcmk__xe_history_key(xml_op), &rsc, NULL, NULL)) { continue; } if (!pcmk__str_in_list(rsc, only_rsc, pcmk__str_star_matches)) { free(rsc); continue; } free(rsc); PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Failed Resource Actions"); out->message(out, "failed-action", xml_op, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } static void status_node(pcmk_node_t *node, xmlNodePtr parent, uint32_t show_opts) { int health = pe__node_health(node); xmlNode *child = NULL; // Cluster membership if (node->details->online) { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, PCMK_VALUE_ONLINE); pcmk__xe_set_content(child, " online"); } else { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, PCMK_VALUE_OFFLINE); pcmk__xe_set_content(child, " OFFLINE"); } // Standby mode if (pcmk_is_set(node->priv->flags, pcmk__node_fail_standby)) { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, PCMK_VALUE_STANDBY); if (node->details->running_rsc == NULL) { pcmk__xe_set_content(child, " (in standby due to " PCMK_META_ON_FAIL ")"); } else { pcmk__xe_set_content(child, " (in standby due to " PCMK_META_ON_FAIL "," " with active resources)"); } } else if (pcmk_is_set(node->priv->flags, pcmk__node_standby)) { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, PCMK_VALUE_STANDBY); if (node->details->running_rsc == NULL) { pcmk__xe_set_content(child, " (in standby)"); } else { pcmk__xe_set_content(child, " (in standby, with active resources)"); } } // Maintenance mode if (node->details->maintenance) { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, PCMK__VALUE_MAINT); pcmk__xe_set_content(child, " (in maintenance mode)"); } // Node health if (health < 0) { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, PCMK__VALUE_HEALTH_RED); pcmk__xe_set_content(child, " (health is RED)"); } else if (health == 0) { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, PCMK__VALUE_HEALTH_YELLOW); pcmk__xe_set_content(child, " (health is YELLOW)"); } // Feature set if (pcmk_is_set(show_opts, pcmk_show_feature_set)) { const char *feature_set = get_node_feature_set(node); if (feature_set != NULL) { child = pcmk__html_create(parent, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, ", feature set %s", feature_set); } } } PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_html(pcmk__output_t *out, va_list args) { pcmk_node_t *node = va_arg(args, pcmk_node_t *); uint32_t show_opts = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (full) { xmlNode *item_node = NULL; xmlNode *child = NULL; if (pcmk_all_flags_set(show_opts, pcmk_show_brief | pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); out->begin_list(out, NULL, NULL, "%s:", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "Status:"); status_node(node, item_node, show_opts); if (rscs != NULL) { uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); } pcmk__output_xml_pop_parent(out); out->end_list(out); } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc2 = NULL; int rc = pcmk_rc_no_output; out->begin_list(out, NULL, NULL, "%s:", node_name); item_node = pcmk__output_xml_create_parent(out, "li", NULL); child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "Status:"); status_node(node, item_node, show_opts); for (lpc2 = node->details->running_rsc; lpc2 != NULL; lpc2 = lpc2->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) lpc2->data; PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources"); show_opts |= pcmk_show_rsc_only; out->message(out, pcmk__map_element_name(rsc->priv->xml), show_opts, rsc, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); pcmk__output_xml_pop_parent(out); out->end_list(out); } else { item_node = pcmk__output_create_xml_node(out, "li", NULL); child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "%s:", node_name); status_node(node, item_node, show_opts); } } else { out->begin_list(out, NULL, NULL, "%s:", node_name); } free(node_name); return pcmk_rc_ok; } /*! * \internal * \brief Get a human-friendly textual description of a node's status * * \param[in] node Node to check * * \return String representation of node's status */ static const char * node_text_status(const pcmk_node_t *node) { if (node->details->unclean) { if (node->details->online) { return "UNCLEAN (online)"; } else if (node->details->pending) { return "UNCLEAN (pending)"; } else { return "UNCLEAN (offline)"; } } else if (node->details->pending) { return "pending"; } else if (pcmk_is_set(node->priv->flags, pcmk__node_fail_standby) && node->details->online) { return "standby (" PCMK_META_ON_FAIL ")"; } else if (pcmk_is_set(node->priv->flags, pcmk__node_standby)) { if (!node->details->online) { return "OFFLINE (standby)"; } else if (node->details->running_rsc == NULL) { return "standby"; } else { return "standby (with active resources)"; } } else if (node->details->maintenance) { if (node->details->online) { return "maintenance"; } else { return "OFFLINE (maintenance)"; } } else if (node->details->online) { return "online"; } return "OFFLINE"; } PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_text(pcmk__output_t *out, va_list args) { pcmk_node_t *node = va_arg(args, pcmk_node_t *); uint32_t show_opts = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); GString *str = g_string_sized_new(64); int health = pe__node_health(node); // Create a summary line with node type, name, and status if (pcmk__is_guest_or_bundle_node(node)) { g_string_append(str, "GuestNode"); } else if (pcmk__is_remote_node(node)) { g_string_append(str, "RemoteNode"); } else { g_string_append(str, "Node"); } pcmk__g_strcat(str, " ", node_name, ": ", node_text_status(node), NULL); if (health < 0) { g_string_append(str, " (health is RED)"); } else if (health == 0) { g_string_append(str, " (health is YELLOW)"); } if (pcmk_is_set(show_opts, pcmk_show_feature_set)) { const char *feature_set = get_node_feature_set(node); if (feature_set != NULL) { pcmk__g_strcat(str, ", feature set ", feature_set, NULL); } } /* If we're grouping by node, print its resources */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (pcmk_is_set(show_opts, pcmk_show_brief)) { GList *rscs = pe__filter_rsc_list(node->details->running_rsc, only_rsc); if (rscs != NULL) { uint32_t new_show_opts = (show_opts | pcmk_show_rsc_only) & ~pcmk_show_inactive_rscs; out->begin_list(out, NULL, NULL, "%s", str->str); out->begin_list(out, NULL, NULL, "Resources"); pe__rscs_brief_output(out, rscs, new_show_opts); out->end_list(out); out->end_list(out); g_list_free(rscs); } } else { GList *gIter2 = NULL; out->begin_list(out, NULL, NULL, "%s", str->str); out->begin_list(out, NULL, NULL, "Resources"); for (gIter2 = node->details->running_rsc; gIter2 != NULL; gIter2 = gIter2->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter2->data; show_opts |= pcmk_show_rsc_only; out->message(out, pcmk__map_element_name(rsc->priv->xml), show_opts, rsc, only_node, only_rsc); } out->end_list(out); out->end_list(out); } } else { out->list_item(out, NULL, "%s", str->str); } g_string_free(str, TRUE); free(node_name); } else { char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); out->begin_list(out, NULL, NULL, "Node: %s", node_name); free(node_name); } return pcmk_rc_ok; } /*! * \internal * \brief Convert an integer health value to a string representation * * \param[in] health Integer health value * * \retval \c PCMK_VALUE_RED if \p health is less than 0 * \retval \c PCMK_VALUE_YELLOW if \p health is equal to 0 * \retval \c PCMK_VALUE_GREEN if \p health is greater than 0 */ static const char * health_text(int health) { if (health < 0) { return PCMK_VALUE_RED; } else if (health == 0) { return PCMK_VALUE_YELLOW; } else { return PCMK_VALUE_GREEN; } } /*! * \internal * \brief Convert a node variant to a string representation * * \param[in] variant Node variant * * \retval \c PCMK_VALUE_MEMBER if \p node_type is \c pcmk__node_variant_cluster * \retval \c PCMK_VALUE_REMOTE if \p node_type is \c pcmk__node_variant_remote * \retval \c PCMK__VALUE_PING if \p node_type is \c pcmk__node_variant_ping * \retval \c PCMK_VALUE_UNKNOWN otherwise */ static const char * node_variant_text(enum pcmk__node_variant variant) { switch (variant) { case pcmk__node_variant_cluster: return PCMK_VALUE_MEMBER; case pcmk__node_variant_remote: return PCMK_VALUE_REMOTE; case pcmk__node_variant_ping: return PCMK__VALUE_PING; default: return PCMK_VALUE_UNKNOWN; } } PCMK__OUTPUT_ARGS("node", "pcmk_node_t *", "uint32_t", "bool", "GList *", "GList *") static int node_xml(pcmk__output_t *out, va_list args) { pcmk_node_t *node = va_arg(args, pcmk_node_t *); uint32_t show_opts G_GNUC_UNUSED = va_arg(args, uint32_t); bool full = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); if (full) { const char *online = pcmk__btoa(node->details->online); const char *standby = pcmk__flag_text(node->priv->flags, pcmk__node_standby); const char *standby_onfail = pcmk__flag_text(node->priv->flags, pcmk__node_fail_standby); const char *maintenance = pcmk__btoa(node->details->maintenance); const char *pending = pcmk__btoa(node->details->pending); const char *unclean = pcmk__btoa(node->details->unclean); const char *health = health_text(pe__node_health(node)); const char *feature_set = get_node_feature_set(node); const char *shutdown = pcmk__btoa(node->details->shutdown); const char *expected_up = pcmk__flag_text(node->priv->flags, pcmk__node_expected_up); const bool is_dc = pcmk__same_node(node, node->priv->scheduler->dc_node); int length = g_list_length(node->details->running_rsc); char *resources_running = pcmk__itoa(length); const char *node_type = node_variant_text(node->priv->variant); int rc = pcmk_rc_ok; rc = pe__name_and_nvpairs_xml(out, true, PCMK_XE_NODE, PCMK_XA_NAME, node->priv->name, PCMK_XA_ID, node->priv->id, PCMK_XA_ONLINE, online, PCMK_XA_STANDBY, standby, PCMK_XA_STANDBY_ONFAIL, standby_onfail, PCMK_XA_MAINTENANCE, maintenance, PCMK_XA_PENDING, pending, PCMK_XA_UNCLEAN, unclean, PCMK_XA_HEALTH, health, PCMK_XA_FEATURE_SET, feature_set, PCMK_XA_SHUTDOWN, shutdown, PCMK_XA_EXPECTED_UP, expected_up, PCMK_XA_IS_DC, pcmk__btoa(is_dc), PCMK_XA_RESOURCES_RUNNING, resources_running, PCMK_XA_TYPE, node_type, NULL); free(resources_running); CRM_ASSERT(rc == pcmk_rc_ok); if (pcmk__is_guest_or_bundle_node(node)) { xmlNodePtr xml_node = pcmk__output_xml_peek_parent(out); crm_xml_add(xml_node, PCMK_XA_ID_AS_RESOURCE, node->priv->remote->priv->launcher->id); } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *lpc = NULL; for (lpc = node->details->running_rsc; lpc != NULL; lpc = lpc->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) lpc->data; show_opts |= pcmk_show_rsc_only; out->message(out, pcmk__map_element_name(rsc->priv->xml), show_opts, rsc, only_node, only_rsc); } } out->end_list(out); } else { pcmk__output_xml_create_parent(out, PCMK_XE_NODE, PCMK_XA_NAME, node->priv->name, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_text(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); if (add_extra) { int v; if (value == NULL) { v = 0; } else { pcmk__scan_min_int(value, &v, INT_MIN); } if (v <= 0) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is lost", name, value); } else if (v < expected_score) { out->list_item(out, NULL, "%-32s\t: %-10s\t: Connectivity is degraded (Expected=%d)", name, value, expected_score); } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } } else { out->list_item(out, NULL, "%-32s\t: %-10s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_html(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); if (add_extra) { int v = 0; xmlNodePtr item_node = pcmk__output_create_xml_node(out, "li", NULL); xmlNode *child = NULL; if (value != NULL) { pcmk__scan_min_int(value, &v, INT_MIN); } child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, NULL); pcmk__xe_set_content(child, "%s: %s", name, value); if (v <= 0) { child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "(connectivity is lost)"); } else if (v < expected_score) { child = pcmk__html_create(item_node, PCMK__XE_SPAN, NULL, PCMK__VALUE_BOLD); pcmk__xe_set_content(child, "(connectivity is degraded -- expected %d)", expected_score); } } else { out->list_item(out, NULL, "%s: %s", name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pcmk_scheduler_t *", "xmlNode *") static int node_and_op(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pcmk_resource_t *rsc = NULL; gchar *node_str = NULL; char *last_change_str = NULL; const char *op_rsc = crm_element_value(xml_op, PCMK_XA_RESOURCE); int status; time_t last_change = 0; pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, PCMK_EXEC_UNKNOWN); rsc = pe_find_resource(scheduler->priv->resources, op_rsc); if (rsc) { const pcmk_node_t *node = pcmk__current_node(rsc); const char *target_role = g_hash_table_lookup(rsc->priv->meta, PCMK_META_TARGET_ROLE); uint32_t show_opts = pcmk_show_rsc_only | pcmk_show_pending; if (node == NULL) { node = rsc->priv->pending_node; } node_str = pcmk__native_output_string(rsc, rsc_printable_id(rsc), node, show_opts, target_role, false); } else { node_str = crm_strdup_printf("Unknown resource %s", op_rsc); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change) == pcmk_ok) { const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); last_change_str = crm_strdup_printf(", %s='%s', exec=%sms", PCMK_XA_LAST_RC_CHANGE, pcmk__trim(ctime(&last_change)), exec_time); } out->list_item(out, NULL, "%s: %s (node=%s, call=%s, rc=%s%s): %s", node_str, pcmk__xe_history_key(xml_op), crm_element_value(xml_op, PCMK_XA_UNAME), crm_element_value(xml_op, PCMK__XA_CALL_ID), crm_element_value(xml_op, PCMK__XA_RC_CODE), last_change_str ? last_change_str : "", pcmk_exec_status_str(status)); g_free(node_str); free(last_change_str); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-and-op", "pcmk_scheduler_t *", "xmlNode *") static int node_and_op_xml(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); xmlNodePtr xml_op = va_arg(args, xmlNodePtr); pcmk_resource_t *rsc = NULL; const char *uname = crm_element_value(xml_op, PCMK_XA_UNAME); const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); const char *rc_s = crm_element_value(xml_op, PCMK__XA_RC_CODE); const char *status_s = NULL; const char *op_rsc = crm_element_value(xml_op, PCMK_XA_RESOURCE); int status; time_t last_change = 0; xmlNode *node = NULL; pcmk__scan_min_int(crm_element_value(xml_op, PCMK__XA_OP_STATUS), &status, PCMK_EXEC_UNKNOWN); status_s = pcmk_exec_status_str(status); node = pcmk__output_create_xml_node(out, PCMK_XE_OPERATION, PCMK_XA_OP, pcmk__xe_history_key(xml_op), PCMK_XA_NODE, uname, PCMK_XA_CALL, call_id, PCMK_XA_RC, rc_s, PCMK_XA_STATUS, status_s, NULL); rsc = pe_find_resource(scheduler->priv->resources, op_rsc); if (rsc) { const char *class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS); const char *provider = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER); const char *kind = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE); bool has_provider = pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider); char *agent_tuple = crm_strdup_printf("%s:%s:%s", class, (has_provider? provider : ""), kind); pcmk__xe_set_props(node, PCMK_XA_RSC, rsc_printable_id(rsc), PCMK_XA_AGENT, agent_tuple, NULL); free(agent_tuple); } if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &last_change) == pcmk_ok) { const char *last_rc_change = pcmk__trim(ctime(&last_change)); const char *exec_time = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); pcmk__xe_set_props(node, PCMK_XA_LAST_RC_CHANGE, last_rc_change, PCMK_XA_EXEC_TIME, exec_time, NULL); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute", "const char *", "const char *", "bool", "int") static int node_attribute_xml(pcmk__output_t *out, va_list args) { const char *name = va_arg(args, const char *); const char *value = va_arg(args, const char *); bool add_extra = va_arg(args, int); int expected_score = va_arg(args, int); xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_ATTRIBUTE, PCMK_XA_NAME, name, PCMK_XA_VALUE, value, NULL); if (add_extra) { char *buf = pcmk__itoa(expected_score); crm_xml_add(node, PCMK_XA_EXPECTED, buf); free(buf); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-attribute-list", "pcmk_scheduler_t *", "uint32_t", "bool", "GList *", "GList *") static int node_attribute_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); int rc = pcmk_rc_no_output; /* Display each node's attributes */ for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = gIter->data; GList *attr_list = NULL; GHashTableIter iter; gpointer key; if (!node || !node->details || !node->details->online) { continue; } g_hash_table_iter_init(&iter, node->priv->attrs); while (g_hash_table_iter_next (&iter, &key, NULL)) { attr_list = filter_attr_list(attr_list, key); } if (attr_list == NULL) { continue; } if (!pcmk__str_in_list(node->priv->name, only_node, pcmk__str_star_matches|pcmk__str_casei)) { g_list_free(attr_list); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node Attributes"); out->message(out, "node", node, show_opts, false, only_node, only_rsc); for (GList *aIter = attr_list; aIter != NULL; aIter = aIter->next) { const char *name = aIter->data; const char *value = NULL; int expected_score = 0; bool add_extra = false; value = pcmk__node_attr(node, name, NULL, pcmk__rsc_node_current); add_extra = add_extra_info(node, node->details->running_rsc, scheduler, name, &expected_score); /* Print attribute name and value */ out->message(out, "node-attribute", name, value, add_extra, expected_score); } g_list_free(attr_list); out->end_list(out); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-capacity", "const pcmk_node_t *", "const char *") static int node_capacity(pcmk__output_t *out, va_list args) { const pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *comment = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s capacity:", comment, pcmk__node_name(node)); g_hash_table_foreach(node->priv->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-capacity", "const pcmk_node_t *", "const char *") static int node_capacity_xml(pcmk__output_t *out, va_list args) { const pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *uname = node->priv->name; const char *comment = va_arg(args, const char *); xmlNodePtr xml_node = pcmk__output_create_xml_node(out, PCMK_XE_CAPACITY, PCMK_XA_NODE, uname, PCMK_XA_COMMENT, comment, NULL); g_hash_table_foreach(node->priv->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-history-list", "pcmk_scheduler_t *", "pcmk_node_t *", "xmlNode *", "GList *", "GList *", "uint32_t", "uint32_t") static int node_history_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); xmlNode *node_state = va_arg(args, xmlNode *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); xmlNode *lrm_rsc = NULL; xmlNode *rsc_entry = NULL; int rc = pcmk_rc_no_output; lrm_rsc = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL, NULL); lrm_rsc = pcmk__xe_first_child(lrm_rsc, PCMK__XE_LRM_RESOURCES, NULL, NULL); /* Print history of each of the node's resources */ for (rsc_entry = pcmk__xe_first_child(lrm_rsc, PCMK__XE_LRM_RESOURCE, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) { const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); pcmk_resource_t *rsc = pe_find_resource(scheduler->priv->resources, rsc_id); const pcmk_resource_t *parent = pe__const_top_resource(rsc, false); /* We can't use is_filtered here to filter group resources. For is_filtered, * we have to decide whether to check the parent or not. If we check the * parent, all elements of a group will always be printed because that's how * is_filtered works for groups. If we do not check the parent, sometimes * this will filter everything out. * * For other resource types, is_filtered is okay. */ if (pcmk__is_group(parent)) { if (!pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) && !pcmk__str_in_list(rsc_printable_id(parent), only_rsc, pcmk__str_star_matches)) { continue; } } else if (rsc->priv->fns->is_filtered(rsc, only_rsc, TRUE)) { continue; } if (!pcmk_is_set(section_opts, pcmk_section_operations)) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pcmk__fc_default, NULL); if (failcount <= 0) { continue; } if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, false, only_node, only_rsc); } out->message(out, "resource-history", rsc, rsc_id, false, failcount, last_failure, false); } else { GList *op_list = get_operation_list(rsc_entry); pcmk_resource_t *rsc = NULL; if (op_list == NULL) { continue; } rsc = pe_find_resource(scheduler->priv->resources, crm_element_value(rsc_entry, PCMK_XA_ID)); if (rc == pcmk_rc_no_output) { rc = pcmk_rc_ok; out->message(out, "node", node, show_opts, false, only_node, only_rsc); } out->message(out, "resource-operation-list", scheduler, rsc, node, op_list, show_opts); } } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_html(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer G_GNUC_UNUSED = va_arg(args, int); int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = (pcmk_node_t *) gIter->data; if (!pcmk__str_in_list(node->priv->name, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Node List"); out->message(out, "node", node, show_opts, true, only_node, only_rsc); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_text(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); /* space-separated lists of node names */ GString *online_nodes = NULL; GString *online_remote_nodes = NULL; GString *online_guest_nodes = NULL; GString *offline_nodes = NULL; GString *offline_remote_nodes = NULL; int rc = pcmk_rc_no_output; for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = (pcmk_node_t *) gIter->data; char *node_name = pe__node_display_name(node, pcmk_is_set(show_opts, pcmk_show_node_id)); if (!pcmk__str_in_list(node->priv->name, only_node, pcmk__str_star_matches|pcmk__str_casei)) { free(node_name); continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, "Node List"); // Determine whether to display node individually or in a list if (node->details->unclean || node->details->pending || (pcmk_is_set(node->priv->flags, pcmk__node_fail_standby) && node->details->online) || pcmk_is_set(node->priv->flags, pcmk__node_standby) || node->details->maintenance || pcmk_is_set(show_opts, pcmk_show_rscs_by_node) || pcmk_is_set(show_opts, pcmk_show_feature_set) || (pe__node_health(node) <= 0)) { // Display node individually } else if (node->details->online) { // Display online node in a list if (pcmk__is_guest_or_bundle_node(node)) { pcmk__add_word(&online_guest_nodes, 1024, node_name); } else if (pcmk__is_remote_node(node)) { pcmk__add_word(&online_remote_nodes, 1024, node_name); } else { pcmk__add_word(&online_nodes, 1024, node_name); } free(node_name); continue; } else { // Display offline node in a list if (pcmk__is_remote_node(node)) { pcmk__add_word(&offline_remote_nodes, 1024, node_name); } else if (pcmk__is_guest_or_bundle_node(node)) { /* ignore offline guest nodes */ } else { pcmk__add_word(&offline_nodes, 1024, node_name); } free(node_name); continue; } /* If we get here, node is in bad state, or we're grouping by node */ out->message(out, "node", node, show_opts, true, only_node, only_rsc); free(node_name); } /* If we're not grouping by node, summarize nodes by status */ if (online_nodes != NULL) { out->list_item(out, "Online", "[ %s ]", (const char *) online_nodes->str); g_string_free(online_nodes, TRUE); } if (offline_nodes != NULL) { out->list_item(out, "OFFLINE", "[ %s ]", (const char *) offline_nodes->str); g_string_free(offline_nodes, TRUE); } if (online_remote_nodes) { out->list_item(out, "RemoteOnline", "[ %s ]", (const char *) online_remote_nodes->str); g_string_free(online_remote_nodes, TRUE); } if (offline_remote_nodes) { out->list_item(out, "RemoteOFFLINE", "[ %s ]", (const char *) offline_remote_nodes->str); g_string_free(offline_remote_nodes, TRUE); } if (online_guest_nodes != NULL) { out->list_item(out, "GuestOnline", "[ %s ]", (const char *) online_guest_nodes->str); g_string_free(online_guest_nodes, TRUE); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-list", "GList *", "GList *", "GList *", "uint32_t", "bool") static int node_list_xml(pcmk__output_t *out, va_list args) { GList *nodes = va_arg(args, GList *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer G_GNUC_UNUSED = va_arg(args, int); /* PCMK_XE_NODES acts as the list's element name for CLI tools that use * pcmk__output_enable_list_element. Otherwise PCMK_XE_NODES is the * value of the list's PCMK_XA_NAME attribute. */ out->begin_list(out, NULL, NULL, PCMK_XE_NODES); for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *node = (pcmk_node_t *) gIter->data; if (!pcmk__str_in_list(node->priv->name, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } out->message(out, "node", node, show_opts, true, only_node, only_rsc); } out->end_list(out); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-summary", "pcmk_scheduler_t *", "GList *", "GList *", "uint32_t", "uint32_t", "bool") static int node_summary(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); uint32_t section_opts = va_arg(args, uint32_t); uint32_t show_opts = va_arg(args, uint32_t); bool print_spacer = va_arg(args, int); xmlNode *node_state = NULL; xmlNode *cib_status = pcmk_find_cib_element(scheduler->input, PCMK_XE_STATUS); int rc = pcmk_rc_no_output; if (xmlChildElementCount(cib_status) == 0) { return rc; } for (node_state = pcmk__xe_first_child(cib_status, PCMK__XE_NODE_STATE, NULL, NULL); node_state != NULL; node_state = pcmk__xe_next_same(node_state)) { pcmk_node_t *node = pe_find_node_id(scheduler->nodes, pcmk__xe_id(node_state)); if (!node || !node->details || !node->details->online) { continue; } if (!pcmk__str_in_list(node->priv->name, only_node, pcmk__str_star_matches|pcmk__str_casei)) { continue; } PCMK__OUTPUT_LIST_HEADER(out, print_spacer, rc, pcmk_is_set(section_opts, pcmk_section_operations) ? "Operations" : "Migration Summary"); out->message(out, "node-history-list", scheduler, node, node_state, only_node, only_rsc, section_opts, show_opts); } PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("node-weight", "const pcmk_resource_t *", "const char *", "const char *", "const char *") static int node_weight(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); const char *score = va_arg(args, const char *); if (rsc) { out->list_item(out, NULL, "%s: %s allocation score on %s: %s", prefix, rsc->id, uname, score); } else { out->list_item(out, NULL, "%s: %s = %s", prefix, uname, score); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("node-weight", "const pcmk_resource_t *", "const char *", "const char *", "const char *") static int node_weight_xml(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); const char *prefix = va_arg(args, const char *); const char *uname = va_arg(args, const char *); const char *score = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_NODE_WEIGHT, PCMK_XA_FUNCTION, prefix, PCMK_XA_NODE, uname, PCMK_XA_SCORE, score, NULL); if (rsc) { crm_xml_add(node, PCMK_XA_ID, rsc->id); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNode *", "const char *", "const char *", "int", "uint32_t") static int op_history_text(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); uint32_t show_opts = va_arg(args, uint32_t); char *buf = op_history_string(xml_op, task, interval_ms_s, rc, pcmk_is_set(show_opts, pcmk_show_timing)); out->list_item(out, NULL, "%s", buf); free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("op-history", "xmlNode *", "const char *", "const char *", "int", "uint32_t") static int op_history_xml(pcmk__output_t *out, va_list args) { xmlNodePtr xml_op = va_arg(args, xmlNodePtr); const char *task = va_arg(args, const char *); const char *interval_ms_s = va_arg(args, const char *); int rc = va_arg(args, int); uint32_t show_opts = va_arg(args, uint32_t); const char *call_id = crm_element_value(xml_op, PCMK__XA_CALL_ID); char *rc_s = pcmk__itoa(rc); const char *rc_text = services_ocf_exitcode_str(rc); xmlNodePtr node = NULL; node = pcmk__output_create_xml_node(out, PCMK_XE_OPERATION_HISTORY, PCMK_XA_CALL, call_id, PCMK_XA_TASK, task, PCMK_XA_RC, rc_s, PCMK_XA_RC_TEXT, rc_text, NULL); free(rc_s); if (interval_ms_s && !pcmk__str_eq(interval_ms_s, "0", pcmk__str_casei)) { char *s = crm_strdup_printf("%sms", interval_ms_s); crm_xml_add(node, PCMK_XA_INTERVAL, s); free(s); } if (pcmk_is_set(show_opts, pcmk_show_timing)) { const char *value = NULL; time_t epoch = 0; if ((crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &epoch) == pcmk_ok) && (epoch > 0)) { char *s = pcmk__epoch2str(&epoch, 0); crm_xml_add(node, PCMK_XA_LAST_RC_CHANGE, s); free(s); } value = crm_element_value(xml_op, PCMK_XA_EXEC_TIME); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, PCMK_XA_EXEC_TIME, s); free(s); } value = crm_element_value(xml_op, PCMK_XA_QUEUE_TIME); if (value) { char *s = crm_strdup_printf("%sms", value); crm_xml_add(node, PCMK_XA_QUEUE_TIME, s); free(s); } } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int promotion_score(pcmk__output_t *out, va_list args) { pcmk_resource_t *child_rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *chosen = va_arg(args, pcmk_node_t *); const char *score = va_arg(args, const char *); if (chosen == NULL) { out->list_item(out, NULL, "%s promotion score (inactive): %s", child_rsc->id, score); } else { out->list_item(out, NULL, "%s promotion score on %s: %s", child_rsc->id, pcmk__node_name(chosen), score); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("promotion-score", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int promotion_score_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *child_rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *chosen = va_arg(args, pcmk_node_t *); const char *score = va_arg(args, const char *); xmlNodePtr node = pcmk__output_create_xml_node(out, PCMK_XE_PROMOTION_SCORE, PCMK_XA_ID, child_rsc->id, PCMK_XA_SCORE, score, NULL); if (chosen) { crm_xml_add(node, PCMK_XA_NODE, chosen->priv->name); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "const pcmk_resource_t *", "bool") static int resource_config(pcmk__output_t *out, va_list args) { const pcmk_resource_t *rsc = va_arg(args, const pcmk_resource_t *); GString *xml_buf = g_string_sized_new(1024); bool raw = va_arg(args, int); formatted_xml_buf(rsc, xml_buf, raw); out->output_xml(out, PCMK_XE_XML, xml_buf->str); g_string_free(xml_buf, TRUE); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-config", "const pcmk_resource_t *", "bool") static int resource_config_text(pcmk__output_t *out, va_list args) { pcmk__formatted_printf(out, "Resource XML:\n"); return resource_config(out, args); } PCMK__OUTPUT_ARGS("resource-history", "pcmk_resource_t *", "const char *", "bool", "int", "time_t", "bool") static int resource_history_text(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); const char *rsc_id = va_arg(args, const char *); bool all = va_arg(args, int); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, time_t); bool as_header = va_arg(args, int); char *buf = resource_history_string(rsc, rsc_id, all, failcount, last_failure); if (as_header) { out->begin_list(out, NULL, NULL, "%s", buf); } else { out->list_item(out, NULL, "%s", buf); } free(buf); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-history", "pcmk_resource_t *", "const char *", "bool", "int", "time_t", "bool") static int resource_history_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); const char *rsc_id = va_arg(args, const char *); bool all = va_arg(args, int); int failcount = va_arg(args, int); time_t last_failure = va_arg(args, time_t); bool as_header = va_arg(args, int); xmlNodePtr node = pcmk__output_xml_create_parent(out, PCMK_XE_RESOURCE_HISTORY, PCMK_XA_ID, rsc_id, NULL); if (rsc == NULL) { pcmk__xe_set_bool_attr(node, PCMK_XA_ORPHAN, true); } else if (all || failcount || last_failure > 0) { char *migration_s = pcmk__itoa(rsc->priv->ban_after_failures); pcmk__xe_set_props(node, PCMK_XA_ORPHAN, PCMK_VALUE_FALSE, PCMK_META_MIGRATION_THRESHOLD, migration_s, NULL); free(migration_s); if (failcount > 0) { char *s = pcmk__itoa(failcount); crm_xml_add(node, PCMK_XA_FAIL_COUNT, s); free(s); } if (last_failure > 0) { char *s = pcmk__epoch2str(&last_failure, 0); crm_xml_add(node, PCMK_XA_LAST_FAILURE, s); free(s); } } if (!as_header) { pcmk__output_xml_pop_parent(out); } return pcmk_rc_ok; } static void print_resource_header(pcmk__output_t *out, uint32_t show_opts) { if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { /* Active resources have already been printed by node */ out->begin_list(out, NULL, NULL, "Inactive Resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->begin_list(out, NULL, NULL, "Full List of Resources"); } else { out->begin_list(out, NULL, NULL, "Active Resources"); } } PCMK__OUTPUT_ARGS("resource-list", "pcmk_scheduler_t *", "uint32_t", "bool", "GList *", "GList *", "bool") static int resource_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler = va_arg(args, pcmk_scheduler_t *); uint32_t show_opts = va_arg(args, uint32_t); bool print_summary = va_arg(args, int); GList *only_node = va_arg(args, GList *); GList *only_rsc = va_arg(args, GList *); bool print_spacer = va_arg(args, int); GList *rsc_iter; int rc = pcmk_rc_no_output; bool printed_header = false; /* If we already showed active resources by node, and * we're not showing inactive resources, we have nothing to do */ if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node) && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { return rc; } /* If we haven't already printed resources grouped by node, * and brief output was requested, print resource summary */ if (pcmk_is_set(show_opts, pcmk_show_brief) && !pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { GList *rscs = pe__filter_rsc_list(scheduler->priv->resources, only_rsc); PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; rc = pe__rscs_brief_output(out, rscs, show_opts); g_list_free(rscs); } /* For each resource, display it if appropriate */ for (rsc_iter = scheduler->priv->resources; rsc_iter != NULL; rsc_iter = rsc_iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) rsc_iter->data; int x; /* Complex resources may have some sub-resources active and some inactive */ gboolean is_active = rsc->priv->fns->active(rsc, TRUE); gboolean partially_active = rsc->priv->fns->active(rsc, FALSE); /* Skip inactive orphans (deleted but still in CIB) */ if (pcmk_is_set(rsc->flags, pcmk__rsc_removed) && !is_active) { continue; /* Skip active resources if we already displayed them by node */ } else if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { if (is_active) { continue; } /* Skip primitives already counted in a brief summary */ } else if (pcmk_is_set(show_opts, pcmk_show_brief) && pcmk__is_primitive(rsc)) { continue; /* Skip resources that aren't at least partially active, * unless we're displaying inactive resources */ } else if (!partially_active && !pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { continue; } else if (partially_active && !pe__rsc_running_on_any(rsc, only_node)) { continue; } if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } /* Print this resource */ x = out->message(out, pcmk__map_element_name(rsc->priv->xml), show_opts, rsc, only_node, only_rsc); if (x == pcmk_rc_ok) { rc = pcmk_rc_ok; } } if (print_summary && rc != pcmk_rc_ok) { if (!printed_header) { PCMK__OUTPUT_SPACER_IF(out, print_spacer); print_resource_header(out, show_opts); printed_header = true; } if (pcmk_is_set(show_opts, pcmk_show_rscs_by_node)) { out->list_item(out, NULL, "No inactive resources"); } else if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) { out->list_item(out, NULL, "No resources"); } else { out->list_item(out, NULL, "No active resources"); } } if (printed_header) { out->end_list(out); } return rc; } PCMK__OUTPUT_ARGS("resource-operation-list", "pcmk_scheduler_t *", "pcmk_resource_t *", "pcmk_node_t *", "GList *", "uint32_t") static int resource_operation_list(pcmk__output_t *out, va_list args) { pcmk_scheduler_t *scheduler G_GNUC_UNUSED = va_arg(args, pcmk_scheduler_t *); pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); GList *op_list = va_arg(args, GList *); uint32_t show_opts = va_arg(args, uint32_t); GList *gIter = NULL; int rc = pcmk_rc_no_output; /* Print each operation */ for (gIter = op_list; gIter != NULL; gIter = gIter->next) { xmlNode *xml_op = (xmlNode *) gIter->data; const char *task = crm_element_value(xml_op, PCMK_XA_OPERATION); const char *interval_ms_s = crm_element_value(xml_op, PCMK_META_INTERVAL); const char *op_rc = crm_element_value(xml_op, PCMK__XA_RC_CODE); int op_rc_i; pcmk__scan_min_int(op_rc, &op_rc_i, 0); /* Display 0-interval monitors as "probe" */ if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei) && pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches | pcmk__str_casei)) { task = "probe"; } /* If this is the first printed operation, print heading for resource */ if (rc == pcmk_rc_no_output) { time_t last_failure = 0; int failcount = pe_get_failcount(node, rsc, &last_failure, pcmk__fc_default, NULL); out->message(out, "resource-history", rsc, rsc_printable_id(rsc), true, failcount, last_failure, true); rc = pcmk_rc_ok; } /* Print the operation */ out->message(out, "op-history", xml_op, task, interval_ms_s, op_rc_i, show_opts); } /* Free the list we created (no need to free the individual items) */ g_list_free(op_list); PCMK__OUTPUT_LIST_FOOTER(out, rc); return rc; } PCMK__OUTPUT_ARGS("resource-util", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int resource_util(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *fn = va_arg(args, const char *); char *dump_text = crm_strdup_printf("%s: %s utilization on %s:", fn, rsc->id, pcmk__node_name(node)); g_hash_table_foreach(rsc->priv->utilization, append_dump_text, &dump_text); out->list_item(out, NULL, "%s", dump_text); free(dump_text); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("resource-util", "pcmk_resource_t *", "pcmk_node_t *", "const char *") static int resource_util_xml(pcmk__output_t *out, va_list args) { pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *); pcmk_node_t *node = va_arg(args, pcmk_node_t *); const char *uname = node->priv->name; const char *fn = va_arg(args, const char *); xmlNodePtr xml_node = NULL; xml_node = pcmk__output_create_xml_node(out, PCMK_XE_UTILIZATION, PCMK_XA_RESOURCE, rsc->id, PCMK_XA_NODE, uname, PCMK_XA_FUNCTION, fn, NULL); g_hash_table_foreach(rsc->priv->utilization, add_dump_node, xml_node); return pcmk_rc_ok; } static inline const char * ticket_status(pcmk__ticket_t *ticket) { if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) { return PCMK_VALUE_GRANTED; } return PCMK_VALUE_REVOKED; } static inline const char * ticket_standby_text(pcmk__ticket_t *ticket) { return pcmk_is_set(ticket->flags, pcmk__ticket_standby)? " [standby]" : ""; } PCMK__OUTPUT_ARGS("ticket", "pcmk__ticket_t *", "bool", "bool") static int ticket_default(pcmk__output_t *out, va_list args) { pcmk__ticket_t *ticket = va_arg(args, pcmk__ticket_t *); bool raw = va_arg(args, int); bool details = va_arg(args, int); GString *detail_str = NULL; if (raw) { out->list_item(out, ticket->id, "%s", ticket->id); return pcmk_rc_ok; } if (details && g_hash_table_size(ticket->state) > 0) { GHashTableIter iter; const char *name = NULL; const char *value = NULL; bool already_added = false; detail_str = g_string_sized_new(100); pcmk__g_strcat(detail_str, "\t(", NULL); g_hash_table_iter_init(&iter, ticket->state); while (g_hash_table_iter_next(&iter, (void **) &name, (void **) &value)) { if (already_added) { g_string_append_printf(detail_str, ", %s=", name); } else { g_string_append_printf(detail_str, "%s=", name); already_added = true; } if (pcmk__str_any_of(name, PCMK_XA_LAST_GRANTED, "expires", NULL)) { char *epoch_str = NULL; long long time_ll; pcmk__scan_ll(value, &time_ll, 0); epoch_str = pcmk__epoch2str((const time_t *) &time_ll, 0); pcmk__g_strcat(detail_str, epoch_str, NULL); free(epoch_str); } else { pcmk__g_strcat(detail_str, value, NULL); } } pcmk__g_strcat(detail_str, ")", NULL); } if (ticket->last_granted > -1) { /* Prior to the introduction of the details & raw arguments to this * function, last-granted would always be added in this block. We need * to preserve that behavior. At the same time, we also need to preserve * the existing behavior from crm_ticket, which would include last-granted * as part of the (...) detail string. * * Luckily we can check detail_str - if it's NULL, either there were no * details, or we are preserving the previous behavior of this function. * If it's not NULL, we are either preserving the previous behavior of * crm_ticket or we were given details=true as an argument. */ if (detail_str == NULL) { char *epoch_str = pcmk__epoch2str(&(ticket->last_granted), 0); out->list_item(out, NULL, "%s\t%s%s last-granted=\"%s\"", ticket->id, ticket_status(ticket), ticket_standby_text(ticket), pcmk__s(epoch_str, "")); free(epoch_str); } else { out->list_item(out, NULL, "%s\t%s%s %s", ticket->id, ticket_status(ticket), ticket_standby_text(ticket), detail_str->str); } } else { out->list_item(out, NULL, "%s\t%s%s%s", ticket->id, ticket_status(ticket), ticket_standby_text(ticket), detail_str != NULL ? detail_str->str : ""); } if (detail_str != NULL) { g_string_free(detail_str, TRUE); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket", "pcmk__ticket_t *", "bool", "bool") static int ticket_xml(pcmk__output_t *out, va_list args) { pcmk__ticket_t *ticket = va_arg(args, pcmk__ticket_t *); bool raw G_GNUC_UNUSED = va_arg(args, int); bool details G_GNUC_UNUSED = va_arg(args, int); const char *standby = pcmk__flag_text(ticket->flags, pcmk__ticket_standby); xmlNodePtr node = NULL; GHashTableIter iter; const char *name = NULL; const char *value = NULL; node = pcmk__output_create_xml_node(out, PCMK_XE_TICKET, PCMK_XA_ID, ticket->id, PCMK_XA_STATUS, ticket_status(ticket), PCMK_XA_STANDBY, standby, NULL); if (ticket->last_granted > -1) { char *buf = pcmk__epoch2str(&ticket->last_granted, 0); crm_xml_add(node, PCMK_XA_LAST_GRANTED, buf); free(buf); } g_hash_table_iter_init(&iter, ticket->state); while (g_hash_table_iter_next(&iter, (void **) &name, (void **) &value)) { /* PCMK_XA_LAST_GRANTED and "expires" are already added by the check * for ticket->last_granted above. */ if (pcmk__str_any_of(name, PCMK_XA_LAST_GRANTED, PCMK_XA_EXPIRES, NULL)) { continue; } crm_xml_add(node, name, value); } return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("ticket-list", "GHashTable *", "bool", "bool", "bool") static int ticket_list(pcmk__output_t *out, va_list args) { GHashTable *tickets = va_arg(args, GHashTable *); bool print_spacer = va_arg(args, int); bool raw = va_arg(args, int); bool details = va_arg(args, int); GHashTableIter iter; gpointer value; if (g_hash_table_size(tickets) == 0) { return pcmk_rc_no_output; } PCMK__OUTPUT_SPACER_IF(out, print_spacer); /* Print section heading */ out->begin_list(out, NULL, NULL, "Tickets"); /* Print each ticket */ g_hash_table_iter_init(&iter, tickets); while (g_hash_table_iter_next(&iter, NULL, &value)) { pcmk__ticket_t *ticket = (pcmk__ticket_t *) value; out->message(out, "ticket", ticket, raw, details); } /* Close section */ out->end_list(out); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "ban", "default", ban_text }, { "ban", "html", ban_html }, { "ban", "xml", ban_xml }, { "ban-list", "default", ban_list }, { "bundle", "default", pe__bundle_text }, { "bundle", "xml", pe__bundle_xml }, { "bundle", "html", pe__bundle_html }, { "clone", "default", pe__clone_default }, { "clone", "xml", pe__clone_xml }, { "cluster-counts", "default", cluster_counts_text }, { "cluster-counts", "html", cluster_counts_html }, { "cluster-counts", "xml", cluster_counts_xml }, { "cluster-dc", "default", cluster_dc_text }, { "cluster-dc", "html", cluster_dc_html }, { "cluster-dc", "xml", cluster_dc_xml }, { "cluster-options", "default", cluster_options_text }, { "cluster-options", "html", cluster_options_html }, { "cluster-options", "log", cluster_options_log }, { "cluster-options", "xml", cluster_options_xml }, { "cluster-summary", "default", cluster_summary }, { "cluster-summary", "html", cluster_summary_html }, { "cluster-stack", "default", cluster_stack_text }, { "cluster-stack", "html", cluster_stack_html }, { "cluster-stack", "xml", cluster_stack_xml }, { "cluster-times", "default", cluster_times_text }, { "cluster-times", "html", cluster_times_html }, { "cluster-times", "xml", cluster_times_xml }, { "failed-action", "default", failed_action_default }, { "failed-action", "xml", failed_action_xml }, { "failed-action-list", "default", failed_action_list }, { "group", "default", pe__group_default}, { "group", "xml", pe__group_xml }, { "maint-mode", "text", cluster_maint_mode_text }, { "node", "default", node_text }, { "node", "html", node_html }, { "node", "xml", node_xml }, { "node-and-op", "default", node_and_op }, { "node-and-op", "xml", node_and_op_xml }, { "node-capacity", "default", node_capacity }, { "node-capacity", "xml", node_capacity_xml }, { "node-history-list", "default", node_history_list }, { "node-list", "default", node_list_text }, { "node-list", "html", node_list_html }, { "node-list", "xml", node_list_xml }, { "node-weight", "default", node_weight }, { "node-weight", "xml", node_weight_xml }, { "node-attribute", "default", node_attribute_text }, { "node-attribute", "html", node_attribute_html }, { "node-attribute", "xml", node_attribute_xml }, { "node-attribute-list", "default", node_attribute_list }, { "node-summary", "default", node_summary }, { "op-history", "default", op_history_text }, { "op-history", "xml", op_history_xml }, { "primitive", "default", pe__resource_text }, { "primitive", "xml", pe__resource_xml }, { "primitive", "html", pe__resource_html }, { "promotion-score", "default", promotion_score }, { "promotion-score", "xml", promotion_score_xml }, { "resource-config", "default", resource_config }, { "resource-config", "text", resource_config_text }, { "resource-history", "default", resource_history_text }, { "resource-history", "xml", resource_history_xml }, { "resource-list", "default", resource_list }, { "resource-operation-list", "default", resource_operation_list }, { "resource-util", "default", resource_util }, { "resource-util", "xml", resource_util_xml }, { "ticket", "default", ticket_default }, { "ticket", "xml", ticket_xml }, { "ticket-list", "default", ticket_list }, { NULL, NULL, NULL } }; void pe__register_messages(pcmk__output_t *out) { pcmk__register_messages(out, fmt_functions); } diff --git a/lib/pengine/status.c b/lib/pengine/status.c index 2be388b15c..2e9e6938f9 100644 --- a/lib/pengine/status.c +++ b/lib/pengine/status.c @@ -1,528 +1,531 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include /*! * \brief Create a new object to hold scheduler data * * \return New, initialized scheduler data on success, else NULL (and set errno) * \note Only pcmk_scheduler_t objects created with this function (as opposed * to statically declared or directly allocated) should be used with the * functions in this library, to allow for future extensions to the * data type. The caller is responsible for freeing the memory with * pe_free_working_set() when the instance is no longer needed. */ pcmk_scheduler_t * pe_new_working_set(void) { pcmk_scheduler_t *scheduler = calloc(1, sizeof(pcmk_scheduler_t)); if (scheduler == NULL) { return NULL; } scheduler->priv = calloc(1, sizeof(pcmk__scheduler_private_t)); if (scheduler->priv == NULL) { free(scheduler); return NULL; } set_working_set_defaults(scheduler); return scheduler; } /*! * \brief Free scheduler data * * \param[in,out] scheduler Scheduler data to free */ void pe_free_working_set(pcmk_scheduler_t *scheduler) { if (scheduler != NULL) { pe_reset_working_set(scheduler); + free(scheduler->priv->local_node_name); free(scheduler->priv); free(scheduler); } } #define XPATH_DEPRECATED_RULES \ "//" PCMK_XE_OP_DEFAULTS "//" PCMK_XE_EXPRESSION \ "|//" PCMK_XE_OP "//" PCMK_XE_EXPRESSION /*! * \internal * \brief Log a warning for deprecated rule syntax in operations * * \param[in] scheduler Scheduler data */ static void check_for_deprecated_rules(pcmk_scheduler_t *scheduler) { // @COMPAT Drop this function when support for the syntax is dropped xmlNode *deprecated = get_xpath_object(XPATH_DEPRECATED_RULES, scheduler->input, LOG_NEVER); if (deprecated != NULL) { pcmk__warn_once(pcmk__wo_op_attr_expr, "Support for rules with node attribute expressions in " PCMK_XE_OP " or " PCMK_XE_OP_DEFAULTS " is deprecated " "and will be dropped in a future release"); } } /* * Unpack everything * At the end you'll have: * - A list of nodes * - A list of resources (each with any dependencies on other resources) * - A list of constraints between resources and nodes * - A list of constraints between start/stop actions * - A list of nodes that need to be stonith'd * - A list of nodes that need to be shutdown * - A list of the possible stop/start actions (without dependencies) */ gboolean cluster_status(pcmk_scheduler_t * scheduler) { const char *new_version = NULL; xmlNode *section = NULL; if ((scheduler == NULL) || (scheduler->input == NULL)) { return FALSE; } new_version = crm_element_value(scheduler->input, PCMK_XA_CRM_FEATURE_SET); if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) { pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'", new_version, CRM_FEATURE_SET); return FALSE; } crm_trace("Beginning unpack"); if (scheduler->priv->failed != NULL) { pcmk__xml_free(scheduler->priv->failed); } scheduler->priv->failed = pcmk__xe_create(NULL, "failed-ops"); if (scheduler->priv->now == NULL) { scheduler->priv->now = crm_time_new(NULL); } if (pcmk__xe_attr_is_true(scheduler->input, PCMK_XA_HAVE_QUORUM)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_quorate); } else { pcmk__clear_scheduler_flags(scheduler, pcmk__sched_quorate); } scheduler->priv->op_defaults = get_xpath_object("//" PCMK_XE_OP_DEFAULTS, scheduler->input, LOG_NEVER); check_for_deprecated_rules(scheduler); scheduler->priv->rsc_defaults = get_xpath_object("//" PCMK_XE_RSC_DEFAULTS, scheduler->input, LOG_NEVER); section = get_xpath_object("//" PCMK_XE_CRM_CONFIG, scheduler->input, LOG_TRACE); unpack_config(section, scheduler); if (!pcmk_any_flags_set(scheduler->flags, pcmk__sched_location_only|pcmk__sched_quorate) && (scheduler->no_quorum_policy != pcmk_no_quorum_ignore)) { pcmk__sched_warn(scheduler, "Fencing and resource management disabled " "due to lack of quorum"); } section = get_xpath_object("//" PCMK_XE_NODES, scheduler->input, LOG_TRACE); unpack_nodes(section, scheduler); section = get_xpath_object("//" PCMK_XE_RESOURCES, scheduler->input, LOG_TRACE); if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { unpack_remote_nodes(section, scheduler); } unpack_resources(section, scheduler); section = get_xpath_object("//" PCMK_XE_FENCING_TOPOLOGY, scheduler->input, LOG_TRACE); pcmk__unpack_fencing_topology(section, scheduler); section = get_xpath_object("//" PCMK_XE_TAGS, scheduler->input, LOG_NEVER); unpack_tags(section, scheduler); if (!pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { section = get_xpath_object("//" PCMK_XE_STATUS, scheduler->input, LOG_TRACE); unpack_status(section, scheduler); } if (!pcmk_is_set(scheduler->flags, pcmk__sched_no_counts)) { for (GList *item = scheduler->priv->resources; item != NULL; item = item->next) { pcmk_resource_t *rsc = item->data; rsc->priv->fns->count(item->data); } crm_trace("Cluster resource count: %d (%d disabled, %d blocked)", scheduler->ninstances, scheduler->disabled_resources, scheduler->blocked_resources); } pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_status); return TRUE; } /*! * \internal * \brief Free a list of pcmk_resource_t * * \param[in,out] resources List to free * * \note When the scheduler's resource list is freed, that includes the original * storage for the uname and id of any Pacemaker Remote nodes in the * scheduler's node list, so take care not to use those afterward. * \todo Refactor pcmk_node_t to strdup() the node name. */ static void pe_free_resources(GList *resources) { pcmk_resource_t *rsc = NULL; GList *iterator = resources; while (iterator != NULL) { rsc = (pcmk_resource_t *) iterator->data; iterator = iterator->next; rsc->priv->fns->free(rsc); } if (resources != NULL) { g_list_free(resources); } } static void pe_free_actions(GList *actions) { GList *iterator = actions; while (iterator != NULL) { pe_free_action(iterator->data); iterator = iterator->next; } if (actions != NULL) { g_list_free(actions); } } static void pe_free_nodes(GList *nodes) { for (GList *iterator = nodes; iterator != NULL; iterator = iterator->next) { pcmk_node_t *node = (pcmk_node_t *) iterator->data; // Shouldn't be possible, but to be safe ... if (node == NULL) { continue; } if (node->details == NULL) { free(node); continue; } /* This is called after pe_free_resources(), which means that we can't * use node->private->name for Pacemaker Remote nodes. */ crm_trace("Freeing node %s", (pcmk__is_pacemaker_remote_node(node)? "(guest or remote)" : pcmk__node_name(node))); if (node->priv->attrs != NULL) { g_hash_table_destroy(node->priv->attrs); } if (node->priv->utilization != NULL) { g_hash_table_destroy(node->priv->utilization); } if (node->priv->digest_cache != NULL) { g_hash_table_destroy(node->priv->digest_cache); } g_list_free(node->details->running_rsc); g_list_free(node->priv->assigned_resources); free(node->priv); free(node->details); free(node->assign); free(node); } if (nodes != NULL) { g_list_free(nodes); } } static void pe__free_ordering(GList *constraints) { GList *iterator = constraints; while (iterator != NULL) { pcmk__action_relation_t *order = iterator->data; iterator = iterator->next; free(order->task1); free(order->task2); free(order); } if (constraints != NULL) { g_list_free(constraints); } } static void pe__free_location(GList *constraints) { GList *iterator = constraints; while (iterator != NULL) { pcmk__location_t *cons = iterator->data; iterator = iterator->next; g_list_free_full(cons->nodes, free); free(cons->id); free(cons); } if (constraints != NULL) { g_list_free(constraints); } } /*! * \brief Reset scheduler data to defaults without freeing it or constraints * * \param[in,out] scheduler Scheduler data to reset * * \deprecated This function is deprecated as part of the API; * pe_reset_working_set() should be used instead. */ void cleanup_calculations(pcmk_scheduler_t *scheduler) { if (scheduler == NULL) { return; } pcmk__clear_scheduler_flags(scheduler, pcmk__sched_have_status); if (scheduler->priv->options != NULL) { g_hash_table_destroy(scheduler->priv->options); } if (scheduler->priv->singletons != NULL) { g_hash_table_destroy(scheduler->priv->singletons); } if (scheduler->priv->ticket_constraints != NULL) { g_hash_table_destroy(scheduler->priv->ticket_constraints); } - if (scheduler->template_rsc_sets) { - g_hash_table_destroy(scheduler->template_rsc_sets); + if (scheduler->priv->templates != NULL) { + g_hash_table_destroy(scheduler->priv->templates); } if (scheduler->tags) { g_hash_table_destroy(scheduler->tags); } crm_trace("deleting resources"); pe_free_resources(scheduler->priv->resources); crm_trace("deleting actions"); pe_free_actions(scheduler->priv->actions); crm_trace("deleting nodes"); pe_free_nodes(scheduler->nodes); pe__free_param_checks(scheduler); g_list_free(scheduler->stop_needed); - pcmk__xml_free(scheduler->graph); crm_time_free(scheduler->priv->now); pcmk__xml_free(scheduler->input); pcmk__xml_free(scheduler->priv->failed); + pcmk__xml_free(scheduler->priv->graph); set_working_set_defaults(scheduler); CRM_LOG_ASSERT((scheduler->priv->location_constraints == NULL) && (scheduler->priv->ordering_constraints == NULL)); } /*! * \brief Reset scheduler data to default state without freeing it * * \param[in,out] scheduler Scheduler data to reset */ void pe_reset_working_set(pcmk_scheduler_t *scheduler) { if (scheduler == NULL) { return; } crm_trace("Deleting %d ordering constraints", g_list_length(scheduler->priv->ordering_constraints)); pe__free_ordering(scheduler->priv->ordering_constraints); scheduler->priv->ordering_constraints = NULL; crm_trace("Deleting %d location constraints", g_list_length(scheduler->priv->location_constraints)); pe__free_location(scheduler->priv->location_constraints); scheduler->priv->location_constraints = NULL; crm_trace("Deleting %d colocation constraints", g_list_length(scheduler->priv->colocation_constraints)); g_list_free_full(scheduler->priv->colocation_constraints, free); scheduler->priv->colocation_constraints = NULL; cleanup_calculations(scheduler); } void set_working_set_defaults(pcmk_scheduler_t *scheduler) { // These members must be preserved pcmk__scheduler_private_t *priv = scheduler->priv; pcmk__output_t *out = priv->out; + char *local_node_name = scheduler->priv->local_node_name; // Wipe the main structs (any other members must have previously been freed) memset(scheduler, 0, sizeof(pcmk_scheduler_t)); memset(priv, 0, sizeof(pcmk__scheduler_private_t)); // Restore the members to preserve scheduler->priv = priv; scheduler->priv->out = out; + scheduler->priv->local_node_name = local_node_name; // Set defaults for everything else scheduler->priv->next_ordering_id = 1; - scheduler->action_id = 1; + scheduler->priv->next_action_id = 1; scheduler->no_quorum_policy = pcmk_no_quorum_stop; pcmk__set_scheduler_flags(scheduler, pcmk__sched_symmetric_cluster |pcmk__sched_stop_removed_resources |pcmk__sched_cancel_removed_actions); if (!strcmp(PCMK__CONCURRENT_FENCING_DEFAULT, PCMK_VALUE_TRUE)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_concurrent_fencing); } } pcmk_resource_t * pe_find_resource(GList *rsc_list, const char *id) { return pe_find_resource_with_flags(rsc_list, id, pcmk_rsc_match_history); } pcmk_resource_t * pe_find_resource_with_flags(GList *rsc_list, const char *id, enum pe_find flags) { GList *rIter = NULL; for (rIter = rsc_list; id && rIter; rIter = rIter->next) { pcmk_resource_t *parent = rIter->data; pcmk_resource_t *match = parent->priv->fns->find_rsc(parent, id, NULL, flags); if (match != NULL) { return match; } } crm_trace("No match for %s", id); return NULL; } /*! * \brief Find a node by name or ID in a list of nodes * * \param[in] nodes List of nodes (as pcmk_node_t*) * \param[in] id If not NULL, ID of node to find * \param[in] node_name If not NULL, name of node to find * * \return Node from \p nodes that matches \p id if any, * otherwise node from \p nodes that matches \p uname if any, * otherwise NULL */ pcmk_node_t * pe_find_node_any(const GList *nodes, const char *id, const char *uname) { pcmk_node_t *match = NULL; if (id != NULL) { match = pe_find_node_id(nodes, id); } if ((match == NULL) && (uname != NULL)) { match = pcmk__find_node_in_list(nodes, uname); } return match; } /*! * \brief Find a node by ID in a list of nodes * * \param[in] nodes List of nodes (as pcmk_node_t*) * \param[in] id ID of node to find * * \return Node from \p nodes that matches \p id if any, otherwise NULL */ pcmk_node_t * pe_find_node_id(const GList *nodes, const char *id) { for (const GList *iter = nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; /* @TODO Whether node IDs should be considered case-sensitive should * probably depend on the node type, so functionizing the comparison * would be worthwhile */ if (pcmk__str_eq(node->priv->id, id, pcmk__str_casei)) { return node; } } return NULL; } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START #include /*! * \brief Find a node by name in a list of nodes * * \param[in] nodes List of nodes (as pcmk_node_t*) * \param[in] node_name Name of node to find * * \return Node from \p nodes that matches \p node_name if any, otherwise NULL */ pcmk_node_t * pe_find_node(const GList *nodes, const char *node_name) { return pcmk__find_node_in_list(nodes, node_name); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/pengine/tests/native/native_find_rsc_test.c b/lib/pengine/tests/native/native_find_rsc_test.c index d1facc428e..676d22a88d 100644 --- a/lib/pengine/tests/native/native_find_rsc_test.c +++ b/lib/pengine/tests/native/native_find_rsc_test.c @@ -1,934 +1,933 @@ /* * Copyright 2022-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include xmlNode *input = NULL; pcmk_scheduler_t *scheduler = NULL; pcmk_node_t *cluster01, *cluster02, *httpd_bundle_0; pcmk_resource_t *exim_group, *inactive_group; pcmk_resource_t *promotable_clone, *inactive_clone; pcmk_resource_t *httpd_bundle, *mysql_clone_group; static int setup(void **state) { char *path = NULL; pcmk__xml_init(); path = crm_strdup_printf("%s/crm_mon.xml", getenv("PCMK_CTS_CLI_DIR")); input = pcmk__xml_read(path); free(path); if (input == NULL) { return 1; } scheduler = pe_new_working_set(); if (scheduler == NULL) { return 1; } - pcmk__set_scheduler_flags(scheduler, - pcmk__sched_no_counts|pcmk__sched_no_compat); + pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts); scheduler->input = input; cluster_status(scheduler); /* Get references to the cluster nodes so we don't have to find them repeatedly. */ cluster01 = pcmk_find_node(scheduler, "cluster01"); cluster02 = pcmk_find_node(scheduler, "cluster02"); httpd_bundle_0 = pcmk_find_node(scheduler, "httpd-bundle-0"); /* Get references to several resources we use frequently. */ for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "exim-group") == 0) { exim_group = rsc; } else if (strcmp(rsc->id, "httpd-bundle") == 0) { httpd_bundle = rsc; } else if (strcmp(rsc->id, "inactive-clone") == 0) { inactive_clone = rsc; } else if (strcmp(rsc->id, "inactive-group") == 0) { inactive_group = rsc; } else if (strcmp(rsc->id, "mysql-clone-group") == 0) { mysql_clone_group = rsc; } else if (strcmp(rsc->id, "promotable-clone") == 0) { promotable_clone = rsc; } } return 0; } static int teardown(void **state) { pe_free_working_set(scheduler); pcmk__xml_cleanup(); return 0; } static void bad_args(void **state) { pcmk_resource_t *rsc = g_list_first(scheduler->priv->resources)->data; char *id = rsc->id; char *name = NULL; assert_non_null(rsc); assert_null(native_find_rsc(NULL, "dummy", NULL, 0)); assert_null(native_find_rsc(rsc, NULL, NULL, 0)); /* No resources exist with these names. */ name = crm_strdup_printf("%sX", rsc->id); assert_null(native_find_rsc(rsc, name, NULL, 0)); free(name); name = crm_strdup_printf("x%s", rsc->id); assert_null(native_find_rsc(rsc, name, NULL, 0)); free(name); name = g_ascii_strup(rsc->id, -1); assert_null(native_find_rsc(rsc, name, NULL, 0)); g_free(name); /* Fails because resource ID is NULL. */ rsc->id = NULL; assert_null(native_find_rsc(rsc, id, NULL, 0)); rsc->id = id; } static void primitive_rsc(void **state) { pcmk_resource_t *dummy = NULL; /* Find the "dummy" resource, which is the only one with that ID in the set. */ for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "dummy") == 0) { dummy = rsc; break; } } assert_non_null(dummy); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(dummy, native_find_rsc(dummy, "dummy", NULL, 0)); assert_ptr_equal(dummy, native_find_rsc(dummy, "dummy", NULL, pcmk_rsc_match_current_node)); /* Fails because resource is not a clone (nor cloned). */ assert_null(native_find_rsc(dummy, "dummy", NULL, pcmk_rsc_match_clone_only)); assert_null(native_find_rsc(dummy, "dummy", cluster02, pcmk_rsc_match_clone_only)); /* Fails because dummy is not running on cluster01, even with the right flags. */ assert_null(native_find_rsc(dummy, "dummy", cluster01, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(dummy, "dummy", cluster02, 0)); /* Passes because dummy is running on cluster02. */ assert_ptr_equal(dummy, native_find_rsc(dummy, "dummy", cluster02, pcmk_rsc_match_current_node)); } static void group_rsc(void **state) { assert_non_null(exim_group); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(exim_group, native_find_rsc(exim_group, "exim-group", NULL, 0)); assert_ptr_equal(exim_group, native_find_rsc(exim_group, "exim-group", NULL, pcmk_rsc_match_current_node)); /* Fails because resource is not a clone (nor cloned). */ assert_null(native_find_rsc(exim_group, "exim-group", NULL, pcmk_rsc_match_clone_only)); assert_null(native_find_rsc(exim_group, "exim-group", cluster01, pcmk_rsc_match_clone_only)); /* Fails because none of exim-group's children are running on cluster01, even with the right flags. */ assert_null(native_find_rsc(exim_group, "exim-group", cluster01, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(exim_group, "exim-group", cluster01, 0)); /* Passes because one of exim-group's children is running on cluster02. */ assert_ptr_equal(exim_group, native_find_rsc(exim_group, "exim-group", cluster02, pcmk_rsc_match_current_node)); } static void inactive_group_rsc(void **state) { assert_non_null(inactive_group); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(inactive_group, native_find_rsc(inactive_group, "inactive-group", NULL, 0)); assert_ptr_equal(inactive_group, native_find_rsc(inactive_group, "inactive-group", NULL, pcmk_rsc_match_current_node)); /* Fails because resource is not a clone (nor cloned). */ assert_null(native_find_rsc(inactive_group, "inactive-group", NULL, pcmk_rsc_match_clone_only)); assert_null(native_find_rsc(inactive_group, "inactive-group", cluster01, pcmk_rsc_match_clone_only)); /* Fails because none of inactive-group's children are running. */ assert_null(native_find_rsc(inactive_group, "inactive-group", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(inactive_group, "inactive-group", cluster02, pcmk_rsc_match_current_node)); } static void group_member_rsc(void **state) { pcmk_resource_t *public_ip = NULL; /* Find the "Public-IP" resource, a member of "exim-group". */ for (GList *iter = exim_group->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "Public-IP") == 0) { public_ip = rsc; break; } } assert_non_null(public_ip); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(public_ip, native_find_rsc(public_ip, "Public-IP", NULL, 0)); assert_ptr_equal(public_ip, native_find_rsc(public_ip, "Public-IP", NULL, pcmk_rsc_match_current_node)); /* Fails because resource is not a clone (nor cloned). */ assert_null(native_find_rsc(public_ip, "Public-IP", NULL, pcmk_rsc_match_clone_only)); assert_null(native_find_rsc(public_ip, "Public-IP", cluster02, pcmk_rsc_match_clone_only)); /* Fails because Public-IP is not running on cluster01, even with the right flags. */ assert_null(native_find_rsc(public_ip, "Public-IP", cluster01, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(public_ip, "Public-IP", cluster02, 0)); /* Passes because Public-IP is running on cluster02. */ assert_ptr_equal(public_ip, native_find_rsc(public_ip, "Public-IP", cluster02, pcmk_rsc_match_current_node)); } static void inactive_group_member_rsc(void **state) { pcmk_resource_t *inactive_dummy_1 = NULL; /* Find the "inactive-dummy-1" resource, a member of "inactive-group". */ for (GList *iter = inactive_group->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "inactive-dummy-1") == 0) { inactive_dummy_1 = rsc; break; } } assert_non_null(inactive_dummy_1); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(inactive_dummy_1, native_find_rsc(inactive_dummy_1, "inactive-dummy-1", NULL, 0)); assert_ptr_equal(inactive_dummy_1, native_find_rsc(inactive_dummy_1, "inactive-dummy-1", NULL, pcmk_rsc_match_current_node)); /* Fails because resource is not a clone (nor cloned). */ assert_null(native_find_rsc(inactive_dummy_1, "inactive-dummy-1", NULL, pcmk_rsc_match_clone_only)); assert_null(native_find_rsc(inactive_dummy_1, "inactive-dummy-1", cluster01, pcmk_rsc_match_clone_only)); /* Fails because inactive-dummy-1 is not running. */ assert_null(native_find_rsc(inactive_dummy_1, "inactive-dummy-1", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(inactive_dummy_1, "inactive-dummy-1", cluster02, pcmk_rsc_match_current_node)); } static void clone_rsc(void **state) { assert_non_null(promotable_clone); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(promotable_clone, native_find_rsc(promotable_clone, "promotable-clone", NULL, 0)); assert_ptr_equal(promotable_clone, native_find_rsc(promotable_clone, "promotable-clone", NULL, pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_clone, native_find_rsc(promotable_clone, "promotable-clone", NULL, pcmk_rsc_match_clone_only)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(promotable_clone, "promotable-clone", cluster01, 0)); /* Passes because one of ping-clone's children is running on cluster01. */ assert_ptr_equal(promotable_clone, native_find_rsc(promotable_clone, "promotable-clone", cluster01, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(promotable_clone, "promotable-clone", cluster02, 0)); /* Passes because one of ping_clone's children is running on cluster02. */ assert_ptr_equal(promotable_clone, native_find_rsc(promotable_clone, "promotable-clone", cluster02, pcmk_rsc_match_current_node)); // Passes for previous reasons, plus includes pcmk_rsc_match_clone_only assert_ptr_equal(promotable_clone, native_find_rsc(promotable_clone, "promotable-clone", cluster01, pcmk_rsc_match_clone_only |pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_clone, native_find_rsc(promotable_clone, "promotable-clone", cluster02, pcmk_rsc_match_clone_only |pcmk_rsc_match_current_node)); } static void inactive_clone_rsc(void **state) { assert_non_null(inactive_clone); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(inactive_clone, native_find_rsc(inactive_clone, "inactive-clone", NULL, 0)); assert_ptr_equal(inactive_clone, native_find_rsc(inactive_clone, "inactive-clone", NULL, pcmk_rsc_match_current_node)); assert_ptr_equal(inactive_clone, native_find_rsc(inactive_clone, "inactive-clone", NULL, pcmk_rsc_match_clone_only)); /* Fails because none of inactive-clone's children are running. */ assert_null(native_find_rsc(inactive_clone, "inactive-clone", cluster01, pcmk_rsc_match_current_node |pcmk_rsc_match_clone_only)); assert_null(native_find_rsc(inactive_clone, "inactive-clone", cluster02, pcmk_rsc_match_current_node |pcmk_rsc_match_clone_only)); } static void clone_instance_rsc(void **state) { pcmk_resource_t *promotable_0 = NULL; pcmk_resource_t *promotable_1 = NULL; /* Find the "promotable-rsc:0" and "promotable-rsc:1" resources, members of "promotable-clone". */ for (GList *iter = promotable_clone->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "promotable-rsc:0") == 0) { promotable_0 = rsc; } else if (strcmp(rsc->id, "promotable-rsc:1") == 0) { promotable_1 = rsc; } } assert_non_null(promotable_0); assert_non_null(promotable_1); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc:0", NULL, 0)); assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc:0", NULL, pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc:1", NULL, 0)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc:1", NULL, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(promotable_0, "promotable-rsc:0", cluster02, 0)); assert_null(native_find_rsc(promotable_1, "promotable-rsc:1", cluster01, 0)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc:0", cluster02, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(promotable_0, "promotable-rsc:0", cluster01, pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc:1", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(promotable_1, "promotable-rsc:1", cluster02, pcmk_rsc_match_current_node)); /* Passes because NULL was passed for node and primitive name was given, with correct flags. */ assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc", NULL, pcmk_rsc_match_clone_only)); // Passes because pcmk_rsc_match_basename matches any instance's base name assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc", NULL, pcmk_rsc_match_basename)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc", NULL, pcmk_rsc_match_basename)); // Passes because pcmk_rsc_match_anon_basename matches assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc", NULL, pcmk_rsc_match_anon_basename)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc", NULL, pcmk_rsc_match_anon_basename)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc", cluster02, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc", cluster02, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(promotable_0, "promotable-rsc", cluster01, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(promotable_0, "promotable-rsc", cluster01, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc", cluster01, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc", cluster01, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(promotable_1, "promotable-rsc", cluster02, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(promotable_1, "promotable-rsc", cluster02, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); /* Fails because incorrect flags were given along with primitive name. */ assert_null(native_find_rsc(promotable_0, "promotable-rsc", NULL, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(promotable_1, "promotable-rsc", NULL, pcmk_rsc_match_current_node)); /* And then we check failure possibilities again, except passing promotable_clone * instead of promotable_X as the first argument to native_find_rsc. */ // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(promotable_clone, "promotable-rsc:0", cluster02, 0)); assert_null(native_find_rsc(promotable_clone, "promotable-rsc:1", cluster01, 0)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(promotable_0, native_find_rsc(promotable_clone, "promotable-rsc:0", cluster02, pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_0, native_find_rsc(promotable_clone, "promotable-rsc", cluster02, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_0, native_find_rsc(promotable_clone, "promotable-rsc", cluster02, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_clone, "promotable-rsc:1", cluster01, pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_clone, "promotable-rsc", cluster01, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_clone, "promotable-rsc", cluster01, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); } static void renamed_rsc(void **state) { pcmk_resource_t *promotable_0 = NULL; pcmk_resource_t *promotable_1 = NULL; /* Find the "promotable-rsc:0" and "promotable-rsc:1" resources, members of "promotable-clone". */ for (GList *iter = promotable_clone->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "promotable-rsc:0") == 0) { promotable_0 = rsc; } else if (strcmp(rsc->id, "promotable-rsc:1") == 0) { promotable_1 = rsc; } } assert_non_null(promotable_0); assert_non_null(promotable_1); // Passes because pcmk_rsc_match_history means base name matches history_id assert_ptr_equal(promotable_0, native_find_rsc(promotable_0, "promotable-rsc", NULL, pcmk_rsc_match_history)); assert_ptr_equal(promotable_1, native_find_rsc(promotable_1, "promotable-rsc", NULL, pcmk_rsc_match_history)); } static void bundle_rsc(void **state) { assert_non_null(httpd_bundle); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(httpd_bundle, native_find_rsc(httpd_bundle, "httpd-bundle", NULL, 0)); assert_ptr_equal(httpd_bundle, native_find_rsc(httpd_bundle, "httpd-bundle", NULL, pcmk_rsc_match_current_node)); /* Fails because resource is not a clone (nor cloned). */ assert_null(native_find_rsc(httpd_bundle, "httpd-bundle", NULL, pcmk_rsc_match_clone_only)); assert_null(native_find_rsc(httpd_bundle, "httpd-bundle", cluster01, pcmk_rsc_match_clone_only)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(httpd_bundle, "httpd-bundle", cluster01, 0)); /* Passes because one of httpd_bundle's children is running on cluster01. */ assert_ptr_equal(httpd_bundle, native_find_rsc(httpd_bundle, "httpd-bundle", cluster01, pcmk_rsc_match_current_node)); } static bool bundle_first_replica(pcmk__bundle_replica_t *replica, void *user_data) { pcmk_resource_t *ip_0 = replica->ip; pcmk_resource_t *child_0 = replica->child; pcmk_resource_t *container_0 = replica->container; pcmk_resource_t *remote_0 = replica->remote; assert_non_null(ip_0); assert_non_null(child_0); assert_non_null(container_0); assert_non_null(remote_0); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(ip_0, native_find_rsc(ip_0, "httpd-bundle-ip-192.168.122.131", NULL, 0)); assert_ptr_equal(child_0, native_find_rsc(child_0, "httpd:0", NULL, 0)); assert_ptr_equal(container_0, native_find_rsc(container_0, "httpd-bundle-docker-0", NULL, 0)); assert_ptr_equal(remote_0, native_find_rsc(remote_0, "httpd-bundle-0", NULL, 0)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(ip_0, "httpd-bundle-ip-192.168.122.131", cluster01, 0)); assert_null(native_find_rsc(child_0, "httpd:0", httpd_bundle_0, 0)); assert_null(native_find_rsc(container_0, "httpd-bundle-docker-0", cluster01, 0)); assert_null(native_find_rsc(remote_0, "httpd-bundle-0", cluster01, 0)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(ip_0, native_find_rsc(ip_0, "httpd-bundle-ip-192.168.122.131", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(ip_0, "httpd-bundle-ip-192.168.122.131", cluster02, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(ip_0, "httpd-bundle-ip-192.168.122.131", httpd_bundle_0, pcmk_rsc_match_current_node)); assert_ptr_equal(child_0, native_find_rsc(child_0, "httpd:0", httpd_bundle_0, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(child_0, "httpd:0", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(child_0, "httpd:0", cluster02, pcmk_rsc_match_current_node)); assert_ptr_equal(container_0, native_find_rsc(container_0, "httpd-bundle-docker-0", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(container_0, "httpd-bundle-docker-0", cluster02, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(container_0, "httpd-bundle-docker-0", httpd_bundle_0, pcmk_rsc_match_current_node)); assert_ptr_equal(remote_0, native_find_rsc(remote_0, "httpd-bundle-0", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(remote_0, "httpd-bundle-0", cluster02, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(remote_0, "httpd-bundle-0", httpd_bundle_0, pcmk_rsc_match_current_node)); // Passes because pcmk_rsc_match_basename matches any replica's base name assert_ptr_equal(child_0, native_find_rsc(child_0, "httpd", NULL, pcmk_rsc_match_basename)); // Passes because pcmk_rsc_match_anon_basename matches assert_ptr_equal(child_0, native_find_rsc(child_0, "httpd", NULL, pcmk_rsc_match_anon_basename)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(child_0, native_find_rsc(child_0, "httpd", httpd_bundle_0, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(child_0, native_find_rsc(child_0, "httpd", httpd_bundle_0, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(child_0, "httpd", cluster01, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(child_0, "httpd", cluster01, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(child_0, "httpd", cluster02, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(child_0, "httpd", cluster02, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); /* Fails because incorrect flags were given along with base name. */ assert_null(native_find_rsc(child_0, "httpd", NULL, pcmk_rsc_match_current_node)); /* And then we check failure possibilities again, except passing httpd-bundle * instead of X_0 as the first argument to native_find_rsc. */ // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(httpd_bundle, "httpd-bundle-ip-192.168.122.131", cluster01, 0)); assert_null(native_find_rsc(httpd_bundle, "httpd:0", httpd_bundle_0, 0)); assert_null(native_find_rsc(httpd_bundle, "httpd-bundle-docker-0", cluster01, 0)); assert_null(native_find_rsc(httpd_bundle, "httpd-bundle-0", cluster01, 0)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(ip_0, native_find_rsc(httpd_bundle, "httpd-bundle-ip-192.168.122.131", cluster01, pcmk_rsc_match_current_node)); assert_ptr_equal(child_0, native_find_rsc(httpd_bundle, "httpd:0", httpd_bundle_0, pcmk_rsc_match_current_node)); assert_ptr_equal(container_0, native_find_rsc(httpd_bundle, "httpd-bundle-docker-0", cluster01, pcmk_rsc_match_current_node)); assert_ptr_equal(remote_0, native_find_rsc(httpd_bundle, "httpd-bundle-0", cluster01, pcmk_rsc_match_current_node)); return false; // Do not iterate through any further replicas } static void bundle_replica_rsc(void **state) { pe__foreach_bundle_replica(httpd_bundle, bundle_first_replica, NULL); } static void clone_group_rsc(void **rsc) { assert_non_null(mysql_clone_group); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(mysql_clone_group, native_find_rsc(mysql_clone_group, "mysql-clone-group", NULL, 0)); assert_ptr_equal(mysql_clone_group, native_find_rsc(mysql_clone_group, "mysql-clone-group", NULL, pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_clone_group, native_find_rsc(mysql_clone_group, "mysql-clone-group", NULL, pcmk_rsc_match_clone_only)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(mysql_clone_group, "mysql-clone-group", cluster01, 0)); /* Passes because one of mysql-clone-group's children is running on cluster01. */ assert_ptr_equal(mysql_clone_group, native_find_rsc(mysql_clone_group, "mysql-clone-group", cluster01, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(mysql_clone_group, "mysql-clone-group", cluster02, 0)); /* Passes because one of mysql-clone-group's children is running on cluster02. */ assert_ptr_equal(mysql_clone_group, native_find_rsc(mysql_clone_group, "mysql-clone-group", cluster02, pcmk_rsc_match_current_node)); // Passes for previous reasons, plus includes pcmk_rsc_match_clone_only assert_ptr_equal(mysql_clone_group, native_find_rsc(mysql_clone_group, "mysql-clone-group", cluster01, pcmk_rsc_match_clone_only |pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_clone_group, native_find_rsc(mysql_clone_group, "mysql-clone-group", cluster02, pcmk_rsc_match_clone_only |pcmk_rsc_match_current_node)); } static void clone_group_instance_rsc(void **rsc) { pcmk_resource_t *mysql_group_0 = NULL; pcmk_resource_t *mysql_group_1 = NULL; /* Find the "mysql-group:0" and "mysql-group:1" resources, members of "mysql-clone-group". */ for (GList *iter = mysql_clone_group->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "mysql-group:0") == 0) { mysql_group_0 = rsc; } else if (strcmp(rsc->id, "mysql-group:1") == 0) { mysql_group_1 = rsc; } } assert_non_null(mysql_group_0); assert_non_null(mysql_group_1); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group:0", NULL, 0)); assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group:0", NULL, pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_group_1, "mysql-group:1", NULL, 0)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_group_1, "mysql-group:1", NULL, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(mysql_group_0, "mysql-group:0", cluster02, 0)); assert_null(native_find_rsc(mysql_group_1, "mysql-group:1", cluster01, 0)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group:0", cluster02, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(mysql_group_0, "mysql-group:0", cluster01, pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_group_1, "mysql-group:1", cluster01, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(mysql_group_1, "mysql-group:1", cluster02, pcmk_rsc_match_current_node)); /* Passes because NULL was passed for node and base name was given, with correct flags. */ assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group" , NULL, pcmk_rsc_match_clone_only)); // Passes because pcmk_rsc_match_basename matches any base name assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group" , NULL, pcmk_rsc_match_basename)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_group_1, "mysql-group" , NULL, pcmk_rsc_match_basename)); // Passes because pcmk_rsc_match_anon_basename matches assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group" , NULL, pcmk_rsc_match_anon_basename)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_group_1, "mysql-group" , NULL, pcmk_rsc_match_anon_basename)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group", cluster02, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_group_0, "mysql-group", cluster02, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(mysql_group_0, "mysql-group", cluster01, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(mysql_group_0, "mysql-group", cluster01, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_group_1, "mysql-group", cluster01, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_group_1, "mysql-group", cluster01, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(mysql_group_1, "mysql-group", cluster02, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_null(native_find_rsc(mysql_group_1, "mysql-group", cluster02, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); /* Fails because incorrect flags were given along with base name. */ assert_null(native_find_rsc(mysql_group_0, "mysql-group", NULL, pcmk_rsc_match_current_node)); assert_null(native_find_rsc(mysql_group_1, "mysql-group", NULL, pcmk_rsc_match_current_node)); /* And then we check failure possibilities again, except passing mysql_clone_group * instead of mysql_group_X as the first argument to native_find_rsc. */ // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(mysql_clone_group, "mysql-group:0", cluster02, 0)); assert_null(native_find_rsc(mysql_clone_group, "mysql-group:1", cluster01, 0)); /* Check that the resource is running on the node we expect. */ assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_clone_group, "mysql-group:0", cluster02, pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_clone_group, "mysql-group", cluster02, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_0, native_find_rsc(mysql_clone_group, "mysql-group", cluster02, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_clone_group, "mysql-group:1", cluster01, pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_clone_group, "mysql-group", cluster01, pcmk_rsc_match_basename |pcmk_rsc_match_current_node)); assert_ptr_equal(mysql_group_1, native_find_rsc(mysql_clone_group, "mysql-group", cluster01, pcmk_rsc_match_anon_basename |pcmk_rsc_match_current_node)); } static void clone_group_member_rsc(void **state) { pcmk_resource_t *mysql_proxy = NULL; /* Find the "mysql-proxy" resource, a member of "mysql-group". */ for (GList *iter = mysql_clone_group->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "mysql-group:0") == 0) { for (GList *iter2 = rsc->priv->children; iter2 != NULL; iter2 = iter2->next) { pcmk_resource_t *child = (pcmk_resource_t *) iter2->data; if (strcmp(child->id, "mysql-proxy:0") == 0) { mysql_proxy = child; break; } } break; } } assert_non_null(mysql_proxy); /* Passes because NULL was passed for node, regardless of flags. */ assert_ptr_equal(mysql_proxy, native_find_rsc(mysql_proxy, "mysql-proxy:0", NULL, 0)); assert_ptr_equal(mysql_proxy, native_find_rsc(mysql_proxy, "mysql-proxy:0", NULL, pcmk_rsc_match_current_node)); /* Passes because resource's parent is a clone. */ assert_ptr_equal(mysql_proxy, native_find_rsc(mysql_proxy, "mysql-proxy:0", NULL, pcmk_rsc_match_clone_only)); assert_ptr_equal(mysql_proxy, native_find_rsc(mysql_proxy, "mysql-proxy:0", cluster02, pcmk_rsc_match_clone_only |pcmk_rsc_match_current_node)); /* Fails because mysql-proxy:0 is not running on cluster01, even with the right flags. */ assert_null(native_find_rsc(mysql_proxy, "mysql-proxy:0", cluster01, pcmk_rsc_match_current_node)); // Fails because pcmk_rsc_match_current_node is required if a node is given assert_null(native_find_rsc(mysql_proxy, "mysql-proxy:0", cluster02, 0)); /* Passes because mysql-proxy:0 is running on cluster02. */ assert_ptr_equal(mysql_proxy, native_find_rsc(mysql_proxy, "mysql-proxy:0", cluster02, pcmk_rsc_match_current_node)); } /* TODO: Add tests for finding on assigned node (passing a node without * pcmk_rsc_match_current_node, after scheduling, for a resource that is * starting/stopping/moving. */ PCMK__UNIT_TEST(setup, teardown, cmocka_unit_test(bad_args), cmocka_unit_test(primitive_rsc), cmocka_unit_test(group_rsc), cmocka_unit_test(inactive_group_rsc), cmocka_unit_test(group_member_rsc), cmocka_unit_test(inactive_group_member_rsc), cmocka_unit_test(clone_rsc), cmocka_unit_test(inactive_clone_rsc), cmocka_unit_test(clone_instance_rsc), cmocka_unit_test(renamed_rsc), cmocka_unit_test(bundle_rsc), cmocka_unit_test(bundle_replica_rsc), cmocka_unit_test(clone_group_rsc), cmocka_unit_test(clone_group_instance_rsc), cmocka_unit_test(clone_group_member_rsc)) diff --git a/lib/pengine/tests/native/pe_base_name_eq_test.c b/lib/pengine/tests/native/pe_base_name_eq_test.c index 81fb1ef82f..1a08480974 100644 --- a/lib/pengine/tests/native/pe_base_name_eq_test.c +++ b/lib/pengine/tests/native/pe_base_name_eq_test.c @@ -1,160 +1,159 @@ /* * Copyright 2022-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include xmlNode *input = NULL; pcmk_scheduler_t *scheduler = NULL; pcmk_resource_t *exim_group, *promotable_0, *promotable_1, *dummy; pcmk_resource_t *httpd_bundle, *mysql_group_0, *mysql_group_1; static int setup(void **state) { char *path = NULL; pcmk__xml_init(); path = crm_strdup_printf("%s/crm_mon.xml", getenv("PCMK_CTS_CLI_DIR")); input = pcmk__xml_read(path); free(path); if (input == NULL) { return 1; } scheduler = pe_new_working_set(); if (scheduler == NULL) { return 1; } - pcmk__set_scheduler_flags(scheduler, - pcmk__sched_no_counts|pcmk__sched_no_compat); + pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts); scheduler->input = input; cluster_status(scheduler); /* Get references to several resources we use frequently. */ for (GList *iter = scheduler->priv->resources; iter != NULL; iter = iter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data; if (strcmp(rsc->id, "dummy") == 0) { dummy = rsc; } else if (strcmp(rsc->id, "exim-group") == 0) { exim_group = rsc; } else if (strcmp(rsc->id, "httpd-bundle") == 0) { httpd_bundle = rsc; } else if (strcmp(rsc->id, "mysql-clone-group") == 0) { for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = (pcmk_resource_t *) iter->data; if (strcmp(child->id, "mysql-group:0") == 0) { mysql_group_0 = child; } else if (strcmp(child->id, "mysql-group:1") == 0) { mysql_group_1 = child; } } } else if (strcmp(rsc->id, "promotable-clone") == 0) { for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = (pcmk_resource_t *) iter->data; if (strcmp(child->id, "promotable-rsc:0") == 0) { promotable_0 = child; } else if (strcmp(child->id, "promotable-rsc:1") == 0) { promotable_1 = child; } } } } return 0; } static int teardown(void **state) { pe_free_working_set(scheduler); pcmk__xml_cleanup(); return 0; } static void bad_args(void **state) { char *id = dummy->id; assert_false(pe_base_name_eq(NULL, "dummy")); assert_false(pe_base_name_eq(dummy, NULL)); dummy->id = NULL; assert_false(pe_base_name_eq(dummy, "dummy")); dummy->id = id; } static void primitive_rsc(void **state) { assert_true(pe_base_name_eq(dummy, "dummy")); assert_false(pe_base_name_eq(dummy, "DUMMY")); assert_false(pe_base_name_eq(dummy, "dUmMy")); assert_false(pe_base_name_eq(dummy, "dummy0")); assert_false(pe_base_name_eq(dummy, "dummy:0")); } static void group_rsc(void **state) { assert_true(pe_base_name_eq(exim_group, "exim-group")); assert_false(pe_base_name_eq(exim_group, "EXIM-GROUP")); assert_false(pe_base_name_eq(exim_group, "exim-group0")); assert_false(pe_base_name_eq(exim_group, "exim-group:0")); assert_false(pe_base_name_eq(exim_group, "Public-IP")); } static void clone_rsc(void **state) { assert_true(pe_base_name_eq(promotable_0, "promotable-rsc")); assert_true(pe_base_name_eq(promotable_1, "promotable-rsc")); assert_false(pe_base_name_eq(promotable_0, "promotable-rsc:0")); assert_false(pe_base_name_eq(promotable_1, "promotable-rsc:1")); assert_false(pe_base_name_eq(promotable_0, "PROMOTABLE-RSC")); assert_false(pe_base_name_eq(promotable_1, "PROMOTABLE-RSC")); assert_false(pe_base_name_eq(promotable_0, "Promotable-rsc")); assert_false(pe_base_name_eq(promotable_1, "Promotable-rsc")); } static void bundle_rsc(void **state) { assert_true(pe_base_name_eq(httpd_bundle, "httpd-bundle")); assert_false(pe_base_name_eq(httpd_bundle, "HTTPD-BUNDLE")); assert_false(pe_base_name_eq(httpd_bundle, "httpd")); assert_false(pe_base_name_eq(httpd_bundle, "httpd-docker-0")); } PCMK__UNIT_TEST(setup, teardown, cmocka_unit_test(bad_args), cmocka_unit_test(primitive_rsc), cmocka_unit_test(group_rsc), cmocka_unit_test(clone_rsc), cmocka_unit_test(bundle_rsc)) diff --git a/lib/pengine/tests/status/set_working_set_defaults_test.c b/lib/pengine/tests/status/set_working_set_defaults_test.c index 733573911e..2770a16213 100644 --- a/lib/pengine/tests/status/set_working_set_defaults_test.c +++ b/lib/pengine/tests/status/set_working_set_defaults_test.c @@ -1,52 +1,52 @@ /* * Copyright 2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include "mock_private.h" static void check_defaults(void **state) { uint32_t flags; pcmk_scheduler_t *scheduler = pcmk__assert_alloc(1, sizeof(pcmk_scheduler_t)); scheduler->priv = pcmk__assert_alloc(1, sizeof(pcmk__scheduler_private_t)); set_working_set_defaults(scheduler); flags = pcmk__sched_symmetric_cluster |pcmk__sched_stop_removed_resources |pcmk__sched_cancel_removed_actions; if (!strcmp(PCMK__CONCURRENT_FENCING_DEFAULT, PCMK_VALUE_TRUE)) { flags |= pcmk__sched_concurrent_fencing; } assert_null(scheduler->priv->out); assert_int_equal(scheduler->priv->next_ordering_id, 1); - assert_int_equal(scheduler->action_id, 1); + assert_int_equal(scheduler->priv->next_action_id, 1); assert_int_equal(scheduler->no_quorum_policy, pcmk_no_quorum_stop); assert_int_equal(scheduler->flags, flags); /* Avoid calling pe_free_working_set here so we don't artificially * inflate the coverage numbers. */ free(scheduler->priv); free(scheduler); } PCMK__UNIT_TEST(NULL, NULL, cmocka_unit_test(check_defaults)) diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index c9d7fffa96..619c468968 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,5133 +1,5134 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); // A (parsed) resource action history entry struct action_history { pcmk_resource_t *rsc; // Resource that history is for pcmk_node_t *node; // Node that history is for xmlNode *xml; // History entry XML // Parsed from entry XML const char *id; // XML ID of history entry const char *key; // Operation key of action const char *task; // Action name const char *exit_reason; // Exit reason given for result guint interval_ms; // Action interval int call_id; // Call ID of action int expected_exit_status; // Expected exit status of action int exit_status; // Actual exit status of action int execution_status; // Execution status of action }; /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than * use pcmk__set_scheduler_flags()/pcmk__clear_scheduler_flags() so that the * flag is stringified more readably in log messages. */ #define set_config_flag(scheduler, option, flag) do { \ GHashTable *config_hash = (scheduler)->priv->options; \ const char *scf_value = pcmk__cluster_option(config_hash, (option)); \ \ if (scf_value != NULL) { \ if (crm_is_true(scf_value)) { \ (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } else { \ (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } \ } \ } while(0) static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum pcmk__on_fail *failed); static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node); static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler); static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler); static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler); /*! * \internal * \brief Check whether a node is a dangling guest node * * \param[in] node Node to check * * \return true if \p node had a Pacemaker Remote connection resource with a * launcher that was removed from the CIB, otherwise false. */ static bool is_dangling_guest_node(pcmk_node_t *node) { return pcmk__is_pacemaker_remote_node(node) && (node->priv->remote != NULL) && (node->priv->remote->priv->launcher == NULL) && pcmk_is_set(node->priv->remote->flags, pcmk__rsc_removed_launched); } /*! * \brief Schedule a fence action for a node * * \param[in,out] scheduler Scheduler data * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider * \c PCMK_OPT_PRIORITY_FENCING_DELAY */ void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); if (pcmk__is_guest_or_bundle_node(node)) { // Fence a guest or bundle node by marking its launcher as failed pcmk_resource_t *rsc = node->priv->remote->priv->launcher; if (!pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", pcmk__node_name(node), reason, rsc->id); } else { pcmk__sched_warn(scheduler, "Guest node %s will be fenced " "(by recovering its guest resource %s): %s", pcmk__node_name(node), rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ pcmk__set_node_flags(node, pcmk__node_remote_reset); pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", pcmk__node_name(node), reason); pcmk__set_rsc_flags(node->priv->remote, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } else if (pcmk__is_remote_node(node)) { pcmk_resource_t *rsc = node->priv->remote; if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", pcmk__node_name(node), reason); } else if (!pcmk_is_set(node->priv->flags, pcmk__node_remote_reset)) { pcmk__set_node_flags(node, pcmk__node_remote_reset); pcmk__sched_warn(scheduler, "Remote node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean", reason); } else { pcmk__sched_warn(scheduler, "Cluster node %s %s: %s", pcmk__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR PCMK_XE_NVPAIR \ "[(@" PCMK_XA_NAME "='" PCMK_STONITH_PROVIDES "'" \ "or @" PCMK_XA_NAME "='" PCMK_META_REQUIRES "') " \ "and @" PCMK_XA_VALUE "='" PCMK_VALUE_UNFENCING "']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RESOURCES \ "//" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR \ "|/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION "/" PCMK_XE_RSC_DEFAULTS \ "/" PCMK_XE_META_ATTRIBUTES "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler) { xmlXPathObjectPtr result = NULL; if (!pcmk_is_set(scheduler->flags, flag)) { result = xpath_search(scheduler->input, xpath); if (result && (numXpathResults(result) > 0)) { pcmk__set_scheduler_flags(scheduler, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) { const char *value = NULL; guint interval_ms = 0U; GHashTable *config_hash = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; scheduler->priv->options = config_hash; pe__unpack_dataset_nvpairs(config, PCMK_XE_CLUSTER_PROPERTY_SET, &rule_data, config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, scheduler); pcmk__validate_cluster_options(config_hash); set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES, pcmk__sched_probe_resources); if (!pcmk_is_set(scheduler->flags, pcmk__sched_probe_resources)) { crm_info("Startup probes: disabled (dangerous)"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_info("Watchdog-based self-fencing will be performed via SBD if " "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is nonzero"); pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_fencing); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pcmk__sched_enable_unfencing, XPATH_ENABLE_UNFENCING, scheduler); value = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_TIMEOUT); pcmk_parse_interval_spec(value, &interval_ms); if (interval_ms >= INT_MAX) { scheduler->priv->fence_timeout_ms = INT_MAX; } else { scheduler->priv->fence_timeout_ms = (int) interval_ms; } crm_debug("STONITH timeout: %d", scheduler->priv->fence_timeout_ms); set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED, pcmk__sched_fencing_enabled); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { crm_debug("STONITH of failed nodes is enabled"); } else { crm_debug("STONITH of failed nodes is disabled"); } scheduler->priv->fence_action = pcmk__cluster_option(config_hash, PCMK_OPT_STONITH_ACTION); if (!strcmp(scheduler->priv->fence_action, PCMK__ACTION_POWEROFF)) { pcmk__warn_once(pcmk__wo_poweroff, "Support for " PCMK_OPT_STONITH_ACTION " of " "'" PCMK__ACTION_POWEROFF "' is deprecated and will be " "removed in a future release " "(use '" PCMK_ACTION_OFF "' instead)"); scheduler->priv->fence_action = PCMK_ACTION_OFF; } crm_trace("STONITH will %s nodes", scheduler->priv->fence_action); set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING, pcmk__sched_concurrent_fencing); if (pcmk_is_set(scheduler->flags, pcmk__sched_concurrent_fencing)) { crm_debug("Concurrent fencing is enabled"); } else { crm_debug("Concurrent fencing is disabled"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY); if (value) { pcmk_parse_interval_spec(value, &interval_ms); scheduler->priority_fencing_delay = (int) (interval_ms / 1000); crm_trace("Priority fencing delay is %ds", scheduler->priority_fencing_delay); } set_config_flag(scheduler, PCMK_OPT_STOP_ALL_RESOURCES, pcmk__sched_stop_all); crm_debug("Stop all active resources: %s", pcmk__flag_text(scheduler->flags, pcmk__sched_stop_all)); set_config_flag(scheduler, PCMK_OPT_SYMMETRIC_CLUSTER, pcmk__sched_symmetric_cluster); if (pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, PCMK_VALUE_IGNORE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_ignore; } else if (pcmk__str_eq(value, PCMK_VALUE_FREEZE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_freeze; } else if (pcmk__str_eq(value, PCMK_VALUE_DEMOTE, pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_demote; } else if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)) { if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { int do_panic = 0; crm_element_value_int(scheduler->input, PCMK_XA_NO_QUORUM_PANIC, &do_panic); if (do_panic || pcmk_is_set(scheduler->flags, pcmk__sched_quorate)) { scheduler->no_quorum_policy = pcmk_no_quorum_fence; } else { crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop': cluster has never had quorum"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop' because fencing is disabled"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { scheduler->no_quorum_policy = pcmk_no_quorum_stop; } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case pcmk_no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case pcmk_no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case pcmk_no_quorum_fence: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case pcmk_no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_RESOURCES, pcmk__sched_stop_removed_resources); if (pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { crm_trace("Orphan resources are stopped"); } else { crm_trace("Orphan resources are ignored"); } set_config_flag(scheduler, PCMK_OPT_STOP_ORPHAN_ACTIONS, pcmk__sched_cancel_removed_actions); if (pcmk_is_set(scheduler->flags, pcmk__sched_cancel_removed_actions)) { crm_trace("Orphan resource actions are stopped"); } else { crm_trace("Orphan resource actions are ignored"); } value = pcmk__cluster_option(config_hash, PCMK__OPT_REMOVE_AFTER_STOP); if (value != NULL) { if (crm_is_true(value)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_remove_after_stop); pcmk__warn_once(pcmk__wo_remove_after, "Support for the " PCMK__OPT_REMOVE_AFTER_STOP " cluster property is deprecated and will be " "removed in a future release"); } else { pcmk__clear_scheduler_flags(scheduler, pcmk__sched_remove_after_stop); } } set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE, pcmk__sched_in_maintenance); crm_trace("Maintenance mode: %s", pcmk__flag_text(scheduler->flags, pcmk__sched_in_maintenance)); set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL, pcmk__sched_start_failure_fatal); if (pcmk_is_set(scheduler->flags, pcmk__sched_start_failure_fatal)) { crm_trace("Start failures are always fatal"); } else { crm_trace("Start failures are handled by failcount"); } if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING, pcmk__sched_startup_fencing); } if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pcmk__warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes"); } pe__unpack_node_health_scores(scheduler); scheduler->priv->placement_strategy = pcmk__cluster_option(config_hash, PCMK_OPT_PLACEMENT_STRATEGY); crm_trace("Placement strategy: %s", scheduler->priv->placement_strategy); set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK, pcmk__sched_shutdown_lock); if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { value = pcmk__cluster_option(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); pcmk_parse_interval_spec(value, &(scheduler->shutdown_lock)); scheduler->shutdown_lock /= 1000; crm_trace("Resources will be locked to nodes that were cleanly " "shut down (locks expire after %s)", pcmk__readable_interval(scheduler->shutdown_lock)); } else { crm_trace("Resources will not be locked to nodes that were cleanly " "shut down"); } value = pcmk__cluster_option(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); pcmk_parse_interval_spec(value, &(scheduler->node_pending_timeout)); scheduler->node_pending_timeout /= 1000; if (scheduler->node_pending_timeout == 0) { crm_trace("Do not fence pending nodes"); } else { crm_trace("Fence pending nodes after %s", pcmk__readable_interval(scheduler->node_pending_timeout * 1000)); } return TRUE; } pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler) { pcmk_node_t *new_node = NULL; if (pcmk_find_node(scheduler, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pcmk_node_t)); if (new_node == NULL) { pcmk__sched_err(scheduler, "Could not allocate memory for node %s", uname); return NULL; } new_node->assign = calloc(1, sizeof(struct pcmk__node_assignment)); new_node->details = calloc(1, sizeof(struct pcmk__node_details)); new_node->priv = calloc(1, sizeof(pcmk__node_private_t)); if ((new_node->assign == NULL) || (new_node->details == NULL) || (new_node->priv == NULL)) { free(new_node->assign); free(new_node->details); free(new_node->priv); free(new_node); pcmk__sched_err(scheduler, "Could not allocate memory for node %s", uname); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->assign->score = char2score(score); new_node->priv->id = id; new_node->priv->name = uname; new_node->priv->flags = pcmk__node_probes_allowed; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->running_rsc = NULL; new_node->priv->scheduler = scheduler; if (pcmk__str_eq(type, PCMK_VALUE_MEMBER, pcmk__str_null_matches|pcmk__str_casei)) { new_node->priv->variant = pcmk__node_variant_cluster; } else if (pcmk__str_eq(type, PCMK_VALUE_REMOTE, pcmk__str_casei)) { new_node->priv->variant = pcmk__node_variant_remote; pcmk__set_scheduler_flags(scheduler, pcmk__sched_have_remote_nodes); } else { /* @COMPAT 'ping' is the default for backward compatibility, but it * should be changed to 'member' at a compatibility break */ if (!pcmk__str_eq(type, PCMK__VALUE_PING, pcmk__str_casei)) { pcmk__config_warn("Node %s has unrecognized type '%s', " "assuming '" PCMK__VALUE_PING "'", pcmk__s(uname, "without name"), type); } pcmk__warn_once(pcmk__wo_ping_node, "Support for nodes of type '" PCMK__VALUE_PING "' " "(such as %s) is deprecated and will be removed in a " "future release", pcmk__s(uname, "unnamed node")); new_node->priv->variant = pcmk__node_variant_ping; } new_node->priv->attrs = pcmk__strkey_table(free, free); if (pcmk__is_pacemaker_remote_node(new_node)) { pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "remote"); } else { pcmk__insert_dup(new_node->priv->attrs, CRM_ATTR_KIND, "cluster"); } new_node->priv->utilization = pcmk__strkey_table(free, free); new_node->priv->digest_cache = pcmk__strkey_table(free, pe__free_digests); scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node, pe__cmp_node_name); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = pcmk__xe_id(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL); attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) { if (!pcmk__xe_is(attr_set, PCMK_XE_META_ATTRIBUTES)) { continue; } for (attr = pcmk__xe_first_child(attr_set, NULL, NULL, NULL); attr != NULL; attr = pcmk__xe_next(attr)) { const char *value = crm_element_value(attr, PCMK_XA_VALUE); const char *name = crm_element_value(attr, PCMK_XA_NAME); if (name == NULL) { // Sanity continue; } if (strcmp(name, PCMK_META_REMOTE_NODE) == 0) { remote_name = value; } else if (strcmp(name, PCMK_META_REMOTE_ADDR) == 0) { remote_server = value; } else if (strcmp(name, PCMK_META_REMOTE_PORT) == 0) { remote_port = value; } else if (strcmp(name, PCMK_META_REMOTE_CONNECT_TIMEOUT) == 0) { connect_timeout = value; } else if (strcmp(name, PCMK_META_REMOTE_ALLOW_MIGRATE) == 0) { remote_allow_migrate = value; } else if (strcmp(name, PCMK_META_IS_MANAGED) == 0) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->priv->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node) { if ((new_node->priv->variant == pcmk__node_variant_remote) && (new_node->priv->remote == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (pcmk_is_set(scheduler->flags, pcmk__sched_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } } gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; pcmk_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = pcmk__xe_first_child(xml_nodes, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (pcmk__xe_is(xml_obj, PCMK_XE_NODE)) { new_node = NULL; id = crm_element_value(xml_obj, PCMK_XA_ID); uname = crm_element_value(xml_obj, PCMK_XA_UNAME); type = crm_element_value(xml_obj, PCMK_XA_TYPE); score = crm_element_value(xml_obj, PCMK_XA_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" PCMK_XE_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, scheduler); if (new_node == NULL) { return FALSE; } handle_startup_fencing(scheduler, new_node); add_node_attrs(xml_obj, new_node, FALSE, scheduler); crm_trace("Done with node %s", crm_element_value(xml_obj, PCMK_XA_UNAME)); } } - if (scheduler->localhost - && (pcmk_find_node(scheduler, scheduler->localhost) == NULL)) { + if ((scheduler->priv->local_node_name != NULL) + && (pcmk_find_node(scheduler, + scheduler->priv->local_node_name) == NULL)) { crm_info("Creating a fake local node"); - pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0, - scheduler); + pe_create_node(scheduler->priv->local_node_name, + scheduler->priv->local_node_name, NULL, 0, scheduler); } return TRUE; } static void unpack_launcher(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { const char *launcher_id = NULL; if (rsc->priv->children != NULL) { g_list_foreach(rsc->priv->children, (GFunc) unpack_launcher, scheduler); return; } launcher_id = g_hash_table_lookup(rsc->priv->meta, PCMK__META_CONTAINER); if ((launcher_id != NULL) && !pcmk__str_eq(launcher_id, rsc->id, pcmk__str_none)) { pcmk_resource_t *launcher = pe_find_resource(scheduler->priv->resources, launcher_id); if (launcher != NULL) { rsc->priv->launcher = launcher; launcher->priv->launched = g_list_append(launcher->priv->launched, rsc); pcmk__rsc_trace(rsc, "Resource %s's launcher is %s", rsc->id, launcher_id); } else { pcmk__config_err("Resource %s: Unknown " PCMK__META_CONTAINER " %s", rsc->id, launcher_id); } } } gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = pcmk__xe_id(xml_obj); /* The pcmk_find_node() check ensures we don't iterate over an * expanded node that has already been added to the node list */ if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found remote node %s defined by resource %s", new_node_id, pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__xe_is(xml_obj, PCMK_XE_PRIMITIVE)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, scheduler); if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s", new_node_id, pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__xe_is(xml_obj, PCMK_XE_GROUP)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = pcmk__xe_first_child(xml_obj, NULL, NULL, NULL); xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, scheduler); if (new_node_id && (pcmk_find_node(scheduler, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, pcmk__xe_id(xml_obj2), pcmk__xe_id(xml_obj)); pe_create_node(new_node_id, new_node_id, PCMK_VALUE_REMOTE, NULL, scheduler); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc) { pcmk_node_t *remote_node = NULL; if (!pcmk_is_set(new_rsc->flags, pcmk__rsc_is_remote_connection)) { return; } if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pcmk_find_node(scheduler, new_rsc->id); CRM_CHECK(remote_node != NULL, return); pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s", new_rsc->id, pcmk__node_name(remote_node)); remote_node->priv->remote = new_rsc; if (new_rsc->priv->launcher == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(scheduler, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ pcmk__insert_dup(remote_node->priv->attrs, CRM_ATTR_KIND, "container"); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] scheduler Scheduler data * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when pe__unpack_resource() calls resource_location() */ gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; GList *gIter = NULL; - scheduler->template_rsc_sets = pcmk__strkey_table(free, pcmk__free_idref); + scheduler->priv->templates = pcmk__strkey_table(free, pcmk__free_idref); for (xml_obj = pcmk__xe_first_child(xml_resources, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { pcmk_resource_t *new_rsc = NULL; const char *id = pcmk__xe_id(xml_obj); if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring <%s> resource without ID", xml_obj->name); continue; } if (pcmk__xe_is(xml_obj, PCMK_XE_TEMPLATE)) { - if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id, + if (g_hash_table_lookup_extended(scheduler->priv->templates, id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ - pcmk__insert_dup(scheduler->template_rsc_sets, id, NULL); + pcmk__insert_dup(scheduler->priv->templates, id, NULL); } continue; } crm_trace("Unpacking <%s " PCMK_XA_ID "='%s'>", xml_obj->name, id); if (pe__unpack_resource(xml_obj, &new_rsc, NULL, scheduler) == pcmk_rc_ok) { scheduler->priv->resources = g_list_append(scheduler->priv->resources, new_rsc); pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", xml_obj->name, id); } } for (gIter = scheduler->priv->resources; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; unpack_launcher(rsc, scheduler); link_rsc2remotenode(scheduler, rsc); } scheduler->priv->resources = g_list_sort(scheduler->priv->resources, pe__cmp_rsc_priority); if (pcmk_is_set(scheduler->flags, pcmk__sched_location_only)) { /* Ignore */ } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled) && !pcmk_is_set(scheduler->flags, pcmk__sched_have_fencing)) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the " PCMK_OPT_STONITH_ENABLED " option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } /*! * \internal * \brief Parse configuration XML for fencing topology information * * \param[in] xml_fencing_topology Top of fencing topology configuration XML * \param[in,out] scheduler Scheduler data * * \return void */ void pcmk__unpack_fencing_topology(const xmlNode *xml_fencing_topology, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; int id = 0; for (xml_obj = pcmk__xe_first_child(xml_fencing_topology, PCMK_XE_FENCING_LEVEL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next_same(xml_obj)) { crm_element_value_int(xml_obj, PCMK_XA_INDEX, &id); // Ensure an ID was given if (pcmk__str_empty(pcmk__xe_id(xml_obj))) { pcmk__config_warn("Ignoring registration for topology level without ID"); continue; } // Ensure level ID is in allowed range if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { pcmk__config_warn("Ignoring topology registration with invalid level %d", id); continue; } } } gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler) { xmlNode *xml_tag = NULL; scheduler->tags = pcmk__strkey_table(free, pcmk__free_idref); for (xml_tag = pcmk__xe_first_child(xml_tags, NULL, NULL, NULL); xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = pcmk__xe_id(xml_tag); if (!pcmk__xe_is(xml_tag, PCMK_XE_TAG)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " PCMK_XA_ID, (const char *) xml_tag->name); continue; } for (xml_obj_ref = pcmk__xe_first_child(xml_tag, NULL, NULL, NULL); xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) { const char *obj_ref = pcmk__xe_id(xml_obj_ref); if (!pcmk__xe_is(xml_obj_ref, PCMK_XE_OBJ_REF)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " PCMK_XA_ID, xml_obj_ref->name, tag_id); continue; } pcmk__add_idref(scheduler->tags, tag_id, obj_ref); } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pcmk__ticket_t *ticket = NULL; ticket_id = pcmk__xe_id(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(scheduler->priv->ticket_constraints, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, scheduler); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = pcmk__xml_attr_value(xIter); if (pcmk__str_eq(prop_name, PCMK_XA_ID, pcmk__str_none)) { continue; } pcmk__insert_dup(ticket->state, prop_name, prop_value); } granted = g_hash_table_lookup(ticket->state, PCMK__XA_GRANTED); if (granted && crm_is_true(granted)) { pcmk__set_ticket_flags(ticket, pcmk__ticket_granted); crm_info("We have ticket '%s'", ticket->id); } else { pcmk__clear_ticket_flags(ticket, pcmk__ticket_granted); crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, PCMK_XA_LAST_GRANTED); if (last_granted) { long long last_granted_ll; pcmk__scan_ll(last_granted, &last_granted_ll, 0LL); ticket->last_granted = (time_t) last_granted_ll; } standby = g_hash_table_lookup(ticket->state, PCMK_XA_STANDBY); if (standby && crm_is_true(standby)) { pcmk__set_ticket_flags(ticket, pcmk__ticket_standby); if (pcmk_is_set(ticket->flags, pcmk__ticket_granted)) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { pcmk__clear_ticket_flags(ticket, pcmk__ticket_standby); } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; for (xml_obj = pcmk__xe_first_child(xml_tickets, NULL, NULL, NULL); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (!pcmk__xe_is(xml_obj, PCMK__XE_TICKET_STATE)) { continue; } unpack_ticket_state(xml_obj, scheduler); } return TRUE; } static void unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = NULL; pcmk_resource_t *rsc = NULL; int maint = 0; if (!pcmk__xe_is(state, PCMK__XE_NODE_STATE)) { return; } if ((this_node == NULL) || !pcmk__is_pacemaker_remote_node(this_node)) { return; } crm_trace("Processing Pacemaker Remote node %s", pcmk__node_name(this_node)); pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_IN_MAINTENANCE), &maint, 0); if (maint) { pcmk__set_node_flags(this_node, pcmk__node_remote_maint); } else { pcmk__clear_node_flags(this_node, pcmk__node_remote_maint); } rsc = this_node->priv->remote; if (!pcmk_is_set(this_node->priv->flags, pcmk__node_remote_reset)) { this_node->details->unclean = FALSE; pcmk__set_node_flags(this_node, pcmk__node_seen); } attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL, NULL); add_node_attrs(attrs, this_node, TRUE, scheduler); if (pe__shutdown_requested(this_node)) { crm_info("%s is shutting down", pcmk__node_name(this_node)); this_node->details->shutdown = TRUE; } if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_STANDBY, NULL, pcmk__rsc_node_current))) { crm_info("%s is in standby mode", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); } if (crm_is_true(pcmk__node_attr(this_node, PCMK_NODE_ATTR_MAINTENANCE, NULL, pcmk__rsc_node_current)) || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_managed))) { crm_info("%s is in maintenance mode", pcmk__node_name(this_node)); this_node->details->maintenance = TRUE; } discovery = pcmk__node_attr(this_node, PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED, NULL, pcmk__rsc_node_current); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__warn_once(pcmk__wo_rdisc_enabled, "Support for the " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " node attribute is deprecated and will be removed" " (and behave as 'true') in a future release."); if (pcmk__is_remote_node(this_node) && !pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { pcmk__config_warn("Ignoring " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " attribute on Pacemaker Remote node %s" " because fencing is disabled", pcmk__node_name(this_node)); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("%s has resource discovery disabled", pcmk__node_name(this_node)); pcmk__clear_node_flags(this_node, pcmk__node_probes_allowed); } } } /*! * \internal * \brief Unpack a cluster node's transient attributes * * \param[in] state CIB node state XML * \param[in,out] node Cluster node whose attributes are being unpacked * \param[in,out] scheduler Scheduler data */ static void unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = pcmk__xe_first_child(state, PCMK__XE_TRANSIENT_ATTRIBUTES, NULL, NULL); add_node_attrs(attrs, node, TRUE, scheduler); if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_STANDBY, NULL, pcmk__rsc_node_current))) { crm_info("%s is in standby mode", pcmk__node_name(node)); pcmk__set_node_flags(node, pcmk__node_standby); } if (crm_is_true(pcmk__node_attr(node, PCMK_NODE_ATTR_MAINTENANCE, NULL, pcmk__rsc_node_current))) { crm_info("%s is in maintenance mode", pcmk__node_name(node)); node->details->maintenance = TRUE; } discovery = pcmk__node_attr(node, PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED, NULL, pcmk__rsc_node_current); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__config_warn("Ignoring " PCMK__NODE_ATTR_RESOURCE_DISCOVERY_ENABLED " attribute for %s because disabling resource" " discovery is not allowed for cluster nodes", pcmk__node_name(node)); } } /*! * \internal * \brief Unpack a node state entry (first pass) * * Unpack one node state entry from status. This unpacks information from the * \C PCMK__XE_NODE_STATE element itself and node attributes inside it, but not * the resource history inside it. Multiple passes through the status are needed * to fully unpack everything. * * \param[in] state CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *id = NULL; const char *uname = NULL; pcmk_node_t *this_node = NULL; id = crm_element_value(state, PCMK_XA_ID); if (id == NULL) { pcmk__config_err("Ignoring invalid " PCMK__XE_NODE_STATE " entry without " PCMK_XA_ID); crm_log_xml_info(state, "missing-id"); return; } uname = crm_element_value(state, PCMK_XA_UNAME); if (uname == NULL) { /* If a joining peer makes the cluster acquire the quorum from corosync * meanwhile it has not joined CPG membership of pacemaker-controld yet, * it's possible that the created PCMK__XE_NODE_STATE entry doesn't have * a PCMK_XA_UNAME yet. We should recognize the node as `pending` and * wait for it to join CPG. */ crm_trace("Handling " PCMK__XE_NODE_STATE " entry with id=\"%s\" " "without " PCMK_XA_UNAME, id); } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { crm_notice("Ignoring recorded state for removed node with name %s and " PCMK_XA_ID " %s", pcmk__s(uname, "unknown"), id); return; } if (pcmk__is_pacemaker_remote_node(this_node)) { int remote_fenced = 0; /* We can't determine the online status of Pacemaker Remote nodes until * after all resource history has been unpacked. In this first pass, we * do need to mark whether the node has been fenced, as this plays a * role during unpacking cluster node resource state. */ pcmk__scan_min_int(crm_element_value(state, PCMK__XA_NODE_FENCED), &remote_fenced, 0); if (remote_fenced) { pcmk__set_node_flags(this_node, pcmk__node_remote_fenced); } else { pcmk__clear_node_flags(this_node, pcmk__node_remote_fenced); } return; } unpack_transient_attributes(state, this_node, scheduler); /* Provisionally mark this cluster node as clean. We have at least seen it * in the current cluster's lifetime. */ this_node->details->unclean = FALSE; pcmk__set_node_flags(this_node, pcmk__node_seen); crm_trace("Determining online status of cluster node %s (id %s)", pcmk__node_name(this_node), id); determine_online_status(state, this_node, scheduler); if (!pcmk_is_set(scheduler->flags, pcmk__sched_quorate) && this_node->details->online && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(scheduler, this_node, "cluster does not have quorum", FALSE); } } /*! * \internal * \brief Unpack nodes' resource history as much as possible * * Unpack as many nodes' resource history as possible in one pass through the * status. We need to process Pacemaker Remote nodes' connections/containers * before unpacking their history; the connection/container history will be * in another node's history, so it might take multiple passes to unpack * everything. * * \param[in] status CIB XML status section * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done, * or EAGAIN if more unpacking remains to be done) */ static int unpack_node_history(const xmlNode *status, bool fence, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; // Loop through all PCMK__XE_NODE_STATE entries in CIB status for (const xmlNode *state = pcmk__xe_first_child(status, PCMK__XE_NODE_STATE, NULL, NULL); state != NULL; state = pcmk__xe_next_same(state)) { const char *id = pcmk__xe_id(state); const char *uname = crm_element_value(state, PCMK_XA_UNAME); pcmk_node_t *this_node = NULL; if ((id == NULL) || (uname == NULL)) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history from malformed " PCMK__XE_NODE_STATE " without id and/or uname"); continue; } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history for node %s because " "no longer in configuration", id); continue; } if (pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) { crm_trace("Not unpacking resource history for node %s because " "already unpacked", id); continue; } if (fence) { // We're processing all remaining nodes } else if (pcmk__is_guest_or_bundle_node(this_node)) { /* We can unpack a guest node's history only after we've unpacked * other resource history to the point that we know that the node's * connection and containing resource are both up. */ const pcmk_resource_t *remote = this_node->priv->remote; const pcmk_resource_t *launcher = remote->priv->launcher; if ((remote->priv->orig_role != pcmk_role_started) || (launcher->priv->orig_role != pcmk_role_started)) { crm_trace("Not unpacking resource history for guest node %s " "because launcher and connection are not known to " "be up", id); continue; } } else if (pcmk__is_remote_node(this_node)) { /* We can unpack a remote node's history only after we've unpacked * other resource history to the point that we know that the node's * connection is up, with the exception of when shutdown locks are * in use. */ pcmk_resource_t *rsc = this_node->priv->remote; if ((rsc == NULL) || (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock) && (rsc->priv->orig_role != pcmk_role_started))) { crm_trace("Not unpacking resource history for remote node %s " "because connection is not known to be up", id); continue; } /* If fencing and shutdown locks are disabled and we're not processing * unseen nodes, then we don't want to unpack offline nodes until online * nodes have been unpacked. This allows us to number active clone * instances first. */ } else if (!pcmk_any_flags_set(scheduler->flags, pcmk__sched_fencing_enabled |pcmk__sched_shutdown_lock) && !this_node->details->online) { crm_trace("Not unpacking resource history for offline " "cluster node %s", id); continue; } if (pcmk__is_pacemaker_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); unpack_handle_remote_attrs(this_node, state, scheduler); } crm_trace("Unpacking resource history for %snode %s", (fence? "unseen " : ""), id); pcmk__set_node_flags(this_node, pcmk__node_unpacked); unpack_node_lrm(this_node, state, scheduler); rc = EAGAIN; // Other node histories might depend on this one } return rc; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler) { xmlNode *state = NULL; crm_trace("Beginning unpack"); if (scheduler->priv->ticket_constraints == NULL) { scheduler->priv->ticket_constraints = pcmk__strkey_table(free, destroy_ticket); } for (state = pcmk__xe_first_child(status, NULL, NULL, NULL); state != NULL; state = pcmk__xe_next(state)) { if (pcmk__xe_is(state, PCMK_XE_TICKETS)) { unpack_tickets_state((xmlNode *) state, scheduler); } else if (pcmk__xe_is(state, PCMK__XE_NODE_STATE)) { unpack_node_state(state, scheduler); } } while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) { crm_trace("Another pass through node resource histories is needed"); } // Now catch any nodes we didn't see unpack_node_history(status, pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled), scheduler); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (scheduler->stop_needed != NULL) { for (GList *item = scheduler->stop_needed; item; item = item->next) { pcmk_resource_t *container = item->data; pcmk_node_t *node = pcmk__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(scheduler->stop_needed); scheduler->stop_needed = NULL; } /* Now that we know status of all Pacemaker Remote connections and nodes, * we can stop connections for node shutdowns, and check the online status * of remote/guest nodes that didn't have any node history to unpack. */ for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *this_node = gIter->data; if (!pcmk__is_pacemaker_remote_node(this_node)) { continue; } if (this_node->details->shutdown && (this_node->priv->remote != NULL)) { pe__set_next_role(this_node->priv->remote, pcmk_role_stopped, "remote shutdown"); } if (!pcmk_is_set(this_node->priv->flags, pcmk__node_unpacked)) { determine_remote_online_status(scheduler, this_node); } } return TRUE; } /*! * \internal * \brief Unpack node's time when it became a member at the cluster layer * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * \param[in,out] scheduler Scheduler data * * \return Epoch time when node became a cluster member * (or scheduler effective time for legacy entries) if a member, * 0 if not a member, or -1 if no valid information available */ static long long unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler) { const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM); int member = 0; if (member_time == NULL) { return -1LL; } else if (crm_str_to_boolean(member_time, &member) == 1) { /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was * recorded as a boolean for a DC < 2.1.7, or the node is pending * shutdown and has left the CPG, in which case it was set to 1 to avoid * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT. * * We return the effective time for in_ccm=1 because what's important to * avoid fencing is that effective time minus this value is less than * the pending node timeout. */ return member? (long long) get_effective_time(scheduler) : 0LL; } else { long long when_member = 0LL; if ((pcmk__scan_ll(member_time, &when_member, 0LL) != pcmk_rc_ok) || (when_member < 0LL)) { crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in " PCMK__XE_NODE_STATE " entry", member_time); return -1LL; } return when_member; } } /*! * \internal * \brief Unpack node's time when it became online in process group * * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry * * \return Epoch time when node became online in process group (or 0 if not * online, or 1 for legacy online entries) */ static long long unpack_node_online(const xmlNode *node_state) { const char *peer_time = crm_element_value(node_state, PCMK_XA_CRMD); // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline" if (pcmk__str_eq(peer_time, PCMK_VALUE_OFFLINE, pcmk__str_casei|pcmk__str_null_matches)) { return 0LL; } else if (pcmk__str_eq(peer_time, PCMK_VALUE_ONLINE, pcmk__str_casei)) { return 1LL; } else { long long when_online = 0LL; if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok) || (when_online < 0)) { crm_warn("Unrecognized value '%s' for " PCMK_XA_CRMD " in " PCMK__XE_NODE_STATE " entry, assuming offline", peer_time); return 0LL; } return when_online; } } /*! * \internal * \brief Unpack node attribute for user-requested fencing * * \param[in] node Node to check * \param[in] node_state Node's \c PCMK__XE_NODE_STATE entry in CIB status * * \return \c true if fencing has been requested for \p node, otherwise \c false */ static bool unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state) { long long value = 0LL; int value_i = 0; const char *value_s = pcmk__node_attr(node, PCMK_NODE_ATTR_TERMINATE, NULL, pcmk__rsc_node_current); // Value may be boolean or an epoch time if (crm_str_to_boolean(value_s, &value_i) == 1) { return (value_i != 0); } if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) { return (value > 0); } crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE "node attribute for %s", value_s, pcmk__node_name(node)); return false; } static gboolean determine_online_status_no_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); if (when_member <= 0) { crm_trace("Node %s is %sdown", pcmk__node_name(this_node), ((when_member < 0)? "presumed " : "")); } else if (when_online > 0) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node %s is not ready to run resources: %s", pcmk__node_name(this_node), join); } } else if (!pcmk_is_set(this_node->priv->flags, pcmk__node_expected_up)) { crm_trace("Node %s controller is down: " "member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } else { /* mark it unclean */ pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE); crm_info("Node %s member@%lld online@%lld join=%s expected=%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } return online; } /*! * \internal * \brief Check whether a node has taken too long to join controller group * * \param[in,out] scheduler Scheduler data * \param[in] node Node to check * \param[in] when_member Epoch time when node became a cluster member * \param[in] when_online Epoch time when node joined controller group * * \return true if node has been pending (on the way up) longer than * \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false * \note This will also update the cluster's recheck time if appropriate. */ static inline bool pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, long long when_member, long long when_online) { if ((scheduler->node_pending_timeout > 0) && (when_member > 0) && (when_online <= 0)) { // There is a timeout on pending nodes, and node is pending time_t timeout = when_member + scheduler->node_pending_timeout; if (get_effective_time(node->priv->scheduler) >= timeout) { return true; // Node has timed out } // Node is pending, but still has time pe__update_recheck_time(timeout, scheduler, "pending node timeout"); } return false; } static bool determine_online_status_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { bool termination_requested = unpack_node_terminate(this_node, node_state); const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); /* - PCMK__XA_JOIN ::= member|down|pending|banned - PCMK_XA_EXPECTED ::= member|down @COMPAT with entries recorded for DCs < 2.1.7 - PCMK__XA_IN_CCM ::= true|false - PCMK_XA_CRMD ::= online|offline Since crm_feature_set 3.18.0 (pacemaker-2.1.7): - PCMK__XA_IN_CCM ::= |0 Since when node has been a cluster member. A value 0 of means the node is not a cluster member. - PCMK_XA_CRMD ::= |0 Since when peer has been online in CPG. A value 0 means the peer is offline in CPG. */ crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s", pcmk__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, ""), (termination_requested? " (termination requested)" : "")); if (this_node->details->shutdown) { crm_debug("%s is shutting down", pcmk__node_name(this_node)); /* Slightly different criteria since we can't shut down a dead peer */ return (when_online > 0); } if (when_member < 0) { pe_fence_node(scheduler, this_node, "peer has not been seen by the cluster", FALSE); return false; } if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) { pe_fence_node(scheduler, this_node, "peer failed Pacemaker membership criteria", FALSE); } else if (termination_requested) { if ((when_member <= 0) && (when_online <= 0) && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) { crm_info("%s was fenced as requested", pcmk__node_name(this_node)); return false; } pe_fence_node(scheduler, this_node, "fencing was requested", false); } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_null_matches)) { if (pending_too_long(scheduler, this_node, when_member, when_online)) { pe_fence_node(scheduler, this_node, "peer pending timed out on joining the process group", FALSE); } else if ((when_member > 0) || (when_online > 0)) { crm_info("- %s is not ready to run resources", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", pcmk__node_name(this_node)); } } else if (when_member <= 0) { // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes pe_fence_node(scheduler, this_node, "peer is no longer part of the cluster", TRUE); } else if (when_online <= 0) { pe_fence_node(scheduler, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) { crm_info("%s is active", pcmk__node_name(this_node)); } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("%s is not ready to run resources", pcmk__node_name(this_node)); pcmk__set_node_flags(this_node, pcmk__node_standby); this_node->details->pending = TRUE; } else { pe_fence_node(scheduler, this_node, "peer was in an unknown state", FALSE); } return (when_member > 0); } static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node) { pcmk_resource_t *rsc = this_node->priv->remote; pcmk_resource_t *launcher = NULL; pcmk_node_t *host = NULL; const char *node_type = "Remote"; if (rsc == NULL) { /* This is a leftover node state entry for a former Pacemaker Remote * node whose connection resource was removed. Consider it offline. */ crm_trace("Pacemaker Remote node %s is considered OFFLINE because " "its connection resource has been removed from the CIB", this_node->priv->id); this_node->details->online = FALSE; return; } launcher = rsc->priv->launcher; if (launcher != NULL) { node_type = "Guest"; if (pcmk__list_of_1(rsc->priv->active_nodes)) { host = rsc->priv->active_nodes->data; } } /* If the resource is currently started, mark it online. */ if (rsc->priv->orig_role == pcmk_role_started) { this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if ((rsc->priv->orig_role == pcmk_role_started) && (rsc->priv->next_role == pcmk_role_stopped)) { crm_trace("%s node %s shutting down because connection resource is stopping", node_type, this_node->priv->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if ((launcher != NULL) && pcmk_is_set(launcher->flags, pcmk__rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->priv->id); this_node->details->online = FALSE; pcmk__set_node_flags(this_node, pcmk__node_remote_reset); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", node_type, this_node->priv->id); this_node->details->online = FALSE; } else if ((rsc->priv->orig_role == pcmk_role_stopped) || ((launcher != NULL) && (launcher->priv->orig_role == pcmk_role_stopped))) { crm_trace("%s node %s OFFLINE because its resource is stopped", node_type, this_node->priv->id); this_node->details->online = FALSE; pcmk__clear_node_flags(this_node, pcmk__node_remote_reset); } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->priv->id); this_node->details->online = FALSE; pcmk__set_node_flags(this_node, pcmk__node_remote_reset); } else { crm_trace("%s node %s is %s", node_type, this_node->priv->id, this_node->details->online? "ONLINE" : "OFFLINE"); } } static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, PCMK_XA_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { pcmk__set_node_flags(this_node, pcmk__node_expected_up); } if (this_node->priv->variant == pcmk__node_variant_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (!pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { online = determine_online_status_no_fencing(scheduler, node_state, this_node); } else { online = determine_online_status_fencing(scheduler, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->assign->score = -PCMK_SCORE_INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->assign->score = -PCMK_SCORE_INFINITY; } if (this_node->priv->variant == pcmk__node_variant_ping) { crm_info("%s is not a Pacemaker node", pcmk__node_name(this_node)); } else if (this_node->details->unclean) { pcmk__sched_warn(scheduler, "%s is unclean", pcmk__node_name(this_node)); } else if (!this_node->details->online) { crm_trace("%s is offline", pcmk__node_name(this_node)); } else if (this_node->details->shutdown) { crm_info("%s is shutting down", pcmk__node_name(this_node)); } else if (this_node->details->pending) { crm_info("%s is pending", pcmk__node_name(this_node)); } else if (pcmk_is_set(this_node->priv->flags, pcmk__node_standby)) { crm_info("%s is in standby", pcmk__node_name(this_node)); } else if (this_node->details->maintenance) { crm_info("%s is in maintenance", pcmk__node_name(this_node)); } else { crm_info("%s is online", pcmk__node_name(this_node)); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = pcmk__assert_alloc(base_name_len + 3, sizeof(char)); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pcmk_resource_t * create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; xmlNode *xml_rsc = pcmk__xe_create(NULL, PCMK_XE_PRIMITIVE); pcmk__xe_copy_attrs(xml_rsc, rsc_entry, pcmk__xaf_none); crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pcmk_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pcmk_find_node(scheduler, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, PCMK_VALUE_REMOTE, NULL, scheduler); } link_rsc2remotenode(scheduler, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, PCMK__META_CONTAINER)) { // This removed resource needs to be mapped to a launcher crm_trace("Launched resource %s was removed from the configuration", rsc_id); pcmk__set_rsc_flags(rsc, pcmk__rsc_removed_launched); } pcmk__set_rsc_flags(rsc, pcmk__rsc_removed); scheduler->priv->resources = g_list_append(scheduler->priv->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history * * \param[in,out] parent Clone resource that orphan will be added to * \param[in] rsc_id Orphan's resource ID * \param[in] node Where orphan is active (for logging only) * \param[in,out] scheduler Scheduler data * * \return Newly added orphaned instance of \p parent */ static pcmk_resource_t * create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *top = pe__create_clone_child(parent, scheduler); pcmk_resource_t *orphan = NULL; // find_rsc() because we might be a cloned group orphan = top->priv->fns->find_rsc(top, rsc_id, NULL, pcmk_rsc_match_clone_only); pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, pcmk__node_name(node)); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of \c PCMK_META_CLONE_MAX * instances); (3) a newly created orphan (that is, \c PCMK_META_CLONE_MAX * instances are already active). * * \param[in,out] scheduler Scheduler data * \param[in] node Node on which to check for instance * \param[in,out] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (no instance) */ static pcmk_resource_t * find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, pcmk_resource_t *parent, const char *rsc_id) { GList *rIter = NULL; pcmk_resource_t *rsc = NULL; pcmk_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(pcmk__is_anonymous_clone(parent)); // Check for active (or partially active, for cloned groups) instance pcmk__rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, pcmk__node_name(node), parent->id); for (rIter = parent->priv->children; (rIter != NULL) && (rsc == NULL); rIter = rIter->next) { GList *locations = NULL; pcmk_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if * PCMK_META_GLOBALLY_UNIQUE was flipped from true to false); and * (3) when we re-run calculations on the same scheduler data as part of * a simulation. */ child->priv->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (pcmk__same_node((pcmk_node_t *) locations->data, node)) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->priv->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if PCMK_META_GLOBALLY_UNIQUE is switched from true * to false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->priv->active_nodes != NULL) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, pcmk__node_name(node)); skip_inactive = TRUE; rsc = NULL; } else { pcmk__rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && !pcmk_is_set(child->flags, pcmk__rsc_blocked)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->priv->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance != NULL) { const pcmk_node_t *pending_node = NULL; pending_node = inactive_instance->priv->pending_node; if ((pending_node != NULL) && !pcmk__same_node(pending_node, node)) { inactive_instance = NULL; } } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pcmk__rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has PCMK_META_REQUIRES set to PCMK_VALUE_QUORUM or * PCMK_VALUE_NOTHING, and we don't have a clone instance for every node, we * don't want to consume a valid instance number for unclean nodes. Such * instances may appear to be active according to the history, but should be * considered inactive, so we can start an instance elsewhere. Treat such * instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk__rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pcmk__is_guest_or_bundle_node(node) && !pe__is_universal_clone(parent, scheduler)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler); pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pcmk_resource_t * unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, const char *rsc_id) { pcmk_resource_t *rsc = NULL; pcmk_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(scheduler->priv->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when PCMK_META_CLONE_MAX=0, * we create a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pcmk_resource_t *clone0 = pe_find_resource(scheduler->priv->resources, clone0_id); if (clone0 && !pcmk_is_set(clone0->flags, pcmk__rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->priv->variant > pcmk__rsc_variant_primitive) { crm_trace("Resource history for %s is orphaned " "because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pcmk__is_anonymous_clone(parent)) { if (pcmk__is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->priv->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(scheduler, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_none) && !pcmk__str_eq(rsc_id, rsc->priv->history_id, pcmk__str_none)) { pcmk__str_update(&(rsc->priv->history_id), rsc_id); pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, pcmk__node_name(node), rsc->id, pcmk_is_set(rsc->flags, pcmk__rsc_removed)? " (ORPHAN)" : ""); } return rsc; } static pcmk_resource_t * process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, pcmk__node_name(node)); rsc = create_fake_resource(rsc_id, rsc_entry, scheduler); if (rsc == NULL) { return NULL; } if (!pcmk_is_set(scheduler->flags, pcmk__sched_stop_removed_resources)) { pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -PCMK_SCORE_INFINITY, "__orphan_do_not_run__", scheduler); } return rsc; } static void process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node, enum pcmk__on_fail on_fail) { pcmk_node_t *tmpnode = NULL; char *reason = NULL; enum pcmk__on_fail save_on_fail = pcmk__on_fail_ignore; pcmk_scheduler_t *scheduler = NULL; bool known_active = false; CRM_ASSERT(rsc); scheduler = rsc->priv->scheduler; known_active = (rsc->priv->orig_role > pcmk_role_stopped); pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, pcmk_role_text(rsc->priv->orig_role), pcmk__node_name(node), pcmk__on_fail_text(on_fail)); /* process current state */ if (rsc->priv->orig_role != pcmk_role_unknown) { pcmk_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->priv->probed_nodes, node->priv->id) == NULL) { pcmk_node_t *n = pe__copy_node(node); pcmk__rsc_trace(rsc, "%s (%s in history) known on %s", rsc->id, pcmk__s(rsc->priv->history_id, "the same"), pcmk__node_name(n)); g_hash_table_insert(iter->priv->probed_nodes, (gpointer) n->priv->id, n); } if (pcmk_is_set(iter->flags, pcmk__rsc_unique)) { break; } iter = iter->priv->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if (known_active && !node->details->online && !node->details->maintenance && pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pcmk__is_guest_or_bundle_node(node)) { pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); should_fence = TRUE; } else if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { if (pcmk__is_remote_node(node) && (node->priv->remote != NULL) && !pcmk_is_set(node->priv->remote->flags, pcmk__rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ pcmk__clear_node_flags(node, pcmk__node_seen); reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(scheduler, node, reason, FALSE); } free(reason); } /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */ save_on_fail = on_fail; if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = pcmk__on_fail_ignore; } switch (on_fail) { case pcmk__on_fail_ignore: /* nothing to do */ break; case pcmk__on_fail_demote: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed); demote_action(rsc, node, FALSE); break; case pcmk__on_fail_fence_node: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(scheduler, node, reason, FALSE); free(reason); break; case pcmk__on_fail_standby_node: pcmk__set_node_flags(node, pcmk__node_standby|pcmk__node_fail_standby); break; case pcmk__on_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ pcmk__clear_rsc_flags(rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(rsc, pcmk__rsc_blocked); break; case pcmk__on_fail_ban: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -PCMK_SCORE_INFINITY, "__action_migration_auto__", scheduler); break; case pcmk__on_fail_stop: pe__set_next_role(rsc, pcmk_role_stopped, PCMK_META_ON_FAIL "=" PCMK_VALUE_STOP); break; case pcmk__on_fail_restart: if (known_active) { pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); stop_action(rsc, node, FALSE); } break; case pcmk__on_fail_restart_container: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); if ((rsc->priv->launcher != NULL) && pcmk__is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ scheduler->stop_needed = g_list_prepend(scheduler->stop_needed, rsc->priv->launcher); } else if (rsc->priv->launcher != NULL) { stop_action(rsc->priv->launcher, node, FALSE); } else if (known_active) { stop_action(rsc, node, FALSE); } break; case pcmk__on_fail_reset_remote: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { tmpnode = NULL; if (pcmk_is_set(rsc->flags, pcmk__rsc_is_remote_connection)) { tmpnode = pcmk_find_node(scheduler, rsc->id); } if (pcmk__is_remote_node(tmpnode) && !pcmk_is_set(tmpnode->priv->flags, pcmk__node_remote_fenced)) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(scheduler, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (known_active) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->priv->remote_reconnect_ms > 0U) { pe__set_next_role(rsc, pcmk_role_stopped, "remote reset"); } break; } /* Ensure a remote connection failure forces an unclean Pacemaker Remote * node to be fenced. By marking the node as seen, the failure will result * in a fencing operation regardless if we're going to attempt to reconnect * in this transition. */ if (pcmk_all_flags_set(rsc->flags, pcmk__rsc_failed|pcmk__rsc_is_remote_connection)) { tmpnode = pcmk_find_node(scheduler, rsc->id); if (tmpnode && tmpnode->details->unclean) { pcmk__set_node_flags(tmpnode, pcmk__node_seen); } } if (known_active) { if (pcmk_is_set(rsc->flags, pcmk__rsc_removed)) { if (pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { crm_notice("Removed resource %s is active on %s and will be " "stopped when possible", rsc->id, pcmk__node_name(node)); } else { crm_notice("Removed resource %s must be stopped manually on %s " "because " PCMK_OPT_STOP_ORPHAN_RESOURCES " is set to false", rsc->id, pcmk__node_name(node)); } } native_add_running(rsc, node, scheduler, (save_on_fail != pcmk__on_fail_ignore)); switch (on_fail) { case pcmk__on_fail_ignore: break; case pcmk__on_fail_demote: case pcmk__on_fail_block: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed); break; default: pcmk__set_rsc_flags(rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); break; } } else if ((rsc->priv->history_id != NULL) && (strchr(rsc->priv->history_id, ':') != NULL)) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pcmk__rsc_trace(rsc, "Clearing history ID %s for %s (stopped)", rsc->priv->history_id, rsc->id); free(rsc->priv->history_id); rsc->priv->history_id = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, FALSE); GList *gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pcmk_action_t *stop = (pcmk_action_t *) gIter->data; pcmk__set_action_flags(stop, pcmk__action_optional); } g_list_free(possible_matches); } /* A successful stop after migrate_to on the migration source doesn't make * the partially migrated resource stopped on the migration target. */ if ((rsc->priv->orig_role == pcmk_role_stopped) && (rsc->priv->active_nodes != NULL) && (rsc->priv->partial_migration_target != NULL) && pcmk__same_node(rsc->priv->partial_migration_source, node)) { rsc->priv->orig_role = pcmk_role_started; } } /* create active recurring operations as optional */ static void process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc, int start_index, int stop_index, GList *sorted_op_list, pcmk_scheduler_t *scheduler) { int counter = -1; const char *task = NULL; const char *status = NULL; GList *gIter = sorted_op_list; CRM_ASSERT(rsc); pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = pcmk__xe_id(rsc_op); counter++; if (node->details->online == FALSE) { pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline", rsc->id, pcmk__node_name(node)); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active", id, pcmk__node_name(node)); continue; } else if (counter < start_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d", id, pcmk__node_name(node), counter); continue; } crm_element_value_ms(rsc_op, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring", id, pcmk__node_name(node)); continue; } status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pcmk__rsc_trace(rsc, "Skipping %s on %s: status", id, pcmk__node_name(node)); continue; } task = crm_element_value(rsc_op, PCMK_XA_OPERATION); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pcmk__rsc_trace(rsc, "Creating %s on %s", key, pcmk__node_name(node)); custom_action(rsc, key, task, node, TRUE, scheduler); } } void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { const xmlNode *rsc_op = (const xmlNode *) iter->data; counter++; task = crm_element_value(rsc_op, PCMK_XA_OPERATION); status = crm_element_value(rsc_op, PCMK__XA_OP_STATUS); if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, PCMK__XA_RC_CODE); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((scheduler->shutdown_lock > 0) && (get_effective_time(scheduler) > (lock_time + scheduler->shutdown_lock))) { pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, pcmk__node_name(node)); pe__clear_resource_history(rsc, node); } else { rsc->priv->lock_node = node; rsc->priv->lock_time = lock_time; } } } /*! * \internal * \brief Unpack one \c PCMK__XE_LRM_RESOURCE entry from a node's CIB status * * \param[in,out] node Node whose status is being unpacked * \param[in] rsc_entry \c PCMK__XE_LRM_RESOURCE XML being unpacked * \param[in,out] scheduler Scheduler data * * \return Resource corresponding to the entry, or NULL if no operation history */ static pcmk_resource_t * unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource, pcmk_scheduler_t *scheduler) { GList *gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = pcmk_role_unknown; const char *rsc_id = pcmk__xe_id(lrm_resource); pcmk_resource_t *rsc = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum pcmk__on_fail on_fail = pcmk__on_fail_ignore; enum rsc_role_e saved_role = pcmk_role_unknown; if (rsc_id == NULL) { pcmk__config_err("Ignoring invalid " PCMK__XE_LRM_RESOURCE " entry: No " PCMK_XA_ID); crm_log_xml_info(lrm_resource, "missing-id"); return NULL; } crm_trace("Unpacking " PCMK__XE_LRM_RESOURCE " for %s on %s", rsc_id, pcmk__node_name(node)); /* Build a list of individual PCMK__XE_LRM_RSC_OP entries, so we can sort * them */ for (rsc_op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next_same(rsc_op)) { op_list = g_list_prepend(op_list, rsc_op); } if (!pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(scheduler, node, rsc_id); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(lrm_resource, node, scheduler); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (pcmk_is_set(scheduler->flags, pcmk__sched_shutdown_lock)) { unpack_shutdown_lock(lrm_resource, rsc, node, scheduler); } /* process operations */ saved_role = rsc->priv->orig_role; rsc->priv->orig_role = pcmk_role_unknown; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, scheduler); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail); if (get_target_role(rsc, &req_role)) { if ((rsc->priv->next_role == pcmk_role_unknown) || (req_role < rsc->priv->next_role)) { pe__set_next_role(rsc, req_role, PCMK_META_TARGET_ROLE); } else if (req_role > rsc->priv->next_role) { pcmk__rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, pcmk_role_text(rsc->priv->next_role), pcmk_role_text(req_role)); } } if (saved_role > rsc->priv->orig_role) { rsc->priv->orig_role = saved_role; } return rsc; } static void handle_removed_launched_resources(const xmlNode *lrm_rsc_list, pcmk_scheduler_t *scheduler) { for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list, NULL, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { pcmk_resource_t *rsc; pcmk_resource_t *launcher = NULL; const char *rsc_id; const char *launcher_id = NULL; if (!pcmk__xe_is(rsc_entry, PCMK__XE_LRM_RESOURCE)) { continue; } launcher_id = crm_element_value(rsc_entry, PCMK__META_CONTAINER); rsc_id = crm_element_value(rsc_entry, PCMK_XA_ID); if ((launcher_id == NULL) || (rsc_id == NULL)) { continue; } launcher = pe_find_resource(scheduler->priv->resources, launcher_id); if (launcher == NULL) { continue; } rsc = pe_find_resource(scheduler->priv->resources, rsc_id); if ((rsc == NULL) || (rsc->priv->launcher != NULL) || !pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) { continue; } pcmk__rsc_trace(rsc, "Mapped launcher of removed resource %s to %s", rsc->id, launcher_id); rsc->priv->launcher = launcher; launcher->priv->launched = g_list_append(launcher->priv->launched, rsc); } } /*! * \internal * \brief Unpack one node's lrm status section * * \param[in,out] node Node whose status is being unpacked * \param[in] xml CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler) { bool found_removed_launched_resource = false; // Drill down to PCMK__XE_LRM_RESOURCES section xml = pcmk__xe_first_child(xml, PCMK__XE_LRM, NULL, NULL); if (xml == NULL) { return; } xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL); if (xml == NULL) { return; } // Unpack each PCMK__XE_LRM_RESOURCE entry for (const xmlNode *rsc_entry = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCE, NULL, NULL); rsc_entry != NULL; rsc_entry = pcmk__xe_next_same(rsc_entry)) { pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler); if ((rsc != NULL) && pcmk_is_set(rsc->flags, pcmk__rsc_removed_launched)) { found_removed_launched_resource = true; } } /* Now that all resource state has been unpacked for this node, map any * removed launched resources to their launchers. */ if (found_removed_launched_resource) { handle_removed_launched_resources(xml, scheduler); } } static void set_active(pcmk_resource_t *rsc) { const pcmk_resource_t *top = pe__const_top_resource(rsc, false); if (top && pcmk_is_set(top->flags, pcmk__rsc_promotable)) { rsc->priv->orig_role = pcmk_role_unpromoted; } else { rsc->priv->orig_role = pcmk_role_started; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pcmk_node_t *node = value; int *score = user_data; node->assign->score = *score; } #define XPATH_NODE_STATE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ "/" PCMK__XE_NODE_STATE #define SUB_XPATH_LRM_RESOURCE "/" PCMK__XE_LRM \ "/" PCMK__XE_LRM_RESOURCES \ "/" PCMK__XE_LRM_RESOURCE #define SUB_XPATH_LRM_RSC_OP "/" PCMK__XE_LRM_RSC_OP static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, int target_rc, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", resource, "']" SUB_XPATH_LRM_RSC_OP "[@" PCMK_XA_OPERATION "='", op, "'", NULL); /* Need to check against transition_magic too? */ if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_TARGET "='", source, "']", NULL); } else if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) { pcmk__g_strcat(xpath, " and @" PCMK__META_MIGRATE_SOURCE "='", source, "']", NULL); } else { g_string_append_c(xpath, ']'); } xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); if (xml && target_rc >= 0) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_EXEC_ERROR; crm_element_value_int(xml, PCMK__XA_RC_CODE, &rc); crm_element_value_int(xml, PCMK__XA_OP_STATUS, &status); if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) { return NULL; } } return xml; } static xmlNode * find_lrm_resource(const char *rsc_id, const char *node_name, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='", rsc_id, "']", NULL); xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); return xml; } /*! * \internal * \brief Check whether a resource has no completed action history on a node * * \param[in,out] rsc Resource to check * \param[in] node_name Node to check * * \return true if \p rsc_id is unknown on \p node_name, otherwise false */ static bool unknown_on_node(pcmk_resource_t *rsc, const char *node_name) { bool result = false; xmlXPathObjectPtr search; char *xpath = NULL; xpath = crm_strdup_printf(XPATH_NODE_STATE "[@" PCMK_XA_UNAME "='%s']" SUB_XPATH_LRM_RESOURCE "[@" PCMK_XA_ID "='%s']" SUB_XPATH_LRM_RSC_OP "[@" PCMK__XA_RC_CODE "!='%d']", node_name, rsc->id, PCMK_OCF_UNKNOWN); search = xpath_search(rsc->priv->scheduler->input, xpath); result = (numXpathResults(search) == 0); freeXpathObject(search); free(xpath); return result; } /*! * \internal * \brief Check whether a probe/monitor indicating the resource was not running * on a node happened after some event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that monitor is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a monitor happened after event, false otherwise */ static bool monitor_not_running_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, pcmk_scheduler_t *scheduler) { /* Any probe/monitor operation on the node indicating it was not running * there */ xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name, NULL, PCMK_OCF_NOT_RUNNING, scheduler); return (monitor != NULL) && (pe__is_newer_op(monitor, xml_op) > 0); } /*! * \internal * \brief Check whether any non-monitor operation on a node happened after some * event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that non-monitor is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool non_monitor_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, pcmk_scheduler_t *scheduler) { xmlNode *lrm_resource = NULL; lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler); if (lrm_resource == NULL) { return false; } for (xmlNode *op = pcmk__xe_first_child(lrm_resource, PCMK__XE_LRM_RSC_OP, NULL, NULL); op != NULL; op = pcmk__xe_next_same(op)) { const char * task = NULL; if (op == xml_op) { continue; } task = crm_element_value(op, PCMK_XA_OPERATION); if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL) && pe__is_newer_op(op, xml_op) > 0) { return true; } } return false; } /*! * \internal * \brief Check whether the resource has newer state on a node after a migration * attempt * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] migrate_to Any migrate_to event that is being compared to * \param[in] migrate_from Any migrate_from event that is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool newer_state_after_migrate(const char *rsc_id, const char *node_name, const xmlNode *migrate_to, const xmlNode *migrate_from, pcmk_scheduler_t *scheduler) { const xmlNode *xml_op = (migrate_from != NULL)? migrate_from : migrate_to; const char *source = crm_element_value(xml_op, PCMK__META_MIGRATE_SOURCE); /* It's preferred to compare to the migrate event on the same node if * existing, since call ids are more reliable. */ if ((xml_op != migrate_to) && (migrate_to != NULL) && pcmk__str_eq(node_name, source, pcmk__str_casei)) { xml_op = migrate_to; } /* If there's any newer non-monitor operation on the node, or any newer * probe/monitor operation on the node indicating it was not running there, * the migration events potentially no longer matter for the node. */ return non_monitor_after(rsc_id, node_name, xml_op, scheduler) || monitor_not_running_after(rsc_id, node_name, xml_op, scheduler); } /*! * \internal * \brief Parse migration source and target node names from history entry * * \param[in] entry Resource history entry for a migration action * \param[in] source_node If not NULL, source must match this node * \param[in] target_node If not NULL, target must match this node * \param[out] source_name Where to store migration source node name * \param[out] target_name Where to store migration target node name * * \return Standard Pacemaker return code */ static int get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node, const pcmk_node_t *target_node, const char **source_name, const char **target_name) { *source_name = crm_element_value(entry, PCMK__META_MIGRATE_SOURCE); *target_name = crm_element_value(entry, PCMK__META_MIGRATE_TARGET); if ((*source_name == NULL) || (*target_name == NULL)) { pcmk__config_err("Ignoring resource history entry %s without " PCMK__META_MIGRATE_SOURCE " and " PCMK__META_MIGRATE_TARGET, pcmk__xe_id(entry)); return pcmk_rc_unpack_error; } if ((source_node != NULL) && !pcmk__str_eq(*source_name, source_node->priv->name, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_SOURCE "='%s' does not match %s", pcmk__xe_id(entry), *source_name, pcmk__node_name(source_node)); return pcmk_rc_unpack_error; } if ((target_node != NULL) && !pcmk__str_eq(*target_name, target_node->priv->name, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " PCMK__META_MIGRATE_TARGET "='%s' does not match %s", pcmk__xe_id(entry), *target_name, pcmk__node_name(target_node)); return pcmk_rc_unpack_error; } return pcmk_rc_ok; } /* * \internal * \brief Add a migration source to a resource's list of dangling migrations * * If the migrate_to and migrate_from actions in a live migration both * succeeded, but there is no stop on the source, the migration is considered * "dangling." Add the source to the resource's dangling migration list, which * will be used to schedule a stop on the source without affecting the target. * * \param[in,out] rsc Resource involved in migration * \param[in] node Migration source */ static void add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node) { pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s", rsc->id, pcmk__node_name(node)); rsc->priv->orig_role = pcmk_role_stopped; rsc->priv->dangling_migration_sources = g_list_prepend(rsc->priv->dangling_migration_sources, (gpointer) node); } /*! * \internal * \brief Update resource role etc. after a successful migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_success(struct action_history *history) { /* A complete migration sequence is: * 1. migrate_to on source node (which succeeded if we get to this function) * 2. migrate_from on target node * 3. stop on source node * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from succeeded but no stop has happened, the * migration is considered to be "dangling". * * If a successful migrate_to and stop have happened on the source node, we * still need to check for a partial migration, due to scenarios (easier to * produce with batch-limit=1) like: * * - A resource is migrating from node1 to node2, and a migrate_to is * initiated for it on node1. * * - node2 goes into standby mode while the migrate_to is pending, which * aborts the transition. * * - Upon completion of the migrate_to, a new transition schedules a stop * on both nodes and a start on node1. * * - If the new transition is aborted for any reason while the resource is * stopping on node1, the transition after that stop completes will see * the migrate_to and stop on the source, but it's still a partial * migration, and the resource must be stopped on node2 because it is * potentially active there due to the migrate_to. * * We also need to take into account that either node's history may be * cleared at any point in the migration process. */ int from_rc = PCMK_OCF_OK; int from_status = PCMK_EXEC_PENDING; pcmk_node_t *target_node = NULL; xmlNode *migrate_from = NULL; const char *source = NULL; const char *target = NULL; bool source_newer_op = false; bool target_newer_state = false; bool active_on_target = false; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } // Check for newer state on the source source_newer_op = non_monitor_after(history->rsc->id, source, history->xml, scheduler); // Check for a migrate_from action from this source on the target migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, -1, scheduler); if (migrate_from != NULL) { if (source_newer_op) { /* There's a newer non-monitor operation on the source and a * migrate_from on the target, so this migrate_to is irrelevant to * the resource's state. */ return; } crm_element_value_int(migrate_from, PCMK__XA_RC_CODE, &from_rc); crm_element_value_int(migrate_from, PCMK__XA_OP_STATUS, &from_status); } /* If the resource has newer state on both the source and target after the * migration events, this migrate_to is irrelevant to the resource's state. */ target_newer_state = newer_state_after_migrate(history->rsc->id, target, history->xml, migrate_from, scheduler); if (source_newer_op && target_newer_state) { return; } /* Check for dangling migration (migrate_from succeeded but stop not done). * We know there's no stop because we already returned if the target has a * migrate_from and the source has any newer non-monitor operation. */ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { add_dangling_migration(history->rsc, history->node); return; } /* Without newer state, this migrate_to implies the resource is active. * (Clones are not allowed to migrate, so role can't be promoted.) */ history->rsc->priv->orig_role = pcmk_role_started; target_node = pcmk_find_node(scheduler, target); active_on_target = !target_newer_state && (target_node != NULL) && target_node->details->online; if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target if (active_on_target) { native_add_running(history->rsc, target_node, scheduler, TRUE); } else { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable); } return; } // The migrate_from is pending, complete but erased, or to be scheduled /* If there is no history at all for the resource on an online target, then * it was likely cleaned. Just return, and we'll schedule a probe. Once we * have the probe result, it will be reflected in target_newer_state. */ if ((target_node != NULL) && target_node->details->online && unknown_on_node(history->rsc, target)) { return; } if (active_on_target) { pcmk_node_t *source_node = pcmk_find_node(scheduler, source); native_add_running(history->rsc, target_node, scheduler, FALSE); if ((source_node != NULL) && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ history->rsc->priv->partial_migration_target = target_node; history->rsc->priv->partial_migration_source = source_node; } } else if (!source_newer_op) { // Mark resource as failed, require recovery, and prevent migration pcmk__set_rsc_flags(history->rsc, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_migratable); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_failure(struct action_history *history) { xmlNode *target_migrate_from = NULL; const char *source = NULL; const char *target = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->priv->orig_role = pcmk_role_started; // Check for migrate_from on the target target_migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, PCMK_OCF_OK, scheduler); if (/* If the resource state is unknown on the target, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, target) /* If the resource has newer state on the target after the migration * events, this migrate_to no longer matters for the target. */ && !newer_state_after_migrate(history->rsc->id, target, history->xml, target_migrate_from, scheduler)) { /* The resource has no newer state on the target, so assume it's still * active there. * (if it is up). */ pcmk_node_t *target_node = pcmk_find_node(scheduler, target); if (target_node && target_node->details->online) { native_add_running(history->rsc, target_node, scheduler, FALSE); } } else if (!non_monitor_after(history->rsc->id, source, history->xml, scheduler)) { /* We know the resource has newer state on the target, but this * migrate_to still matters for the source as long as there's no newer * non-monitor operation there. */ // Mark node as having dangling migration so we can force a stop later history->rsc->priv->dangling_migration_sources = g_list_prepend(history->rsc->priv->dangling_migration_sources, (gpointer) history->node); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_from action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_from_failure(struct action_history *history) { xmlNode *source_migrate_to = NULL; const char *source = NULL; const char *target = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; // Get source and target node names from XML if (get_migration_node_names(history->xml, NULL, history->node, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->priv->orig_role = pcmk_role_started; // Check for a migrate_to on the source source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO, source, target, PCMK_OCF_OK, scheduler); if (/* If the resource state is unknown on the source, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, source) /* If the resource has newer state on the source after the migration * events, this migrate_from no longer matters for the source. */ && !newer_state_after_migrate(history->rsc->id, source, source_migrate_to, history->xml, scheduler)) { /* The resource has no newer state on the source, so assume it's still * active there (if it is up). */ pcmk_node_t *source_node = pcmk_find_node(scheduler, source); if (source_node && source_node->details->online) { native_add_running(history->rsc, source_node, scheduler, TRUE); } } } /*! * \internal * \brief Add an action to cluster's list of failed actions * * \param[in,out] history Parsed action result history */ static void record_failed_op(struct action_history *history) { const pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; if (!(history->node->details->online)) { return; } for (const xmlNode *xIter = scheduler->priv->failed->children; xIter != NULL; xIter = xIter->next) { const char *key = pcmk__xe_history_key(xIter); const char *uname = crm_element_value(xIter, PCMK_XA_UNAME); if (pcmk__str_eq(history->key, key, pcmk__str_none) && pcmk__str_eq(uname, history->node->priv->name, pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", history->key, pcmk__node_name(history->node)); return; } } crm_trace("Adding entry for %s on %s to failed action list", history->key, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name); crm_xml_add(history->xml, PCMK__XA_RSC_ID, history->rsc->id); pcmk__xml_copy(scheduler->priv->failed, history->xml); } static char * last_change_str(const xmlNode *xml_op) { time_t when; char *result = NULL; if (crm_element_value_epoch(xml_op, PCMK_XA_LAST_RC_CHANGE, &when) == pcmk_ok) { char *when_s = pcmk__epoch2str(&when, 0); const char *p = strchr(when_s, ' '); // Skip day of week to make message shorter if ((p != NULL) && (*(++p) != '\0')) { result = pcmk__str_copy(p); } free(when_s); } if (result == NULL) { result = pcmk__str_copy("unknown_time"); } return result; } /*! * \internal * \brief Ban a resource (or its clone if an anonymous instance) from all nodes * * \param[in,out] rsc Resource to ban */ static void ban_from_all_nodes(pcmk_resource_t *rsc) { int score = -PCMK_SCORE_INFINITY; const pcmk_scheduler_t *scheduler = rsc->priv->scheduler; if (rsc->priv->parent != NULL) { pcmk_resource_t *parent = uber_parent(rsc); if (pcmk__is_anonymous_clone(parent)) { /* For anonymous clones, if an operation with * PCMK_META_ON_FAIL=PCMK_VALUE_STOP fails for any instance, the * entire clone must stop. */ rsc = parent; } } // Ban the resource from all nodes crm_notice("%s will not be started under current conditions", rsc->id); if (rsc->priv->allowed_nodes != NULL) { g_hash_table_destroy(rsc->priv->allowed_nodes); } rsc->priv->allowed_nodes = pe__node_list2table(scheduler->nodes); g_hash_table_foreach(rsc->priv->allowed_nodes, set_node_score, &score); } /*! * \internal * \brief Get configured failure handling and role after failure for an action * * \param[in,out] history Unpacked action history entry * \param[out] on_fail Where to set configured failure handling * \param[out] fail_role Where to set to role after failure */ static void unpack_failure_handling(struct action_history *history, enum pcmk__on_fail *on_fail, enum rsc_role_e *fail_role) { xmlNode *config = pcmk__find_action_config(history->rsc, history->task, history->interval_ms, true); GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node, history->task, history->interval_ms, config); const char *on_fail_str = g_hash_table_lookup(meta, PCMK_META_ON_FAIL); *on_fail = pcmk__parse_on_fail(history->rsc, history->task, history->interval_ms, on_fail_str); *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail, meta); g_hash_table_destroy(meta); } /*! * \internal * \brief Update resource role, failure handling, etc., after a failed action * * \param[in,out] history Parsed action result history * \param[in] config_on_fail Action failure handling from configuration * \param[in] fail_role Resource's role after failure of this action * \param[out] last_failure This will be set to the history XML * \param[in,out] on_fail Actual handling of action result */ static void unpack_rsc_op_failure(struct action_history *history, enum pcmk__on_fail config_on_fail, enum rsc_role_e fail_role, xmlNode **last_failure, enum pcmk__on_fail *on_fail) { bool is_probe = false; char *last_change_s = NULL; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; *last_failure = history->xml; is_probe = pcmk_xe_is_probe(history->xml); last_change_s = last_change_str(history->xml); if (!pcmk_is_set(scheduler->flags, pcmk__sched_symmetric_cluster) && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " QB_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); } else { pcmk__sched_warn(scheduler, "Unexpected result (%s%s%s) was recorded for %s of " "%s on %s at %s " QB_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pcmk__node_name(history->node), last_change_s, history->exit_status, history->id); if (is_probe && (history->exit_status != PCMK_OCF_OK) && (history->exit_status != PCMK_OCF_NOT_RUNNING) && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the " PCMK_XA_RESOURCE_DISCOVERY " option for location " "constraints", history->rsc->id, pcmk__node_name(history->node)); } record_failed_op(history); } free(last_change_s); if (*on_fail < config_on_fail) { pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s", pcmk__on_fail_text(*on_fail), pcmk__on_fail_text(config_on_fail), history->key); *on_fail = config_on_fail; } if (strcmp(history->task, PCMK_ACTION_STOP) == 0) { resource_location(history->rsc, history->node, -PCMK_SCORE_INFINITY, "__stop_fail__", scheduler); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) { unpack_migrate_to_failure(history); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) { unpack_migrate_from_failure(history); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->priv->orig_role = pcmk_role_promoted; } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) { if (config_on_fail == pcmk__on_fail_block) { history->rsc->priv->orig_role = pcmk_role_promoted; pe__set_next_role(history->rsc, pcmk_role_stopped, "demote with " PCMK_META_ON_FAIL "=block"); } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) { history->rsc->priv->orig_role = pcmk_role_stopped; } else { /* Staying in the promoted role would put the scheduler and * controller into a loop. Setting the role to unpromoted is not * dangerous because the resource will be stopped as part of * recovery, and any promotion will be ordered after that stop. */ history->rsc->priv->orig_role = pcmk_role_unpromoted; } } if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { /* leave stopped */ pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id); history->rsc->priv->orig_role = pcmk_role_stopped; } else if (history->rsc->priv->orig_role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id); set_active(history->rsc); } pcmk__rsc_trace(history->rsc, "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s", history->rsc->id, pcmk_role_text(history->rsc->priv->orig_role), pcmk__btoa(history->node->details->unclean), pcmk__on_fail_text(config_on_fail), pcmk_role_text(fail_role)); if ((fail_role != pcmk_role_started) && (history->rsc->priv->next_role < fail_role)) { pe__set_next_role(history->rsc, fail_role, "failure"); } if (fail_role == pcmk_role_stopped) { ban_from_all_nodes(history->rsc); } } /*! * \internal * \brief Block a resource with a failed action if it cannot be recovered * * If resource action is a failed stop and fencing is not possible, mark the * resource as unmanaged and blocked, since recovery cannot be done. * * \param[in,out] history Parsed action history entry */ static void block_if_unrecoverable(struct action_history *history) { char *last_change_s = NULL; if (strcmp(history->task, PCMK_ACTION_STOP) != 0) { return; // All actions besides stop are always recoverable } if (pe_can_fence(history->node->priv->scheduler, history->node)) { return; // Failed stops are recoverable via fencing } last_change_s = last_change_str(history->xml); pcmk__sched_err(history->node->priv->scheduler, "No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " QB_XS " rc=%d id=%s", history->rsc->id, history->task, pcmk__node_name(history->node), services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), last_change_s, history->exit_status, history->id); free(last_change_s); pcmk__clear_rsc_flags(history->rsc, pcmk__rsc_managed); pcmk__set_rsc_flags(history->rsc, pcmk__rsc_blocked); } /*! * \internal * \brief Update action history's execution status and why * * \param[in,out] history Parsed action history entry * \param[out] why Where to store reason for update * \param[in] value New value * \param[in] reason Description of why value was changed */ static inline void remap_because(struct action_history *history, const char **why, int value, const char *reason) { if (history->execution_status != value) { history->execution_status = value; *why = reason; } } /*! * \internal * \brief Remap informational monitor results and operation status * * For the monitor results, certain OCF codes are for providing extended information * to the user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by Pacemaker. * * For operation status, the action result can be used to determine an appropriate * status for the purposes of responding to the action. The status provided by the * executor is not directly usable since the executor does not know what was expected. * * \param[in,out] history Parsed action history entry * \param[in,out] on_fail What should be done about the result * \param[in] expired Whether result is expired * * \note If the result is remapped and the node is not shutting down or failed, * the operation will be recorded in the scheduler data's list of failed * operations to highlight it for the user. * * \note This may update the resource's current and next role. */ static void remap_operation(struct action_history *history, enum pcmk__on_fail *on_fail, bool expired) { bool is_probe = false; int orig_exit_status = history->exit_status; int orig_exec_status = history->execution_status; const char *why = NULL; const char *task = history->task; // Remap degraded results to their successful counterparts history->exit_status = pcmk__effective_rc(history->exit_status); if (history->exit_status != orig_exit_status) { why = "degraded result"; if (!expired && (!history->node->details->shutdown || history->node->details->online)) { record_failed_op(history); } } if (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && ((history->execution_status != PCMK_EXEC_DONE) || (history->exit_status != PCMK_OCF_NOT_RUNNING))) { history->execution_status = PCMK_EXEC_DONE; history->exit_status = PCMK_OCF_NOT_RUNNING; why = "equivalent probe result"; } /* If the executor reported an execution status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ switch (history->execution_status) { case PCMK_EXEC_DONE: case PCMK_EXEC_ERROR: break; // These should be treated as node-fatal case PCMK_EXEC_NO_FENCE_DEVICE: case PCMK_EXEC_NO_SECRETS: remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "node-fatal error"); goto remap_done; default: goto remap_done; } is_probe = pcmk_xe_is_probe(history->xml); if (is_probe) { task = "probe"; } if (history->expected_exit_status < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * expected exit status in the transition key, which (along with the * similar case of a corrupted transition key in the CIB) will be * reported to this function as -1. Pacemaker 2.0+ does not support * rolling upgrades from those versions or processing of saved CIB files * from those versions, so we do not need to care much about this case. */ remap_because(history, &why, PCMK_EXEC_ERROR, "obsolete history format"); pcmk__config_warn("Expected result not found for %s on %s " "(corrupt or obsolete CIB?)", history->key, pcmk__node_name(history->node)); } else if (history->exit_status == history->expected_exit_status) { remap_because(history, &why, PCMK_EXEC_DONE, "expected result"); } else { remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result"); pcmk__rsc_debug(history->rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", history->key, pcmk__node_name(history->node), history->expected_exit_status, services_ocf_exitcode_str(history->expected_exit_status), history->exit_status, services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, "")); } switch (history->exit_status) { case PCMK_OCF_OK: if (is_probe && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || (history->expected_exit_status == history->exit_status) || !pcmk_is_set(history->rsc->flags, pcmk__rsc_managed)) { /* For probes, recurring monitors for the Stopped role, and * unmanaged resources, "not running" is not considered a * failure. */ remap_because(history, &why, PCMK_EXEC_DONE, "exit status"); history->rsc->priv->orig_role = pcmk_role_stopped; *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "not running"); } break; case PCMK_OCF_RUNNING_PROMOTED: if (is_probe && (history->exit_status != history->expected_exit_status)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active and promoted on %s at %s", history->rsc->id, pcmk__node_name(history->node), last_change_s); free(last_change_s); } if (!expired || (history->exit_status == history->expected_exit_status)) { history->rsc->priv->orig_role = pcmk_role_promoted; } break; case PCMK_OCF_FAILED_PROMOTED: if (!expired) { history->rsc->priv->orig_role = pcmk_role_promoted; } remap_because(history, &why, PCMK_EXEC_ERROR, "exit status"); break; case PCMK_OCF_NOT_CONFIGURED: remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); break; case PCMK_OCF_UNIMPLEMENT_FEATURE: { guint interval_ms = 0; crm_element_value_ms(history->xml, PCMK_META_INTERVAL, &interval_ms); if (interval_ms == 0) { if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); } else { remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED, "exit status"); } } break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); break; default: if (history->execution_status == PCMK_EXEC_DONE) { char *last_change_s = last_change_str(history->xml); crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", history->exit_status, task, history->rsc->id, pcmk__node_name(history->node), last_change_s); remap_because(history, &why, PCMK_EXEC_ERROR, "unknown exit status"); free(last_change_s); } break; } remap_done: if (why != NULL) { pcmk__rsc_trace(history->rsc, "Remapped %s result from [%s: %s] to [%s: %s] " "because of %s", history->key, pcmk_exec_status_str(orig_exec_status), crm_exit_str(orig_exit_status), pcmk_exec_status_str(history->execution_status), crm_exit_str(history->exit_status), why); } } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(const xmlNode *xml_op, const char *task, pcmk_resource_t *rsc, pcmk_node_t *node) { if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure, rsc->priv->scheduler); } else { pcmk__op_digest_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->priv->scheduler); switch (digest_data->rc) { case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, pcmk__xe_history_key(xml_op), node->priv->id); break; case pcmk__digest_match: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn, pcmk_scheduler_t *scheduler) { pcmk_node_t *remote_node = pcmk_find_node(scheduler, remote_conn->id); if (remote_node) { pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, scheduler); order_actions(fence, action, pcmk__ar_first_implies_then); } } static bool should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task, guint interval_ms, bool is_last_failure) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if ((rsc->priv->remote_reconnect_ms > 0U) && pcmk_is_set(rsc->priv->scheduler->flags, pcmk__sched_fencing_enabled) && (interval_ms != 0) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { pcmk_node_t *remote_node = pcmk_find_node(rsc->priv->scheduler, rsc->id); if (remote_node && !pcmk_is_set(remote_node->priv->flags, pcmk__node_remote_fenced)) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in,out] history Parsed action result history * * \return true if operation history entry is expired, otherwise false */ static bool check_operation_expiry(struct action_history *history) { bool expired = false; bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0"); time_t last_run = 0; int unexpired_fail_count = 0; const char *clear_reason = NULL; const guint expiration_sec = history->rsc->priv->failure_expiration_ms / 1000; pcmk_scheduler_t *scheduler = history->rsc->priv->scheduler; if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) { pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not expired: " "Not Installed does not expire", history->id, pcmk__node_name(history->node)); return false; // "Not installed" must always be cleared manually } if ((expiration_sec > 0) && (crm_element_value_epoch(history->xml, PCMK_XA_LAST_RC_CHANGE, &last_run) == 0)) { /* Resource has a PCMK_META_FAILURE_TIMEOUT and history entry has a * timestamp */ time_t now = get_effective_time(scheduler); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + expiration_sec)) && !should_ignore_failure_timeout(history->rsc, history->task, history->interval_ms, is_last_failure)) { expired = true; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(history->node, history->rsc, &last_failure, pcmk__fc_effective, history->xml); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d " "expiration=%s last-failure@%lld", history->id, (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, pcmk__readable_interval(expiration_sec * 1000), (long long) last_failure); last_failure += expiration_sec + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, scheduler, "fail count expiration"); } } if (expired) { if (pe_get_failcount(history->node, history->rsc, NULL, pcmk__fc_default, history->xml)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Unexpired fail count", history->id, pcmk__node_name(history->node)); expired = false; } } else if (is_last_failure && (history->rsc->priv->remote_reconnect_ms > 0U)) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(history->xml, history->task, history->rsc, history->node)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { pcmk_action_t *clear_op = NULL; // Schedule clearing of the fail count clear_op = pe__clear_failcount(history->rsc, history->node, clear_reason, scheduler); if (pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled) && (history->rsc->priv->remote_reconnect_ms > 0)) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_history() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", history->task, history->rsc->id); order_after_remote_fencing(clear_op, history->rsc, scheduler); } } if (expired && (history->interval_ms == 0) && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { switch (history->exit_status) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Don't expire probes that return these values pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Probe result", history->id, pcmk__node_name(history->node)); expired = false; break; } } return expired; } int pe__target_rc_from_xml(const xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, PCMK__XA_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } /*! * \internal * \brief Update a resource's state for an action result * * \param[in,out] history Parsed action history entry * \param[in] exit_status Exit status to base new state on * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void update_resource_state(struct action_history *history, int exit_status, const xmlNode *last_failure, enum pcmk__on_fail *on_fail) { bool clear_past_failure = false; if ((exit_status == PCMK_OCF_NOT_INSTALLED) || (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml))) { history->rsc->priv->orig_role = pcmk_role_stopped; } else if (exit_status == PCMK_OCF_NOT_RUNNING) { clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { if ((last_failure != NULL) && pcmk__str_eq(history->key, pcmk__xe_history_key(last_failure), pcmk__str_none)) { clear_past_failure = true; } if (history->rsc->priv->orig_role < pcmk_role_started) { set_active(history->rsc); } } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_stopped; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_promoted; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE, pcmk__str_none)) { if (*on_fail == pcmk__on_fail_demote) { /* Demote clears an error only if * PCMK_META_ON_FAIL=PCMK_VALUE_DEMOTE */ clear_past_failure = true; } history->rsc->priv->orig_role = pcmk_role_unpromoted; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { history->rsc->priv->orig_role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { unpack_migrate_to_success(history); } else if (history->rsc->priv->orig_role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "%s active on %s", history->rsc->id, pcmk__node_name(history->node)); set_active(history->rsc); } if (!clear_past_failure) { return; } switch (*on_fail) { case pcmk__on_fail_stop: case pcmk__on_fail_ban: case pcmk__on_fail_standby_node: case pcmk__on_fail_fence_node: pcmk__rsc_trace(history->rsc, "%s (%s) is not cleared by a completed %s", history->rsc->id, pcmk__on_fail_text(*on_fail), history->task); break; case pcmk__on_fail_block: case pcmk__on_fail_ignore: case pcmk__on_fail_demote: case pcmk__on_fail_restart: case pcmk__on_fail_restart_container: *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures"); break; case pcmk__on_fail_reset_remote: if (history->rsc->priv->remote_reconnect_ms == 0U) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = pcmk__on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures and reset remote"); } break; } } /*! * \internal * \brief Check whether a given history entry matters for resource state * * \param[in] history Parsed action history entry * * \return true if action can affect resource state, otherwise false */ static inline bool can_affect_state(struct action_history *history) { #if 0 /* @COMPAT It might be better to parse only actions we know we're interested * in, rather than exclude a couple we don't. However that would be a * behavioral change that should be done at a major or minor series release. * Currently, unknown operations can affect whether a resource is considered * active and/or failed. */ return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, "asyncmon", NULL); #else return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL); #endif } /*! * \internal * \brief Unpack execution/exit status and exit reason from a history entry * * \param[in,out] history Action history entry to unpack * * \return Standard Pacemaker return code */ static int unpack_action_result(struct action_history *history) { if ((crm_element_value_int(history->xml, PCMK__XA_OP_STATUS, &(history->execution_status)) < 0) || (history->execution_status < PCMK_EXEC_PENDING) || (history->execution_status > PCMK_EXEC_MAX) || (history->execution_status == PCMK_EXEC_CANCELLED)) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_OP_STATUS " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_OP_STATUS), "")); return pcmk_rc_unpack_error; } if ((crm_element_value_int(history->xml, PCMK__XA_RC_CODE, &(history->exit_status)) < 0) || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) { #if 0 /* @COMPAT We should ignore malformed entries, but since that would * change behavior, it should be done at a major or minor series * release. */ pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " PCMK__XA_RC_CODE " '%s'", history->id, history->rsc->id, pcmk__node_name(history->node), pcmk__s(crm_element_value(history->xml, PCMK__XA_RC_CODE), "")); return pcmk_rc_unpack_error; #else history->exit_status = CRM_EX_ERROR; #endif } history->exit_reason = crm_element_value(history->xml, PCMK_XA_EXIT_REASON); return pcmk_rc_ok; } /*! * \internal * \brief Process an action history entry whose result expired * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the * entry needs no further processing) */ static int process_expired_result(struct action_history *history, int orig_exit_status) { if (!pcmk__is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && (orig_exit_status != history->expected_exit_status)) { if (history->rsc->priv->orig_role <= pcmk_role_stopped) { history->rsc->priv->orig_role = pcmk_role_unknown; } crm_trace("Ignoring resource history entry %s for probe of %s on %s: " "Masked failure expired", history->id, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->exit_status == history->expected_exit_status) { return pcmk_rc_undetermined; // Only failures expire } if (history->interval_ms == 0) { crm_notice("Ignoring resource history entry %s for %s of %s on %s: " "Expired failure", history->id, history->task, history->rsc->id, pcmk__node_name(history->node)); return pcmk_rc_ok; } if (history->node->details->online && !history->node->details->unclean) { /* Reschedule the recurring action. schedule_cancel() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so pcmk__check_action_config() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has a PCMK__XA_OP_DIGEST (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s-interval %s of %s on %s " "after failure expired", pcmk__readable_interval(history->interval_ms), history->task, history->rsc->id, pcmk__node_name(history->node)); crm_xml_add(history->xml, PCMK__XA_OP_RESTART_DIGEST, "calculated-failure-timeout"); return pcmk_rc_ok; } return pcmk_rc_undetermined; } /*! * \internal * \brief Process a masked probe failure * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void mask_probe_failure(struct action_history *history, int orig_exit_status, const xmlNode *last_failure, enum pcmk__on_fail *on_fail) { pcmk_resource_t *ban_rsc = history->rsc; if (!pcmk_is_set(history->rsc->flags, pcmk__rsc_unique)) { ban_rsc = uber_parent(history->rsc); } crm_notice("Treating probe result '%s' for %s on %s as 'not running'", services_ocf_exitcode_str(orig_exit_status), history->rsc->id, pcmk__node_name(history->node)); update_resource_state(history, history->expected_exit_status, last_failure, on_fail); crm_xml_add(history->xml, PCMK_XA_UNAME, history->node->priv->name); record_failed_op(history); resource_location(ban_rsc, history->node, -PCMK_SCORE_INFINITY, "masked-probe-failure", ban_rsc->priv->scheduler); } /*! * \internal Check whether a given failure is for a given pending action * * \param[in] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known * * \return true if \p last_failure is failure of pending action in \p history, * otherwise false * \note Both \p history and \p last_failure must come from the same * \c PCMK__XE_LRM_RESOURCE block, as node and resource are assumed to be * the same. */ static bool failure_is_newer(const struct action_history *history, const xmlNode *last_failure) { guint failure_interval_ms = 0U; long long failure_change = 0LL; long long this_change = 0LL; if (last_failure == NULL) { return false; // Resource has no last_failure entry } if (!pcmk__str_eq(history->task, crm_element_value(last_failure, PCMK_XA_OPERATION), pcmk__str_none)) { return false; // last_failure is for different action } if ((crm_element_value_ms(last_failure, PCMK_META_INTERVAL, &failure_interval_ms) != pcmk_ok) || (history->interval_ms != failure_interval_ms)) { return false; // last_failure is for action with different interval } if ((pcmk__scan_ll(crm_element_value(history->xml, PCMK_XA_LAST_RC_CHANGE), &this_change, 0LL) != pcmk_rc_ok) || (pcmk__scan_ll(crm_element_value(last_failure, PCMK_XA_LAST_RC_CHANGE), &failure_change, 0LL) != pcmk_rc_ok) || (failure_change < this_change)) { return false; // Failure is not known to be newer } return true; } /*! * \internal * \brief Update a resource's role etc. for a pending action * * \param[in,out] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known */ static void process_pending_action(struct action_history *history, const xmlNode *last_failure) { /* For recurring monitors, a failure is recorded only in RSC_last_failure_0, * and there might be a RSC_monitor_INTERVAL entry with the last successful * or pending result. * * If last_failure contains the failure of the pending recurring monitor * we're processing here, and is newer, the action is no longer pending. * (Pending results have call ID -1, which sorts last, so the last failure * if any should be known.) */ if (failure_is_newer(history, last_failure)) { return; } if (strcmp(history->task, PCMK_ACTION_START) == 0) { pcmk__set_rsc_flags(history->rsc, pcmk__rsc_start_pending); set_active(history->rsc); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->priv->orig_role = pcmk_role_promoted; } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) && history->node->details->unclean) { /* A migrate_to action is pending on a unclean source, so force a stop * on the target. */ const char *migrate_target = NULL; pcmk_node_t *target = NULL; migrate_target = crm_element_value(history->xml, PCMK__META_MIGRATE_TARGET); target = pcmk_find_node(history->rsc->priv->scheduler, migrate_target); if (target != NULL) { stop_action(history->rsc, target, FALSE); } } if (history->rsc->priv->pending_action != NULL) { /* There should never be multiple pending actions, but as a failsafe, * just remember the first one processed for display purposes. */ return; } if (pcmk_is_probe(history->task, history->interval_ms)) { /* Pending probes are currently never displayed, even if pending * operations are requested. If we ever want to change that, * enable the below and the corresponding part of * native.c:native_pending_action(). */ #if 0 history->rsc->private->pending_action = strdup("probe"); history->rsc->private->pending_node = history->node; #endif } else { history->rsc->priv->pending_action = strdup(history->task); history->rsc->priv->pending_node = history->node; } } static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum pcmk__on_fail *on_fail) { int old_rc = 0; bool expired = false; pcmk_resource_t *parent = rsc; enum rsc_role_e fail_role = pcmk_role_unknown; enum pcmk__on_fail failure_strategy = pcmk__on_fail_restart; struct action_history history = { .rsc = rsc, .node = node, .xml = xml_op, .execution_status = PCMK_EXEC_UNKNOWN, }; CRM_CHECK(rsc && node && xml_op, return); history.id = pcmk__xe_id(xml_op); if (history.id == NULL) { pcmk__config_err("Ignoring resource history entry for %s on %s " "without ID", rsc->id, pcmk__node_name(node)); return; } // Task and interval history.task = crm_element_value(xml_op, PCMK_XA_OPERATION); if (history.task == NULL) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "without " PCMK_XA_OPERATION, history.id, rsc->id, pcmk__node_name(node)); return; } crm_element_value_ms(xml_op, PCMK_META_INTERVAL, &(history.interval_ms)); if (!can_affect_state(&history)) { pcmk__rsc_trace(rsc, "Ignoring resource history entry %s for %s on %s " "with irrelevant action '%s'", history.id, rsc->id, pcmk__node_name(node), history.task); return; } if (unpack_action_result(&history) != pcmk_rc_ok) { return; // Error already logged } history.expected_exit_status = pe__target_rc_from_xml(xml_op); history.key = pcmk__xe_history_key(xml_op); crm_element_value_int(xml_op, PCMK__XA_CALL_ID, &(history.call_id)); pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", history.id, history.task, history.call_id, pcmk__node_name(node), pcmk_exec_status_str(history.execution_status), crm_exit_str(history.exit_status)); if (node->details->unclean) { pcmk__rsc_trace(rsc, "%s is running on %s, which is unclean (further action " "depends on value of stop's on-fail attribute)", rsc->id, pcmk__node_name(node)); } expired = check_operation_expiry(&history); old_rc = history.exit_status; remap_operation(&history, on_fail, expired); if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) { goto done; } if (!pcmk__is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { mask_probe_failure(&history, old_rc, *last_failure, on_fail); goto done; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_unique)) { parent = uber_parent(rsc); } switch (history.execution_status) { case PCMK_EXEC_PENDING: process_pending_action(&history, *last_failure); goto done; case PCMK_EXEC_DONE: update_resource_state(&history, history.exit_status, *last_failure, on_fail); goto done; case PCMK_EXEC_NOT_INSTALLED: unpack_failure_handling(&history, &failure_strategy, &fail_role); if (failure_strategy == pcmk__on_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " QB_XS " status=%d rc=%d id=%s", history.task, rsc->id, pcmk__node_name(node), history.execution_status, history.exit_status, history.id); /* Also for printing it as "FAILED" by marking it as * pcmk__rsc_failed later */ *on_fail = pcmk__on_fail_ban; } resource_location(parent, node, -PCMK_SCORE_INFINITY, "hard-error", rsc->priv->scheduler); unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); goto done; case PCMK_EXEC_NOT_CONNECTED: if (pcmk__is_pacemaker_remote_node(node) && pcmk_is_set(node->priv->remote->flags, pcmk__rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ pcmk__set_rsc_flags(node->priv->remote, pcmk__rsc_failed|pcmk__rsc_stop_if_failed); } break; // Not done, do error handling case PCMK_EXEC_ERROR: case PCMK_EXEC_ERROR_HARD: case PCMK_EXEC_ERROR_FATAL: case PCMK_EXEC_TIMEOUT: case PCMK_EXEC_NOT_SUPPORTED: case PCMK_EXEC_INVALID: break; // Not done, do error handling default: // No other value should be possible at this point break; } unpack_failure_handling(&history, &failure_strategy, &fail_role); if ((failure_strategy == pcmk__on_fail_ignore) || ((failure_strategy == pcmk__on_fail_restart_container) && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) { char *last_change_s = last_change_str(xml_op); crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " QB_XS " %s", history.task, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), rsc->id, pcmk__node_name(node), last_change_s, history.id); free(last_change_s); update_resource_state(&history, history.expected_exit_status, *last_failure, on_fail); crm_xml_add(xml_op, PCMK_XA_UNAME, node->priv->name); pcmk__set_rsc_flags(rsc, pcmk__rsc_ignore_failure); record_failed_op(&history); if ((failure_strategy == pcmk__on_fail_restart_container) && (*on_fail <= pcmk__on_fail_restart)) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); if (history.execution_status == PCMK_EXEC_ERROR_HARD) { uint8_t log_level = LOG_ERR; if (history.exit_status == PCMK_OCF_NOT_INSTALLED) { log_level = LOG_NOTICE; } do_crm_log(log_level, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s) " QB_XS " %s", parent->id, pcmk__node_name(node), services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, node, -PCMK_SCORE_INFINITY, "hard-error", rsc->priv->scheduler); } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) { pcmk__sched_err(rsc->priv->scheduler, "Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " QB_XS " %s", parent->id, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, NULL, -PCMK_SCORE_INFINITY, "fatal-error", rsc->priv->scheduler); } } done: pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", rsc->id, pcmk__node_name(node), history.id, pcmk_role_text(rsc->priv->orig_role), pcmk_role_text(rsc->priv->next_role)); } static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler) { const char *cluster_name = NULL; const char *dc_id = crm_element_value(scheduler->input, PCMK_XA_DC_UUID); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .now = scheduler->priv->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; pcmk__insert_dup(node->priv->attrs, CRM_ATTR_UNAME, node->priv->name); pcmk__insert_dup(node->priv->attrs, CRM_ATTR_ID, node->priv->id); if ((scheduler->dc_node == NULL) && pcmk__str_eq(node->priv->id, dc_id, pcmk__str_casei)) { scheduler->dc_node = node; pcmk__insert_dup(node->priv->attrs, CRM_ATTR_IS_DC, PCMK_VALUE_TRUE); } else if (!pcmk__same_node(node, scheduler->dc_node)) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_IS_DC, PCMK_VALUE_FALSE); } cluster_name = g_hash_table_lookup(scheduler->priv->options, PCMK_OPT_CLUSTER_NAME); if (cluster_name) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_CLUSTER_NAME, cluster_name); } pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_INSTANCE_ATTRIBUTES, &rule_data, node->priv->attrs, NULL, overwrite, scheduler); pe__unpack_dataset_nvpairs(xml_obj, PCMK_XE_UTILIZATION, &rule_data, node->priv->utilization, NULL, FALSE, scheduler); if (pcmk__node_attr(node, CRM_ATTR_SITE_NAME, NULL, pcmk__rsc_node_current) == NULL) { const char *site_name = pcmk__node_attr(node, "site-name", NULL, pcmk__rsc_node_current); if (site_name) { pcmk__insert_dup(node->priv->attrs, CRM_ATTR_SITE_NAME, site_name); } else if (cluster_name) { /* Default to cluster-name if unset */ pcmk__insert_dup(node->priv->attrs, CRM_ATTR_SITE_NAME, cluster_name); } } } static GList * extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GList *gIter = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry, NULL, NULL, NULL); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__xe_is(rsc_op, PCMK__XE_LRM_RSC_OP)) { crm_xml_add(rsc_op, PCMK_XA_RESOURCE, rsc); crm_xml_add(rsc_op, PCMK_XA_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", pcmk__xe_id(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", pcmk__xe_id(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler) { GList *output = NULL; GList *intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = pcmk__xe_first_child(scheduler->input, PCMK_XE_STATUS, NULL, NULL); pcmk_node_t *this_node = NULL; xmlNode *node_state = NULL; CRM_CHECK(status != NULL, return NULL); for (node_state = pcmk__xe_first_child(status, NULL, NULL, NULL); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__xe_is(node_state, PCMK__XE_NODE_STATE)) { const char *uname = crm_element_value(node_state, PCMK_XA_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pcmk_find_node(scheduler, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pcmk__is_pacemaker_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); } else { determine_online_status(node_state, this_node, scheduler); } if (this_node->details->online || pcmk_is_set(scheduler->flags, pcmk__sched_fencing_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = pcmk__xe_first_child(node_state, PCMK__XE_LRM, NULL, NULL); tmp = pcmk__xe_first_child(tmp, PCMK__XE_LRM_RESOURCES, NULL, NULL); for (lrm_rsc = pcmk__xe_first_child(tmp, NULL, NULL, NULL); lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) { if (pcmk__xe_is(lrm_rsc, PCMK__XE_LRM_RESOURCE)) { const char *rsc_id = crm_element_value(lrm_rsc, PCMK_XA_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index 75208466b9..06689db0c0 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2183 +1,2195 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include #include // stonith__* #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static uint32_t show; static uint32_t show_opts = pcmk_show_pending; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; static guint reconnect_timer = 0; static mainloop_timer_t *refresh_timer = NULL; static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; volatile crm_trigger_t *refresh_trigger = NULL; +static pcmk_scheduler_t *scheduler = NULL; static enum pcmk__fence_history fence_history = pcmk__fence_history_none; int interactive_fence_level = 0; static pcmk__supported_format_t formats[] = { #if CURSES_ENABLED CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_default(pcmk__output_t *out, va_list args) { return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_html(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); if (out->dest != stdout) { out->reset(out); } pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, "Not connected to CIB"); if (desc != NULL) { pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ": "); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, desc); } if (state != pcmk_pacemakerd_state_invalid) { const char *state_s = pcmk__pcmkd_state_enum2friendly(state); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, " ("); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, state_s); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ")"); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_text(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); int rc = pcmk_rc_ok; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { rc = out->info(out, "Not connected to CIB%s%s (%s)", (desc != NULL)? ": " : "", pcmk__s(desc, ""), pcmk__pcmkd_state_enum2friendly(state)); } else { rc = out->info(out, "Not connected to CIB%s%s", (desc != NULL)? ": " : "", pcmk__s(desc, "")); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return rc; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_xml(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = NULL; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); } pcmk__output_create_xml_node(out, PCMK_XE_CRM_MON_DISCONNECTED, PCMK_XA_DESCRIPTION, desc, PCMK_XA_PACEMAKERD_STATE, state_s, NULL); out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "crm-mon-disconnected", "default", crm_mon_disconnected_default }, { "crm-mon-disconnected", "html", crm_mon_disconnected_html }, { "crm-mon-disconnected", "text", crm_mon_disconnected_text }, { "crm-mon-disconnected", "xml", crm_mon_disconnected_xml }, { NULL, NULL, NULL }, }; #define RECONNECT_MSECS 5000 struct { guint reconnect_ms; enum mon_exec_mode exec_mode; gboolean fence_connect; gboolean print_pending; gboolean show_bans; gboolean watch_fencing; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; char *only_rsc; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .reconnect_ms = RECONNECT_MSECS, .exec_mode = mon_exec_unset, .fence_connect = TRUE, }; static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); static void clean_up_on_connection_failure(int rc); static int mon_refresh_display(gpointer user_data); static int setup_cib_connection(void); static int setup_fencer_connection(void); static int setup_api_connections(void); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); static void refresh_after_event(gboolean data_updated, gboolean enforce); static uint32_t all_includes(mon_output_format_t fmt) { if ((fmt == mon_output_plain) || (fmt == mon_output_console)) { return ~pcmk_section_options; } else { return pcmk_section_all; } } static uint32_t default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_plain: case mon_output_console: case mon_output_html: return pcmk_section_summary |pcmk_section_nodes |pcmk_section_resources |pcmk_section_failures; case mon_output_xml: return all_includes(fmt); default: return 0; } } struct { const char *name; uint32_t bit; } sections[] = { { "attributes", pcmk_section_attributes }, { "bans", pcmk_section_bans }, { "counts", pcmk_section_counts }, { "dc", pcmk_section_dc }, { "failcounts", pcmk_section_failcounts }, { "failures", pcmk_section_failures }, { PCMK_VALUE_FENCING, pcmk_section_fencing_all }, { "fencing-failed", pcmk_section_fence_failed }, { "fencing-pending", pcmk_section_fence_pending }, { "fencing-succeeded", pcmk_section_fence_worked }, { "maint-mode", pcmk_section_maint_mode }, { "nodes", pcmk_section_nodes }, { "operations", pcmk_section_operations }, { "options", pcmk_section_options }, { "resources", pcmk_section_resources }, { "stack", pcmk_section_stack }, { "summary", pcmk_section_summary }, { "tickets", pcmk_section_tickets }, { "times", pcmk_section_times }, { NULL } }; static uint32_t find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = 0; } else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, " PCMK_VALUE_NONE ", operations, options, resources, " "stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= pcmk_section_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } } else if (pcmk__str_any_of(*s, PCMK_VALUE_DEFAULT, "defaults", NULL)) { show |= default_includes(output_format); } else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " PCMK_VALUE_DEFAULT ", failcounts, failures, fencing, " "fencing-failed, fencing-pending, fencing-succeeded, " "maint-mode, nodes, " PCMK_VALUE_NONE ", operations, " "options, resources, stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include_exclude(GSList *lst, GError **error) { gboolean rc = TRUE; GSList *node = lst; while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "xml"); output_format = mon_output_legacy_xml; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg == NULL) { interactive_fence_level = 2; } else { pcmk__scan_min_int(optarg, &interactive_fence_level, 0); } switch (interactive_fence_level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK_VALUE_FENCING, data, err); case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK_VALUE_FENCING, data, err); case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; return include_exclude_cb("--exclude", PCMK_VALUE_FENCING, data, err); default: g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_rscs_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_inactive_rscs; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_brief; return TRUE; } static gboolean print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_details; return TRUE; } static gboolean print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_description; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_timing; return user_include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { pcmk_parse_interval_spec(optarg, &options.reconnect_ms); if (options.exec_mode != mon_exec_daemonized) { // Reconnect interval applies to daemonized too, so don't override options.exec_mode = mon_exec_update; } } return TRUE; } /*! * \internal * \brief Enable one-shot mode * * \param[in] option_name Name of option being parsed (ignored) * \param[in] optarg Value to be parsed (ignored) * \param[in] data User data (ignored) * \param[out] err Where to store error (ignored) */ static gboolean one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.exec_mode = mon_exec_one_shot; return TRUE; } /*! * \internal * \brief Enable daemonized mode * * \param[in] option_name Name of option being parsed (ignored) * \param[in] optarg Value to be parsed (ignored) * \param[in] data User data (ignored) * \param[out] err Where to store error (ignored) */ static gboolean daemonize_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.exec_mode = mon_exec_daemonized; return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = user_include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return user_include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.exec_mode = mon_exec_one_shot; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds)", "TIMESPEC" }, { "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, one_shot_cb, "Display the cluster status once and exit", NULL }, { "daemonize", 'd', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, daemonize_cb, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc, "When displaying information about resources, show only what's related to the given\n" INDENT "resource, or to all resources tagged with the given tag", "RSC" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb, "Show resource descriptions", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending, "Display pending state if '" PCMK_META_RECORD_PENDING "' is enabled", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { /* @COMPAT At least the following resource agents use --as-xml at time of * writing: * * galera * * pgsql * * rabbitmq-cluster * * redis * * Removing this option must wait until those are updated and we no longer * support building on any platforms that ship the older agents in stock * packages. */ { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { NULL } }; /* *INDENT-ON* */ /* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds * like it would be more broadly useful, but only ever happens after a disconnect via * mon_cib_connection_destroy. */ static gboolean reconnect_after_timeout(gpointer data) { #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif out->transient(out, "Reconnecting..."); if (setup_api_connections() == pcmk_rc_ok) { // Trigger redrawing the screen (needs reconnect_timer == 0) reconnect_timer = 0; refresh_after_event(FALSE, TRUE); return G_SOURCE_REMOVE; } out->message(out, "crm-mon-disconnected", "Latest connection attempt failed", pcmkd_state); reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); return G_SOURCE_REMOVE; } /* Called from various places when we are disconnected from the CIB or from the * fencing agent. If the CIB connection is still valid, this function will also * attempt to sign off and reconnect. */ static void mon_cib_connection_destroy(gpointer user_data) { const char *msg = "Connection to the cluster lost"; pcmkd_state = pcmk_pacemakerd_state_invalid; /* No crm-mon-disconnected message for console; a working implementation * is not currently worth the effort */ out->transient(out, "%s", msg); out->message(out, "crm-mon-disconnected", msg, pcmkd_state); if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } if (reconnect_timer) { /* we'll trigger a new reconnect-timeout at the end */ g_source_remove(reconnect_timer); reconnect_timer = 0; } /* the client API won't properly reconnect notifications if they are still * in the table - so remove them */ if (st != NULL) { if (st->state != stonith_disconnected) { st->cmds->disconnect(st); } st->cmds->remove_notification(st, NULL); } if (cib) { cib->cmds->signoff(cib); reconnect_timer = g_timeout_add(options.reconnect_ms, reconnect_after_timeout, NULL); } } /* Signal handler installed into the mainloop for normal program shutdown */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if CURSES_ENABLED static volatile sighandler_t ncurses_winch_handler; /* Signal handler installed the regular way (not into the main loop) for when * the screen is resized. Commonly, this happens when running in an xterm and * the user changes its size. */ static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); /* Alert the mainloop code we'd like the refresh_trigger to run next * time the mainloop gets around to checking. */ mainloop_set_trigger((crm_trigger_t *) refresh_trigger); } not_done--; } #endif static int setup_fencer_connection(void) { int rc = pcmk_ok; if (options.fence_connect && st == NULL) { st = stonith_api_new(); } if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) { return rc; } rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); if (options.watch_fencing) { st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, mon_st_callback_event); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE, mon_st_callback_event); } else { st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, mon_st_callback_display); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY, mon_st_callback_display); } } else { stonith_api_delete(st); st = NULL; } return rc; } static int setup_cib_connection(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { // Already connected with notifications registered for CIB updates return rc; } rc = cib__signon_query(out, &cib, ¤t_cib); if (rc == pcmk_rc_ok) { rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy)); if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support connection loss " "notifications; crm_mon will be unable to reconnect after " "connection loss"); rc = pcmk_rc_ok; } if (rc == pcmk_rc_ok) { cib->cmds->del_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY, crm_diff_update); rc = pcmk_legacy2rc(rc); } if (rc != pcmk_rc_ok) { if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support CIB diff " "notifications"); } else { out->err(out, "CIB diff notification setup failed"); } out->err(out, "Cannot monitor CIB changes; exiting"); cib__clean_up_connection(&cib); stonith_api_delete(st); st = NULL; } } return rc; } /* This is used to set up the fencing options after the interactive UI has been stared. * fence_history_cb can't be used because it builds up a list of includes/excludes that * then have to be processed with apply_include_exclude and that could affect other * things. */ static void set_fencing_options(int level) { switch (level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fence_failed | pcmk_section_fence_pending; break; default: interactive_fence_level = 0; options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; show &= ~pcmk_section_fencing_all; break; } } static int setup_api_connections(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { return rc; } if (cib->variant == cib_native) { rc = pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); if (rc != pcmk_rc_ok) { return rc; } switch (pcmkd_state) { case pcmk_pacemakerd_state_running: case pcmk_pacemakerd_state_remote: case pcmk_pacemakerd_state_shutting_down: /* Fencer and CIB may still be available while shutting down or * running on a Pacemaker Remote node */ break; default: // Fencer and CIB are definitely unavailable return ENOTCONN; } setup_fencer_connection(); } rc = setup_cib_connection(); return rc; } #if CURSES_ENABLED static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(out, option, condition) \ curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option)); /* This function is called from the main loop when there is something to be read * on stdin, like an interactive user's keystroke. All it does is read the keystroke, * set flags (or show the page showing which keystrokes are valid), and redraw the * screen. It does not do anything with connections to the CIB or fencing agent * agent what would happen in mon_refresh_display. */ static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; gboolean config_mode = FALSE; gboolean rc = G_SOURCE_CONTINUE; /* If the attached pty device (pseudo-terminal) has been closed/deleted, * the condition (G_IO_IN | G_IO_ERR | G_IO_HUP) occurs. * Exit with an error, otherwise the process would persist in the * background and significantly raise the CPU usage. */ if ((condition & G_IO_ERR) && (condition & G_IO_HUP)) { rc = G_SOURCE_REMOVE; clean_up(CRM_EX_IOERR); } /* The connection/fd has been closed. Refresh the screen and remove this * event source hence ignore stdin. */ if (condition & (G_IO_HUP | G_IO_NVAL)) { rc = G_SOURCE_REMOVE; } if ((condition & G_IO_IN) == 0) { return rc; } while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': interactive_fence_level++; if (interactive_fence_level > 3) { interactive_fence_level = 0; } set_fencing_options(interactive_fence_level); break; case 'c': show ^= pcmk_section_tickets; break; case 'f': show ^= pcmk_section_failcounts; break; case 'n': show_opts ^= pcmk_show_rscs_by_node; break; case 'o': show ^= pcmk_section_operations; if (!pcmk_is_set(show, pcmk_section_operations)) { show_opts &= ~pcmk_show_timing; } break; case 'r': show_opts ^= pcmk_show_inactive_rscs; break; case 'R': show_opts ^= pcmk_show_details; break; case 't': show_opts ^= pcmk_show_timing; if (pcmk_is_set(show_opts, pcmk_show_timing)) { show |= pcmk_section_operations; } break; case 'A': show ^= pcmk_section_attributes; break; case 'L': show ^= pcmk_section_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (pcmk_any_flags_set(show, pcmk_section_summary)) { show &= ~pcmk_section_summary; } else { show |= pcmk_section_summary; } /* Regardless, we don't show options in console mode. */ show &= ~pcmk_section_options; break; case 'b': show_opts ^= pcmk_show_brief; break; case 'j': show_opts ^= pcmk_show_pending; break; case '?': config_mode = TRUE; break; default: /* All other keys just redraw the screen. */ goto refresh; } if (!config_mode) goto refresh; clear(); refresh(); curses_formatted_printf(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets)); print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts)); print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node)); print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations)); print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs)); print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing)); print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes)); print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans)); print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary)); print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details)); print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief)); print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending)); curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m')); curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n"); } refresh: refresh_after_event(FALSE, TRUE); return rc; } #endif // CURSES_ENABLED // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; #if CURSES_ENABLED const char *fmts = "console (default), html, text, xml, none"; #else const char *fmts = "text (default), html, xml, none"; #endif // CURSES_ENABLED const char *desc = NULL; desc = "Notes:\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many\n" "different formats. It can be an integer number of seconds, a\n" "number plus units (us/usec/ms/msec/s/sec/m/min/h/hr), or an ISO\n" "8601 period specification.\n\n" "Output Control:\n\n" "By default, a particular set of sections are written to the\n" "output destination. The default varies based on the output\n" "format: XML includes all sections by default, while other output\n" "formats include less. This set can be modified with the --include\n" "and --exclude command line options. Each option may be passed\n" "multiple times, and each can specify a comma-separated list of\n" "sections. The options are applied to the default set, in order\n" "from left to right as they are passed on the command line. For a\n" "list of valid sections, pass --include=list or --exclude=list.\n\n" "Interactive Use:\n\n" #if CURSES_ENABLED "When run interactively, crm_mon can be told to hide and show\n" "various sections of output. To see a help screen explaining the\n" "options, press '?'. Any key stroke aside from those listed will\n" "cause the screen to refresh.\n\n" #else "The local installation of Pacemaker was built without support for\n" "interactive (console) mode. A curses library must be available at\n" "build time to support interactive mode.\n\n" #endif // CURSES_ENABLED "Examples:\n\n" #if CURSES_ENABLED "Display the cluster status on the console with updates as they\n" "occur:\n\n" "\tcrm_mon\n\n" #endif // CURSES_ENABLED "Display the cluster status once and exit:\n\n" "\tcrm_mon -1\n\n" "Display the cluster status, group resources by node, and include\n" "inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the\n" "cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html " "--output-to /path/to/docroot/filename.html\n\n" "Display the cluster status as XML:\n\n" "\tcrm_mon --output-as xml\n\n"; context = pcmk__build_arg_context(args, fmts, group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, desc); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /*! * \internal * \brief Set output format based on \c --output-as arguments and mode arguments * * When the deprecated \c --as-xml argument is parsed, a callback function sets * \c output_format. Otherwise, this function does the same based on the current * \c --output-as arguments and the \c --one-shot and \c --daemonize arguments. * * \param[in,out] args Command line arguments */ static void reconcile_output_format(pcmk__common_args_t *args) { if (output_format != mon_output_unset) { /* The deprecated --as-xml argument was used, and we're finished. Note * that this means the deprecated argument takes precedence. */ return; } if (pcmk__str_eq(args->output_ty, PCMK_VALUE_NONE, pcmk__str_none)) { output_format = mon_output_none; } else if (pcmk__str_eq(args->output_ty, "html", pcmk__str_none)) { output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); // World-readable HTML } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) { output_format = mon_output_xml; #if CURSES_ENABLED } else if (pcmk__str_eq(args->output_ty, "console", pcmk__str_null_matches)) { /* Console is the default format if no conflicting options are given. * * Use text output instead if one of the following conditions is met: * * We've requested daemonized or one-shot mode (console output is * incompatible with modes other than mon_exec_update) * * We requested the version, which is effectively one-shot * * We specified a non-stdout output destination (console mode is * compatible only with stdout) */ if ((options.exec_mode == mon_exec_daemonized) || (options.exec_mode == mon_exec_one_shot) || args->version || !pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } else { pcmk__str_update(&args->output_ty, "console"); output_format = mon_output_console; crm_enable_stderr(FALSE); } #endif // CURSES_ENABLED } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) { /* Text output was explicitly requested, or it's the default because * curses is not enabled */ pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } // Otherwise, invalid format. Let pcmk__output_new() throw an error. } /*! * \internal * \brief Set execution mode to the output format's default if appropriate * * \param[in,out] args Command line arguments */ static void set_default_exec_mode(const pcmk__common_args_t *args) { if (output_format == mon_output_console) { /* Update is the only valid mode for console, but set here instead of * reconcile_output_format() for isolation and consistency */ options.exec_mode = mon_exec_update; } else if (options.exec_mode == mon_exec_unset) { // Default to one-shot mode for all other formats options.exec_mode = mon_exec_one_shot; } else if ((options.exec_mode == mon_exec_update) && pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { // If not using console format, update mode cannot be used with stdout options.exec_mode = mon_exec_one_shot; } } static void clean_up_on_connection_failure(int rc) { if (rc == ENOTCONN) { if (pcmkd_state == pcmk_pacemakerd_state_remote) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster"); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); } } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc)); } clean_up(pcmk_rc2exitc(rc)); } static void one_shot(void) { int rc = pcmk__status(out, cib, fence_history, show, show_opts, options.only_node, options.only_rsc, options.neg_location_prefix, 0); if (rc == pcmk_rc_ok) { clean_up(pcmk_rc2exitc(rc)); } else { clean_up_on_connection_failure(rc); } } static void exit_on_invalid_cib(void) { if (cib != NULL) { return; } // Shouldn't really be possible g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source"); clean_up(CRM_EX_ERROR); } int main(int argc, char **argv) { int rc = pcmk_rc_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); pcmk__cli_init_logging("crm_mon", 0); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); processed_args = pcmk__cmdline_preproc(argv, "eimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. * Doing this here means the user can give their own title on the command * line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (options.watch_fencing) { fence_history_cb("--fence-history", "0", NULL, NULL); options.fence_connect = TRUE; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); exit_on_invalid_cib(); switch (cib->variant) { case cib_native: // Everything (fencer, CIB, pcmkd status) should be available break; case cib_file: // Live fence history is not meaningful fence_history_cb("--fence-history", "0", NULL, NULL); /* Notifications are unsupported; nothing to monitor * @COMPAT: Let setup_cib_connection() handle this by exiting? */ options.exec_mode = mon_exec_one_shot; break; case cib_remote: // We won't receive any fencing updates fence_history_cb("--fence-history", "0", NULL, NULL); break; default: /* something is odd */ exit_on_invalid_cib(); break; } if ((options.exec_mode == mon_exec_daemonized) && !options.external_agent && pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to " "(with value not set to '-') and --external-agent"); return clean_up(CRM_EX_USAGE); } } reconcile_output_format(args); set_default_exec_mode(args); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; pcmk__output_set_legacy_xml(out); } /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ /* If we had a valid format for pcmk__output_new(), output_format should be * set by now. */ CRM_ASSERT(output_format != mon_output_unset); if (output_format == mon_output_plain) { pcmk__output_text_set_fancy(out, true); } if (options.exec_mode == mon_exec_daemonized) { if (!options.external_agent && (output_format == mon_output_none)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires --external-agent if used with " "--output-as=none"); return clean_up(CRM_EX_USAGE); } crm_enable_stderr(FALSE); cib_delete(cib); cib = NULL; pcmk__daemonize(crm_system_name, options.pid_file); cib = cib_new(); exit_on_invalid_cib(); } show = default_includes(output_format); /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, &error)) { return clean_up(CRM_EX_USAGE); } /* Sync up the initial value of interactive_fence_level with whatever was set with * --include/--exclude= options. */ if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) { interactive_fence_level = 3; } else if (pcmk_is_set(show, pcmk_section_fence_worked)) { interactive_fence_level = 2; } else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) { interactive_fence_level = 1; } else { interactive_fence_level = 0; } pcmk__register_lib_messages(out); crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); // Messages internal to this file, nothing curses-specific pcmk__register_messages(out, fmt_functions); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } if (output_format == mon_output_xml) { show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing; } if ((output_format == mon_output_html) && (out->dest != stdout)) { char *content = pcmk__itoa(options.reconnect_ms / 1000); pcmk__html_add_header(PCMK__XE_META, PCMK__XA_HTTP_EQUIV, PCMK__VALUE_REFRESH, PCMK__XA_CONTENT, content, NULL); free(content); } crm_info("Starting %s", crm_system_name); cib__set_output(cib, out); if (options.exec_mode == mon_exec_one_shot) { one_shot(); } + scheduler = pe_new_working_set(); + pcmk__mem_assert(scheduler); + scheduler->priv->out = out; + if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) { + // Currently used only in the times section + pcmk__query_node_name(out, 0, &(scheduler->priv->local_node_name), 0); + } + out->message(out, "crm-mon-disconnected", "Waiting for initial connection", pcmkd_state); do { out->transient(out, "Connecting to cluster..."); rc = setup_api_connections(); if (rc != pcmk_rc_ok) { if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) { out->transient(out, "Connection failed. Retrying in %ums...", options.reconnect_ms); } // Give some time to view all output even if we won't retry pcmk__sleep_ms(options.reconnect_ms); #if CURSES_ENABLED if (output_format == mon_output_console) { clear(); refresh(); } #endif } } while ((rc == ENOTCONN) || (rc == ECONNREFUSED)); if (rc != pcmk_rc_ok) { clean_up_on_connection_failure(rc); } set_fencing_options(interactive_fence_level); mon_refresh_display(NULL); mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if CURSES_ENABLED if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, (G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL), detect_user_input, NULL); } #endif /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In * this file, that is anywhere mainloop_set_trigger is called. */ refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = pcmk__itoa(rc); char *status_s = pcmk__itoa(status); char *target_rc_s = pcmk__itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { out->err(out, "notification fork() failed: %s", strerror(errno)); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); crm_exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static int handle_rsc_op(xmlNode *xml, void *userdata) { const char *node_id = (const char *) userdata; int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, PCMK__XE_LRM_RSC_OP) != 0) { pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id); return pcmk_rc_ok; } id = pcmk__xe_history_key(rsc_op); magic = crm_element_value(rsc_op, PCMK__XA_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return pcmk_rc_ok; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return pcmk_rc_ok; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, PCMK__META_ON_NODE); while ((n != NULL) && !pcmk__xe_is(n, PCMK__XE_NODE_STATE)) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, PCMK_XA_UNAME); } if (node == NULL && n) { node = pcmk__xe_id(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_rc_str(pcmk_rc_ok); if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_EXEC_DONE) { desc = services_ocf_exitcode_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = pcmk_exec_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); return pcmk_rc_ok; } /* This function is just a wrapper around mainloop_set_trigger so that it can be * called from a mainloop directly. It's simply another way of ensuring the screen * gets redrawn. */ static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); return FALSE; } static int handle_op_for_node(xmlNode *xml, void *userdata) { const char *node = crm_element_value(xml, PCMK_XA_UNAME); if (node == NULL) { node = pcmk__xe_id(xml); } handle_rsc_op(xml, (void *) node); return pcmk_rc_ok; } static int crm_diff_update_element(xmlNode *change, void *userdata) { const char *name = NULL; const char *op = crm_element_value(change, PCMK_XA_OPERATION); const char *xpath = crm_element_value(change, PCMK_XA_PATH); xmlNode *match = NULL; const char *node = NULL; if (op == NULL) { return pcmk_rc_ok; } else if (strcmp(op, PCMK_VALUE_CREATE) == 0) { match = change->children; } else if (pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE, NULL)) { return pcmk_rc_ok; } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) { match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); CRM_ASSERT(pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE, NULL)); } else if (strcmp(name, PCMK_XE_CIB) == 0) { pcmk__xe_foreach_child(pcmk__xe_first_child(match, PCMK_XE_STATUS, NULL, NULL), NULL, handle_op_for_node, NULL); } else if (strcmp(name, PCMK_XE_STATUS) == 0) { pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL); } else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) { node = crm_element_value(match, PCMK_XA_UNAME); if (node == NULL) { node = pcmk__xe_id(match); } handle_rsc_op(match, (void *) node); } else if (strcmp(name, PCMK__XE_LRM) == 0) { node = pcmk__xe_id(match); handle_rsc_op(match, (void *) node); } else if (strcmp(name, PCMK__XE_LRM_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else if (strcmp(name, PCMK__XE_LRM_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else if (strcmp(name, PCMK__XE_LRM_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } return pcmk_rc_ok; } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); out->progress(out, false); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, PCMK_XA_FORMAT, &format); if (format == 2) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); pcmk__xe_foreach_child(diff, NULL, crm_diff_update_element, NULL); } else { crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { out->info(out, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; refresh_after_event(cib_updated, FALSE); } static int mon_refresh_display(gpointer user_data) { int rc = pcmk_rc_ok; last_refresh = time(NULL); if (output_format == mon_output_none) { return G_SOURCE_REMOVE; } if (fence_history == pcmk__fence_history_full && !pcmk_all_flags_set(show, pcmk_section_fencing_all) && output_format != mon_output_xml) { fence_history = pcmk__fence_history_reduced; } // Get an up-to-date pacemakerd status for the cluster summary if (cib->variant == cib_native) { pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); } if (out->dest != stdout) { out->reset(out); } - rc = pcmk__output_cluster_status(out, st, cib, current_cib, pcmkd_state, - fence_history, show, show_opts, + rc = pcmk__output_cluster_status(scheduler, st, cib, current_cib, + pcmkd_state, fence_history, show, + show_opts, options.only_node,options.only_rsc, options.neg_location_prefix); if (rc == pcmk_rc_schema_validation) { clean_up(CRM_EX_CONFIG); return G_SOURCE_REMOVE; } if (out->dest != stdout) { out->finish(out, CRM_EX_OK, true, NULL); } return G_SOURCE_CONTINUE; } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is used on the command line */ static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else if (options.external_agent) { char *desc = stonith__event_description(e); send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } /* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met: * * - If the last update occurred more than reconnect_ms ago (defaults to 5s, but * can be changed via the -i command line option), or * - After every 10 CIB updates, or * - If it's been 2s since the last update * * This function sounds like it would be more broadly useful, but it is only called when a * fencing event is received or a CIB diff occurrs. */ static void refresh_after_event(gboolean data_updated, gboolean enforce) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } if (reconnect_timer > 0) { /* we will receive a refresh request after successful reconnect */ mainloop_timer_stop(refresh_timer); return; } /* as we're not handling initial failure of fencer-connection as * fatal give it a retry here * not getting here if cib-reconnection is already on the way */ setup_fencer_connection(); if (enforce || ((now - last_refresh) > (options.reconnect_ms / 1000)) || updates >= 10) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is NOT used on the command line */ static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else { out->progress(out, false); refresh_after_event(TRUE, FALSE); } } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } cib__clean_up_connection(&cib); stonith_api_delete(st); free(options.neg_location_prefix); free(options.only_node); free(options.only_rsc); free(options.pid_file); g_slist_free_full(options.includes_excludes, free); g_strfreev(processed_args); + pe_free_working_set(scheduler); + /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if (((error != NULL) || (exit_code == CRM_EX_USAGE)) && (output_format == mon_output_console) && (out != NULL)) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { if (options.exec_mode != mon_exec_daemonized) { out->finish(out, exit_code, true, NULL); } pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); } diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 0cf41f9350..c57d124312 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -1,2181 +1,2179 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // uint32_t #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "crm_resource - perform tasks related to Pacemaker cluster resources" enum rsc_command { cmd_none = 0, // No command option given (yet) cmd_ban, cmd_cleanup, cmd_clear, cmd_colocations, cmd_cts, cmd_delete, cmd_delete_param, cmd_digests, cmd_execute_agent, cmd_fail, cmd_get_param, cmd_get_property, cmd_list_active_ops, cmd_list_agents, cmd_list_all_ops, cmd_list_alternatives, cmd_list_instances, cmd_list_options, cmd_list_providers, cmd_list_resources, cmd_list_standards, cmd_locate, cmd_metadata, cmd_move, cmd_query_xml, cmd_query_xml_raw, cmd_refresh, cmd_restart, cmd_set_param, cmd_set_property, cmd_wait, cmd_why, }; struct { enum rsc_command rsc_cmd; // crm_resource command to perform // Command-line option values gchar *rsc_id; // Value of --resource gchar *rsc_type; // Value of --resource-type gboolean all; // --all was given gboolean force; // --force was given gboolean clear_expired; // --expired was given gboolean recursive; // --recursive was given gboolean promoted_role_only; // --promoted was given gchar *host_uname; // Value of --node gchar *interval_spec; // Value of --interval gchar *move_lifetime; // Value of --lifetime gchar *operation; // Value of --operation enum pcmk__opt_flags opt_list; // Parsed from --list-options const char *attr_set_type; // Instance, meta, utilization, or element attribute gchar *prop_id; // --nvpair (attribute XML ID) char *prop_name; // Attribute name gchar *prop_set; // --set-name (attribute block XML ID) gchar *prop_value; // --parameter-value (attribute value) guint timeout_ms; // Parsed from --timeout value char *agent_spec; // Standard and/or provider and/or agent gchar *xml_file; // Value of (deprecated) --xml-file int check_level; // Optional value of --validate or --force-check // Resource configuration specified via command-line arguments bool cmdline_config; // Resource configuration was via arguments char *v_agent; // Value of --agent char *v_class; // Value of --class char *v_provider; // Value of --provider GHashTable *cmdline_params; // Resource parameters specified // Positional command-line arguments gchar **remainder; // Positional arguments as given GHashTable *override_params; // Resource parameter values that override config } options = { .attr_set_type = PCMK_XE_INSTANCE_ATTRIBUTES, .check_level = -1, .rsc_cmd = cmd_list_resources, // List all resources if no command given }; gboolean attr_set_type_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean cmdline_config_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean option_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean timeout_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); static crm_exit_t exit_code = CRM_EX_OK; static pcmk__output_t *out = NULL; static pcmk__common_args_t *args = NULL; // Things that should be cleaned up on exit static GError *error = NULL; static GMainLoop *mainloop = NULL; static cib_t *cib_conn = NULL; static pcmk_ipc_api_t *controld_api = NULL; static pcmk_scheduler_t *scheduler = NULL; #define MESSAGE_TIMEOUT_S 60 #define INDENT " " static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; // Clean up and exit static crm_exit_t bye(crm_exit_t ec) { pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, ec, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); if (cib_conn != NULL) { cib_t *save_cib_conn = cib_conn; cib_conn = NULL; // Ensure we can't free this twice cib__clean_up_connection(&save_cib_conn); } if (controld_api != NULL) { pcmk_ipc_api_t *save_controld_api = controld_api; controld_api = NULL; // Ensure we can't free this twice pcmk_free_ipc_api(save_controld_api); } if (mainloop != NULL) { g_main_loop_unref(mainloop); mainloop = NULL; } pe_free_working_set(scheduler); scheduler = NULL; crm_exit(ec); return ec; } static void quit_main_loop(crm_exit_t ec) { exit_code = ec; if (mainloop != NULL) { GMainLoop *mloop = mainloop; mainloop = NULL; // Don't re-enter this block pcmk_quit_main_loop(mloop, 10); g_main_loop_unref(mloop); } } static gboolean resource_ipc_timeout(gpointer data) { // Start with newline because "Waiting for ..." message doesn't have one if (error != NULL) { g_clear_error(&error); } g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_TIMEOUT, _("Aborting because no messages received in %d seconds"), MESSAGE_TIMEOUT_S); quit_main_loop(CRM_EX_TIMEOUT); return FALSE; } static void controller_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data, void *user_data) { switch (event_type) { case pcmk_ipc_event_disconnect: if (exit_code == CRM_EX_DISCONNECT) { // Unexpected crm_info("Connection to controller was terminated"); } quit_main_loop(exit_code); break; case pcmk_ipc_event_reply: if (status != CRM_EX_OK) { out->err(out, "Error: bad reply from controller: %s", crm_exit_str(status)); pcmk_disconnect_ipc(api); quit_main_loop(status); } else { if ((pcmk_controld_api_replies_expected(api) == 0) && mainloop && g_main_loop_is_running(mainloop)) { out->info(out, "... got reply (done)"); crm_debug("Got all the replies we expected"); pcmk_disconnect_ipc(api); quit_main_loop(CRM_EX_OK); } else { out->info(out, "... got reply"); } } break; default: break; } } static void start_mainloop(pcmk_ipc_api_t *capi) { unsigned int count = pcmk_controld_api_replies_expected(capi); if (count > 0) { out->info(out, "Waiting for %u %s from the controller", count, pcmk__plural_alt(count, "reply", "replies")); exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects mainloop = g_main_loop_new(NULL, FALSE); g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); g_main_loop_run(mainloop); } } static int compare_id(gconstpointer a, gconstpointer b) { return strcmp((const char *)a, (const char *)b); } static GList * build_constraint_list(xmlNode *root) { GList *retval = NULL; xmlNode *cib_constraints = NULL; xmlXPathObjectPtr xpathObj = NULL; int ndx = 0; cib_constraints = pcmk_find_cib_element(root, PCMK_XE_CONSTRAINTS); xpathObj = xpath_search(cib_constraints, "//" PCMK_XE_RSC_LOCATION); for (ndx = 0; ndx < numXpathResults(xpathObj); ndx++) { xmlNode *match = getXpathResult(xpathObj, ndx); retval = g_list_insert_sorted(retval, (gpointer) pcmk__xe_id(match), compare_id); } freeXpathObject(xpathObj); return retval; } static gboolean validate_opt_list(const gchar *optarg) { if (pcmk__str_eq(optarg, PCMK_VALUE_FENCING, pcmk__str_none)) { options.opt_list = pcmk__opt_fencing; } else if (pcmk__str_eq(optarg, PCMK__VALUE_PRIMITIVE, pcmk__str_none)) { options.opt_list = pcmk__opt_primitive; } else { return FALSE; } return TRUE; } /*! * \internal * \brief Process options that set the command * * Nothing else should set \c options.rsc_cmd. * * \param[in] option_name Name of the option being parsed * \param[in] optarg Value to be parsed * \param[in] data Ignored * \param[out] error Where to store recoverable error, if any * * \return \c TRUE if the option was successfully parsed, or \c FALSE if an * error occurred, in which case \p *error is set */ static gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { // Sorted by enum rsc_command name if (pcmk__str_any_of(option_name, "-B", "--ban", NULL)) { options.rsc_cmd = cmd_ban; } else if (pcmk__str_any_of(option_name, "-C", "--cleanup", NULL)) { options.rsc_cmd = cmd_cleanup; } else if (pcmk__str_any_of(option_name, "-U", "--clear", NULL)) { options.rsc_cmd = cmd_clear; } else if (pcmk__str_any_of(option_name, "-a", "--constraints", NULL)) { options.rsc_cmd = cmd_colocations; } else if (pcmk__str_any_of(option_name, "-A", "--stack", NULL)) { options.rsc_cmd = cmd_colocations; options.recursive = TRUE; } else if (pcmk__str_any_of(option_name, "-c", "--list-cts", NULL)) { options.rsc_cmd = cmd_cts; } else if (pcmk__str_any_of(option_name, "-D", "--delete", NULL)) { options.rsc_cmd = cmd_delete; } else if (pcmk__str_any_of(option_name, "-d", "--delete-parameter", NULL)) { options.rsc_cmd = cmd_delete_param; pcmk__str_update(&options.prop_name, optarg); } else if (pcmk__str_eq(option_name, "--digests", pcmk__str_none)) { options.rsc_cmd = cmd_digests; if (options.override_params == NULL) { options.override_params = pcmk__strkey_table(free, free); } } else if (pcmk__str_any_of(option_name, "--force-demote", "--force-promote", "--force-start", "--force-stop", "--force-check", "--validate", NULL)) { options.rsc_cmd = cmd_execute_agent; g_free(options.operation); options.operation = g_strdup(option_name + 2); // skip "--" if (options.override_params == NULL) { options.override_params = pcmk__strkey_table(free, free); } if (optarg != NULL) { if (pcmk__scan_min_int(optarg, &options.check_level, 0) != pcmk_rc_ok) { g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, _("Invalid check level setting: %s"), optarg); return FALSE; } } } else if (pcmk__str_any_of(option_name, "-F", "--fail", NULL)) { options.rsc_cmd = cmd_fail; } else if (pcmk__str_any_of(option_name, "-g", "--get-parameter", NULL)) { options.rsc_cmd = cmd_get_param; pcmk__str_update(&options.prop_name, optarg); } else if (pcmk__str_any_of(option_name, "-G", "--get-property", NULL)) { options.rsc_cmd = cmd_get_property; pcmk__str_update(&options.prop_name, optarg); } else if (pcmk__str_any_of(option_name, "-O", "--list-operations", NULL)) { options.rsc_cmd = cmd_list_active_ops; } else if (pcmk__str_eq(option_name, "--list-agents", pcmk__str_none)) { options.rsc_cmd = cmd_list_agents; pcmk__str_update(&options.agent_spec, optarg); } else if (pcmk__str_any_of(option_name, "-o", "--list-all-operations", NULL)) { options.rsc_cmd = cmd_list_all_ops; } else if (pcmk__str_eq(option_name, "--list-ocf-alternatives", pcmk__str_none)) { options.rsc_cmd = cmd_list_alternatives; pcmk__str_update(&options.agent_spec, optarg); } else if (pcmk__str_eq(option_name, "--list-options", pcmk__str_none)) { options.rsc_cmd = cmd_list_options; return validate_opt_list(optarg); } else if (pcmk__str_any_of(option_name, "-l", "--list-raw", NULL)) { options.rsc_cmd = cmd_list_instances; } else if (pcmk__str_eq(option_name, "--list-ocf-providers", pcmk__str_none)) { options.rsc_cmd = cmd_list_providers; pcmk__str_update(&options.agent_spec, optarg); } else if (pcmk__str_any_of(option_name, "-L", "--list", NULL)) { options.rsc_cmd = cmd_list_resources; } else if (pcmk__str_eq(option_name, "--list-standards", pcmk__str_none)) { options.rsc_cmd = cmd_list_standards; } else if (pcmk__str_any_of(option_name, "-W", "--locate", NULL)) { options.rsc_cmd = cmd_locate; } else if (pcmk__str_eq(option_name, "--show-metadata", pcmk__str_none)) { options.rsc_cmd = cmd_metadata; pcmk__str_update(&options.agent_spec, optarg); } else if (pcmk__str_any_of(option_name, "-M", "--move", NULL)) { options.rsc_cmd = cmd_move; } else if (pcmk__str_any_of(option_name, "-q", "--query-xml", NULL)) { options.rsc_cmd = cmd_query_xml; } else if (pcmk__str_any_of(option_name, "-w", "--query-xml-raw", NULL)) { options.rsc_cmd = cmd_query_xml_raw; } else if (pcmk__str_any_of(option_name, "-R", "--refresh", NULL)) { options.rsc_cmd = cmd_refresh; } else if (pcmk__str_eq(option_name, "--restart", pcmk__str_none)) { options.rsc_cmd = cmd_restart; } else if (pcmk__str_any_of(option_name, "-p", "--set-parameter", NULL)) { options.rsc_cmd = cmd_set_param; pcmk__str_update(&options.prop_name, optarg); } else if (pcmk__str_any_of(option_name, "-S", "--set-property", NULL)) { options.rsc_cmd = cmd_set_property; pcmk__str_update(&options.prop_name, optarg); } else if (pcmk__str_eq(option_name, "--wait", pcmk__str_none)) { options.rsc_cmd = cmd_wait; } else if (pcmk__str_any_of(option_name, "-Y", "--why", NULL)) { options.rsc_cmd = cmd_why; } return TRUE; } /* short option letters still available: eEJkKXyYZ */ static GOptionEntry query_entries[] = { { "list", 'L', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "List all cluster resources with status", NULL }, { "list-raw", 'l', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "List IDs of all instantiated resources (individual members\n" INDENT "rather than groups etc.)", NULL }, { "list-cts", 'c', G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, NULL, NULL }, { "list-operations", 'O', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "List active resource operations, optionally filtered by\n" INDENT "--resource and/or --node", NULL }, { "list-all-operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "List all resource operations, optionally filtered by\n" INDENT "--resource and/or --node", NULL }, { "list-options", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb, "List all available options of the given type.\n" INDENT "Allowed values:\n" INDENT PCMK__VALUE_PRIMITIVE " (primitive resource meta-attributes),\n" INDENT PCMK_VALUE_FENCING " (parameters common to all fencing resources)", "TYPE" }, { "list-standards", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "List supported standards", NULL }, { "list-ocf-providers", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "List all available OCF providers", NULL }, { "list-agents", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb, "List all agents available for the named standard and/or provider", "STD:PROV" }, { "list-ocf-alternatives", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb, "List all available providers for the named OCF agent", "AGENT" }, { "show-metadata", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb, "Show the metadata for the named class:provider:agent", "SPEC" }, { "query-xml", 'q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Show XML configuration of resource (after any template expansion)", NULL }, { "query-xml-raw", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Show XML configuration of resource (before any template expansion)", NULL }, { "get-parameter", 'g', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb, "Display named parameter for resource (use instance attribute\n" INDENT "unless --element, --meta, or --utilization is specified)", "PARAM" }, { "get-property", 'G', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, command_cb, "Display named property of resource ('class', 'type', or 'provider') " "(requires --resource)", "PROPERTY" }, { "locate", 'W', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Show node(s) currently running resource", NULL }, { "constraints", 'a', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the location and colocation constraints that apply to a\n" INDENT "resource, and if --recursive is specified, to the resources\n" INDENT "directly or indirectly involved in those colocations.\n" INDENT "If the named resource is part of a group, or a clone or\n" INDENT "bundle instance, constraints for the collective resource\n" INDENT "will be shown unless --force is given.", NULL }, { "stack", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Equivalent to --constraints --recursive", NULL }, { "why", 'Y', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Show why resources are not running, optionally filtered by\n" INDENT "--resource and/or --node", NULL }, { NULL } }; static GOptionEntry command_entries[] = { { "validate", 0, G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Validate resource configuration by calling agent's validate-all\n" INDENT "action. The configuration may be specified either by giving an\n" INDENT "existing resource name with -r, or by specifying --class,\n" INDENT "--agent, and --provider arguments, along with any number of\n" INDENT "--option arguments. An optional LEVEL argument can be given\n" INDENT "to control the level of checking performed.", "LEVEL" }, { "cleanup", 'C', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "If resource has any past failures, clear its history and fail\n" INDENT "count. Optionally filtered by --resource, --node, --operation\n" INDENT "and --interval (otherwise all). --operation and --interval\n" INDENT "apply to fail counts, but entire history is always clear, to\n" INDENT "allow current state to be rechecked. If the named resource is\n" INDENT "part of a group, or one numbered instance of a clone or bundled\n" INDENT "resource, the clean-up applies to the whole collective resource\n" INDENT "unless --force is given.", NULL }, { "refresh", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Delete resource's history (including failures) so its current state\n" INDENT "is rechecked. Optionally filtered by --resource and --node\n" INDENT "(otherwise all). If the named resource is part of a group, or one\n" INDENT "numbered instance of a clone or bundled resource, the refresh\n" INDENT "applies to the whole collective resource unless --force is given.", NULL }, { "set-parameter", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb, "Set named parameter for resource (requires -v). Use instance\n" INDENT "attribute unless --element, --meta, or --utilization is " "specified.", "PARAM" }, { "delete-parameter", 'd', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, command_cb, "Delete named parameter for resource. Use instance attribute\n" INDENT "unless --element, --meta or, --utilization is specified.", "PARAM" }, { "set-property", 'S', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, command_cb, "Set named property of resource ('class', 'type', or 'provider') " "(requires -r, -t, -v)", "PROPERTY" }, { NULL } }; static GOptionEntry location_entries[] = { { "move", 'M', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Create a constraint to move resource. If --node is specified,\n" INDENT "the constraint will be to move to that node, otherwise it\n" INDENT "will be to ban the current node. Unless --force is specified\n" INDENT "this will return an error if the resource is already running\n" INDENT "on the specified node. If --force is specified, this will\n" INDENT "always ban the current node.\n" INDENT "Optional: --lifetime, --promoted. NOTE: This may prevent the\n" INDENT "resource from running on its previous location until the\n" INDENT "implicit constraint expires or is removed with --clear.", NULL }, { "ban", 'B', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Create a constraint to keep resource off a node.\n" INDENT "Optional: --node, --lifetime, --promoted.\n" INDENT "NOTE: This will prevent the resource from running on the\n" INDENT "affected node until the implicit constraint expires or is\n" INDENT "removed with --clear. If --node is not specified, it defaults\n" INDENT "to the node currently running the resource for primitives\n" INDENT "and groups, or the promoted instance of promotable clones with\n" INDENT PCMK_META_PROMOTED_MAX "=1 (all other situations result in an\n" INDENT "error as there is no sane default).", NULL }, { "clear", 'U', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Remove all constraints created by the --ban and/or --move\n" INDENT "commands. Requires: --resource. Optional: --node, --promoted,\n" INDENT "--expired. If --node is not specified, all constraints created\n" INDENT "by --ban and --move will be removed for the named resource. If\n" INDENT "--node and --force are specified, any constraint created by\n" INDENT "--move will be cleared, even if it is not for the specified\n" INDENT "node. If --expired is specified, only those constraints whose\n" INDENT "lifetimes have expired will be removed.", NULL }, { "expired", 'e', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.clear_expired, "Modifies the --clear argument to remove constraints with\n" INDENT "expired lifetimes.", NULL }, { "lifetime", 'u', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.move_lifetime, "Lifespan (as ISO 8601 duration) of created constraints (with\n" INDENT "-B, -M) see https://en.wikipedia.org/wiki/ISO_8601#Durations)", "TIMESPEC" }, { "promoted", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.promoted_role_only, "Limit scope of command to promoted role (with -B, -M, -U). For\n" INDENT "-B and -M, previously promoted instances may remain\n" INDENT "active in the unpromoted role.", NULL }, // Deprecated since 2.1.0 { "master", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.promoted_role_only, "Deprecated: Use --promoted instead", NULL }, { NULL } }; static GOptionEntry advanced_entries[] = { { "delete", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Delete a resource from the CIB. Required: -t", NULL }, { "fail", 'F', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Tell the cluster this resource has failed", NULL }, { "restart", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Tell the cluster to restart this resource and\n" INDENT "anything that depends on it", NULL }, { "wait", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Wait until the cluster settles into a stable state", NULL }, { "digests", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Show parameter hashes that Pacemaker uses to detect\n" INDENT "configuration changes (only accurate if there is resource\n" INDENT "history on the specified node). Required: --resource, --node.\n" INDENT "Optional: any NAME=VALUE parameters will be used to override\n" INDENT "the configuration (to see what the hash would be with those\n" INDENT "changes).", NULL }, { "force-demote", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Bypass the cluster and demote a resource on the local\n" INDENT "node. Unless --force is specified, this will refuse to do so if\n" INDENT "the cluster believes the resource is a clone instance already\n" INDENT "running on the local node.", NULL }, { "force-stop", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Bypass the cluster and stop a resource on the local node", NULL }, { "force-start", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Bypass the cluster and start a resource on the local\n" INDENT "node. Unless --force is specified, this will refuse to do so if\n" INDENT "the cluster believes the resource is a clone instance already\n" INDENT "running on the local node.", NULL }, { "force-promote", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Bypass the cluster and promote a resource on the local\n" INDENT "node. Unless --force is specified, this will refuse to do so if\n" INDENT "the cluster believes the resource is a clone instance already\n" INDENT "running on the local node.", NULL }, { "force-check", 0, G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, command_cb, "(Advanced) Bypass the cluster and check the state of a resource on\n" INDENT "the local node. An optional LEVEL argument can be given\n" INDENT "to control the level of checking performed.", "LEVEL" }, { NULL } }; static GOptionEntry addl_entries[] = { { "node", 'N', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.host_uname, "Node name", "NAME" }, { "recursive", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.recursive, "Follow colocation chains when using --set-parameter or --constraints", NULL }, { "resource-type", 't', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.rsc_type, "Resource XML element (primitive, group, etc.) (with -D)", "ELEMENT" }, { "parameter-value", 'v', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_value, "Value to use with -p", "PARAM" }, { "meta", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb, "Use resource meta-attribute instead of instance attribute\n" INDENT "(with -p, -g, -d)", NULL }, { "utilization", 'z', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb, "Use resource utilization attribute instead of instance attribute\n" INDENT "(with -p, -g, -d)", NULL }, { "element", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attr_set_type_cb, "Use resource element attribute instead of instance attribute\n" INDENT "(with -p, -g, -d)", NULL }, { "operation", 'n', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.operation, "Operation to clear instead of all (with -C -r)", "OPERATION" }, { "interval", 'I', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.interval_spec, "Interval of operation to clear (default 0) (with -C -r -n)", "N" }, { "class", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, cmdline_config_cb, "The standard the resource agent conforms to (for example, ocf).\n" INDENT "Use with --agent, --provider, --option, and --validate.", "CLASS" }, { "agent", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, cmdline_config_cb, "The agent to use (for example, IPaddr). Use with --class,\n" INDENT "--provider, --option, and --validate.", "AGENT" }, { "provider", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, cmdline_config_cb, "The vendor that supplies the resource agent (for example,\n" INDENT "heartbeat). Use with --class, --agent, --option, and --validate.", "PROVIDER" }, { "option", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, option_cb, "Specify a device configuration parameter as NAME=VALUE (may be\n" INDENT "specified multiple times). Use with --validate and without the\n" INDENT "-r option.", "PARAM" }, { "set-name", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_set, "(Advanced) XML ID of attributes element to use (with -p, -d)", "ID" }, { "nvpair", 'i', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.prop_id, "(Advanced) XML ID of nvpair element to use (with -p, -d)", "ID" }, { "timeout", 'T', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, timeout_cb, "(Advanced) Abort if command does not finish in this time (with\n" INDENT "--restart, --wait, --force-*)", "N" }, { "all", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.all, "List all options, including advanced and deprecated (with\n" INDENT "--list-options)", NULL }, { "force", 'f', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.force, "Force the action to be performed. See help for individual commands for\n" INDENT "additional behavior.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_FILENAME, &options.xml_file, NULL, "FILE" }, { "host-uname", 'H', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &options.host_uname, NULL, "HOST" }, { NULL } }; gboolean attr_set_type_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "-m", "--meta", NULL)) { options.attr_set_type = PCMK_XE_META_ATTRIBUTES; } else if (pcmk__str_any_of(option_name, "-z", "--utilization", NULL)) { options.attr_set_type = PCMK_XE_UTILIZATION; } else if (pcmk__str_eq(option_name, "--element", pcmk__str_none)) { options.attr_set_type = ATTR_SET_ELEMENT; } return TRUE; } gboolean cmdline_config_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.cmdline_config = true; if (pcmk__str_eq(option_name, "--class", pcmk__str_none)) { pcmk__str_update(&options.v_class, optarg); } else if (pcmk__str_eq(option_name, "--provider", pcmk__str_none)) { pcmk__str_update(&options.v_provider, optarg); } else { // --agent pcmk__str_update(&options.v_agent, optarg); } return TRUE; } gboolean option_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { char *name = NULL; char *value = NULL; if (pcmk__scan_nvpair(optarg, &name, &value) != 2) { return FALSE; } if (options.cmdline_params == NULL) { options.cmdline_params = pcmk__strkey_table(free, free); } g_hash_table_replace(options.cmdline_params, name, value); return TRUE; } gboolean timeout_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { long long timeout_ms = crm_get_msec(optarg); if (timeout_ms < 0) { return FALSE; } options.timeout_ms = (guint) QB_MIN(timeout_ms, UINT_MAX); return TRUE; } static int ban_or_move(pcmk__output_t *out, pcmk_resource_t *rsc, const char *move_lifetime) { int rc = pcmk_rc_ok; pcmk_node_t *current = NULL; unsigned int nactive = 0; CRM_CHECK(rsc != NULL, return EINVAL); current = pe__find_active_requires(rsc, &nactive); if (nactive == 1) { rc = cli_resource_ban(out, options.rsc_id, current->priv->name, move_lifetime, cib_conn, cib_sync_call, options.promoted_role_only, PCMK_ROLE_PROMOTED); } else if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { int count = 0; GList *iter = NULL; current = NULL; for (iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = (pcmk_resource_t *)iter->data; enum rsc_role_e child_role = child->priv->fns->state(child, TRUE); if (child_role == pcmk_role_promoted) { count++; current = pcmk__current_node(child); } } if(count == 1 && current) { rc = cli_resource_ban(out, options.rsc_id, current->priv->name, move_lifetime, cib_conn, cib_sync_call, options.promoted_role_only, PCMK_ROLE_PROMOTED); } else { rc = EINVAL; g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("Resource '%s' not moved: active in %d locations (promoted in %d).\n" "To prevent '%s' from running on a specific location, " "specify a node." "To prevent '%s' from being promoted at a specific " "location, specify a node and the --promoted option."), options.rsc_id, nactive, count, options.rsc_id, options.rsc_id); } } else { rc = EINVAL; g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("Resource '%s' not moved: active in %d locations.\n" "To prevent '%s' from running on a specific location, " "specify a node."), options.rsc_id, nactive, options.rsc_id); } return rc; } static void cleanup(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node) { int rc = pcmk_rc_ok; if (options.force == FALSE) { rsc = uber_parent(rsc); } crm_debug("Erasing failures of %s (%s requested) on %s", rsc->id, options.rsc_id, (options.host_uname? options.host_uname: "all nodes")); rc = cli_resource_delete(controld_api, options.host_uname, rsc, options.operation, options.interval_spec, TRUE, scheduler, options.force); if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { // Show any reasons why resource might stay stopped cli_resource_check(out, rsc, node); } if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } } static int clear_constraints(pcmk__output_t *out, xmlNodePtr *cib_xml_copy) { GList *before = NULL; GList *after = NULL; GList *remaining = NULL; GList *ele = NULL; pcmk_node_t *dest = NULL; int rc = pcmk_rc_ok; if (!out->is_quiet(out)) { before = build_constraint_list(scheduler->input); } if (options.clear_expired) { rc = cli_resource_clear_all_expired(scheduler->input, cib_conn, cib_sync_call, options.rsc_id, options.host_uname, options.promoted_role_only); } else if (options.host_uname) { dest = pcmk_find_node(scheduler, options.host_uname); if (dest == NULL) { rc = pcmk_rc_node_unknown; if (!out->is_quiet(out)) { g_list_free(before); } return rc; } rc = cli_resource_clear(options.rsc_id, dest->priv->name, NULL, cib_conn, cib_sync_call, true, options.force); } else { rc = cli_resource_clear(options.rsc_id, NULL, scheduler->nodes, cib_conn, cib_sync_call, true, options.force); } if (!out->is_quiet(out)) { rc = cib_conn->cmds->query(cib_conn, NULL, cib_xml_copy, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, _("Could not get modified CIB: %s\n"), pcmk_rc_str(rc)); g_list_free(before); pcmk__xml_free(*cib_xml_copy); *cib_xml_copy = NULL; return rc; } scheduler->input = *cib_xml_copy; cluster_status(scheduler); after = build_constraint_list(scheduler->input); remaining = pcmk__subtract_lists(before, after, (GCompareFunc) strcmp); for (ele = remaining; ele != NULL; ele = ele->next) { out->info(out, "Removing constraint: %s", (char *) ele->data); } g_list_free(before); g_list_free(after); g_list_free(remaining); } return rc; } static int initialize_scheduler_data(xmlNodePtr *cib_xml_copy) { int rc = pcmk_rc_ok; if (options.xml_file != NULL) { *cib_xml_copy = pcmk__xml_read(options.xml_file); if (*cib_xml_copy == NULL) { rc = pcmk_rc_cib_corrupt; } } else { rc = cib_conn->cmds->query(cib_conn, NULL, cib_xml_copy, cib_sync_call); rc = pcmk_legacy2rc(rc); } if (rc == pcmk_rc_ok) { scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = ENOMEM; } else { - pcmk__set_scheduler_flags(scheduler, - pcmk__sched_no_counts - |pcmk__sched_no_compat); + pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts); scheduler->priv->out = out; rc = update_scheduler_input(scheduler, cib_xml_copy); } } if (rc != pcmk_rc_ok) { pcmk__xml_free(*cib_xml_copy); *cib_xml_copy = NULL; return rc; } cluster_status(scheduler); return pcmk_rc_ok; } static void list_options(void) { switch (options.opt_list) { case pcmk__opt_fencing: exit_code = pcmk_rc2exitc(pcmk__list_fencing_params(out, options.all)); break; case pcmk__opt_primitive: exit_code = pcmk_rc2exitc(pcmk__list_primitive_meta(out, options.all)); break; default: exit_code = CRM_EX_SOFTWARE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "BUG: Invalid option list type"); break; } } static int refresh(pcmk__output_t *out) { int rc = pcmk_rc_ok; const char *router_node = options.host_uname; int attr_options = pcmk__node_attr_none; if (options.host_uname) { pcmk_node_t *node = pcmk_find_node(scheduler, options.host_uname); if (pcmk__is_pacemaker_remote_node(node)) { node = pcmk__current_node(node->priv->remote); if (node == NULL) { rc = ENXIO; g_set_error(&error, PCMK__RC_ERROR, rc, _("No cluster connection to Pacemaker Remote node %s detected"), options.host_uname); return rc; } router_node = node->priv->name; attr_options |= pcmk__node_attr_remote; } } if (controld_api == NULL) { out->info(out, "Dry run: skipping clean-up of %s due to CIB_file", options.host_uname? options.host_uname : "all nodes"); rc = pcmk_rc_ok; return rc; } crm_debug("Re-checking the state of all resources on %s", options.host_uname?options.host_uname:"all nodes"); rc = pcmk__attrd_api_clear_failures(NULL, options.host_uname, NULL, NULL, NULL, NULL, attr_options); if (pcmk_controld_api_reprobe(controld_api, options.host_uname, router_node) == pcmk_rc_ok) { start_mainloop(controld_api); } return rc; } static void refresh_resource(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node) { int rc = pcmk_rc_ok; if (options.force == FALSE) { rsc = uber_parent(rsc); } crm_debug("Re-checking the state of %s (%s requested) on %s", rsc->id, options.rsc_id, (options.host_uname? options.host_uname: "all nodes")); rc = cli_resource_delete(controld_api, options.host_uname, rsc, NULL, 0, FALSE, scheduler, options.force); if ((rc == pcmk_rc_ok) && !out->is_quiet(out)) { // Show any reasons why resource might stay stopped cli_resource_check(out, rsc, node); } if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } } static int set_property(void) { int rc = pcmk_rc_ok; xmlNode *msg_data = NULL; if (pcmk__str_empty(options.rsc_type)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("Must specify -t with resource type")); rc = ENXIO; return rc; } else if (pcmk__str_empty(options.prop_value)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("Must supply -v with new value")); rc = ENXIO; return rc; } CRM_LOG_ASSERT(options.prop_name != NULL); msg_data = pcmk__xe_create(NULL, options.rsc_type); crm_xml_add(msg_data, PCMK_XA_ID, options.rsc_id); crm_xml_add(msg_data, options.prop_name, options.prop_value); rc = cib_conn->cmds->modify(cib_conn, PCMK_XE_RESOURCES, msg_data, cib_sync_call); rc = pcmk_legacy2rc(rc); pcmk__xml_free(msg_data); return rc; } static int show_metadata(pcmk__output_t *out, const char *agent_spec) { int rc = pcmk_rc_ok; char *standard = NULL; char *provider = NULL; char *type = NULL; char *metadata = NULL; lrmd_t *lrmd_conn = NULL; rc = lrmd__new(&lrmd_conn, NULL, NULL, 0); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, _("Could not create executor connection")); lrmd_api_delete(lrmd_conn); return rc; } rc = crm_parse_agent_spec(agent_spec, &standard, &provider, &type); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { rc = lrmd_conn->cmds->get_metadata(lrmd_conn, standard, provider, type, &metadata, 0); rc = pcmk_legacy2rc(rc); if (metadata) { out->output_xml(out, PCMK_XE_METADATA, metadata); free(metadata); } else { /* We were given a validly formatted spec, but it doesn't necessarily * match up with anything that exists. Use ENXIO as the return code * here because that maps to an exit code of CRM_EX_NOSUCH, which * probably is the most common reason to get here. */ rc = ENXIO; g_set_error(&error, PCMK__RC_ERROR, rc, _("Metadata query for %s failed: %s"), agent_spec, pcmk_rc_str(rc)); } } else { rc = ENXIO; g_set_error(&error, PCMK__RC_ERROR, rc, _("'%s' is not a valid agent specification"), agent_spec); } lrmd_api_delete(lrmd_conn); return rc; } static void validate_cmdline_config(void) { // Cannot use both --resource and command-line resource configuration if (options.rsc_id != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("--resource cannot be used with --class, --agent, and --provider")); // Not all commands support command-line resource configuration } else if (options.rsc_cmd != cmd_execute_agent) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("--class, --agent, and --provider can only be used with " "--validate and --force-*")); // Not all of --class, --agent, and --provider need to be given. Not all // classes support the concept of a provider. Check that what we were given // is valid. } else if (pcmk__str_eq(options.v_class, "stonith", pcmk__str_none)) { if (options.v_provider != NULL) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("stonith does not support providers")); } else if (stonith_agent_exists(options.v_agent, 0) == FALSE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("%s is not a known stonith agent"), options.v_agent ? options.v_agent : ""); } } else if (resources_agent_exists(options.v_class, options.v_provider, options.v_agent) == FALSE) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("%s:%s:%s is not a known resource"), options.v_class ? options.v_class : "", options.v_provider ? options.v_provider : "", options.v_agent ? options.v_agent : ""); } if ((error == NULL) && (options.cmdline_params == NULL)) { options.cmdline_params = pcmk__strkey_table(free, free); } } /*! * \internal * \brief Get the enum pe_find flags for a given command * * \return enum pe_find flag group appropriate for \c options.rsc_cmd. */ static uint32_t get_find_flags(void) { switch (options.rsc_cmd) { case cmd_ban: case cmd_cleanup: case cmd_clear: case cmd_colocations: case cmd_digests: case cmd_execute_agent: case cmd_locate: case cmd_move: case cmd_refresh: case cmd_restart: case cmd_why: return pcmk_rsc_match_history|pcmk_rsc_match_anon_basename; case cmd_delete_param: case cmd_get_param: case cmd_get_property: case cmd_query_xml_raw: case cmd_query_xml: case cmd_set_param: case cmd_set_property: return pcmk_rsc_match_history|pcmk_rsc_match_basename; default: return 0; } } /*! * \internal * \brief Check whether a node argument is required * * \return \c true if a \c --node argument is required, or \c false otherwise */ static bool is_node_required(void) { switch (options.rsc_cmd) { case cmd_digests: case cmd_fail: return true; default: return false; } } /*! * \internal * \brief Check whether a resource argument is required * * \return \c true if a \c --resource argument is required, or \c false * otherwise */ static bool is_resource_required(void) { if (options.cmdline_config) { return false; } switch (options.rsc_cmd) { case cmd_clear: return !options.clear_expired; case cmd_cleanup: case cmd_cts: case cmd_list_active_ops: case cmd_list_agents: case cmd_list_all_ops: case cmd_list_alternatives: case cmd_list_instances: case cmd_list_options: case cmd_list_providers: case cmd_list_resources: case cmd_list_standards: case cmd_metadata: case cmd_refresh: case cmd_wait: case cmd_why: return false; default: return true; } } /*! * \internal * \brief Check whether a CIB connection is required * * \return \c true if a CIB connection is required, or \c false otherwise */ static bool is_cib_required(void) { if (options.cmdline_config) { return false; } switch (options.rsc_cmd) { case cmd_list_agents: case cmd_list_alternatives: case cmd_list_options: case cmd_list_providers: case cmd_list_standards: case cmd_metadata: return false; default: return true; } } /*! * \internal * \brief Check whether a controller IPC connection is required * * \return \c true if a controller connection is required, or \c false otherwise */ static bool is_controller_required(void) { switch (options.rsc_cmd) { case cmd_cleanup: case cmd_refresh: return getenv("CIB_file") == NULL; case cmd_fail: return true; default: return false; } } /*! * \internal * \brief Check whether a scheduler IPC connection is required * * \return \c true if a scheduler connection is required, or \c false otherwise */ static bool is_scheduler_required(void) { if (options.cmdline_config) { return false; } switch (options.rsc_cmd) { case cmd_delete: case cmd_list_agents: case cmd_list_alternatives: case cmd_list_options: case cmd_list_providers: case cmd_list_standards: case cmd_metadata: case cmd_wait: return false; default: return true; } } /*! * \internal * \brief Check whether the chosen command accepts clone instances * * \return \c true if \p options.rsc_cmd accepts or ignores clone instances, or * \c false otherwise */ static bool accept_clone_instance(void) { switch (options.rsc_cmd) { case cmd_ban: case cmd_clear: case cmd_delete: case cmd_move: case cmd_restart: return false; default: return true; } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { "resource", 'r', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.rsc_id, "Resource ID", "ID" }, { G_OPTION_REMAINING, 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING_ARRAY, &options.remainder, NULL, NULL }, { NULL } }; const char *description = "Examples:\n\n" "List the available OCF agents:\n\n" "\t# crm_resource --list-agents ocf\n\n" "List the available OCF agents from the linux-ha project:\n\n" "\t# crm_resource --list-agents ocf:heartbeat\n\n" "Move 'myResource' to a specific node:\n\n" "\t# crm_resource --resource myResource --move --node altNode\n\n" "Allow (but not force) 'myResource' to move back to its original " "location:\n\n" "\t# crm_resource --resource myResource --clear\n\n" "Stop 'myResource' (and anything that depends on it):\n\n" "\t# crm_resource --resource myResource --set-parameter " PCMK_META_TARGET_ROLE "--meta --parameter-value Stopped\n\n" "Tell the cluster not to manage 'myResource' (the cluster will not " "attempt to start or stop the\n" "resource under any circumstances; useful when performing maintenance " "tasks on a resource):\n\n" "\t# crm_resource --resource myResource --set-parameter " PCMK_META_IS_MANAGED "--meta --parameter-value false\n\n" "Erase the operation history of 'myResource' on 'aNode' (the cluster " "will 'forget' the existing\n" "resource state, including any errors, and attempt to recover the" "resource; useful when a resource\n" "had failed permanently and has been repaired by an administrator):\n\n" "\t# crm_resource --resource myResource --cleanup --node aNode\n\n"; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); g_option_context_set_description(context, description); /* Add the -Q option, which cannot be part of the globally supported options * because some tools use that flag for something else. */ pcmk__add_main_args(context, extra_prog_entries); pcmk__add_arg_group(context, "queries", "Queries:", "Show query help", query_entries); pcmk__add_arg_group(context, "commands", "Commands:", "Show command help", command_entries); pcmk__add_arg_group(context, "locations", "Locations:", "Show location help", location_entries); pcmk__add_arg_group(context, "advanced", "Advanced:", "Show advanced option help", advanced_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); return context; } int main(int argc, char **argv) { xmlNode *cib_xml_copy = NULL; pcmk_resource_t *rsc = NULL; pcmk_node_t *node = NULL; uint32_t find_flags = 0; int rc = pcmk_rc_ok; GOptionGroup *output_group = NULL; gchar **processed_args = NULL; GOptionContext *context = NULL; /* * Parse command line arguments */ args = pcmk__new_common_args(SUMMARY); processed_args = pcmk__cmdline_preproc(argv, "GHINSTdginpstuvx"); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } pcmk__cli_init_logging("crm_resource", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Error creating output format %s: %s"), args->output_ty, pcmk_rc_str(rc)); goto done; } pe__register_messages(out); crm_resource_register_messages(out); lrmd__register_messages(out); pcmk__register_lib_messages(out); out->quiet = args->quiet; crm_log_args(argc, argv); /* * Validate option combinations */ // --expired without --clear/-U doesn't make sense if (options.clear_expired && (options.rsc_cmd != cmd_clear)) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("--expired requires --clear or -U")); goto done; } if ((options.remainder != NULL) && (options.override_params != NULL)) { // Commands that use positional arguments will create override_params for (gchar **s = options.remainder; *s; s++) { char *name = pcmk__assert_alloc(1, strlen(*s)); char *value = pcmk__assert_alloc(1, strlen(*s)); int rc = sscanf(*s, "%[^=]=%s", name, value); if (rc == 2) { g_hash_table_replace(options.override_params, name, value); } else { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Error parsing '%s' as a name=value pair"), argv[optind]); free(value); free(name); goto done; } } } else if (options.remainder != NULL) { gchar **strv = NULL; gchar *msg = NULL; int i = 1; int len = 0; for (gchar **s = options.remainder; *s; s++) { len++; } CRM_ASSERT(len > 0); /* Add 1 for the strv[0] string below, and add another 1 for the NULL * at the end of the array so g_strjoinv knows when to stop. */ strv = pcmk__assert_alloc(len+2, sizeof(char *)); strv[0] = strdup("non-option ARGV-elements:\n"); for (gchar **s = options.remainder; *s; s++) { strv[i] = crm_strdup_printf("[%d of %d] %s\n", i, len, *s); i++; } strv[i] = NULL; exit_code = CRM_EX_USAGE; msg = g_strjoinv("", strv); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "%s", msg); g_free(msg); /* Don't try to free the last element, which is just NULL. */ for(i = 0; i < len+1; i++) { free(strv[i]); } free(strv); goto done; } if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) { switch (options.rsc_cmd) { /* These are the only commands that have historically used the * elements in their XML schema. For all others, use the simple list * argument. */ case cmd_get_param: case cmd_get_property: case cmd_list_instances: case cmd_list_standards: pcmk__output_enable_list_element(out); break; default: break; } } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) { switch (options.rsc_cmd) { case cmd_colocations: case cmd_list_resources: pcmk__output_text_set_fancy(out, true); break; default: break; } } if (args->version) { out->version(out, false); goto done; } if (options.cmdline_config) { /* A resource configuration was given on the command line. Sanity-check * the values and set error if they don't make sense. */ validate_cmdline_config(); if (error != NULL) { exit_code = CRM_EX_USAGE; goto done; } } else if (options.cmdline_params != NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("--option must be used with --validate and without -r")); g_hash_table_destroy(options.cmdline_params); options.cmdline_params = NULL; goto done; } if (is_resource_required() && (options.rsc_id == NULL)) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Must supply a resource id with -r")); goto done; } if (is_node_required() && (options.host_uname == NULL)) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Must supply a node name with -N")); goto done; } /* * Set up necessary connections */ // Establish a connection to the CIB if needed if (is_cib_required()) { cib_conn = cib_new(); if ((cib_conn == NULL) || (cib_conn->cmds == NULL)) { exit_code = CRM_EX_DISCONNECT; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Could not create CIB connection")); goto done; } rc = cib__signon_attempts(cib_conn, crm_system_name, cib_command, 5); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Could not connect to the CIB: %s"), pcmk_rc_str(rc)); goto done; } } // Populate scheduler data from XML file if specified or CIB query otherwise if (is_scheduler_required()) { rc = initialize_scheduler_data(&cib_xml_copy); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); goto done; } } find_flags = get_find_flags(); // If command requires that resource exist if specified, find it if ((find_flags != 0) && (options.rsc_id != NULL)) { rsc = pe_find_resource_with_flags(scheduler->priv->resources, options.rsc_id, find_flags); if (rsc == NULL) { exit_code = CRM_EX_NOSUCH; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Resource '%s' not found"), options.rsc_id); goto done; } /* The --ban, --clear, --move, and --restart commands do not work with * instances of clone resourcs. */ if (pcmk__is_clone(rsc->priv->parent) && (strchr(options.rsc_id, ':') != NULL) && !accept_clone_instance()) { exit_code = CRM_EX_INVALID_PARAM; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Cannot operate on clone resource instance '%s'"), options.rsc_id); goto done; } } // If user supplied a node name, check whether it exists if ((options.host_uname != NULL) && (scheduler != NULL)) { node = pcmk_find_node(scheduler, options.host_uname); if (node == NULL) { exit_code = CRM_EX_NOSUCH; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Node '%s' not found"), options.host_uname); goto done; } } // Establish a connection to the controller if needed if (is_controller_required()) { rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Error connecting to the controller: %s"), pcmk_rc_str(rc)); goto done; } pcmk_register_ipc_callback(controld_api, controller_event_callback, NULL); rc = pcmk__connect_ipc(controld_api, pcmk_ipc_dispatch_main, 5); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Error connecting to %s: %s"), pcmk_ipc_name(controld_api, true), pcmk_rc_str(rc)); goto done; } } /* * Handle requested command */ switch (options.rsc_cmd) { case cmd_list_resources: { GList *all = NULL; uint32_t show_opts = pcmk_show_inactive_rscs | pcmk_show_rsc_only | pcmk_show_pending; all = g_list_prepend(all, (gpointer) "*"); rc = out->message(out, "resource-list", scheduler, show_opts, true, all, all, false); g_list_free(all); if (rc == pcmk_rc_no_output) { rc = ENXIO; } break; } case cmd_list_instances: rc = out->message(out, "resource-names-list", scheduler->priv->resources); if (rc != pcmk_rc_ok) { rc = ENXIO; } break; case cmd_list_options: list_options(); break; case cmd_list_alternatives: rc = pcmk__list_alternatives(out, options.agent_spec); break; case cmd_list_agents: rc = pcmk__list_agents(out, options.agent_spec); break; case cmd_list_standards: rc = pcmk__list_standards(out); break; case cmd_list_providers: rc = pcmk__list_providers(out, options.agent_spec); break; case cmd_metadata: rc = show_metadata(out, options.agent_spec); break; case cmd_restart: /* We don't pass scheduler because rsc needs to stay valid for the * entire lifetime of cli_resource_restart(), but it will reset and * update the scheduler data multiple times, so it needs to use its * own copy. */ rc = cli_resource_restart(out, rsc, node, options.move_lifetime, options.timeout_ms, cib_conn, cib_sync_call, options.promoted_role_only, options.force); break; case cmd_wait: rc = wait_till_stable(out, options.timeout_ms, cib_conn); break; case cmd_execute_agent: if (options.cmdline_config) { exit_code = cli_resource_execute_from_params(out, NULL, options.v_class, options.v_provider, options.v_agent, options.operation, options.cmdline_params, options.override_params, options.timeout_ms, args->verbosity, options.force, options.check_level); } else { exit_code = cli_resource_execute(rsc, options.rsc_id, options.operation, options.override_params, options.timeout_ms, cib_conn, scheduler, args->verbosity, options.force, options.check_level); } goto done; case cmd_digests: node = pcmk_find_node(scheduler, options.host_uname); if (node == NULL) { rc = pcmk_rc_node_unknown; } else { rc = pcmk__resource_digests(out, rsc, node, options.override_params); } break; case cmd_colocations: rc = out->message(out, "locations-and-colocations", rsc, options.recursive, (bool) options.force); break; case cmd_cts: rc = pcmk_rc_ok; g_list_foreach(scheduler->priv->resources, (GFunc) cli_resource_print_cts, out); cli_resource_print_cts_constraints(scheduler); break; case cmd_fail: rc = cli_resource_fail(controld_api, options.host_uname, options.rsc_id, scheduler); if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } break; case cmd_list_active_ops: rc = cli_resource_print_operations(options.rsc_id, options.host_uname, TRUE, scheduler); break; case cmd_list_all_ops: rc = cli_resource_print_operations(options.rsc_id, options.host_uname, FALSE, scheduler); break; case cmd_locate: { GList *nodes = cli_resource_search(rsc, options.rsc_id, scheduler); rc = out->message(out, "resource-search-list", nodes, options.rsc_id); g_list_free_full(nodes, free); break; } case cmd_query_xml: rc = cli_resource_print(rsc, scheduler, true); break; case cmd_query_xml_raw: rc = cli_resource_print(rsc, scheduler, false); break; case cmd_why: if ((options.host_uname != NULL) && (node == NULL)) { rc = pcmk_rc_node_unknown; } else { rc = out->message(out, "resource-reasons-list", scheduler->priv->resources, rsc, node); } break; case cmd_clear: rc = clear_constraints(out, &cib_xml_copy); break; case cmd_move: if (options.host_uname == NULL) { rc = ban_or_move(out, rsc, options.move_lifetime); } else { rc = cli_resource_move(rsc, options.rsc_id, options.host_uname, options.move_lifetime, cib_conn, cib_sync_call, scheduler, options.promoted_role_only, options.force); } if (rc == EINVAL) { exit_code = CRM_EX_USAGE; goto done; } break; case cmd_ban: if (options.host_uname == NULL) { rc = ban_or_move(out, rsc, options.move_lifetime); } else if (node == NULL) { rc = pcmk_rc_node_unknown; } else { rc = cli_resource_ban(out, options.rsc_id, node->priv->name, options.move_lifetime, cib_conn, cib_sync_call, options.promoted_role_only, PCMK_ROLE_PROMOTED); } if (rc == EINVAL) { exit_code = CRM_EX_USAGE; goto done; } break; case cmd_get_property: rc = out->message(out, "property-list", rsc, options.prop_name); if (rc == pcmk_rc_no_output) { rc = ENXIO; } break; case cmd_set_property: rc = set_property(); break; case cmd_get_param: { unsigned int count = 0; GHashTable *params = NULL; pcmk_node_t *current = rsc->priv->fns->active_node(rsc, &count, NULL); bool free_params = true; const char* value = NULL; if (count > 1) { out->err(out, "%s is active on more than one node," " returning the default value for %s", rsc->id, pcmk__s(options.prop_name, "unspecified property")); current = NULL; } crm_debug("Looking up %s in %s", options.prop_name, rsc->id); if (pcmk__str_eq(options.attr_set_type, PCMK_XE_INSTANCE_ATTRIBUTES, pcmk__str_none)) { params = pe_rsc_params(rsc, current, scheduler); free_params = false; value = g_hash_table_lookup(params, options.prop_name); } else if (pcmk__str_eq(options.attr_set_type, PCMK_XE_META_ATTRIBUTES, pcmk__str_none)) { params = pcmk__strkey_table(free, free); get_meta_attributes(params, rsc, NULL, scheduler); value = g_hash_table_lookup(params, options.prop_name); } else if (pcmk__str_eq(options.attr_set_type, ATTR_SET_ELEMENT, pcmk__str_none)) { value = crm_element_value(rsc->priv->xml, options.prop_name); free_params = false; } else { pe_rule_eval_data_t rule_data = { .now = scheduler->priv->now, }; params = pcmk__strkey_table(free, free); pe__unpack_dataset_nvpairs(rsc->priv->xml, PCMK_XE_UTILIZATION, &rule_data, params, NULL, FALSE, scheduler); value = g_hash_table_lookup(params, options.prop_name); } rc = out->message(out, "attribute-list", rsc, options.prop_name, value); if (free_params) { g_hash_table_destroy(params); } break; } case cmd_set_param: if (pcmk__str_empty(options.prop_value)) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("You need to supply a value with the -v option")); goto done; } /* coverity[var_deref_model] False positive */ rc = cli_resource_update_attribute(rsc, options.rsc_id, options.prop_set, options.attr_set_type, options.prop_id, options.prop_name, options.prop_value, options.recursive, cib_conn, options.force); break; case cmd_delete_param: /* coverity[var_deref_model] False positive */ rc = cli_resource_delete_attribute(rsc, options.rsc_id, options.prop_set, options.attr_set_type, options.prop_id, options.prop_name, cib_conn, cib_sync_call, options.force); break; case cmd_cleanup: if (rsc == NULL) { rc = cli_cleanup_all(controld_api, options.host_uname, options.operation, options.interval_spec, scheduler); if (rc == pcmk_rc_ok) { start_mainloop(controld_api); } } else { cleanup(out, rsc, node); } break; case cmd_refresh: if (rsc == NULL) { rc = refresh(out); } else { refresh_resource(out, rsc, node); } break; case cmd_delete: /* rsc_id was already checked for NULL much earlier when validating * command line arguments. */ if (options.rsc_type == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, _("You need to specify a resource type with -t")); } else { rc = pcmk__resource_delete(cib_conn, cib_sync_call, options.rsc_id, options.rsc_type); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__RC_ERROR, rc, _("Could not delete resource %s: %s"), options.rsc_id, pcmk_rc_str(rc)); } } break; default: exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Unimplemented command: %d"), (int) options.rsc_cmd); goto done; } /* Convert rc into an exit code. */ if (rc != pcmk_rc_ok && rc != pcmk_rc_no_output) { exit_code = pcmk_rc2exitc(rc); } /* * Clean up and exit */ done: /* When we get here, exit_code has been set one of two ways - either at one of * the spots where there's a "goto done" (which itself could have happened either * directly or by calling pcmk_rc2exitc), or just up above after any of the break * statements. * * Thus, we can use just exit_code here to decide what to do. */ if (exit_code != CRM_EX_OK && exit_code != CRM_EX_USAGE) { if (error != NULL) { char *msg = crm_strdup_printf("%s\nError performing operation: %s", error->message, crm_exit_str(exit_code)); g_clear_error(&error); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "%s", msg); free(msg); } else { g_set_error(&error, PCMK__EXITC_ERROR, exit_code, _("Error performing operation: %s"), crm_exit_str(exit_code)); } } g_free(options.host_uname); g_free(options.interval_spec); g_free(options.move_lifetime); g_free(options.operation); g_free(options.prop_id); free(options.prop_name); g_free(options.prop_set); g_free(options.prop_value); g_free(options.rsc_id); g_free(options.rsc_type); free(options.agent_spec); free(options.v_agent); free(options.v_class); free(options.v_provider); g_free(options.xml_file); g_strfreev(options.remainder); if (options.override_params != NULL) { g_hash_table_destroy(options.override_params); } /* options.cmdline_params does not need to be destroyed here. See the * comments in cli_resource_execute_from_params. */ g_strfreev(processed_args); g_option_context_free(context); return bye(exit_code); } diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 68940b2137..a89278f72e 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,2437 +1,2434 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static GList * build_node_info_list(const pcmk_resource_t *rsc) { GList *retval = NULL; for (const GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data; for (const GList *iter2 = child->priv->active_nodes; iter2 != NULL; iter2 = iter2->next) { const pcmk_node_t *node = (const pcmk_node_t *) iter2->data; node_info_t *ni = pcmk__assert_alloc(1, sizeof(node_info_t)); ni->node_name = node->priv->name; if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable) && (child->priv->fns->state(child, TRUE) == pcmk_role_promoted)) { ni->promoted = true; } retval = g_list_prepend(retval, ni); } } return retval; } GList * cli_resource_search(pcmk_resource_t *rsc, const char *requested_name, pcmk_scheduler_t *scheduler) { GList *retval = NULL; const pcmk_resource_t *parent = pe__const_top_resource(rsc, false); if (pcmk__is_clone(rsc)) { retval = build_node_info_list(rsc); /* The anonymous clone children's common ID is supplied */ } else if (pcmk__is_clone(parent) && !pcmk_is_set(rsc->flags, pcmk__rsc_unique) && (rsc->priv->history_id != NULL) && pcmk__str_eq(requested_name, rsc->priv->history_id, pcmk__str_none) && !pcmk__str_eq(requested_name, rsc->id, pcmk__str_none)) { retval = build_node_info_list(parent); } else { for (GList *iter = rsc->priv->active_nodes; iter != NULL; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; node_info_t *ni = pcmk__assert_alloc(1, sizeof(node_info_t)); ni->node_name = node->priv->name; if (rsc->priv->fns->state(rsc, TRUE) == pcmk_role_promoted) { ni->promoted = true; } retval = g_list_prepend(retval, ni); } } return retval; } // \return Standard Pacemaker return code static int find_resource_attr(pcmk__output_t *out, cib_t * the_cib, const char *attr, const char *rsc, const char *attr_set_type, const char *set_name, const char *attr_id, const char *attr_name, xmlNode **result) { xmlNode *xml_search; int rc = pcmk_rc_ok; GString *xpath = NULL; const char *xpath_base = NULL; if (result) { *result = NULL; } if(the_cib == NULL) { return ENOTCONN; } xpath_base = pcmk_cib_xpath_for(PCMK_XE_RESOURCES); if (xpath_base == NULL) { crm_err(PCMK_XE_RESOURCES " CIB element not known (bug?)"); return ENOMSG; } xpath = g_string_sized_new(1024); pcmk__g_strcat(xpath, xpath_base, "//*[@" PCMK_XA_ID "=\"", rsc, "\"]", NULL); if (attr_set_type != NULL) { pcmk__g_strcat(xpath, "/", attr_set_type, NULL); if (set_name != NULL) { pcmk__g_strcat(xpath, "[@" PCMK_XA_ID "=\"", set_name, "\"]", NULL); } } g_string_append(xpath, "//" PCMK_XE_NVPAIR); if (attr_id != NULL && attr_name!= NULL) { pcmk__g_strcat(xpath, "[@" PCMK_XA_ID "='", attr_id, "' " "and @" PCMK_XA_NAME "='", attr_name, "']", NULL); } else if (attr_id != NULL) { pcmk__g_strcat(xpath, "[@" PCMK_XA_ID "='", attr_id, "']", NULL); } else if (attr_name != NULL) { pcmk__g_strcat(xpath, "[@" PCMK_XA_NAME "='", attr_name, "']", NULL); } rc = the_cib->cmds->query(the_cib, xpath->str, &xml_search, cib_sync_call|cib_xpath); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { crm_log_xml_debug(xml_search, "Match"); if (xml_search->children != NULL) { rc = ENOTUNIQ; pcmk__warn_multiple_name_matches(out, xml_search, attr_name); out->spacer(out); } } if (result) { *result = xml_search; } else { pcmk__xml_free(xml_search); } g_string_free(xpath, TRUE); return rc; } /* PRIVATE. Use the find_matching_attr_resources instead. */ static void find_matching_attr_resources_recursive(pcmk__output_t *out, GList /* */ **result, pcmk_resource_t *rsc, const char * attr_set, const char * attr_set_type, const char * attr_id, const char * attr_name, cib_t * cib, int depth) { int rc = pcmk_rc_ok; char *lookup_id = clone_strip(rsc->id); for (GList *gIter = rsc->priv->children; gIter != NULL; gIter = gIter->next) { find_matching_attr_resources_recursive(out, result, (pcmk_resource_t *) gIter->data, attr_set, attr_set_type, attr_id, attr_name, cib, depth+1); /* do it only once for clones */ if (pcmk__is_clone(rsc)) { break; } } rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, NULL); /* Post-order traversal. * The root is always on the list and it is the last item. */ if((0 == depth) || (pcmk_rc_ok == rc)) { /* push the head */ *result = g_list_append(*result, rsc); } free(lookup_id); } /* The result is a linearized pre-ordered tree of resources. */ static GList/**/ * find_matching_attr_resources(pcmk__output_t *out, pcmk_resource_t *rsc, const char * rsc_id, const char * attr_set, const char * attr_set_type, const char * attr_id, const char * attr_name, cib_t * cib, const char * cmd, gboolean force) { int rc = pcmk_rc_ok; char *lookup_id = NULL; GList * result = NULL; /* If --force is used, update only the requested resource (clone or primitive). * Otherwise, if the primitive has the attribute, use that. * Otherwise use the clone. */ if(force == TRUE) { return g_list_append(result, rsc); } if (pcmk__is_clone(rsc->priv->parent)) { int rc = find_resource_attr(out, cib, PCMK_XA_ID, rsc_id, attr_set_type, attr_set, attr_id, attr_name, NULL); if(rc != pcmk_rc_ok) { rsc = rsc->priv->parent; out->info(out, "Performing %s of '%s' on '%s', the parent of '%s'", cmd, attr_name, rsc->id, rsc_id); } return g_list_append(result, rsc); } else if ((rsc->priv->parent == NULL) && (rsc->priv->children != NULL) && pcmk__is_clone(rsc)) { pcmk_resource_t *child = rsc->priv->children->data; if (pcmk__is_primitive(child)) { lookup_id = clone_strip(child->id); /* Could be a cloned group! */ rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, NULL); if(rc == pcmk_rc_ok) { rsc = child; out->info(out, "A value for '%s' already exists in child '%s', performing %s on that instead of '%s'", attr_name, lookup_id, cmd, rsc_id); } free(lookup_id); } return g_list_append(result, rsc); } /* If the resource is a group ==> children inherit the attribute if defined. */ find_matching_attr_resources_recursive(out, &result, rsc, attr_set, attr_set_type, attr_id, attr_name, cib, 0); return result; } static int update_element_attribute(pcmk__output_t *out, pcmk_resource_t *rsc, cib_t *cib, const char *attr_name, const char *attr_value) { int rc = pcmk_rc_ok; if (cib == NULL) { return ENOTCONN; } crm_xml_add(rsc->priv->xml, attr_name, attr_value); rc = cib->cmds->replace(cib, PCMK_XE_RESOURCES, rsc->priv->xml, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Set attribute: " PCMK_XA_NAME "=%s value=%s", attr_name, attr_value); } return rc; } static int resources_with_attr(pcmk__output_t *out, cib_t *cib, pcmk_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, const char *top_id, gboolean force, GList **resources) { if (pcmk__str_eq(attr_set_type, PCMK_XE_INSTANCE_ATTRIBUTES, pcmk__str_casei)) { if (!force) { xmlNode *xml_search = NULL; int rc = pcmk_rc_ok; rc = find_resource_attr(out, cib, PCMK_XA_ID, top_id, PCMK_XE_META_ATTRIBUTES, attr_set, attr_id, attr_name, &xml_search); if (rc == pcmk_rc_ok || rc == ENOTUNIQ) { char *found_attr_id = NULL; found_attr_id = crm_element_value_copy(xml_search, PCMK_XA_ID); if (!out->is_quiet(out)) { out->err(out, "WARNING: There is already a meta attribute " "for '%s' called '%s' (id=%s)", top_id, attr_name, found_attr_id); out->err(out, " Delete '%s' first or use the force option " "to override", found_attr_id); } free(found_attr_id); pcmk__xml_free(xml_search); return ENOTUNIQ; } pcmk__xml_free(xml_search); } *resources = g_list_append(*resources, rsc); } else { *resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, cib, "update", force); } /* If the user specified attr_set or attr_id, the intent is to modify a * single resource, which will be the last item in the list. */ if ((attr_set != NULL) || (attr_id != NULL)) { GList *last = g_list_last(*resources); *resources = g_list_remove_link(*resources, last); g_list_free(*resources); *resources = last; } return pcmk_rc_ok; } static void free_attr_update_data(gpointer data) { attr_update_data_t *ud = data; if (ud == NULL) { return; } free(ud->attr_set_type); free(ud->attr_set_id); free(ud->attr_name); free(ud->attr_value); free(ud->given_rsc_id); free(ud->found_attr_id); free(ud); } static int update_attribute(pcmk_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, const char *attr_value, gboolean recursive, cib_t *cib, gboolean force, GList **results) { pcmk__output_t *out = rsc->priv->scheduler->priv->out; int rc = pcmk_rc_ok; GList/**/ *resources = NULL; const char *top_id = pe__const_top_resource(rsc, false)->id; if ((attr_id == NULL) && !force) { find_resource_attr(out, cib, PCMK_XA_ID, top_id, NULL, NULL, NULL, attr_name, NULL); } rc = resources_with_attr(out, cib, rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, top_id, force, &resources); if (rc != pcmk_rc_ok) { return rc; } for (GList *iter = resources; iter != NULL; iter = iter->next) { char *lookup_id = NULL; char *local_attr_set = NULL; char *found_attr_id = NULL; const char *rsc_attr_id = attr_id; const char *rsc_attr_set = attr_set; xmlNode *xml_top = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_search = NULL; rsc = (pcmk_resource_t *) iter->data; lookup_id = clone_strip(rsc->id); /* Could be a cloned group! */ rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &xml_search); switch (rc) { case pcmk_rc_ok: found_attr_id = crm_element_value_copy(xml_search, PCMK_XA_ID); crm_debug("Found a match for " PCMK_XA_NAME "='%s': " PCMK_XA_ID "='%s'", attr_name, found_attr_id); rsc_attr_id = found_attr_id; break; case ENXIO: if (rsc_attr_set == NULL) { local_attr_set = crm_strdup_printf("%s-%s", lookup_id, attr_set_type); rsc_attr_set = local_attr_set; } if (rsc_attr_id == NULL) { found_attr_id = crm_strdup_printf("%s-%s", rsc_attr_set, attr_name); rsc_attr_id = found_attr_id; } xml_top = pcmk__xe_create(NULL, (const char *) rsc->priv->xml->name); crm_xml_add(xml_top, PCMK_XA_ID, lookup_id); xml_obj = pcmk__xe_create(xml_top, attr_set_type); crm_xml_add(xml_obj, PCMK_XA_ID, rsc_attr_set); break; default: free(lookup_id); free(found_attr_id); pcmk__xml_free(xml_search); g_list_free(resources); return rc; } xml_obj = crm_create_nvpair_xml(xml_obj, rsc_attr_id, attr_name, attr_value); if (xml_top == NULL) { xml_top = xml_obj; } crm_log_xml_debug(xml_top, "Update"); rc = cib->cmds->modify(cib, PCMK_XE_RESOURCES, xml_top, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { attr_update_data_t *ud = pcmk__assert_alloc(1, sizeof(attr_update_data_t)); if (attr_set_type == NULL) { attr_set_type = (const char *) xml_search->parent->name; } if (rsc_attr_set == NULL) { rsc_attr_set = crm_element_value(xml_search->parent, PCMK_XA_ID); } ud->attr_set_type = pcmk__str_copy(attr_set_type); ud->attr_set_id = pcmk__str_copy(rsc_attr_set); ud->attr_name = pcmk__str_copy(attr_name); ud->attr_value = pcmk__str_copy(attr_value); ud->given_rsc_id = pcmk__str_copy(lookup_id); ud->found_attr_id = pcmk__str_copy(found_attr_id); ud->rsc = rsc; *results = g_list_append(*results, ud); } pcmk__xml_free(xml_top); pcmk__xml_free(xml_search); free(lookup_id); free(found_attr_id); free(local_attr_set); if (recursive && pcmk__str_eq(attr_set_type, PCMK_XE_META_ATTRIBUTES, pcmk__str_casei)) { /* We want to set the attribute only on resources explicitly * colocated with this one, so we use * rsc->priv->with_this_colocations directly rather than the * with_this_colocations() method. */ pcmk__set_rsc_flags(rsc, pcmk__rsc_detect_loop); for (GList *lpc = rsc->priv->with_this_colocations; lpc != NULL; lpc = lpc->next) { pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data; crm_debug("Checking %s %d", cons->id, cons->score); if (pcmk_is_set(cons->dependent->flags, pcmk__rsc_detect_loop) || (cons->score <= 0)) { continue; } crm_debug("Setting %s=%s for dependent resource %s", attr_name, attr_value, cons->dependent->id); update_attribute(cons->dependent, cons->dependent->id, NULL, attr_set_type, NULL, attr_name, attr_value, recursive, cib, force, results); } } } g_list_free(resources); return rc; } // \return Standard Pacemaker return code int cli_resource_update_attribute(pcmk_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, const char *attr_value, gboolean recursive, cib_t *cib, gboolean force) { static bool need_init = true; int rc = pcmk_rc_ok; GList *results = NULL; pcmk__output_t *out = rsc->priv->scheduler->priv->out; /* If we were asked to update the attribute in a resource element (for * instance, ) there's really not much we need to do. */ if (pcmk__str_eq(attr_set_type, ATTR_SET_ELEMENT, pcmk__str_none)) { return update_element_attribute(out, rsc, cib, attr_name, attr_value); } /* One time initialization - clear flags so we can detect loops */ if (need_init) { need_init = false; pcmk__unpack_constraints(rsc->priv->scheduler); pe__clear_resource_flags_on_all(rsc->priv->scheduler, pcmk__rsc_detect_loop); } rc = update_attribute(rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, attr_value, recursive, cib, force, &results); if (rc == pcmk_rc_ok) { if (results == NULL) { return rc; } out->message(out, "attribute-changed-list", results); g_list_free_full(results, free_attr_update_data); } return rc; } // \return Standard Pacemaker return code int cli_resource_delete_attribute(pcmk_resource_t *rsc, const char *requested_name, const char *attr_set, const char *attr_set_type, const char *attr_id, const char *attr_name, cib_t *cib, int cib_options, gboolean force) { pcmk__output_t *out = rsc->priv->scheduler->priv->out; int rc = pcmk_rc_ok; GList/**/ *resources = NULL; if ((attr_id == NULL) && !force) { find_resource_attr(out, cib, PCMK_XA_ID, pe__const_top_resource(rsc, false)->id, NULL, NULL, NULL, attr_name, NULL); } if (pcmk__str_eq(attr_set_type, PCMK_XE_META_ATTRIBUTES, pcmk__str_casei)) { resources = find_matching_attr_resources(out, rsc, requested_name, attr_set, attr_set_type, attr_id, attr_name, cib, "delete", force); } else if (pcmk__str_eq(attr_set_type, ATTR_SET_ELEMENT, pcmk__str_none)) { pcmk__xe_remove_attr(rsc->priv->xml, attr_name); CRM_ASSERT(cib != NULL); rc = cib->cmds->replace(cib, PCMK_XE_RESOURCES, rsc->priv->xml, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Deleted attribute: %s", attr_name); } return rc; } else { resources = g_list_append(resources, rsc); } for (GList *iter = resources; iter != NULL; iter = iter->next) { char *lookup_id = NULL; xmlNode *xml_obj = NULL; xmlNode *xml_search = NULL; char *found_attr_id = NULL; const char *rsc_attr_id = attr_id; rsc = (pcmk_resource_t *) iter->data; lookup_id = clone_strip(rsc->id); rc = find_resource_attr(out, cib, PCMK_XA_ID, lookup_id, attr_set_type, attr_set, attr_id, attr_name, &xml_search); switch (rc) { case pcmk_rc_ok: found_attr_id = crm_element_value_copy(xml_search, PCMK_XA_ID); pcmk__xml_free(xml_search); break; case ENXIO: free(lookup_id); pcmk__xml_free(xml_search); continue; default: free(lookup_id); pcmk__xml_free(xml_search); g_list_free(resources); return rc; } if (rsc_attr_id == NULL) { rsc_attr_id = found_attr_id; } xml_obj = crm_create_nvpair_xml(NULL, rsc_attr_id, attr_name, NULL); crm_log_xml_debug(xml_obj, "Delete"); CRM_ASSERT(cib); rc = cib->cmds->remove(cib, PCMK_XE_RESOURCES, xml_obj, cib_options); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { out->info(out, "Deleted '%s' option: " PCMK_XA_ID "=%s%s%s%s%s", lookup_id, found_attr_id, ((attr_set == NULL)? "" : " set="), pcmk__s(attr_set, ""), ((attr_name == NULL)? "" : " " PCMK_XA_NAME "="), pcmk__s(attr_name, "")); } free(lookup_id); pcmk__xml_free(xml_obj); free(found_attr_id); } g_list_free(resources); return rc; } // \return Standard Pacemaker return code static int send_lrm_rsc_op(pcmk_ipc_api_t *controld_api, bool do_fail_resource, const char *host_uname, const char *rsc_id, pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv->out; const char *router_node = host_uname; const char *rsc_api_id = NULL; const char *rsc_long_id = NULL; const char *rsc_class = NULL; const char *rsc_provider = NULL; const char *rsc_type = NULL; bool cib_only = false; pcmk_resource_t *rsc = pe_find_resource(scheduler->priv->resources, rsc_id); if (rsc == NULL) { out->err(out, "Resource %s not found", rsc_id); return ENXIO; } else if (!pcmk__is_primitive(rsc)) { out->err(out, "We can only process primitive resources, not %s", rsc_id); return EINVAL; } rsc_class = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS); rsc_provider = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER); rsc_type = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE); if ((rsc_class == NULL) || (rsc_type == NULL)) { out->err(out, "Resource %s does not have a class and type", rsc_id); return EINVAL; } { pcmk_node_t *node = pcmk_find_node(scheduler, host_uname); if (node == NULL) { out->err(out, "Node %s not found", host_uname); return pcmk_rc_node_unknown; } if (!(node->details->online)) { if (do_fail_resource) { out->err(out, "Node %s is not online", host_uname); return ENOTCONN; } else { cib_only = true; } } if (!cib_only && pcmk__is_pacemaker_remote_node(node)) { node = pcmk__current_node(node->priv->remote); if (node == NULL) { out->err(out, "No cluster connection to Pacemaker Remote node %s detected", host_uname); return ENOTCONN; } router_node = node->priv->name; } } if (rsc->priv->history_id != NULL) { rsc_api_id = rsc->priv->history_id; rsc_long_id = rsc->id; } else { rsc_api_id = rsc->id; } if (do_fail_resource) { return pcmk_controld_api_fail(controld_api, host_uname, router_node, rsc_api_id, rsc_long_id, rsc_class, rsc_provider, rsc_type); } else { return pcmk_controld_api_refresh(controld_api, host_uname, router_node, rsc_api_id, rsc_long_id, rsc_class, rsc_provider, rsc_type, cib_only); } } /*! * \internal * \brief Get resource name as used in failure-related node attributes * * \param[in] rsc Resource to check * * \return Newly allocated string containing resource's fail name * \note The caller is responsible for freeing the result. */ static inline char * rsc_fail_name(const pcmk_resource_t *rsc) { const char *name = pcmk__s(rsc->priv->history_id, rsc->id); if (pcmk_is_set(rsc->flags, pcmk__rsc_unique)) { return strdup(name); } return clone_strip(name); } // \return Standard Pacemaker return code static int clear_rsc_history(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; /* Erase the resource's entire LRM history in the CIB, even if we're only * clearing a single operation's fail count. If we erased only entries for a * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, scheduler); if (rc != pcmk_rc_ok) { return rc; } crm_trace("Processing %d mainloop inputs", pcmk_controld_api_replies_expected(controld_api)); while (g_main_context_iteration(NULL, FALSE)) { crm_trace("Processed mainloop input, %d still remaining", pcmk_controld_api_replies_expected(controld_api)); } return rc; } // \return Standard Pacemaker return code static int clear_rsc_failures(pcmk__output_t *out, pcmk_ipc_api_t *controld_api, const char *node_name, const char *rsc_id, const char *operation, const char *interval_spec, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; const char *failed_value = NULL; const char *failed_id = NULL; char *interval_ms_s = NULL; GHashTable *rscs = NULL; GHashTableIter iter; /* Create a hash table to use as a set of resources to clean. This lets us * clean each resource only once (per node) regardless of how many failed * operations it has. */ rscs = pcmk__strkey_table(NULL, NULL); // Normalize interval to milliseconds for comparison to history entry if (operation) { guint interval_ms = 0U; pcmk_parse_interval_spec(interval_spec, &interval_ms); interval_ms_s = crm_strdup_printf("%u", interval_ms); } for (xmlNode *xml_op = pcmk__xe_first_child(scheduler->priv->failed, NULL, NULL, NULL); xml_op != NULL; xml_op = pcmk__xe_next(xml_op)) { failed_id = crm_element_value(xml_op, PCMK__XA_RSC_ID); if (failed_id == NULL) { // Malformed history entry, should never happen continue; } // No resource specified means all resources match if (rsc_id) { pcmk_resource_t *fail_rsc = NULL; fail_rsc = pe_find_resource_with_flags(scheduler->priv->resources, failed_id, pcmk_rsc_match_history |pcmk_rsc_match_anon_basename); if (!fail_rsc || !pcmk__str_eq(rsc_id, fail_rsc->id, pcmk__str_casei)) { continue; } } // Host name should always have been provided by this point failed_value = crm_element_value(xml_op, PCMK_XA_UNAME); if (!pcmk__str_eq(node_name, failed_value, pcmk__str_casei)) { continue; } // No operation specified means all operations match if (operation) { failed_value = crm_element_value(xml_op, PCMK_XA_OPERATION); if (!pcmk__str_eq(operation, failed_value, pcmk__str_casei)) { continue; } // Interval (if operation was specified) defaults to 0 (not all) failed_value = crm_element_value(xml_op, PCMK_META_INTERVAL); if (!pcmk__str_eq(interval_ms_s, failed_value, pcmk__str_casei)) { continue; } } g_hash_table_add(rscs, (gpointer) failed_id); } free(interval_ms_s); g_hash_table_iter_init(&iter, rscs); while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { crm_debug("Erasing failures of %s on %s", failed_id, node_name); rc = clear_rsc_history(controld_api, node_name, failed_id, scheduler); if (rc != pcmk_rc_ok) { return rc; } } g_hash_table_destroy(rscs); return rc; } // \return Standard Pacemaker return code static int clear_rsc_fail_attrs(const pcmk_resource_t *rsc, const char *operation, const char *interval_spec, const pcmk_node_t *node) { int rc = pcmk_rc_ok; int attr_options = pcmk__node_attr_none; char *rsc_name = rsc_fail_name(rsc); if (pcmk__is_pacemaker_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } rc = pcmk__attrd_api_clear_failures(NULL, node->priv->name, rsc_name, operation, interval_spec, NULL, attr_options); free(rsc_name); return rc; } // \return Standard Pacemaker return code int cli_resource_delete(pcmk_ipc_api_t *controld_api, const char *host_uname, const pcmk_resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pcmk_scheduler_t *scheduler, gboolean force) { pcmk__output_t *out = scheduler->priv->out; int rc = pcmk_rc_ok; pcmk_node_t *node = NULL; if (rsc == NULL) { return ENXIO; } else if (rsc->priv->children != NULL) { for (const GList *lpc = rsc->priv->children; lpc != NULL; lpc = lpc->next) { const pcmk_resource_t *child = (const pcmk_resource_t *) lpc->data; rc = cli_resource_delete(controld_api, host_uname, child, operation, interval_spec, just_failures, scheduler, force); if (rc != pcmk_rc_ok) { return rc; } } return pcmk_rc_ok; } else if (host_uname == NULL) { GList *lpc = NULL; GList *nodes = g_hash_table_get_values(rsc->priv->probed_nodes); if(nodes == NULL && force) { nodes = pcmk__copy_node_list(scheduler->nodes, false); } else if ((nodes == NULL) && pcmk_is_set(rsc->flags, pcmk__rsc_exclusive_probes)) { GHashTableIter iter; pcmk_node_t *node = NULL; g_hash_table_iter_init(&iter, rsc->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void**)&node)) { if (node->assign->score >= 0) { nodes = g_list_prepend(nodes, node); } } } else if(nodes == NULL) { nodes = g_hash_table_get_values(rsc->priv->allowed_nodes); } for (lpc = nodes; lpc != NULL; lpc = lpc->next) { node = (pcmk_node_t *) lpc->data; if (node->details->online) { rc = cli_resource_delete(controld_api, node->priv->name, rsc, operation, interval_spec, just_failures, scheduler, force); } if (rc != pcmk_rc_ok) { g_list_free(nodes); return rc; } } g_list_free(nodes); return pcmk_rc_ok; } node = pcmk_find_node(scheduler, host_uname); if (node == NULL) { out->err(out, "Unable to clean up %s because node %s not found", rsc->id, host_uname); return ENODEV; } if (!pcmk_is_set(node->priv->flags, pcmk__node_probes_allowed)) { out->err(out, "Unable to clean up %s because resource discovery disabled on %s", rsc->id, host_uname); return EOPNOTSUPP; } if (controld_api == NULL) { out->err(out, "Dry run: skipping clean-up of %s on %s due to CIB_file", rsc->id, host_uname); return pcmk_rc_ok; } rc = clear_rsc_fail_attrs(rsc, operation, interval_spec, node); if (rc != pcmk_rc_ok) { out->err(out, "Unable to clean up %s failures on %s: %s", rsc->id, host_uname, pcmk_rc_str(rc)); return rc; } if (just_failures) { rc = clear_rsc_failures(out, controld_api, host_uname, rsc->id, operation, interval_spec, scheduler); } else { rc = clear_rsc_history(controld_api, host_uname, rsc->id, scheduler); } if (rc != pcmk_rc_ok) { out->err(out, "Cleaned %s failures on %s, but unable to clean history: %s", rsc->id, host_uname, pcmk_rc_str(rc)); } else { out->info(out, "Cleaned up %s on %s", rsc->id, host_uname); } return rc; } // \return Standard Pacemaker return code int cli_cleanup_all(pcmk_ipc_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pcmk_scheduler_t *scheduler) { pcmk__output_t *out = scheduler->priv->out; int rc = pcmk_rc_ok; int attr_options = pcmk__node_attr_none; const char *display_name = node_name? node_name : "all nodes"; if (controld_api == NULL) { out->info(out, "Dry run: skipping clean-up of %s due to CIB_file", display_name); return rc; } if (node_name) { pcmk_node_t *node = pcmk_find_node(scheduler, node_name); if (node == NULL) { out->err(out, "Unknown node: %s", node_name); return ENXIO; } if (pcmk__is_pacemaker_remote_node(node)) { attr_options |= pcmk__node_attr_remote; } } rc = pcmk__attrd_api_clear_failures(NULL, node_name, NULL, operation, interval_spec, NULL, attr_options); if (rc != pcmk_rc_ok) { out->err(out, "Unable to clean up all failures on %s: %s", display_name, pcmk_rc_str(rc)); return rc; } if (node_name) { rc = clear_rsc_failures(out, controld_api, node_name, NULL, operation, interval_spec, scheduler); if (rc != pcmk_rc_ok) { out->err(out, "Cleaned all resource failures on %s, but unable to clean history: %s", node_name, pcmk_rc_str(rc)); return rc; } } else { for (GList *iter = scheduler->nodes; iter; iter = iter->next) { pcmk_node_t *node = (pcmk_node_t *) iter->data; rc = clear_rsc_failures(out, controld_api, node->priv->name, NULL, operation, interval_spec, scheduler); if (rc != pcmk_rc_ok) { out->err(out, "Cleaned all resource failures on all nodes, but unable to clean history: %s", pcmk_rc_str(rc)); return rc; } } } out->info(out, "Cleaned up all resources on %s", display_name); return rc; } static void check_role(resource_checks_t *checks) { const char *role_s = g_hash_table_lookup(checks->rsc->priv->meta, PCMK_META_TARGET_ROLE); if (role_s == NULL) { return; } switch (pcmk_parse_role(role_s)) { case pcmk_role_stopped: checks->flags |= rsc_remain_stopped; break; case pcmk_role_unpromoted: if (pcmk_is_set(pe__const_top_resource(checks->rsc, false)->flags, pcmk__rsc_promotable)) { checks->flags |= rsc_unpromotable; } break; default: break; } } static void check_managed(resource_checks_t *checks) { const char *managed_s = g_hash_table_lookup(checks->rsc->priv->meta, PCMK_META_IS_MANAGED); if ((managed_s != NULL) && !crm_is_true(managed_s)) { checks->flags |= rsc_unmanaged; } } static void check_locked(resource_checks_t *checks) { const pcmk_node_t *lock_node = checks->rsc->priv->lock_node; if (lock_node != NULL) { checks->flags |= rsc_locked; checks->lock_node = lock_node->priv->name; } } static bool node_is_unhealthy(pcmk_node_t *node) { switch (pe__health_strategy(node->priv->scheduler)) { case pcmk__health_strategy_none: break; case pcmk__health_strategy_no_red: if (pe__node_health(node) < 0) { return true; } break; case pcmk__health_strategy_only_green: if (pe__node_health(node) <= 0) { return true; } break; case pcmk__health_strategy_progressive: case pcmk__health_strategy_custom: /* @TODO These are finite scores, possibly with rules, and possibly * combining with other scores, so attributing these as a cause is * nontrivial. */ break; } return false; } static void check_node_health(resource_checks_t *checks, pcmk_node_t *node) { if (node == NULL) { GHashTableIter iter; bool allowed = false; bool all_nodes_unhealthy = true; g_hash_table_iter_init(&iter, checks->rsc->priv->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) { allowed = true; if (!node_is_unhealthy(node)) { all_nodes_unhealthy = false; break; } } if (allowed && all_nodes_unhealthy) { checks->flags |= rsc_node_health; } } else if (node_is_unhealthy(node)) { checks->flags |= rsc_node_health; } } int cli_resource_check(pcmk__output_t *out, pcmk_resource_t *rsc, pcmk_node_t *node) { resource_checks_t checks = { .rsc = rsc }; check_role(&checks); check_managed(&checks); check_locked(&checks); check_node_health(&checks, node); return out->message(out, "resource-check-list", &checks); } // \return Standard Pacemaker return code int cli_resource_fail(pcmk_ipc_api_t *controld_api, const char *host_uname, const char *rsc_id, pcmk_scheduler_t *scheduler) { crm_notice("Failing %s on %s", rsc_id, host_uname); return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, scheduler); } static GHashTable * generate_resource_params(pcmk_resource_t *rsc, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { GHashTable *params = NULL; GHashTable *meta = NULL; GHashTable *combined = NULL; GHashTableIter iter; char *key = NULL; char *value = NULL; combined = pcmk__strkey_table(free, free); params = pe_rsc_params(rsc, node, scheduler); if (params != NULL) { g_hash_table_iter_init(&iter, params); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { pcmk__insert_dup(combined, key, value); } } meta = pcmk__strkey_table(free, free); get_meta_attributes(meta, rsc, NULL, scheduler); if (meta != NULL) { g_hash_table_iter_init(&iter, meta); while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) { char *crm_name = crm_meta_name(key); g_hash_table_insert(combined, crm_name, strdup(value)); } g_hash_table_destroy(meta); } return combined; } bool resource_is_running_on(pcmk_resource_t *rsc, const char *host) { bool found = true; GList *hIter = NULL; GList *hosts = NULL; if (rsc == NULL) { return false; } rsc->priv->fns->location(rsc, &hosts, TRUE); for (hIter = hosts; host != NULL && hIter != NULL; hIter = hIter->next) { pcmk_node_t *node = (pcmk_node_t *) hIter->data; if (pcmk__strcase_any_of(host, node->priv->name, node->priv->id, NULL)) { crm_trace("Resource %s is running on %s\n", rsc->id, host); goto done; } } if (host != NULL) { crm_trace("Resource %s is not running on: %s\n", rsc->id, host); found = false; } else if(host == NULL && hosts == NULL) { crm_trace("Resource %s is not running\n", rsc->id); found = false; } done: g_list_free(hosts); return found; } /*! * \internal * \brief Create a list of all resources active on host from a given list * * \param[in] host Name of host to check whether resources are active * \param[in] rsc_list List of resources to check * * \return New list of resources from list that are active on host */ static GList * get_active_resources(const char *host, GList *rsc_list) { GList *rIter = NULL; GList *active = NULL; for (rIter = rsc_list; rIter != NULL; rIter = rIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) rIter->data; /* Expand groups to their members, because if we're restarting a member * other than the first, we can't otherwise tell which resources are * stopping and starting. */ if (pcmk__is_group(rsc)) { GList *member_active = NULL; member_active = get_active_resources(host, rsc->priv->children); active = g_list_concat(active, member_active); } else if (resource_is_running_on(rsc, host)) { active = g_list_append(active, strdup(rsc->id)); } } return active; } static void dump_list(GList *items, const char *tag) { int lpc = 0; GList *item = NULL; for (item = items; item != NULL; item = item->next) { crm_trace("%s[%d]: %s", tag, lpc, (char*)item->data); lpc++; } } static void display_list(pcmk__output_t *out, GList *items, const char *tag) { GList *item = NULL; for (item = items; item != NULL; item = item->next) { out->info(out, "%s%s", tag, (const char *)item->data); } } /*! * \internal * \brief Upgrade XML to latest schema version and use it as scheduler input * * This also updates the scheduler timestamp to the current time. * * \param[in,out] scheduler Scheduler data to update * \param[in,out] xml XML to use as input * * \return Standard Pacemaker return code * \note On success, caller is responsible for freeing memory allocated for * scheduler->priv->now. */ int update_scheduler_input(pcmk_scheduler_t *scheduler, xmlNode **xml) { int rc = pcmk_update_configured_schema(xml, false); if (rc == pcmk_rc_ok) { scheduler->input = *xml; scheduler->priv->now = crm_time_new(NULL); } return pcmk_rc_ok; } /*! * \internal * \brief Update scheduler XML input based on a CIB query * * \param[in] scheduler Scheduler data to initialize * \param[in] cib Connection to the CIB manager * * \return Standard Pacemaker return code * \note On success, caller is responsible for freeing memory allocated for * scheduler->input and scheduler->priv->now. */ static int update_scheduler_input_to_cib(pcmk__output_t *out, pcmk_scheduler_t *scheduler, cib_t *cib) { xmlNode *cib_xml_copy = NULL; int rc = pcmk_rc_ok; rc = cib->cmds->query(cib, NULL, &cib_xml_copy, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Could not obtain the current CIB: %s (%d)", pcmk_rc_str(rc), rc); return rc; } rc = update_scheduler_input(scheduler, &cib_xml_copy); if (rc != pcmk_rc_ok) { out->err(out, "Could not upgrade the current CIB XML"); pcmk__xml_free(cib_xml_copy); return rc; } return rc; } // \return Standard Pacemaker return code static int update_dataset(cib_t *cib, pcmk_scheduler_t *scheduler, bool simulate) { char *pid = NULL; char *shadow_file = NULL; cib_t *shadow_cib = NULL; int rc = pcmk_rc_ok; pcmk__output_t *out = scheduler->priv->out; pe_reset_working_set(scheduler); - pcmk__set_scheduler_flags(scheduler, - pcmk__sched_no_counts|pcmk__sched_no_compat); + pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts); rc = update_scheduler_input_to_cib(out, scheduler, cib); if (rc != pcmk_rc_ok) { return rc; } if(simulate) { bool prev_quiet = false; pid = pcmk__getpid_s(); shadow_cib = cib_shadow_new(pid); shadow_file = get_shadow_file(pid); if (shadow_cib == NULL) { out->err(out, "Could not create shadow cib: '%s'", pid); rc = ENXIO; goto done; } rc = pcmk__xml_write_file(scheduler->input, shadow_file, false); if (rc != pcmk_rc_ok) { out->err(out, "Could not populate shadow cib: %s", pcmk_rc_str(rc)); goto done; } rc = shadow_cib->cmds->signon(shadow_cib, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Could not connect to shadow cib: %s", pcmk_rc_str(rc)); goto done; } - pcmk__schedule_actions(scheduler->input, - pcmk__sched_no_counts|pcmk__sched_no_compat, + pcmk__schedule_actions(scheduler->input, pcmk__sched_no_counts, scheduler); prev_quiet = out->is_quiet(out); out->quiet = true; pcmk__simulate_transition(scheduler, shadow_cib, NULL); out->quiet = prev_quiet; rc = update_dataset(shadow_cib, scheduler, false); } else { cluster_status(scheduler); } done: // Do not free scheduler->input because rsc->priv->xml must remain valid cib_delete(shadow_cib); free(pid); if(shadow_file) { unlink(shadow_file); free(shadow_file); } return rc; } /*! * \internal * \brief Find the maximum stop timeout of a resource and its children (if any) * * \param[in,out] rsc Resource to get timeout for * * \return Maximum stop timeout for \p rsc (in milliseconds) */ static guint max_rsc_stop_timeout(pcmk_resource_t *rsc) { long long result_ll; guint max_delay = 0; xmlNode *config = NULL; GHashTable *meta = NULL; if (rsc == NULL) { return 0; } // If resource is collective, use maximum of its children's stop timeouts if (rsc->priv->children != NULL) { for (GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { pcmk_resource_t *child = iter->data; guint delay = max_rsc_stop_timeout(child); if (delay > max_delay) { pcmk__rsc_trace(rsc, "Maximum stop timeout for %s is now %s " "due to %s", rsc->id, pcmk__readable_interval(delay), child->id); max_delay = delay; } } return max_delay; } // Get resource's stop action configuration from CIB config = pcmk__find_action_config(rsc, PCMK_ACTION_STOP, 0, true); /* Get configured timeout for stop action (fully evaluated for rules, * defaults, etc.). * * @TODO This currently ignores node (which might matter for rules) */ meta = pcmk__unpack_action_meta(rsc, NULL, PCMK_ACTION_STOP, 0, config); if ((pcmk__scan_ll(g_hash_table_lookup(meta, PCMK_META_TIMEOUT), &result_ll, -1LL) == pcmk_rc_ok) && (result_ll >= 0)) { max_delay = (guint) QB_MIN(result_ll, UINT_MAX); } g_hash_table_destroy(meta); return max_delay; } /*! * \internal * \brief Find a reasonable waiting time for stopping any one resource in a list * * \param[in,out] scheduler Scheduler data * \param[in] resources List of names of resources that will be stopped * * \return Rough estimate of a reasonable time to wait (in seconds) to stop any * one resource in \p resources * \note This estimate is very rough, simply the maximum stop timeout of all * given resources and their children, plus a small fudge factor. It does * not account for children that must be stopped in sequence, action * throttling, or any demotions needed. It checks the stop timeout, even * if the resources in question are actually being started. */ static guint wait_time_estimate(pcmk_scheduler_t *scheduler, const GList *resources) { guint max_delay = 0U; // Find maximum stop timeout in milliseconds for (const GList *item = resources; item != NULL; item = item->next) { pcmk_resource_t *rsc = pe_find_resource(scheduler->priv->resources, (const char *) item->data); guint delay = max_rsc_stop_timeout(rsc); if (delay > max_delay) { pcmk__rsc_trace(rsc, "Wait time is now %s due to %s", pcmk__readable_interval(delay), rsc->id); max_delay = delay; } } return (max_delay / 1000U) + 5U; } #define waiting_for_starts(d, r, h) ((d != NULL) || \ (!resource_is_running_on((r), (h)))) /*! * \internal * \brief Restart a resource (on a particular host if requested). * * \param[in,out] out Output object * \param[in,out] rsc The resource to restart * \param[in] node Node to restart resource on (NULL for all) * \param[in] move_lifetime If not NULL, how long constraint should * remain in effect (as ISO 8601 string) * \param[in] timeout_ms Consider failed if actions do not complete * in this time (specified in milliseconds, * but a two-second granularity is actually * used; if 0, it will be calculated based on * the resource timeout) * \param[in,out] cib Connection to the CIB manager * \param[in] cib_options Group of enum cib_call_options flags to * use with CIB calls * \param[in] promoted_role_only If true, limit to promoted instances * \param[in] force If true, apply only to requested instance * if part of a collective resource * * \return Standard Pacemaker return code (exits on certain failures) */ int cli_resource_restart(pcmk__output_t *out, pcmk_resource_t *rsc, const pcmk_node_t *node, const char *move_lifetime, guint timeout_ms, cib_t *cib, int cib_options, gboolean promoted_role_only, gboolean force) { int rc = pcmk_rc_ok; int lpc = 0; int before = 0; guint step_timeout_s = 0; guint sleep_interval = 2U; guint timeout = timeout_ms / 1000U; bool stop_via_ban = false; char *rsc_id = NULL; char *lookup_id = NULL; char *orig_target_role = NULL; GList *list_delta = NULL; GList *target_active = NULL; GList *current_active = NULL; GList *restart_target_active = NULL; pcmk_scheduler_t *scheduler = NULL; pcmk_resource_t *parent = uber_parent(rsc); bool running = false; const char *id = pcmk__s(rsc->priv->history_id, rsc->id); const char *host = node ? node->priv->name : NULL; /* If the implicit resource or primitive resource of a bundle is given, operate on the * bundle itself instead. */ if (pcmk__is_bundled(rsc)) { rsc = parent->priv->parent; } running = resource_is_running_on(rsc, host); if (pcmk__is_clone(parent) && !running) { if (pcmk__is_unique_clone(parent)) { lookup_id = strdup(rsc->id); } else { lookup_id = clone_strip(rsc->id); } rsc = parent->priv->fns->find_rsc(parent, lookup_id, node, pcmk_rsc_match_basename |pcmk_rsc_match_current_node); free(lookup_id); running = resource_is_running_on(rsc, host); } if (!running) { if (host) { out->err(out, "%s is not running on %s and so cannot be restarted", id, host); } else { out->err(out, "%s is not running anywhere and so cannot be restarted", id); } return ENXIO; } if (!pcmk_is_set(rsc->flags, pcmk__rsc_managed)) { out->err(out, "Unmanaged resources cannot be restarted."); return EAGAIN; } rsc_id = strdup(rsc->id); if (pcmk__is_unique_clone(parent)) { lookup_id = strdup(rsc->id); } else { lookup_id = clone_strip(rsc->id); } if (host) { if (pcmk__is_clone(rsc) || pe_bundle_replicas(rsc)) { stop_via_ban = true; } else if (pcmk__is_clone(parent)) { stop_via_ban = true; free(lookup_id); lookup_id = strdup(parent->id); } } /* grab full cib determine originally active resources disable or ban poll cib and watch for affected resources to get stopped without --timeout, calculate the stop timeout for each step and wait for that if we hit --timeout or the service timeout, re-enable or un-ban, report failure and indicate which resources we couldn't take down if everything stopped, re-enable or un-ban poll cib and watch for affected resources to get started without --timeout, calculate the start timeout for each step and wait for that if we hit --timeout or the service timeout, report (different) failure and indicate which resources we couldn't bring back up report success Optimizations: - use constraints to determine ordered list of affected resources - Allow a --no-deps option (aka. --force-restart) */ scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = errno; out->err(out, "Could not allocate scheduler data: %s", pcmk_rc_str(rc)); goto done; } scheduler->priv->out = out; rc = update_dataset(cib, scheduler, false); if(rc != pcmk_rc_ok) { out->err(out, "Could not get new resource list: %s (%d)", pcmk_rc_str(rc), rc); goto done; } restart_target_active = get_active_resources(host, scheduler->priv->resources); current_active = get_active_resources(host, scheduler->priv->resources); dump_list(current_active, "Origin"); if (stop_via_ban) { /* Stop the clone or bundle instance by banning it from the host */ out->quiet = true; rc = cli_resource_ban(out, lookup_id, host, move_lifetime, cib, cib_options, promoted_role_only, PCMK_ROLE_PROMOTED); } else { xmlNode *xml_search = NULL; /* Stop the resource by setting PCMK_META_TARGET_ROLE to Stopped. * Remember any existing PCMK_META_TARGET_ROLE so we can restore it * later (though it only makes any difference if it's Unpromoted). */ rc = find_resource_attr(out, cib, PCMK_XA_VALUE, lookup_id, NULL, NULL, NULL, PCMK_META_TARGET_ROLE, &xml_search); if (rc == pcmk_rc_ok) { orig_target_role = crm_element_value_copy(xml_search, PCMK_XA_VALUE); } pcmk__xml_free(xml_search); rc = cli_resource_update_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, PCMK_ACTION_STOPPED, FALSE, cib, force); } if(rc != pcmk_rc_ok) { out->err(out, "Could not set " PCMK_META_TARGET_ROLE " for %s: %s (%d)", rsc_id, pcmk_rc_str(rc), rc); if (current_active != NULL) { g_list_free_full(current_active, free); current_active = NULL; } if (restart_target_active != NULL) { g_list_free_full(restart_target_active, free); restart_target_active = NULL; } goto done; } rc = update_dataset(cib, scheduler, true); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources would be stopped"); goto failure; } target_active = get_active_resources(host, scheduler->priv->resources); dump_list(target_active, "Target"); list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp); out->info(out, "Waiting for %d resources to stop:", g_list_length(list_delta)); display_list(out, list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (list_delta != NULL) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = wait_time_estimate(scheduler, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for(lpc = 0; (lpc < step_timeout_s) && (list_delta != NULL); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%us remaining", timeout); } rc = update_dataset(cib, scheduler, FALSE); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources were stopped"); goto failure; } if (current_active != NULL) { g_list_free_full(current_active, free); } current_active = get_active_resources(host, scheduler->priv->resources); g_list_free(list_delta); list_delta = pcmk__subtract_lists(current_active, target_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } crm_trace("%d (was %d) resources remaining", g_list_length(list_delta), before); if(before == g_list_length(list_delta)) { /* aborted during stop phase, print the contents of list_delta */ out->err(out, "Could not complete shutdown of %s, %d resources remaining", rsc_id, g_list_length(list_delta)); display_list(out, list_delta, " * "); rc = ETIME; goto failure; } } if (stop_via_ban) { rc = cli_resource_clear(lookup_id, host, NULL, cib, cib_options, true, force); } else if (orig_target_role) { rc = cli_resource_update_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, orig_target_role, FALSE, cib, force); free(orig_target_role); orig_target_role = NULL; } else { rc = cli_resource_delete_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, cib, cib_options, force); } if(rc != pcmk_rc_ok) { out->err(out, "Could not unset " PCMK_META_TARGET_ROLE " for %s: %s (%d)", rsc_id, pcmk_rc_str(rc), rc); goto done; } if (target_active != NULL) { g_list_free_full(target_active, free); } target_active = restart_target_active; list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp); out->info(out, "Waiting for %d resources to start again:", g_list_length(list_delta)); display_list(out, list_delta, " * "); step_timeout_s = timeout / sleep_interval; while (waiting_for_starts(list_delta, rsc, host)) { before = g_list_length(list_delta); if(timeout_ms == 0) { step_timeout_s = wait_time_estimate(scheduler, list_delta) / sleep_interval; } /* We probably don't need the entire step timeout */ for (lpc = 0; (lpc < step_timeout_s) && waiting_for_starts(list_delta, rsc, host); lpc++) { sleep(sleep_interval); if(timeout) { timeout -= sleep_interval; crm_trace("%ds remaining", timeout); } rc = update_dataset(cib, scheduler, false); if(rc != pcmk_rc_ok) { out->err(out, "Could not determine which resources were started"); goto failure; } /* It's OK if dependent resources moved to a different node, * so we check active resources on all nodes. */ if (current_active != NULL) { g_list_free_full(current_active, free); } current_active = get_active_resources(NULL, scheduler->priv->resources); g_list_free(list_delta); list_delta = pcmk__subtract_lists(target_active, current_active, (GCompareFunc) strcmp); dump_list(current_active, "Current"); dump_list(list_delta, "Delta"); } if(before == g_list_length(list_delta)) { /* aborted during start phase, print the contents of list_delta */ out->err(out, "Could not complete restart of %s, %d resources remaining", rsc_id, g_list_length(list_delta)); display_list(out, list_delta, " * "); rc = ETIME; goto failure; } } rc = pcmk_rc_ok; goto done; failure: if (stop_via_ban) { cli_resource_clear(lookup_id, host, NULL, cib, cib_options, true, force); } else if (orig_target_role) { cli_resource_update_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, orig_target_role, FALSE, cib, force); free(orig_target_role); } else { cli_resource_delete_attribute(rsc, rsc_id, NULL, PCMK_XE_META_ATTRIBUTES, NULL, PCMK_META_TARGET_ROLE, cib, cib_options, force); } done: if (list_delta != NULL) { g_list_free(list_delta); } if (current_active != NULL) { g_list_free_full(current_active, free); } if (target_active != NULL && (target_active != restart_target_active)) { g_list_free_full(target_active, free); } if (restart_target_active != NULL) { g_list_free_full(restart_target_active, free); } free(rsc_id); free(lookup_id); pe_free_working_set(scheduler); return rc; } static inline bool action_is_pending(const pcmk_action_t *action) { if (pcmk_any_flags_set(action->flags, pcmk__action_optional|pcmk__action_pseudo) || !pcmk_is_set(action->flags, pcmk__action_runnable) || pcmk__str_eq(PCMK_ACTION_NOTIFY, action->task, pcmk__str_casei)) { return false; } return true; } /*! * \internal * \brief Check whether any actions in a list are pending * * \param[in] actions List of actions to check * * \return true if any actions in the list are pending, otherwise false */ static bool actions_are_pending(const GList *actions) { for (const GList *action = actions; action != NULL; action = action->next) { const pcmk_action_t *a = (const pcmk_action_t *) action->data; if (action_is_pending(a)) { crm_notice("Waiting for %s (flags=%#.8x)", a->uuid, a->flags); return true; } } return false; } static void print_pending_actions(pcmk__output_t *out, GList *actions) { GList *action; out->info(out, "Pending actions:"); for (action = actions; action != NULL; action = action->next) { pcmk_action_t *a = (pcmk_action_t *) action->data; if (!action_is_pending(a)) { continue; } if (a->node) { out->info(out, "\tAction %d: %s\ton %s", a->id, a->uuid, pcmk__node_name(a->node)); } else { out->info(out, "\tAction %d: %s", a->id, a->uuid); } } } /* For --wait, timeout (in seconds) to use if caller doesn't specify one */ #define WAIT_DEFAULT_TIMEOUT_S (60 * 60) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) /*! * \internal * \brief Wait until all pending cluster actions are complete * * This waits until either the CIB's transition graph is idle or a timeout is * reached. * * \param[in,out] out Output object * \param[in] timeout_ms Consider failed if actions do not complete in * this time (specified in milliseconds, but * one-second granularity is actually used; if 0, a * default will be used) * \param[in,out] cib Connection to the CIB manager * * \return Standard Pacemaker return code */ int wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib) { pcmk_scheduler_t *scheduler = NULL; xmlXPathObjectPtr search; int rc = pcmk_rc_ok; bool pending_unknown_state_resources; time_t expire_time = time(NULL); time_t time_diff; bool printed_version_warning = out->is_quiet(out); // i.e. don't print if quiet char *xpath = NULL; if (timeout_ms == 0) { expire_time += WAIT_DEFAULT_TIMEOUT_S; } else { expire_time += (timeout_ms + 999) / 1000; } scheduler = pe_new_working_set(); if (scheduler == NULL) { return ENOMEM; } xpath = crm_strdup_printf("/" PCMK_XE_CIB "/" PCMK_XE_STATUS "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM "/" PCMK__XE_LRM_RESOURCES "/" PCMK__XE_LRM_RESOURCE "/" PCMK__XE_LRM_RSC_OP "[@" PCMK__XA_RC_CODE "='%d']", PCMK_OCF_UNKNOWN); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); if (time_diff <= 0) { print_pending_actions(out, scheduler->priv->actions); rc = ETIME; break; } crm_info("Waiting up to %lld seconds for cluster actions to complete", (long long) time_diff); if (rc == pcmk_rc_ok) { /* this avoids sleep on first loop iteration */ sleep(WAIT_SLEEP_S); } /* Get latest transition graph */ pe_reset_working_set(scheduler); rc = update_scheduler_input_to_cib(out, scheduler, cib); if (rc != pcmk_rc_ok) { break; } - pcmk__schedule_actions(scheduler->input, - pcmk__sched_no_counts|pcmk__sched_no_compat, + pcmk__schedule_actions(scheduler->input, pcmk__sched_no_counts, scheduler); if (!printed_version_warning) { /* If the DC has a different version than the local node, the two * could come to different conclusions about what actions need to be * done. Warn the user in this case. * * @TODO A possible long-term solution would be to reimplement the * wait as a new controller operation that would be forwarded to the * DC. However, that would have potential problems of its own. */ const char *dc_version = NULL; dc_version = g_hash_table_lookup(scheduler->priv->options, PCMK_OPT_DC_VERSION); if (!pcmk__str_eq(dc_version, PACEMAKER_VERSION "-" BUILD_VERSION, pcmk__str_casei)) { out->info(out, "warning: wait option may not work properly in " "mixed-version cluster"); printed_version_warning = true; } } search = xpath_search(scheduler->input, xpath); pending_unknown_state_resources = (numXpathResults(search) > 0); freeXpathObject(search); } while (actions_are_pending(scheduler->priv->actions) || pending_unknown_state_resources); pe_free_working_set(scheduler); free(xpath); return rc; } static const char * get_action(const char *rsc_action) { const char *action = NULL; if (pcmk__str_eq(rsc_action, "validate", pcmk__str_casei)) { action = PCMK_ACTION_VALIDATE_ALL; } else if (pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) { action = PCMK_ACTION_MONITOR; } else if (pcmk__strcase_any_of(rsc_action, "force-start", "force-stop", "force-demote", "force-promote", NULL)) { action = rsc_action+6; } else { action = rsc_action; } return action; } /*! * \brief Set up environment variables as expected by resource agents * * When the cluster executes resource agents, it adds certain environment * variables (directly or via resource meta-attributes) expected by some * resource agents. Add the essential ones that many resource agents expect, so * the behavior is the same for command-line execution. * * \param[in,out] params Resource parameters that will be passed to agent * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] check_level OCF check level * \param[in] verbosity Verbosity level */ static void set_agent_environment(GHashTable *params, guint timeout_ms, int check_level, int verbosity) { g_hash_table_insert(params, crm_meta_name(PCMK_META_TIMEOUT), crm_strdup_printf("%u", timeout_ms)); pcmk__insert_dup(params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET); if (check_level >= 0) { char *level = crm_strdup_printf("%d", check_level); setenv("OCF_CHECK_LEVEL", level, 1); free(level); } pcmk__set_env_option(PCMK__ENV_DEBUG, ((verbosity > 0)? "1" : "0"), true); if (verbosity > 1) { setenv("OCF_TRACE_RA", "1", 1); } /* A resource agent using the standard ocf-shellfuncs library will not print * messages to stderr if it doesn't have a controlling terminal (e.g. if * crm_resource is called via script or ssh). This forces it to do so. */ setenv("OCF_TRACE_FILE", "/dev/stderr", 0); } /*! * \internal * \brief Apply command-line overrides to resource parameters * * \param[in,out] params Parameters to be passed to agent * \param[in] overrides Parameters to override (or NULL if none) */ static void apply_overrides(GHashTable *params, GHashTable *overrides) { if (overrides != NULL) { GHashTableIter iter; char *name = NULL; char *value = NULL; g_hash_table_iter_init(&iter, overrides); while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) { pcmk__insert_dup(params, name, value); } } } crm_exit_t cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, const char *rsc_class, const char *rsc_prov, const char *rsc_type, const char *rsc_action, GHashTable *params, GHashTable *override_hash, guint timeout_ms, int resource_verbose, gboolean force, int check_level) { const char *class = rsc_class; const char *action = get_action(rsc_action); crm_exit_t exit_code = CRM_EX_OK; svc_action_t *op = NULL; // If no timeout was provided, use the same default as the cluster if (timeout_ms == 0U) { timeout_ms = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } set_agent_environment(params, timeout_ms, check_level, resource_verbose); apply_overrides(params, override_hash); op = services__create_resource_action(rsc_name? rsc_name : "test", rsc_class, rsc_prov, rsc_type, action, 0, QB_MIN(timeout_ms, INT_MAX), params, 0); if (op == NULL) { out->err(out, "Could not execute %s using %s%s%s:%s: %s", action, rsc_class, (rsc_prov? ":" : ""), (rsc_prov? rsc_prov : ""), rsc_type, strerror(ENOMEM)); g_hash_table_destroy(params); return CRM_EX_OSERR; } if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei)) { class = resources_find_service_class(rsc_type); } if (!pcmk__strcase_any_of(class, PCMK_RESOURCE_CLASS_OCF, PCMK_RESOURCE_CLASS_LSB, NULL)) { services__format_result(op, CRM_EX_UNIMPLEMENT_FEATURE, PCMK_EXEC_ERROR, "Manual execution of the %s standard is " "unsupported", pcmk__s(class, "unspecified")); } if (op->rc != PCMK_OCF_UNKNOWN) { exit_code = op->rc; goto done; } services_action_sync(op); // Map results to OCF codes for consistent reporting to user { enum ocf_exitcode ocf_code = services_result2ocf(class, action, op->rc); // Cast variable instead of function return to keep compilers happy exit_code = (crm_exit_t) ocf_code; } done: out->message(out, "resource-agent-action", resource_verbose, rsc_class, rsc_prov, rsc_type, rsc_name, rsc_action, override_hash, exit_code, op->status, services__exit_reason(op), op->stdout_data, op->stderr_data); services_action_free(op); return exit_code; } /*! * \internal * \brief Get the timeout the cluster would use for an action * * \param[in] rsc Resource that action is for * \param[in] action Name of action */ static guint get_action_timeout(pcmk_resource_t *rsc, const char *action) { long long timeout_ms = -1LL; xmlNode *op = pcmk__find_action_config(rsc, action, 0, true); GHashTable *meta = pcmk__unpack_action_meta(rsc, NULL, action, 0, op); if ((pcmk__scan_ll(g_hash_table_lookup(meta, PCMK_META_TIMEOUT), &timeout_ms, -1LL) != pcmk_rc_ok) || (timeout_ms <= 0LL)) { timeout_ms = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } g_hash_table_destroy(meta); return (guint) QB_MIN(timeout_ms, UINT_MAX); } crm_exit_t cli_resource_execute(pcmk_resource_t *rsc, const char *requested_name, const char *rsc_action, GHashTable *override_hash, guint timeout_ms, cib_t *cib, pcmk_scheduler_t *scheduler, int resource_verbose, gboolean force, int check_level) { pcmk__output_t *out = scheduler->priv->out; crm_exit_t exit_code = CRM_EX_OK; const char *rid = requested_name; const char *rtype = NULL; const char *rprov = NULL; const char *rclass = NULL; GHashTable *params = NULL; if (pcmk__strcase_any_of(rsc_action, "force-start", "force-demote", "force-promote", NULL)) { if (pcmk__is_clone(rsc)) { GList *nodes = cli_resource_search(rsc, requested_name, scheduler); if(nodes != NULL && force == FALSE) { out->err(out, "It is not safe to %s %s here: the cluster claims it is already active", rsc_action, rsc->id); out->err(out, "Try setting " PCMK_META_TARGET_ROLE "=" PCMK_ROLE_STOPPED " first or specifying the force option"); return CRM_EX_UNSAFE; } g_list_free_full(nodes, free); } } if (pcmk__is_clone(rsc)) { /* Grab the first child resource in the hope it's not a group */ rsc = rsc->priv->children->data; } if (pcmk__is_group(rsc)) { out->err(out, "Sorry, the %s option doesn't support group resources", rsc_action); return CRM_EX_UNIMPLEMENT_FEATURE; } else if (pcmk__is_bundled(rsc)) { out->err(out, "Sorry, the %s option doesn't support bundled resources", rsc_action); return CRM_EX_UNIMPLEMENT_FEATURE; } rclass = crm_element_value(rsc->priv->xml, PCMK_XA_CLASS); rprov = crm_element_value(rsc->priv->xml, PCMK_XA_PROVIDER); rtype = crm_element_value(rsc->priv->xml, PCMK_XA_TYPE); params = generate_resource_params(rsc, NULL /* @TODO use local node */, scheduler); if (timeout_ms == 0U) { timeout_ms = get_action_timeout(rsc, get_action(rsc_action)); } if (!pcmk__is_anonymous_clone(rsc->priv->parent)) { rid = rsc->id; } exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, rsc_action, params, override_hash, timeout_ms, resource_verbose, force, check_level); return exit_code; } // \return Standard Pacemaker return code int cli_resource_move(const pcmk_resource_t *rsc, const char *rsc_id, const char *host_name, const char *move_lifetime, cib_t *cib, int cib_options, pcmk_scheduler_t *scheduler, gboolean promoted_role_only, gboolean force) { pcmk__output_t *out = scheduler->priv->out; int rc = pcmk_rc_ok; unsigned int count = 0; pcmk_node_t *current = NULL; pcmk_node_t *dest = pcmk_find_node(scheduler, host_name); bool cur_is_dest = false; if (dest == NULL) { return pcmk_rc_node_unknown; } if (promoted_role_only && !pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { const pcmk_resource_t *p = pe__const_top_resource(rsc, false); if (pcmk_is_set(p->flags, pcmk__rsc_promotable)) { out->info(out, "Using parent '%s' for move instead of '%s'.", rsc->id, rsc_id); rsc_id = p->id; rsc = p; } else { out->info(out, "Ignoring --promoted option: %s is not promotable", rsc_id); promoted_role_only = FALSE; } } current = pe__find_active_requires(rsc, &count); if (pcmk_is_set(rsc->flags, pcmk__rsc_promotable)) { unsigned int promoted_count = 0; pcmk_node_t *promoted_node = NULL; for (const GList *iter = rsc->priv->children; iter != NULL; iter = iter->next) { const pcmk_resource_t *child = (const pcmk_resource_t *) iter->data; enum rsc_role_e child_role = child->priv->fns->state(child, TRUE); if (child_role == pcmk_role_promoted) { rsc = child; promoted_node = pcmk__current_node(child); promoted_count++; } } if (promoted_role_only || (promoted_count != 0)) { count = promoted_count; current = promoted_node; } } if (count > 1) { if (pcmk__is_clone(rsc)) { current = NULL; } else { return pcmk_rc_multiple; } } if (pcmk__same_node(current, dest)) { cur_is_dest = true; if (force) { crm_info("%s is already %s on %s, reinforcing placement with location constraint.", rsc_id, promoted_role_only?"promoted":"active", pcmk__node_name(dest)); } else { return pcmk_rc_already; } } /* Clear any previous prefer constraints across all nodes. */ cli_resource_clear(rsc_id, NULL, scheduler->nodes, cib, cib_options, false, force); /* Clear any previous ban constraints on 'dest'. */ cli_resource_clear(rsc_id, dest->priv->name, scheduler->nodes, cib, cib_options, TRUE, force); /* Record an explicit preference for 'dest' */ rc = cli_resource_prefer(out, rsc_id, dest->priv->name, move_lifetime, cib, cib_options, promoted_role_only, PCMK_ROLE_PROMOTED); crm_trace("%s%s now prefers %s%s", rsc->id, (promoted_role_only? " (promoted)" : ""), pcmk__node_name(dest), force?"(forced)":""); /* only ban the previous location if current location != destination location. * it is possible to use -M to enforce a location without regard of where the * resource is currently located */ if (force && !cur_is_dest) { /* Ban the original location if possible */ if(current) { (void)cli_resource_ban(out, rsc_id, current->priv->name, move_lifetime, cib, cib_options, promoted_role_only, PCMK_ROLE_PROMOTED); } else if(count > 1) { out->info(out, "Resource '%s' is currently %s in %d locations. " "One may now move to %s", rsc_id, (promoted_role_only? "promoted" : "active"), count, pcmk__node_name(dest)); out->info(out, "To prevent '%s' from being %s at a specific location, " "specify a node.", rsc_id, (promoted_role_only? "promoted" : "active")); } else { crm_trace("Not banning %s from its current location: not active", rsc_id); } } return rc; } diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c index a54386ec82..418bc0cbd6 100644 --- a/tools/crm_simulate.c +++ b/tools/crm_simulate.c @@ -1,586 +1,585 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "crm_simulate - simulate a Pacemaker cluster's response to events" struct { char *dot_file; char *graph_file; gchar *input_file; pcmk_injections_t *injections; unsigned int flags; gchar *output_file; long long repeat; gboolean store; gchar *test_dir; char *use_date; char *xml_file; } options = { .flags = pcmk_sim_show_pending | pcmk_sim_sanitized, .repeat = 1 }; uint32_t section_opts = 0; char *temp_shadow = NULL; crm_exit_t exit_code = CRM_EX_OK; #define INDENT " " static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static gboolean all_actions_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_all_actions; return TRUE; } static gboolean attrs_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { section_opts |= pcmk_section_attributes; return TRUE; } static gboolean failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { section_opts |= pcmk_section_failcounts | pcmk_section_failures; return TRUE; } static gboolean in_place_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.store = TRUE; options.flags |= pcmk_sim_process | pcmk_sim_simulate; return TRUE; } static gboolean live_check_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (options.xml_file) { free(options.xml_file); } options.xml_file = NULL; options.flags &= ~pcmk_sim_sanitized; return TRUE; } static gboolean node_down_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.injections->node_down = g_list_append(options.injections->node_down, g_strdup(optarg)); return TRUE; } static gboolean node_fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.injections->node_fail = g_list_append(options.injections->node_fail, g_strdup(optarg)); return TRUE; } static gboolean node_up_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { pcmk__simulate_node_config = true; options.injections->node_up = g_list_append(options.injections->node_up, g_strdup(optarg)); return TRUE; } static gboolean op_fail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_process | pcmk_sim_simulate; options.injections->op_fail = g_list_append(options.injections->op_fail, g_strdup(optarg)); return TRUE; } static gboolean op_inject_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.injections->op_inject = g_list_append(options.injections->op_inject, g_strdup(optarg)); return TRUE; } static gboolean pending_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_show_pending; return TRUE; } static gboolean process_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_process; return TRUE; } static gboolean quorum_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { pcmk__str_update(&options.injections->quorum, optarg); return TRUE; } static gboolean save_dotfile_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_process; pcmk__str_update(&options.dot_file, optarg); return TRUE; } static gboolean save_graph_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_process; pcmk__str_update(&options.graph_file, optarg); return TRUE; } static gboolean show_scores_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_process | pcmk_sim_show_scores; return TRUE; } static gboolean simulate_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_process | pcmk_sim_simulate; return TRUE; } static gboolean ticket_activate_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.injections->ticket_activate = g_list_append(options.injections->ticket_activate, g_strdup(optarg)); return TRUE; } static gboolean ticket_grant_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.injections->ticket_grant = g_list_append(options.injections->ticket_grant, g_strdup(optarg)); return TRUE; } static gboolean ticket_revoke_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.injections->ticket_revoke = g_list_append(options.injections->ticket_revoke, g_strdup(optarg)); return TRUE; } static gboolean ticket_standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.injections->ticket_standby = g_list_append(options.injections->ticket_standby, g_strdup(optarg)); return TRUE; } static gboolean utilization_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.flags |= pcmk_sim_process | pcmk_sim_show_utilization; return TRUE; } static gboolean watchdog_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { pcmk__str_update(&options.injections->watchdog, optarg); return TRUE; } static gboolean xml_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { pcmk__str_update(&options.xml_file, optarg); options.flags |= pcmk_sim_sanitized; return TRUE; } static gboolean xml_pipe_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { pcmk__str_update(&options.xml_file, "-"); options.flags |= pcmk_sim_sanitized; return TRUE; } static GOptionEntry operation_entries[] = { { "run", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, process_cb, "Process the supplied input and show what actions the cluster will take in response", NULL }, { "simulate", 'S', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, simulate_cb, "Like --run, but also simulate taking those actions and show the resulting new status", NULL }, { "in-place", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, in_place_cb, "Like --simulate, but also store the results back to the input file", NULL }, { "show-attrs", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, attrs_cb, "Show node attributes", NULL }, { "show-failcounts", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, failcounts_cb, "Show resource fail counts", NULL }, { "show-scores", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_scores_cb, "Show allocation scores", NULL }, { "show-utilization", 'U', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, utilization_cb, "Show utilization information", NULL }, { "profile", 'P', 0, G_OPTION_ARG_FILENAME, &options.test_dir, "Process all the XML files in the named directory to create profiling data", "DIR" }, { "repeat", 'N', 0, G_OPTION_ARG_INT, &options.repeat, "With --profile, repeat each test N times and print timings", "N" }, /* Deprecated */ { "pending", 'j', G_OPTION_FLAG_NO_ARG|G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, pending_cb, "Display pending state if '" PCMK_META_RECORD_PENDING "' is enabled", NULL }, { NULL } }; static GOptionEntry synthetic_entries[] = { { "node-up", 'u', 0, G_OPTION_ARG_CALLBACK, node_up_cb, "Simulate bringing a node online", "NODE" }, { "node-down", 'd', 0, G_OPTION_ARG_CALLBACK, node_down_cb, "Simulate taking a node offline", "NODE" }, { "node-fail", 'f', 0, G_OPTION_ARG_CALLBACK, node_fail_cb, "Simulate a node failing", "NODE" }, { "op-inject", 'i', 0, G_OPTION_ARG_CALLBACK, op_inject_cb, "Generate a failure for the cluster to react to in the simulation.\n" INDENT "See `Operation Specification` help for more information.", "OPSPEC" }, { "op-fail", 'F', 0, G_OPTION_ARG_CALLBACK, op_fail_cb, "If the specified task occurs during the simulation, have it fail with return code ${rc}.\n" INDENT "The transition will normally stop at the failed action.\n" INDENT "Save the result with --save-output and re-run with --xml-file.\n" INDENT "See `Operation Specification` help for more information.", "OPSPEC" }, { "set-datetime", 't', 0, G_OPTION_ARG_STRING, &options.use_date, "Set date/time (ISO 8601 format, see https://en.wikipedia.org/wiki/ISO_8601)", "DATETIME" }, { "quorum", 'q', 0, G_OPTION_ARG_CALLBACK, quorum_cb, "Set to '1' (or 'true') to indicate cluster has quorum", "QUORUM" }, { "watchdog", 'w', 0, G_OPTION_ARG_CALLBACK, watchdog_cb, "Set to '1' (or 'true') to indicate cluster has an active watchdog device", "DEVICE" }, { "ticket-grant", 'g', 0, G_OPTION_ARG_CALLBACK, ticket_grant_cb, "Simulate granting a ticket", "TICKET" }, { "ticket-revoke", 'r', 0, G_OPTION_ARG_CALLBACK, ticket_revoke_cb, "Simulate revoking a ticket", "TICKET" }, { "ticket-standby", 'b', 0, G_OPTION_ARG_CALLBACK, ticket_standby_cb, "Simulate making a ticket standby", "TICKET" }, { "ticket-activate", 'e', 0, G_OPTION_ARG_CALLBACK, ticket_activate_cb, "Simulate activating a ticket", "TICKET" }, { NULL } }; static GOptionEntry artifact_entries[] = { { "save-input", 'I', 0, G_OPTION_ARG_FILENAME, &options.input_file, "Save the input configuration to the named file", "FILE" }, { "save-output", 'O', 0, G_OPTION_ARG_FILENAME, &options.output_file, "Save the output configuration to the named file", "FILE" }, { "save-graph", 'G', 0, G_OPTION_ARG_CALLBACK, save_graph_cb, "Save the transition graph (XML format) to the named file", "FILE" }, { "save-dotfile", 'D', 0, G_OPTION_ARG_CALLBACK, save_dotfile_cb, "Save the transition graph (DOT format) to the named file", "FILE" }, { "all-actions", 'a', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, all_actions_cb, "Display all possible actions in DOT graph (even if not part of transition)", NULL }, { NULL } }; static GOptionEntry source_entries[] = { { "live-check", 'L', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, live_check_cb, "Connect to CIB manager and use the current CIB contents as input", NULL }, { "xml-file", 'x', 0, G_OPTION_ARG_CALLBACK, xml_file_cb, "Retrieve XML from the named file", "FILE" }, { "xml-pipe", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, xml_pipe_cb, "Retrieve XML from stdin", NULL }, { NULL } }; static int setup_input(pcmk__output_t *out, const char *input, const char *output, GError **error) { int rc = pcmk_rc_ok; xmlNode *cib_object = NULL; char *local_output = NULL; if (input == NULL) { /* Use live CIB */ rc = cib__signon_query(out, NULL, &cib_object); if (rc != pcmk_rc_ok) { // cib__signon_query() outputs any relevant error return rc; } } else if (pcmk__str_eq(input, "-", pcmk__str_casei)) { cib_object = pcmk__xml_read(NULL); } else { cib_object = pcmk__xml_read(input); } if (cib_object == NULL) { rc = pcmk_rc_bad_input; g_set_error(error, PCMK__EXITC_ERROR, pcmk_rc2exitc(rc), "Could not read input XML: %s", pcmk_rc_str(rc)); return rc; } if (pcmk_find_cib_element(cib_object, PCMK_XE_STATUS) == NULL) { pcmk__xe_create(cib_object, PCMK_XE_STATUS); } rc = pcmk_update_configured_schema(&cib_object, false); if (rc != pcmk_rc_ok) { pcmk__xml_free(cib_object); return rc; } if (!pcmk__validate_xml(cib_object, NULL, NULL, NULL)) { pcmk__xml_free(cib_object); return pcmk_rc_schema_validation; } if (output == NULL) { char *pid = pcmk__getpid_s(); local_output = get_shadow_file(pid); temp_shadow = strdup(local_output); output = local_output; free(pid); } rc = pcmk__xml_write_file(cib_object, output, false); if (rc != pcmk_rc_ok) { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_CANTCREAT, "Could not create '%s': %s", output, pcmk_rc_str(rc)); } else { setenv("CIB_file", output, 1); } pcmk__xml_free(cib_object); free(local_output); return rc; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Display only essential output", NULL }, { NULL } }; const char *description = "Operation Specification:\n\n" "The OPSPEC in any command line option is of the form\n" "${resource}_${task}_${interval_in_ms}@${node}=${rc}\n" "(memcached_monitor_20000@bart.example.com=7, for example).\n" "${rc} is an OCF return code. For more information on these\n" "return codes, refer to https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/html/agents.html#ocf-return-codes\n\n" "Examples:\n\n" "Pretend a recurring monitor action found memcached stopped on node\n" "fred.example.com and, during recovery, that the memcached stop\n" "action failed:\n\n" "\tcrm_simulate -LS --op-inject memcached:0_monitor_20000@bart.example.com=7 " "--op-fail memcached:0_stop_0@fred.example.com=1 --save-output /tmp/memcached-test.xml\n\n" "Now see what the reaction to the stop failed would be:\n\n" "\tcrm_simulate -S --xml-file /tmp/memcached-test.xml\n\n"; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "operations", "Operations:", "Show operations options", operation_entries); pcmk__add_arg_group(context, "synthetic", "Synthetic Cluster Events:", "Show synthetic cluster event options", synthetic_entries); pcmk__add_arg_group(context, "artifact", "Artifact Options:", "Show artifact options", artifact_entries); pcmk__add_arg_group(context, "source", "Data Source:", "Show data source options", source_entries); return context; } int main(int argc, char **argv) { int rc = pcmk_rc_ok; pcmk_scheduler_t *scheduler = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "bdefgiqrtuwxDFGINOP"); GOptionContext *context = build_arg_context(args, &output_group); options.injections = calloc(1, sizeof(pcmk_injections_t)); if (options.injections == NULL) { rc = ENOMEM; goto done; } /* This must come before g_option_context_parse_strv. */ options.xml_file = strdup("-"); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } pcmk__cli_init_logging("crm_simulate", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { fprintf(stderr, "Error creating output format %s: %s\n", args->output_ty, pcmk_rc_str(rc)); exit_code = CRM_EX_ERROR; goto done; } if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches) && !(pcmk_is_set(options.flags, pcmk_sim_show_scores) && args->quiet)) { pcmk__output_text_set_fancy(out, true); } pe__register_messages(out); pcmk__register_lib_messages(out); out->quiet = args->quiet; if (args->version) { out->version(out, false); goto done; } if (args->verbosity > 0) { options.flags |= pcmk_sim_verbose; } scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = ENOMEM; g_set_error(&error, PCMK__RC_ERROR, rc, "Could not allocate scheduler data"); goto done; } if (pcmk_is_set(options.flags, pcmk_sim_show_scores)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_output_scores); } if (pcmk_is_set(options.flags, pcmk_sim_show_utilization)) { pcmk__set_scheduler_flags(scheduler, pcmk__sched_show_utilization); } - pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_compat); if (options.test_dir != NULL) { scheduler->priv->out = out; pcmk__profile_dir(options.test_dir, options.repeat, scheduler, options.use_date); rc = pcmk_rc_ok; goto done; } rc = setup_input(out, options.xml_file, options.store? options.xml_file : options.output_file, &error); if (rc != pcmk_rc_ok) { goto done; } rc = pcmk__simulate(scheduler, out, options.injections, options.flags, section_opts, options.use_date, options.input_file, options.graph_file, options.dot_file); done: pcmk__output_and_clear_error(&error, NULL); /* There sure is a lot to free in options. */ free(options.dot_file); free(options.graph_file); g_free(options.input_file); g_free(options.output_file); g_free(options.test_dir); free(options.use_date); free(options.xml_file); pcmk_free_injections(options.injections); pcmk__free_arg_context(context); g_strfreev(processed_args); if (scheduler != NULL) { pe_free_working_set(scheduler); } fflush(stderr); if (temp_shadow) { unlink(temp_shadow); free(temp_shadow); } if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); } if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/tools/crm_ticket.c b/tools/crm_ticket.c index e2a9b0991e..68c9ff39f8 100644 --- a/tools/crm_ticket.c +++ b/tools/crm_ticket.c @@ -1,666 +1,665 @@ /* * Copyright 2012-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GError *error = NULL; #define SUMMARY "Perform tasks related to cluster tickets\n\n" \ "Allows ticket attributes to be queried, modified and deleted." struct { gchar *attr_default; gchar *attr_id; char *attr_name; char *attr_value; gboolean force; char *get_attr_name; gboolean quiet; gchar *set_name; char ticket_cmd; gchar *ticket_id; gchar *xml_file; } options = { .ticket_cmd = 'S' }; GList *attr_delete; GHashTable *attr_set; bool modified = false; int cib_options = cib_sync_call; static pcmk__output_t *out = NULL; #define INDENT " " static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static gboolean attr_value_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&options.attr_value, optarg); if (!options.attr_name || !options.attr_value) { return TRUE; } pcmk__insert_dup(attr_set, options.attr_name, options.attr_value); pcmk__str_update(&options.attr_name, NULL); pcmk__str_update(&options.attr_value, NULL); modified = true; return TRUE; } static gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (pcmk__str_any_of(option_name, "--info", "-l", NULL)) { options.ticket_cmd = 'l'; } else if (pcmk__str_any_of(option_name, "--details", "-L", NULL)) { options.ticket_cmd = 'L'; } else if (pcmk__str_any_of(option_name, "--raw", "-w", NULL)) { options.ticket_cmd = 'w'; } else if (pcmk__str_any_of(option_name, "--query-xml", "-q", NULL)) { options.ticket_cmd = 'q'; } else if (pcmk__str_any_of(option_name, "--constraints", "-c", NULL)) { options.ticket_cmd = 'c'; } else if (pcmk__str_any_of(option_name, "--cleanup", "-C", NULL)) { options.ticket_cmd = 'C'; } return TRUE; } static gboolean delete_attr_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { attr_delete = g_list_append(attr_delete, strdup(optarg)); modified = true; return TRUE; } static gboolean get_attr_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&options.get_attr_name, optarg); options.ticket_cmd = 'G'; return TRUE; } static gboolean grant_standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (pcmk__str_any_of(option_name, "--grant", "-g", NULL)) { pcmk__insert_dup(attr_set, PCMK__XA_GRANTED, PCMK_VALUE_TRUE); modified = true; } else if (pcmk__str_any_of(option_name, "--revoke", "-r", NULL)) { pcmk__insert_dup(attr_set, PCMK__XA_GRANTED, PCMK_VALUE_FALSE); modified = true; } else if (pcmk__str_any_of(option_name, "--standby", "-s", NULL)) { pcmk__insert_dup(attr_set, PCMK_XA_STANDBY, PCMK_VALUE_TRUE); modified = true; } else if (pcmk__str_any_of(option_name, "--activate", "-a", NULL)) { pcmk__insert_dup(attr_set, PCMK_XA_STANDBY, PCMK_VALUE_FALSE); modified = true; } return TRUE; } static gboolean set_attr_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&options.attr_name, optarg); if (!options.attr_name || !options.attr_value) { return TRUE; } pcmk__insert_dup(attr_set, options.attr_name, options.attr_value); pcmk__str_update(&options.attr_name, NULL); pcmk__str_update(&options.attr_value, NULL); modified = true; return TRUE; } static GOptionEntry query_entries[] = { { "info", 'l', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the information of ticket(s)", NULL }, { "details", 'L', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the details of ticket(s)", NULL }, { "raw", 'w', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the IDs of ticket(s)", NULL }, { "query-xml", 'q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Query the XML of ticket(s)", NULL }, { "constraints", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Display the " PCMK_XE_RSC_TICKET " constraints that apply to ticket(s)", NULL }, { NULL } }; static GOptionEntry command_entries[] = { { "grant", 'g', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, grant_standby_cb, "Grant a ticket to this cluster site", NULL }, { "revoke", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, grant_standby_cb, "Revoke a ticket from this cluster site", NULL }, { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, grant_standby_cb, "Tell this cluster site this ticket is standby", NULL }, { "activate", 'a', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, grant_standby_cb, "Tell this cluster site this ticket is active", NULL }, { NULL } }; static GOptionEntry advanced_entries[] = { { "get-attr", 'G', 0, G_OPTION_ARG_CALLBACK, get_attr_cb, "Display the named attribute for a ticket", "ATTRIBUTE" }, { "set-attr", 'S', 0, G_OPTION_ARG_CALLBACK, set_attr_cb, "Set the named attribute for a ticket", "ATTRIBUTE" }, { "delete-attr", 'D', 0, G_OPTION_ARG_CALLBACK, delete_attr_cb, "Delete the named attribute for a ticket", "ATTRIBUTE" }, { "cleanup", 'C', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb, "Delete all state of a ticket at this cluster site", NULL }, { NULL} }; static GOptionEntry addl_entries[] = { { "attr-value", 'v', 0, G_OPTION_ARG_CALLBACK, attr_value_cb, "Attribute value to use with -S", "VALUE" }, { "default", 'd', 0, G_OPTION_ARG_STRING, &options.attr_default, "(Advanced) Default attribute value to display if none is found\n" INDENT "(for use with -G)", "VALUE" }, { "force", 'f', 0, G_OPTION_ARG_NONE, &options.force, "(Advanced) Force the action to be performed", NULL }, { "ticket", 't', 0, G_OPTION_ARG_STRING, &options.ticket_id, "Ticket ID", "ID" }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_STRING, &options.xml_file, NULL, NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { { "set-name", 'n', 0, G_OPTION_ARG_STRING, &options.set_name, "(Advanced) ID of the " PCMK_XE_INSTANCE_ATTRIBUTES " object to change", "ID" }, { "nvpair", 'i', 0, G_OPTION_ARG_STRING, &options.attr_id, "(Advanced) ID of the nvpair object to change/delete", "ID" }, { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &options.quiet, "Print only the value on stdout", NULL }, { NULL } }; static void ticket_grant_warning(gchar *ticket_id) { out->err(out, "This command cannot help you verify whether '%s' has " "been already granted elsewhere.\n" "If you really want to grant '%s' to this site now, and " "you know what you are doing,\n" "please specify --force.", ticket_id, ticket_id); } static void ticket_revoke_warning(gchar *ticket_id) { out->err(out, "Revoking '%s' can trigger the specified '" PCMK_XA_LOSS_POLICY "'(s) relating to '%s'.\n\n" "You can check that with:\n" "crm_ticket --ticket %s --constraints\n\n" "Otherwise before revoking '%s', you may want to make '%s'" "standby with:\n" "crm_ticket --ticket %s --standby\n\n" "If you really want to revoke '%s' from this site now, and " "you know what you are doing,\n" "please specify --force.", ticket_id, ticket_id, ticket_id, ticket_id, ticket_id, ticket_id, ticket_id); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; const char *description = "Examples:\n\n" "Display the info of tickets:\n\n" "\tcrm_ticket --info\n\n" "Display the detailed info of tickets:\n\n" "\tcrm_ticket --details\n\n" "Display the XML of 'ticketA':\n\n" "\tcrm_ticket --ticket ticketA --query-xml\n\n" "Display the " PCMK_XE_RSC_TICKET " constraints that apply to 'ticketA':\n\n" "\tcrm_ticket --ticket ticketA --constraints\n\n" "Grant 'ticketA' to this cluster site:\n\n" "\tcrm_ticket --ticket ticketA --grant\n\n" "Revoke 'ticketA' from this cluster site:\n\n" "\tcrm_ticket --ticket ticketA --revoke\n\n" "Make 'ticketA' standby (the cluster site will treat a granted\n" "'ticketA' as 'standby', and the dependent resources will be\n" "stopped or demoted gracefully without triggering loss-policies):\n\n" "\tcrm_ticket --ticket ticketA --standby\n\n" "Activate 'ticketA' from being standby:\n\n" "\tcrm_ticket --ticket ticketA --activate\n\n" "Get the value of the 'granted' attribute for 'ticketA':\n\n" "\tcrm_ticket --ticket ticketA --get-attr granted\n\n" "Set the value of the 'standby' attribute for 'ticketA':\n\n" "\tcrm_ticket --ticket ticketA --set-attr standby --attr-value true\n\n" "Delete the 'granted' attribute for 'ticketA':\n\n" "\tcrm_ticket --ticket ticketA --delete-attr granted\n\n" "Erase the operation history of 'ticketA' at this cluster site,\n" "causing the cluster site to 'forget' the existing ticket state:\n\n" "\tcrm_ticket --ticket ticketA --cleanup\n\n"; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); g_option_context_set_description(context, description); pcmk__add_arg_group(context, "queries", "Queries:", "Show queries", query_entries); pcmk__add_arg_group(context, "commands", "Commands:", "Show command options", command_entries); pcmk__add_arg_group(context, "advanced", "Advanced Options:", "Show advanced options", advanced_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } int main(int argc, char **argv) { pcmk_scheduler_t *scheduler = NULL; xmlNode *cib_xml_copy = NULL; cib_t *cib_conn = NULL; crm_exit_t exit_code = CRM_EX_OK; int rc = pcmk_rc_ok; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = NULL; GOptionContext *context = NULL; gchar **processed_args = NULL; attr_set = pcmk__strkey_table(free, free); attr_delete = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); processed_args = pcmk__cmdline_preproc(argv, "dintvxCDGS"); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } pcmk__cli_init_logging("crm_ticket", args->verbosity); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } pe__register_messages(out); pcmk__register_lib_messages(out); if (args->version) { out->version(out, false); goto done; } scheduler = pe_new_working_set(); if (scheduler == NULL) { rc = errno; exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not allocate scheduler data: %s", pcmk_rc_str(rc)); goto done; } - pcmk__set_scheduler_flags(scheduler, - pcmk__sched_no_counts|pcmk__sched_no_compat); + pcmk__set_scheduler_flags(scheduler, pcmk__sched_no_counts); cib_conn = cib_new(); if (cib_conn == NULL) { exit_code = CRM_EX_DISCONNECT; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not connect to the CIB manager"); goto done; } rc = cib__signon_attempts(cib_conn, crm_system_name, cib_command, 5); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not connect to the CIB: %s", pcmk_rc_str(rc)); goto done; } if (options.xml_file != NULL) { cib_xml_copy = pcmk__xml_read(options.xml_file); } else { rc = cib_conn->cmds->query(cib_conn, NULL, &cib_xml_copy, cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not get local CIB: %s", pcmk_rc_str(rc)); goto done; } } rc = pcmk_update_configured_schema(&cib_xml_copy, false); if (rc != pcmk_rc_ok) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not update local CIB to latest schema version"); goto done; } scheduler->input = cib_xml_copy; scheduler->priv->now = crm_time_new(NULL); cluster_status(scheduler); /* For recording the tickets that are referenced in PCMK_XE_RSC_TICKET * constraints but have never been granted yet. */ pcmk__unpack_constraints(scheduler); if (options.ticket_cmd == 'l' || options.ticket_cmd == 'L' || options.ticket_cmd == 'w') { bool raw = false; bool details = false; if (options.ticket_cmd == 'L') { details = true; } else if (options.ticket_cmd == 'w') { raw = true; } rc = pcmk__ticket_info(out, scheduler, options.ticket_id, details, raw); exit_code = pcmk_rc2exitc(rc); if (rc == ENXIO) { g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "No such ticket '%s'", options.ticket_id); } else if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not get ticket info: %s", pcmk_rc_str(rc)); } } else if (options.ticket_cmd == 'q') { rc = pcmk__ticket_state(out, cib_conn, options.ticket_id); if (rc != pcmk_rc_ok && rc != pcmk_rc_duplicate_id) { exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not query ticket XML: %s", pcmk_rc_str(rc)); } else { exit_code = CRM_EX_OK; } } else if (options.ticket_cmd == 'c') { rc = pcmk__ticket_constraints(out, cib_conn, options.ticket_id); exit_code = pcmk_rc2exitc(rc); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not show ticket constraints: %s", pcmk_rc_str(rc)); } } else if (options.ticket_cmd == 'G') { if (options.ticket_id == NULL) { exit_code = CRM_EX_NOSUCH; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply ticket ID with -t"); goto done; } rc = pcmk__ticket_get_attr(out, scheduler, options.ticket_id, options.get_attr_name, options.attr_default); exit_code = pcmk_rc2exitc(rc); } else if (options.ticket_cmd == 'C') { if (options.ticket_id == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply ticket ID with -t"); goto done; } rc = pcmk__ticket_delete(out, cib_conn, scheduler, options.ticket_id, options.force); exit_code = pcmk_rc2exitc(rc); switch (rc) { case ENXIO: g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "No such ticket '%s'", options.ticket_id); break; case EACCES: ticket_revoke_warning(options.ticket_id); break; case pcmk_rc_ok: case pcmk_rc_duplicate_id: break; default: g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not clean up ticket: %s", pcmk_rc_str(rc)); break; } } else if (modified) { if (options.ticket_id == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply ticket ID with -t"); goto done; } if (options.attr_value && (pcmk__str_empty(options.attr_name))) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply attribute name with -S for -v %s", options.attr_value); goto done; } if (options.attr_name && (pcmk__str_empty(options.attr_value))) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply attribute value with -v for -S %s", options.attr_value); goto done; } if (attr_delete != NULL) { rc = pcmk__ticket_remove_attr(out, cib_conn, scheduler, options.ticket_id, attr_delete, options.force); if (rc == EACCES) { ticket_revoke_warning(options.ticket_id); exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Ticket modification not allowed without --force"); } } else { rc = pcmk__ticket_set_attr(out, cib_conn, scheduler, options.ticket_id, attr_set, options.force); if (rc == EACCES) { const char *value = NULL; value = g_hash_table_lookup(attr_set, PCMK__XA_GRANTED); if (crm_is_true(value)) { ticket_grant_warning(options.ticket_id); } else { ticket_revoke_warning(options.ticket_id); } exit_code = pcmk_rc2exitc(rc); g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Ticket modification not allowed without --force"); } } exit_code = pcmk_rc2exitc(rc); if (rc != pcmk_rc_ok && error == NULL) { g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not modify ticket: %s", pcmk_rc_str(rc)); } } else if (options.ticket_cmd == 'S') { /* Correct usage was handled in the "if (modified)" block above, so * this is just for reporting usage errors */ if (pcmk__str_empty(options.attr_name)) { // We only get here if ticket_cmd was left as default exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply a command"); goto done; } if (options.ticket_id == NULL) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply ticket ID with -t"); goto done; } if (pcmk__str_empty(options.attr_value)) { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Must supply value with -v for -S %s", options.attr_name); goto done; } } else { exit_code = CRM_EX_USAGE; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Unknown command: %c", options.ticket_cmd); } done: if (attr_set) { g_hash_table_destroy(attr_set); } attr_set = NULL; if (attr_delete) { g_list_free_full(attr_delete, free); } attr_delete = NULL; pe_free_working_set(scheduler); scheduler = NULL; cib__clean_up_connection(&cib_conn); g_strfreev(processed_args); pcmk__free_arg_context(context); g_free(options.attr_default); g_free(options.attr_id); free(options.attr_name); free(options.attr_value); free(options.get_attr_name); g_free(options.set_name); g_free(options.ticket_id); g_free(options.xml_file); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); }