diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index 67f0b7d6e3..ee87d1bb7e 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -1,577 +1,577 @@ /* * Copyright 2012-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include // stonith__api_new() #include #include "pacemaker-execd.h" #ifdef PCMK__COMPILE_REMOTE # define EXECD_TYPE "remote" # define EXECD_NAME PCMK__SERVER_REMOTED # define SUMMARY "resource agent executor daemon for Pacemaker Remote nodes" #else # define EXECD_TYPE "local" # define EXECD_NAME PCMK__SERVER_EXECD # define SUMMARY "resource agent executor daemon for Pacemaker cluster nodes" #endif static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; static stonith_t *stonith_api = NULL; int lrmd_call_id = 0; time_t start_time; static struct { gchar **log_files; #ifdef PCMK__COMPILE_REMOTE gchar *port; #endif // PCMK__COMPILE_REMOTE } options; #ifdef PCMK__COMPILE_REMOTE /* whether shutdown request has been sent */ static gboolean shutting_down = FALSE; #endif static void exit_executor(void); static void stonith_connection_destroy_cb(stonith_t * st, stonith_event_t * e) { stonith_api->state = stonith_disconnected; stonith_connection_failed(); } stonith_t * get_stonith_connection(void) { if (stonith_api && stonith_api->state == stonith_disconnected) { - stonith_api_delete(stonith_api); + stonith__api_free(stonith_api); stonith_api = NULL; } if (stonith_api == NULL) { int rc = pcmk_ok; stonith_api = stonith__api_new(); if (stonith_api == NULL) { crm_err("Could not connect to fencer: API memory allocation failed"); return NULL; } rc = stonith_api_connect_retry(stonith_api, crm_system_name, 10); if (rc != pcmk_ok) { crm_err("Could not connect to fencer in 10 attempts: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); - stonith_api_delete(stonith_api); + stonith__api_free(stonith_api); stonith_api = NULL; } else { stonith_api_operations_t *cmds = stonith_api->cmds; cmds->register_notification(stonith_api, PCMK__VALUE_ST_NOTIFY_DISCONNECT, stonith_connection_destroy_cb); } } return stonith_api; } static int32_t lrmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -ENOMEM; } return 0; } static void lrmd_ipc_created(qb_ipcs_connection_t * c) { pcmk__client_t *new_client = pcmk__find_client(c); crm_trace("Connection %p", c); pcmk__assert(new_client != NULL); /* Now that the connection is offically established, alert * the other clients a new connection exists. */ notify_of_new_client(new_client); } static int32_t lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); CRM_CHECK(client->id != NULL, crm_err("Invalid client: %p", client); return FALSE); CRM_CHECK(flags & crm_ipc_client_response, crm_err("Invalid client request: %p", client); return FALSE); if (!request) { return 0; } /* @TODO functionize some of this to reduce duplication with * lrmd_remote_client_msg() */ if (!client->name) { const char *value = crm_element_value(request, PCMK__XA_LRMD_CLIENTNAME); if (value == NULL) { client->name = pcmk__itoa(pcmk__client_pid(c)); } else { client->name = pcmk__str_copy(value); } } lrmd_call_id++; if (lrmd_call_id < 1) { lrmd_call_id = 1; } crm_xml_add(request, PCMK__XA_LRMD_CLIENTID, client->id); crm_xml_add(request, PCMK__XA_LRMD_CLIENTNAME, client->name); crm_xml_add_int(request, PCMK__XA_LRMD_CALLID, lrmd_call_id); process_lrmd_message(client, id, request); pcmk__xml_free(request); return 0; } /*! * \internal * \brief Free a client connection, and exit if appropriate * * \param[in,out] client Client connection to free */ void lrmd_client_destroy(pcmk__client_t *client) { pcmk__free_client(client); #ifdef PCMK__COMPILE_REMOTE /* If we were waiting to shut down, we can now safely do so * if there are no more proxied IPC providers */ if (shutting_down && (ipc_proxy_get_provider() == NULL)) { exit_executor(); } #endif } static int32_t lrmd_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); client_disconnect_cleanup(client->id); #ifdef PCMK__COMPILE_REMOTE ipc_proxy_remove_provider(client); #endif lrmd_client_destroy(client); return 0; } static void lrmd_ipc_destroy(qb_ipcs_connection_t * c) { lrmd_ipc_closed(c); crm_trace("Connection %p", c); } static struct qb_ipcs_service_handlers lrmd_ipc_callbacks = { .connection_accept = lrmd_ipc_accept, .connection_created = lrmd_ipc_created, .msg_process = lrmd_ipc_dispatch, .connection_closed = lrmd_ipc_closed, .connection_destroyed = lrmd_ipc_destroy }; // \return Standard Pacemaker return code int lrmd_server_send_reply(pcmk__client_t *client, uint32_t id, xmlNode *reply) { crm_trace("Sending reply (%d) to client (%s)", id, client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: return pcmk__ipc_send_xml(client, id, reply, FALSE); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: return lrmd__remote_send_xml(client->remote, reply, id, "reply"); #endif default: crm_err("Could not send reply: unknown type for client %s " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } // \return Standard Pacemaker return code int lrmd_server_send_notify(pcmk__client_t *client, xmlNode *msg) { crm_trace("Sending notification to client (%s)", client->id); switch (PCMK__CLIENT_TYPE(client)) { case pcmk__client_ipc: if (client->ipcs == NULL) { crm_trace("Could not notify local client: disconnected"); return ENOTCONN; } return pcmk__ipc_send_xml(client, 0, msg, crm_ipc_server_event); #ifdef PCMK__COMPILE_REMOTE case pcmk__client_tls: if (client->remote == NULL) { crm_trace("Could not notify remote client: disconnected"); return ENOTCONN; } else { return lrmd__remote_send_xml(client->remote, msg, 0, "notify"); } #endif default: crm_err("Could not notify client %s with unknown transport " QB_XS " flags=%#llx", pcmk__client_name(client), client->flags); } return ENOTCONN; } /*! * \internal * \brief Clean up and exit immediately */ static void exit_executor(void) { const guint nclients = pcmk__ipc_client_count(); crm_info("Terminating with %d client%s", nclients, pcmk__plural_s(nclients)); - stonith_api_delete(stonith_api); + stonith__api_free(stonith_api); if (ipcs) { mainloop_del_ipc_server(ipcs); } #ifdef PCMK__COMPILE_REMOTE execd_stop_tls_server(); ipc_proxy_cleanup(); #endif pcmk__client_cleanup(); if (mainloop) { lrmd_drain_alerts(mainloop); } g_hash_table_destroy(rsc_list); // @TODO End mainloop instead so all cleanup is done crm_exit(CRM_EX_OK); } /*! * \internal * \brief Request cluster shutdown if appropriate, otherwise exit immediately * * \param[in] nsig Signal that caused invocation (ignored) */ static void lrmd_shutdown(int nsig) { #ifdef PCMK__COMPILE_REMOTE pcmk__client_t *ipc_proxy = ipc_proxy_get_provider(); /* If there are active proxied IPC providers, then we may be running * resources, so notify the cluster that we wish to shut down. */ if (ipc_proxy) { if (shutting_down) { crm_notice("Waiting for cluster to stop resources before exiting"); return; } crm_info("Sending shutdown request to cluster"); if (ipc_proxy_shutdown_req(ipc_proxy) < 0) { crm_crit("Shutdown request failed, exiting immediately"); } else { /* We requested a shutdown. Now, we need to wait for an * acknowledgement from the proxy host, then wait for all proxy * hosts to disconnect (which ensures that all resources have been * stopped). */ shutting_down = TRUE; /* Stop accepting new proxy connections */ execd_stop_tls_server(); /* Currently, we let the OS kill us if the clients don't disconnect * in a reasonable time. We could instead set a long timer here * (shorter than what the OS is likely to use) and exit immediately * if it pops. */ return; } } #endif exit_executor(); } /*! * \internal * \brief Log a shutdown acknowledgment */ void handle_shutdown_ack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("IPC proxy provider acknowledged shutdown request"); return; } #endif crm_debug("Ignoring unexpected shutdown acknowledgment " "from IPC proxy provider"); } /*! * \internal * \brief Handle rejection of shutdown request */ void handle_shutdown_nack(void) { #ifdef PCMK__COMPILE_REMOTE if (shutting_down) { crm_info("Exiting immediately after IPC proxy provider " "indicated no resources will be stopped"); exit_executor(); return; } #endif crm_debug("Ignoring unexpected shutdown rejection from IPC proxy provider"); } static GOptionEntry entries[] = { { "logfile", 'l', G_OPTION_FLAG_NONE, G_OPTION_ARG_FILENAME_ARRAY, &options.log_files, "Send logs to the additional named logfile", NULL }, #ifdef PCMK__COMPILE_REMOTE { "port", 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, &options.port, "Port to listen on (defaults to " G_STRINGIFY(DEFAULT_REMOTE_PORT) ")", NULL }, #endif // PCMK__COMPILE_REMOTE { NULL } }; static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv, char **envp) { int rc = pcmk_rc_ok; crm_exit_t exit_code = CRM_EX_OK; const char *option = NULL; pcmk__output_t *out = NULL; GError *error = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = NULL; gchar **processed_args = NULL; GOptionContext *context = NULL; #ifdef PCMK__COMPILE_REMOTE // If necessary, create PID 1 now before any file descriptors are opened remoted_spawn_pidone(argc, argv, envp); #endif args = pcmk__new_common_args(SUMMARY); #ifdef PCMK__COMPILE_REMOTE processed_args = pcmk__cmdline_preproc(argv, "lp"); #else processed_args = pcmk__cmdline_preproc(argv, "l"); #endif // PCMK__COMPILE_REMOTE context = build_arg_context(args, &output_group); crm_log_preinit(EXECD_NAME, argc, argv); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } if (args->version) { out->version(out, false); goto done; } // Open additional log files if (options.log_files != NULL) { for (gchar **fname = options.log_files; *fname != NULL; fname++) { rc = pcmk__add_logfile(*fname); if (rc != pcmk_rc_ok) { out->err(out, "Logging to %s is disabled: %s", *fname, pcmk_rc_str(rc)); } } } pcmk__cli_init_logging(EXECD_NAME, args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); // ocf_log() (in resource-agents) uses the capitalized env options below option = pcmk__env_option(PCMK__ENV_LOGFACILITY); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches) && !pcmk__str_eq(option, "/dev/null", pcmk__str_none)) { pcmk__set_env_option("LOGFACILITY", option, true); } option = pcmk__env_option(PCMK__ENV_LOGFILE); if (!pcmk__str_eq(option, PCMK_VALUE_NONE, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__set_env_option("LOGFILE", option, true); if (pcmk__env_option_enabled(crm_system_name, PCMK__ENV_DEBUG)) { pcmk__set_env_option("DEBUGLOG", option, true); } } #ifdef PCMK__COMPILE_REMOTE if (options.port != NULL) { pcmk__set_env_option(PCMK__ENV_REMOTE_PORT, options.port, false); } #endif // PCMK__COMPILE_REMOTE start_time = time(NULL); crm_notice("Starting Pacemaker " EXECD_TYPE " executor"); /* The presence of this variable allegedly controls whether child * processes like httpd will try and use Systemd's sd_notify * API */ unsetenv("NOTIFY_SOCKET"); { // Temporary directory for resource agent use (leave owned by root) int rc = pcmk__build_path(PCMK__OCF_TMP_DIR, 0755); if (rc != pcmk_rc_ok) { crm_warn("Could not create resource agent temporary directory " PCMK__OCF_TMP_DIR ": %s", pcmk_rc_str(rc)); } } rsc_list = pcmk__strkey_table(NULL, free_rsc); ipcs = mainloop_add_ipc_server(CRM_SYSTEM_LRMD, QB_IPC_SHM, &lrmd_ipc_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); exit_code = CRM_EX_FATAL; goto done; } #ifdef PCMK__COMPILE_REMOTE if (lrmd_init_remote_tls_server() < 0) { crm_err("Failed to create TLS listener: shutting down and staying down"); exit_code = CRM_EX_FATAL; goto done; } ipc_proxy_init(); #endif mainloop_add_signal(SIGTERM, lrmd_shutdown); mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker " EXECD_TYPE " executor successfully started and accepting connections"); crm_notice("OCF resource agent search path is %s", PCMK__OCF_RA_PATH); g_main_loop_run(mainloop); /* should never get here */ exit_executor(); done: g_strfreev(options.log_files); #ifdef PCMK__COMPILE_REMOTE g_free(options.port); #endif // PCMK__COMPILE_REMOTE g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); crm_exit(exit_code); } diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c index 6d6c73a058..33f9ce57f2 100644 --- a/daemons/fenced/cts-fence-helper.c +++ b/daemons/fenced/cts-fence-helper.c @@ -1,695 +1,695 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SUMMARY "cts-fence-helper - inject commands into the Pacemaker fencer and watch for events" static GMainLoop *mainloop = NULL; static crm_trigger_t *trig = NULL; static int mainloop_iter = 0; static pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; typedef void (*mainloop_test_iteration_cb) (int check_event); #define MAINLOOP_DEFAULT_TIMEOUT 2 enum test_modes { test_standard = 0, // test using a specific developer environment test_passive, // watch notifications only test_api_sanity, // sanity-test stonith client API using fence_dummy test_api_mainloop, // sanity-test mainloop code with async responses }; struct { enum test_modes mode; } options = { .mode = test_standard }; static gboolean mode_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { if (pcmk__str_any_of(option_name, "--mainloop_api_test", "-m", NULL)) { options.mode = test_api_mainloop; } else if (pcmk__str_any_of(option_name, "--api_test", "-t", NULL)) { options.mode = test_api_sanity; } else if (pcmk__str_any_of(option_name, "--passive", "-p", NULL)) { options.mode = test_passive; } return TRUE; } static GOptionEntry entries[] = { { "mainloop_api_test", 'm', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, NULL, NULL, }, { "api_test", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, NULL, NULL, }, { "passive", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, mode_cb, NULL, NULL, }, { NULL } }; static stonith_t *st = NULL; static struct pollfd pollfd; static const int st_opts = st_opt_sync_call; static int expected_notifications = 0; static int verbose = 0; static void mainloop_test_done(const char *origin, bool pass) { if (pass) { crm_info("SUCCESS - %s", origin); mainloop_iter++; mainloop_set_trigger(trig); result.execution_status = PCMK_EXEC_DONE; result.exit_status = CRM_EX_OK; } else { crm_err("FAILURE - %s (%d: %s)", origin, result.exit_status, pcmk_exec_status_str(result.execution_status)); crm_exit(CRM_EX_ERROR); } } static void dispatch_helper(int timeout) { int rc; crm_debug("Looking for notification"); pollfd.events = POLLIN; while (true) { rc = poll(&pollfd, 1, timeout); /* wait 10 minutes, -1 forever */ if (rc > 0) { if (!stonith_dispatch(st)) { break; } } else { break; } } } static void st_callback(stonith_t * st, stonith_event_t * e) { char *desc = NULL; if (st->state == stonith_disconnected) { crm_exit(CRM_EX_DISCONNECT); } desc = stonith__event_description(e); crm_notice("%s", desc); free(desc); if (expected_notifications) { expected_notifications--; } } static void st_global_callback(stonith_t * stonith, stonith_callback_data_t * data) { crm_notice("Call %d exited %d: %s (%s)", data->call_id, stonith__exit_status(data), stonith__execution_status(data), pcmk__s(stonith__exit_reason(data), "unspecified reason")); } static void passive_test(void) { int rc = 0; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); if (rc != pcmk_ok) { - stonith_api_delete(st); + stonith__api_free(st); crm_exit(CRM_EX_DISCONNECT); } st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, st_callback); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", st_global_callback); dispatch_helper(600 * 1000); } #define single_test(cmd, str, num_notifications, expected_rc) \ { \ int rc = 0; \ rc = cmd; \ expected_notifications = 0; \ if (num_notifications) { \ expected_notifications = num_notifications; \ dispatch_helper(500); \ } \ if (rc != expected_rc) { \ crm_err("FAILURE - expected rc %d != %d(%s) for cmd - %s", expected_rc, rc, pcmk_strerror(rc), str); \ crm_exit(CRM_EX_ERROR); \ } else if (expected_notifications) { \ crm_err("FAILURE - expected %d notifications, got only %d for cmd - %s", \ num_notifications, num_notifications - expected_notifications, str); \ crm_exit(CRM_EX_ERROR); \ } else { \ if (verbose) { \ crm_info("SUCCESS - %s: %d", str, rc); \ } else { \ crm_debug("SUCCESS - %s: %d", str, rc); \ } \ } \ }\ static void run_fence_failure_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "fail"); single_test(st-> cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for failure test", 1, 0); single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, 3, 0), "Fence failure results off", 1, -ENODATA); single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_REBOOT, 3, 0), "Fence failure results reboot", 1, -ENODATA); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), "Remove device1 for failure test", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void run_fence_failure_rollover_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "fail"); single_test(st-> cmds->register_device(st, st_opts, "test-id1", "stonith-ng", "fence_dummy", params), "Register device1 for rollover test", 1, 0); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "pass"); single_test(st-> cmds->register_device(st, st_opts, "test-id2", "stonith-ng", "fence_dummy", params), "Register device2 for rollover test", 1, 0); single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_OFF, 3, 0), "Fence rollover results off", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ single_test(st->cmds->fence(st, st_opts, "false_1_node2", PCMK_ACTION_ON, 3, 0), "Fence rollover results on", 1, -ENODEV); single_test(st->cmds->remove_device(st, st_opts, "test-id1"), "Remove device1 for rollover tests", 1, 0); single_test(st->cmds->remove_device(st, st_opts, "test-id2"), "Remove device2 for rollover tests", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void run_standard_test(void) { stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2 false_1_node2=3,4"); params = stonith_key_value_add(params, "mode", "pass"); params = stonith_key_value_add(params, "mock_dynamic_hosts", "false_1_node1 false_1_node2"); single_test(st-> cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_dummy", params), "Register", 1, 0); stonith_key_value_freeall(params, 1, 1); params = NULL; single_test(st->cmds->list(st, st_opts, "test-id", NULL, 1), PCMK_ACTION_LIST, 0, 0); single_test(st->cmds->monitor(st, st_opts, "test-id", 1), "Monitor", 0, 0); single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node2", 1), "Status false_1_node2", 0, 0); single_test(st->cmds->status(st, st_opts, "test-id", "false_1_node1", 1), "Status false_1_node1", 0, 0); single_test(st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 1, 0), "Fence unknown-host (expected failure)", 0, -ENODEV); single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 1, 0), "Fence false_1_node1", 1, 0); /* Expect -ENODEV because fence_dummy requires 'on' to be executed on target */ single_test(st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 1, 0), "Unfence false_1_node1", 1, -ENODEV); /* Confirm that an invalid level index is rejected */ single_test(st->cmds->register_level(st, st_opts, "node1", 999, params), "Attempt to register an invalid level index", 0, -EINVAL); single_test(st->cmds->remove_device(st, st_opts, "test-id"), "Remove test-id", 1, 0); stonith_key_value_freeall(params, 1, 1); } static void sanity_tests(void) { int rc = 0; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); if (rc != pcmk_ok) { - stonith_api_delete(st); + stonith__api_free(st); crm_exit(CRM_EX_DISCONNECT); } st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, st_callback); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_ADD, st_callback); st->cmds->register_notification(st, STONITH_OP_DEVICE_DEL, st_callback); st->cmds->register_callback(st, 0, 120, st_opt_timeout_updates, NULL, "st_global_callback", st_global_callback); crm_info("Starting API Sanity Tests"); run_standard_test(); run_fence_failure_test(); run_fence_failure_rollover_test(); crm_info("Sanity Tests Passed"); } static void standard_dev_test(void) { int rc = 0; char *tmp = NULL; stonith_key_value_t *params = NULL; rc = st->cmds->connect(st, crm_system_name, &pollfd.fd); if (rc != pcmk_ok) { - stonith_api_delete(st); + stonith__api_free(st); crm_exit(CRM_EX_DISCONNECT); } params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "some-host=pcmk-7 true_1_node1=3,4"); rc = st->cmds->register_device(st, st_opts, "test-id", "stonith-ng", "fence_xvm", params); crm_debug("Register: %d", rc); rc = st->cmds->list(st, st_opts, "test-id", &tmp, 10); crm_debug("List: %d output: %s", rc, tmp ? tmp : ""); rc = st->cmds->monitor(st, st_opts, "test-id", 10); crm_debug("Monitor: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node2", 10); crm_debug("Status false_1_node2: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "unknown-host", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence unknown-host: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_OFF, 60, 0); crm_debug("Fence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "false_1_node1", 10); crm_debug("Status false_1_node1: %d", rc); rc = st->cmds->fence(st, st_opts, "some-host", PCMK_ACTION_OFF, 10, 0); crm_debug("Fence alias: %d", rc); rc = st->cmds->status(st, st_opts, "test-id", "some-host", 10); crm_debug("Status alias: %d", rc); rc = st->cmds->fence(st, st_opts, "false_1_node1", PCMK_ACTION_ON, 10, 0); crm_debug("Unfence false_1_node1: %d", rc); rc = st->cmds->remove_device(st, st_opts, "test-id"); crm_debug("Remove test-id: %d", rc); stonith_key_value_freeall(params, 1, 1); } static void iterate_mainloop_tests(gboolean event_ready); static void mainloop_callback(stonith_t * stonith, stonith_callback_data_t * data) { pcmk__set_result(&result, stonith__exit_status(data), stonith__execution_status(data), stonith__exit_reason(data)); iterate_mainloop_tests(TRUE); } static int register_callback_helper(int callid) { return st->cmds->register_callback(st, callid, MAINLOOP_DEFAULT_TIMEOUT, st_opt_timeout_updates, NULL, "callback", mainloop_callback); } static void test_async_fence_pass(int check_event) { int rc = 0; if (check_event) { mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK)); return; } rc = st->cmds->fence(st, 0, "true_1_node1", PCMK_ACTION_OFF, MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } #define CUSTOM_TIMEOUT_ADDITION 10 static void test_async_fence_custom_timeout(int check_event) { int rc = 0; static time_t begin = 0; if (check_event) { uint32_t diff = (time(NULL) - begin); if (result.execution_status != PCMK_EXEC_TIMEOUT) { mainloop_test_done(__func__, false); } else if (diff < CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT) { crm_err ("Custom timeout test failed, callback expiration should be updated to %d, actual timeout was %d", CUSTOM_TIMEOUT_ADDITION + MAINLOOP_DEFAULT_TIMEOUT, diff); mainloop_test_done(__func__, false); } else { mainloop_test_done(__func__, true); } return; } begin = time(NULL); rc = st->cmds->fence(st, 0, "custom_timeout_node1", PCMK_ACTION_OFF, MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } static void test_async_fence_timeout(int check_event) { int rc = 0; if (check_event) { mainloop_test_done(__func__, (result.execution_status == PCMK_EXEC_NO_FENCE_DEVICE)); return; } rc = st->cmds->fence(st, 0, "false_1_node2", PCMK_ACTION_OFF, MAINLOOP_DEFAULT_TIMEOUT, 0); if (rc < 0) { crm_err("fence failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } static void test_async_monitor(int check_event) { int rc = 0; if (check_event) { mainloop_test_done(__func__, (result.exit_status == CRM_EX_OK)); return; } rc = st->cmds->monitor(st, 0, "false_1", MAINLOOP_DEFAULT_TIMEOUT); if (rc < 0) { crm_err("monitor failed with rc %d", rc); mainloop_test_done(__func__, false); } register_callback_helper(rc); /* wait for event */ } static void test_register_async_devices(int check_event) { char buf[16] = { 0, }; stonith_key_value_t *params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "false_1_node1=1,2"); params = stonith_key_value_add(params, "mode", "fail"); st->cmds->register_device(st, st_opts, "false_1", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "true_1_node1=1,2"); params = stonith_key_value_add(params, "mode", "pass"); st->cmds->register_device(st, st_opts, "true_1", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); params = NULL; params = stonith_key_value_add(params, PCMK_STONITH_HOST_MAP, "custom_timeout_node1=1,2"); params = stonith_key_value_add(params, "mode", "fail"); params = stonith_key_value_add(params, "delay", "1000"); snprintf(buf, sizeof(buf) - 1, "%d", MAINLOOP_DEFAULT_TIMEOUT + CUSTOM_TIMEOUT_ADDITION); params = stonith_key_value_add(params, "pcmk_off_timeout", buf); st->cmds->register_device(st, st_opts, "false_custom_timeout", "stonith-ng", "fence_dummy", params); stonith_key_value_freeall(params, 1, 1); mainloop_test_done(__func__, true); } static void try_mainloop_connect(int check_event) { int rc = stonith_api_connect_retry(st, crm_system_name, 10); if (rc == pcmk_ok) { mainloop_test_done(__func__, true); return; } crm_err("API CONNECTION FAILURE"); mainloop_test_done(__func__, false); } static void iterate_mainloop_tests(gboolean event_ready) { static mainloop_test_iteration_cb callbacks[] = { try_mainloop_connect, test_register_async_devices, test_async_monitor, test_async_fence_pass, test_async_fence_timeout, test_async_fence_custom_timeout, }; if (mainloop_iter == (sizeof(callbacks) / sizeof(mainloop_test_iteration_cb))) { /* all tests ran, everything passed */ crm_info("ALL MAINLOOP TESTS PASSED!"); crm_exit(CRM_EX_OK); } callbacks[mainloop_iter] (event_ready); } static gboolean trigger_iterate_mainloop_tests(gpointer user_data) { iterate_mainloop_tests(FALSE); return TRUE; } static void test_shutdown(int nsig) { int rc = 0; if (st) { rc = st->cmds->disconnect(st); crm_info("Disconnect: %d", rc); crm_debug("Destroy"); - stonith_api_delete(st); + stonith__api_free(st); } if (rc) { crm_exit(CRM_EX_ERROR); } } static void mainloop_tests(void) { trig = mainloop_add_trigger(G_PRIORITY_HIGH, trigger_iterate_mainloop_tests, NULL); mainloop_set_trigger(trig); mainloop_add_signal(SIGTERM, test_shutdown); crm_info("Starting"); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; context = pcmk__build_arg_context(args, NULL, group, NULL); pcmk__add_main_args(context, entries); return context; } int main(int argc, char **argv) { GError *error = NULL; crm_exit_t exit_code = CRM_EX_OK; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); GOptionContext *context = build_arg_context(args, NULL); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } /* We have to use crm_log_init here to set up the logging because there's * different handling for daemons vs. command line programs, and * pcmk__cli_init_logging is set up to only handle the latter. */ crm_log_init(NULL, LOG_INFO, TRUE, (verbose? TRUE : FALSE), argc, argv, FALSE); for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } st = stonith__api_new(); if (st == NULL) { exit_code = CRM_EX_DISCONNECT; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Could not connect to fencer: API memory allocation failed"); goto done; } switch (options.mode) { case test_standard: standard_dev_test(); break; case test_passive: passive_test(); break; case test_api_sanity: sanity_tests(); break; case test_api_mainloop: mainloop_tests(); break; } test_shutdown(0); done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, NULL); crm_exit(exit_code); } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index c4032584a6..bc24f794cd 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3673 +1,3673 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include // bool #include #include #include #include #include #include #include #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include #include #include #include static GHashTable *device_table = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ // @TODO This name is misleading now, it's the value of stonith-timeout int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_self; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(fenced_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct { int id; uint32_t options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; char *action; char *device; //! Head of device list (used only for freeing list with command object) GList *device_list; //! Next item to process in \c device_list GList *next_device_iter; void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); fenced_device_t *active_on; fenced_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); /*! * \internal * \brief Set a bad fencer API request error in a result object * * \param[out] result Result to set */ static inline void set_bad_request_result(pcmk__action_result_t *result) { pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Fencer API request missing required information (bug?)"); } /*! * \internal * \brief Check whether the fencer's device table contains a watchdog device * * \retval \c true If the device table contains a watchdog device * \retval \c false Otherwise */ bool fenced_has_watchdog_device(void) { return (device_table != NULL) && (g_hash_table_lookup(device_table, STONITH_WATCHDOG_ID) != NULL); } /*! * \internal * \brief Call a function for each known fence device * * \param[in] fn Function to call for each device * \param[in,out] user_data User data */ void fenced_foreach_device(GHFunc fn, gpointer user_data) { if (device_table != NULL) { g_hash_table_foreach(device_table, fn, user_data); } } /*! * \internal * \brief Remove each known fence device matching a given predicate * * \param[in] fn Function that returns \c TRUE to remove a fence device or * \c FALSE to keep it */ void fenced_foreach_device_remove(GHRFunc fn) { if (device_table != NULL) { g_hash_table_foreach_remove(device_table, fn, NULL); } } static gboolean is_action_required(const char *action, const fenced_device_t *device) { return (device != NULL) && pcmk_is_set(device->flags, fenced_df_auto_unfence) && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const fenced_device_t *device, const char *action) { const char *value = NULL; guint delay_max = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { pcmk_parse_interval_spec(value, &delay_max); delay_max /= 1000; } return (int) delay_max; } static int get_action_delay_base(const fenced_device_t *device, const char *action, const char *target) { char *hash_value = NULL; guint delay_base = 0U; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = pcmk__str_copy(hash_value); char *valptr = value; if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { pcmk_parse_interval_spec(value, &delay_base); delay_base /= 1000; } free(valptr); } return (int) delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const fenced_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, fenced_df_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { long long timeout_ms = crm_get_msec(value); return (int) QB_MIN(pcmk__timeout_ms2s(timeout_ms), INT_MAX); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static fenced_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_table == NULL)) { return NULL; } return g_hash_table_lookup(device_table, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_table != NULL) && (device_id != NULL)) { fenced_device_t *device = g_hash_table_lookup(device_table, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_table != NULL) && (device_id != NULL)) { fenced_device_t *device = g_hash_table_lookup(device_table, device_id); if (device != NULL) { return pcmk_is_set(device->flags, fenced_df_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; int rc = pcmk_rc_ok; if (msg == NULL) { return NULL; } op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_ERR); if (op == NULL) { return NULL; } cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); // All messages must include these cmd->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); cmd->op = crm_element_value_copy(msg, PCMK__XA_ST_OP); cmd->client = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, PCMK__XA_ST_CALLID, &(cmd->id)); crm_element_value_int(msg, PCMK__XA_ST_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, PCMK__XA_ST_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; rc = pcmk__xe_get_flags(msg, PCMK__XA_ST_CALLOPT, &(cmd->options), st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from request: %s", pcmk_rc_str(rc)); } cmd->origin = crm_element_value_copy(msg, PCMK__XA_SRC); cmd->remote_op_id = crm_element_value_copy(msg, PCMK__XA_ST_REMOTE_OP); cmd->client_name = crm_element_value_copy(msg, PCMK__XA_ST_CLIENTNAME); cmd->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); cmd->device = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ID); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(fenced_device_t *device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(fenced_device_t *device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; fenced_device_t *device = cmd->activating_on; if (device == NULL) { /* In case of a retry, we've done the move from activating_on to * active_on already */ device = cmd->active_on; } pcmk__assert(device != NULL); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { fenced_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); device->default_host_arg = stonith__default_host_arg(device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(fenced_device_t *device) { int exec_rc = 0; const char *action_str = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(fenced_get_local_node())) { pcmk__panic("Watchdog self-fencing required"); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if PCMK__ENABLE_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, fenced_df_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->timeout, device->params, device->aliases, device->default_host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; fenced_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t *cmd, fenced_device_t *device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } cmd->device = pcmk__str_copy(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " QB_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = pcmk__create_timer(cmd->start_delay * 1000, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; fenced_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); pcmk__xml_free(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } /*! * \internal * \brief Initialize the table of known fence devices */ void fenced_init_device_table(void) { if (device_table == NULL) { device_table = pcmk__strkey_table(NULL, free_device); } } /*! * \internal * \brief Free the table of known fence devices */ void fenced_free_device_table(void) { if (device_table != NULL) { g_hash_table_destroy(device_table); device_table = NULL; } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = pcmk__assert_alloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, pcmk__str_copy(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith__api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); - stonith_api_delete(st); + stonith__api_free(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, pcmk__str_copy(agent), buffer); } *metadata = pcmk__xml_parse(buffer); return pcmk_rc_ok; } static void read_action_metadata(fenced_device_t *device) { xmlXPathObject *xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = pcmk__xpath_search(device->agent_metadata->doc, "//" PCMK_XE_ACTION); max = pcmk__xpath_num_results(xpath); if (max == 0) { xmlXPathFreeObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = pcmk__xpath_result(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, PCMK_XA_NAME); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { fenced_device_set_flags(device, fenced_df_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { fenced_device_set_flags(device, fenced_df_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { fenced_device_set_flags(device, fenced_df_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* PCMK_XA_AUTOMATIC means the cluster will unfence a node when it * joins. * * @COMPAT PCMK__XA_REQUIRED is a deprecated synonym for * PCMK_XA_AUTOMATIC. */ if (pcmk__xe_attr_is_true(match, PCMK_XA_AUTOMATIC) || pcmk__xe_attr_is_true(match, PCMK__XA_REQUIRED)) { fenced_device_set_flags(device, fenced_df_auto_unfence); } fenced_device_set_flags(device, fenced_df_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, PCMK_XA_ON_TARGET)) { pcmk__add_word(&(device->on_target_actions), 64, action); } } xmlXPathFreeObject(xpath); } static const char * target_list_type(fenced_device_t *dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = PCMK_VALUE_STATIC_LIST; } else if (pcmk_is_set(dev->flags, fenced_df_supports_list)) { check_type = PCMK_VALUE_DYNAMIC_LIST; } else if (pcmk_is_set(dev->flags, fenced_df_supports_status)) { check_type = PCMK_VALUE_STATUS; } else { check_type = PCMK_VALUE_NONE; } } return check_type; } static fenced_device_t * build_device_from_xml(const xmlNode *dev) { const char *value; fenced_device_t *device = NULL; char *agent = crm_element_value_copy(dev, PCMK_XA_AGENT); CRM_CHECK(agent != NULL, return device); device = pcmk__assert_alloc(1, sizeof(fenced_device_t)); device->id = crm_element_value_copy(dev, PCMK_XA_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, PCMK__XA_NAMESPACE); device->params = xml2list(dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, PCMK_VALUE_STATIC_LIST, pcmk__str_casei) && (device->targets != NULL)) { // device->targets is necessary only with PCMK_VALUE_STATIC_LIST g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); device->default_host_arg = stonith__default_host_arg(device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = crm_element_value(dev, PCMK__XA_RSC_PROVIDES); if (pcmk__str_eq(value, PCMK_VALUE_UNFENCING, pcmk__str_casei)) { fenced_device_set_flags(device, fenced_df_auto_unfence); } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); return device; } static void schedule_internal_command(const char *origin, fenced_device_t *device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = pcmk__assert_alloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = pcmk__str_copy(action); cmd->target = pcmk__str_copy(target); cmd->device = pcmk__str_copy(device->id); cmd->origin = pcmk__str_copy(origin); cmd->client = pcmk__str_copy(crm_system_name); cmd->client_name = pcmk__str_copy(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; fenced_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; fenced_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check=PCMK_VALUE_STATUS if the user didn't * explicitly specify PCMK_VALUE_DYNAMIC_LIST */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); pcmk__insert_dup(dev->params, PCMK_STONITH_HOST_CHECK, PCMK_VALUE_STATUS); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if (strcmp(key, PCMK_XA_CRM_FEATURE_SET) == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the table */ static fenced_device_t * device_has_duplicate(const fenced_device_t *device) { fenced_device_t *dup = g_hash_table_lookup(device_table, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } // Use pcmk__digest_operation() here? if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int fenced_device_register(const xmlNode *dev, bool from_cib) { const char *local_node_name = fenced_get_local_node(); fenced_device_t *dup = NULL; fenced_device_t *device = build_device_from_xml(dev); int rc = pcmk_rc_ok; CRM_CHECK(device != NULL, return ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set"); rc = ENODEV; goto done; } if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown agent '%s' " "rather than '" STONITH_WATCHDOG_AGENT "'", pcmk__s(device->agent, "")); rc = ENODEV; goto done; } if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device named '%s' rather than " "'" STONITH_WATCHDOG_ID "'", pcmk__s(device->id, "")); rc = ENODEV; goto done; } if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* This has either an empty list or the targets configured for * watchdog fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (!node_does_watchdog_fencing(local_node_name)) { crm_debug("Skip registration of watchdog fence device on node not " "in host list"); device->targets = NULL; stonith_device_remove(device->id, from_cib); goto done; } // Proceed as with any other fencing device g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(local_node_name); pcmk__insert_dup(device->params, PCMK_STONITH_HOST_LIST, local_node_name); } dup = device_has_duplicate(device); if (dup != NULL) { guint ndevices = g_hash_table_size(device_table); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; fenced_device_clear_flags(device, fenced_df_dirty); } else { guint ndevices = 0; fenced_device_t *old = g_hash_table_lookup(device_table, device->id); if (from_cib && (old != NULL) && pcmk_is_set(old->flags, fenced_df_api_registered)) { /* If the CIB is writing over an entry that is shared with a stonith * client, copy any pending ops that currently exist on the old * entry to the new one. Otherwise the pending ops will be reported * as failures. */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; fenced_device_set_flags(device, fenced_df_api_registered); old->pending_ops = NULL; if (device->pending_ops != NULL) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_table, device->id, device); ndevices = g_hash_table_size(device_table); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { fenced_device_set_flags(device, fenced_df_cib_registered); } else { fenced_device_set_flags(device, fenced_df_api_registered); } done: if (rc != pcmk_rc_ok) { free_device(device); } return rc; } void stonith_device_remove(const char *id, bool from_cib) { fenced_device_t *device = g_hash_table_lookup(device_table, id); guint ndevices = 0; if (device == NULL) { ndevices = g_hash_table_size(device_table); crm_info("Device '%s' not found (%u active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { fenced_device_clear_flags(device, fenced_df_cib_registered); } else { fenced_device_clear_flags(device, fenced_df_api_registered|fenced_df_verified); } if (!pcmk_any_flags_set(device->flags, fenced_df_api_registered |fenced_df_cib_registered)) { g_hash_table_remove(device_table, id); ndevices = g_hash_table_size(device_table); crm_info("Removed '%s' from device list (%u active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { // Exactly one is true at this point const bool cib_registered = pcmk_is_set(device->flags, fenced_df_cib_registered); crm_trace("Not removing '%s' from device list (%u active) because " "still registered via %s", id, g_hash_table_size(device_table), (cib_registered? "CIB" : "API")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST__LEVEL_COUNT; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, PCMK_XA_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE), crm_element_value(level, PCMK_XA_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", pcmk__xe_id(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, PCMK_XA_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, PCMK_XA_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if ((crm_element_value(level, PCMK_XA_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, PCMK_XA_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, PCMK_XE_FENCING_LEVEL)) { xml = pcmk__xpath_find_one(xml->doc, "//" PCMK_XE_FENCING_LEVEL, LOG_WARNING); } if (xml != NULL) { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, PCMK_XA_INDEX, &local_id); } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] result Where to set result of registration (can be \c NULL) */ void fenced_register_level(xmlNode *msg, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK(msg != NULL, return); level = unpack_level_request(msg, &mode, &target, &id); if (level == NULL) { set_bad_request_result(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(pcmk__xe_id(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", pcmk__xe_id(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", pcmk__xe_id(level)); return; } // Ensure level ID is in allowed range if ((id < ST__LEVEL_MIN) || (id > ST__LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), pcmk__xe_id(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = pcmk__assert_alloc(1, sizeof(stonith_topology_t)); tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, PCMK_XA_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, PCMK_XA_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, PCMK_XA_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, PCMK_XA_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], pcmk__str_copy(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] result Where to set result of unregistration (can be \c NULL) */ void fenced_unregister_level(xmlNode *msg, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; level = unpack_level_request(msg, NULL, &target, &id); if (level == NULL) { set_bad_request_result(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST__LEVEL_COUNT)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, PCMK_XA_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(pcmk__xe_id(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; char *pos = NULL; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = pcmk__assert_alloc(alloc_size, sizeof(char)); pos = rv; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = pcmk__xpath_find_one(msg->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); xmlNode *op = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_ERR); const char *id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); const char *action = crm_element_value(op, PCMK__XA_ST_DEVICE_ACTION); async_command_t *cmd = NULL; fenced_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); set_bad_request_result(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_table, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!pcmk_is_set(device->flags, fenced_df_api_registered) && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); set_bad_request_result(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != fenced_df_none) { fenced_device_t *dev = g_hash_table_lookup(device_table, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, pcmk__str_copy(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const fenced_device_t *device, const char *action, const char *target, gboolean allow_self) { gboolean localhost_is_target = pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_self) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const fenced_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } /*! * \internal * \brief Check whether we can use a device's cached target list * * \param[in] dev Fencing device to check * * \return \c true if \p dev cached its targets less than a minute ago, * otherwise \c false */ static inline bool can_use_target_cache(const fenced_device_t *dev) { return (dev->targets != NULL) && (time(NULL) < (dev->targets_age + 60)); } static void can_fence_host_with_device(fenced_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, fenced_df_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_self)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK_VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATIC_LIST, pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_DYNAMIC_LIST, pcmk__str_casei)) { if (!can_use_target_cache(dev)) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, PCMK_VALUE_STATUS, pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { fenced_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool allow_self, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_table); if (ndevices == 0) { callback(NULL, user_data); return; } search = pcmk__assert_alloc(1, sizeof(struct device_search_s)); search->host = pcmk__str_copy(host); search->action = pcmk__str_copy(action); search->per_device_timeout = timeout; search->allow_self = allow_self; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); fenced_foreach_device(search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const fenced_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); // PCMK__XA_ST_REQUIRED is currently used only for unfencing if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, PCMK__XA_ST_REQUIRED, 1); } // pcmk__timeout if configured action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %ds using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, PCMK__XA_ST_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, PCMK__XA_ST_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, PCMK__XA_ST_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_self Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const fenced_device_t *device, const char *target, gboolean allow_self) { if (!localhost_is_eligible(device, action, target, allow_self)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, PCMK__XA_ST_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_self Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const fenced_device_t *device, const char *target, gboolean allow_self) { xmlNode *child = pcmk__xe_create(xml, PCMK__XE_ST_DEVICE_ACTION); crm_xml_add(child, PCMK_XA_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_self); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { const pcmk__node_status_t *node = pcmk__get_node(0, remote_peer, NULL, pcmk__node_search_cluster_member); pcmk__cluster_send_message(node, pcmk_ipc_fenced, reply); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *wrapper = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } // Pack the results into XML wrapper = pcmk__xe_create(query->reply, PCMK__XE_ST_CALLDATA); list = pcmk__xe_create(wrapper, __func__); crm_xml_add(list, PCMK__XA_ST_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { fenced_device_t *device = g_hash_table_lookup(device_table, lpc->data); const char *action = query->action; xmlNode *dev = NULL; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = pcmk__xe_create(list, PCMK__XE_ST_DEVICE_ID); crm_xml_add(dev, PCMK_XA_ID, device->id); crm_xml_add(dev, PCMK__XA_NAMESPACE, device->namespace); crm_xml_add(dev, PCMK_XA_AGENT, device->agent); // Has had successful monitor, list, or status on this node crm_xml_add_int(dev, PCMK__XA_ST_MONITOR_VERIFIED, pcmk_is_set(device->flags, fenced_df_verified)); crm_xml_add_int(dev, PCMK__XA_ST_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, fenced_df_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_self_fencing)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_self_fencing)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = pcmk__xe_create(dev, PCMK__XE_ATTRIBUTES); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, PCMK__XA_ST_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } crm_log_xml_trace(list, "query-result"); stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: pcmk__xml_free(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, PCMK_ACTION_METADATA, pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " QB_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, PCMK__XA_ST_OP_MERGED, true); } if (pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, PCMK__XA_SUBT, PCMK__VALUE_BROADCAST); crm_xml_add(reply, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); pcmk__cluster_send_message(NULL, pcmk_ipc_fenced, reply); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); pcmk__xml_free(reply); } static void cancel_stonith_command(async_command_t * cmd) { fenced_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static fenced_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { fenced_device_t *next_device = g_hash_table_lookup(device_table, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; fenced_device_t *device = NULL; fenced_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!pcmk_is_set(device->flags, fenced_df_verified) && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { fenced_device_set_flags(device, fenced_df_verified); } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_table, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; fenced_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { device = g_hash_table_lookup(device_table, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { /* Device found. Schedule a fencing command for it. * * Assign devices to device_list so that it will be freed with cmd. */ cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; fenced_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = pcmk__xpath_find_one(msg->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); set_bad_request_result(result); return; } device_id = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (device_id != NULL) { device = g_hash_table_lookup(device_table, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, PCMK__XA_ST_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; pcmk__node_status_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { host = node->name; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { PCMK__XA_ST_OP, PCMK__XA_ST_CALLID, PCMK__XA_ST_CLIENTID, PCMK__XA_ST_CLIENTNAME, PCMK__XA_ST_REMOTE_OP, PCMK__XA_ST_CALLOPT, }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(reply, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = pcmk__xe_create(NULL, PCMK__XE_ST_REPLY); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(reply, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(reply, PCMK__XA_ST_OP, cmd->op); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ID, cmd->device); crm_xml_add(reply, PCMK__XA_ST_REMOTE_OP, cmd->remote_op_id); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, cmd->client); crm_xml_add(reply, PCMK__XA_ST_CLIENTNAME, cmd->client_name); crm_xml_add(reply, PCMK__XA_ST_TARGET, cmd->target); crm_xml_add(reply, PCMK__XA_ST_DEVICE_ACTION, cmd->op); crm_xml_add(reply, PCMK__XA_ST_ORIGIN, cmd->origin); crm_xml_add_int(reply, PCMK__XA_ST_CALLID, cmd->id); crm_xml_add_int(reply, PCMK__XA_ST_CALLOPT, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(pcmk__node_status_t *peer) { return (peer != NULL) && (peer->name != NULL) && pcmk_is_set(peer->processes, crm_get_cluster_proc()); } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { GHashTableIter gIter; pcmk__node_status_t *entry = NULL; g_hash_table_iter_init(&gIter, pcmk__peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->name, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->name); return entry->name; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = pcmk__xpath_find_one(request->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, PCMK__XA_ST_TARGET); } relay_op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP_RELAY); op_id = crm_element_value(request, PCMK__XA_ST_REMOTE_OP); client_name = crm_element_value(request, PCMK__XA_ST_CLIENTNAME); /* Delete RELAY operation. */ if ((relay_op_id != NULL) && (target != NULL) && pcmk__str_eq(target, fenced_get_local_node(), pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = pcmk__xe_create(NULL, "reply"); pcmk__assert(request->ipc_client != NULL); crm_xml_add(reply, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(reply, PCMK__XA_ST_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, PCMK__XA_ST_CALLID); const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_DEVICE_ACTION "]", LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, PCMK__XA_ST_TARGET); action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = pcmk__assert_alloc(1, sizeof(struct st_query_data)); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); query->remote_peer = pcmk__str_copy(request->peer); query->client_id = pcmk__str_copy(client_id); query->target = pcmk__str_copy(target); query->action = pcmk__str_copy(action); query->call_options = request->call_options; crm_element_value_int(request->xml, PCMK__XA_ST_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_self_fencing), query, stonith_query_capable_device_cb, fenced_df_none); return NULL; } // STONITH_OP_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; pcmk__assert(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, fenced_parse_notify_flag(flag_name)); } flag_name = crm_element_value(request->xml, PCMK__XA_ST_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, fenced_parse_notify_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, PCMK__XE_ACK, NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION), crm_element_value(dev, PCMK__XA_ST_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { set_bad_request_result(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if (request->peer != NULL) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: set_bad_request_result(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//*[@" PCMK__XA_ST_TARGET "]", LOG_TRACE); const char *target = crm_element_value(dev, PCMK__XA_ST_TARGET); const char *action = crm_element_value(dev, PCMK__XA_ST_DEVICE_ACTION); const char *device = crm_element_value(dev, PCMK__XA_ST_DEVICE_ID); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, PCMK__XA_ST_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; pcmk__node_status_t *node = pcmk__get_node(0, alternate_host, NULL, pcmk__node_search_cluster_member); if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, PCMK__XA_ST_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, PCMK__XA_ST_OP, STONITH_OP_RELAY); crm_xml_add(request->xml, PCMK__XA_ST_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, PCMK__XA_ST_REMOTE_OP, op->id); // @TODO On failure, fail request immediately, or maybe panic pcmk__cluster_send_message(node, pcmk_ipc_fenced, request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { set_bad_request_result(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } pcmk__xml_free(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = fenced_device_register(dev, false); rc = pcmk_rc2legacy(rc); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_config_notification(op, &request->result, (dev == NULL)? NULL : pcmk__xe_id(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = pcmk__xpath_find_one(request->xml->doc, "//" PCMK__XE_ST_DEVICE_ID, LOG_ERR); const char *device_id = crm_element_value(dev, PCMK_XA_ID); const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_config_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, PCMK__XA_ST_OP); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, PCMK_XA_ID, &node_id); name = crm_element_value(request->xml, PCMK_XA_UNAME); pcmk__cluster_forget_cluster_node(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { STONITH_OP_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } pcmk__xml_free(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, PCMK__XA_ST_OP); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, STONITH_OP_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { uint32_t call_options = st_opt_none; int rc = pcmk_rc_ok; bool is_reply = false; CRM_CHECK(message != NULL, return); if (pcmk__xpath_find_one(message->doc, "//" PCMK__XE_ST_REPLY, LOG_NEVER) != NULL) { is_reply = true; } rc = pcmk__xe_get_flags(message, PCMK__XA_ST_CALLOPT, &call_options, st_opt_none); if (rc != pcmk_rc_ok) { crm_warn("Couldn't parse options from message: %s", pcmk_rc_str(rc)); } crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, PCMK__XA_ST_OP), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { pcmk__assert((client == NULL) || (client->request_id == id)); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, PCMK__XA_ST_OP); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h index cbbddca12a..b29e5328ce 100644 --- a/include/crm/fencing/internal.h +++ b/include/crm/fencing/internal.h @@ -1,181 +1,182 @@ /* * Copyright 2011-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_FENCING_INTERNAL__H #define PCMK__CRM_FENCING_INTERNAL__H #include #include #include #include #include #include #ifdef __cplusplus extern "C" { #endif stonith_t *stonith__api_new(void); +void stonith__api_free(stonith_t *stonith_api); #define stonith__set_call_options(st_call_opts, call_for, flags_to_set) do { \ st_call_opts = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fencer call", (call_for), \ (st_call_opts), (flags_to_set), \ #flags_to_set); \ } while (0) #define stonith__clear_call_options(st_call_opts, call_for, flags_to_clear) do { \ st_call_opts = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "Fencer call", (call_for), \ (st_call_opts), (flags_to_clear), \ #flags_to_clear); \ } while (0) struct stonith_action_s; typedef struct stonith_action_s stonith_action_t; stonith_action_t *stonith__action_create(const char *agent, const char *action_name, const char *target, int timeout_sec, GHashTable *device_args, GHashTable *port_map, const char *host_arg); void stonith__destroy_action(stonith_action_t *action); pcmk__action_result_t *stonith__action_result(stonith_action_t *action); int stonith__result2rc(const pcmk__action_result_t *result); void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result); void stonith__xe_get_result(const xmlNode *xml, pcmk__action_result_t *result); xmlNode *stonith__find_xe_with_result(xmlNode *xml); int stonith__execute_async(stonith_action_t *action, void *userdata, void (*done) (int pid, const pcmk__action_result_t *result, void *user_data), void (*fork_cb) (int pid, void *user_data)); int stonith__metadata_async(const char *agent, int timeout_sec, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data); xmlNode *create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list); xmlNode *create_device_registration_xml(const char *id, enum stonith_namespace standard, const char *agent, const stonith_key_value_t *params, const char *rsc_provides); void stonith__register_messages(pcmk__output_t *out); GList *stonith__parse_targets(const char *hosts); const char *stonith__later_succeeded(const stonith_history_t *event, const stonith_history_t *top_history); stonith_history_t *stonith__sort_history(stonith_history_t *history); const char *stonith__default_host_arg(xmlNode *metadata); /* Only 1-9 is allowed for fencing topology levels, * however, 0 is used to unregister all levels in * unregister requests. */ # define ST__LEVEL_COUNT 10 # define STONITH_ATTR_ACTION_OP "action" # define STONITH_OP_EXEC "st_execute" # define STONITH_OP_TIMEOUT_UPDATE "st_timeout_update" # define STONITH_OP_QUERY "st_query" # define STONITH_OP_FENCE "st_fence" # define STONITH_OP_RELAY "st_relay" # define STONITH_OP_DEVICE_ADD "st_device_register" # define STONITH_OP_DEVICE_DEL "st_device_remove" # define STONITH_OP_FENCE_HISTORY "st_fence_history" # define STONITH_OP_LEVEL_ADD "st_level_add" # define STONITH_OP_LEVEL_DEL "st_level_remove" # define STONITH_OP_NOTIFY "st_notify" # define STONITH_OP_POKE "poke" # define STONITH_WATCHDOG_AGENT "fence_watchdog" /* Don't change 2 below as it would break rolling upgrade */ # define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog" # define STONITH_WATCHDOG_ID "watchdog" stonith_history_t *stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data); bool stonith__event_state_pending(stonith_history_t *history, void *user_data); bool stonith__event_state_eq(stonith_history_t *history, void *user_data); bool stonith__event_state_neq(stonith_history_t *history, void *user_data); int stonith__legacy2status(int rc); int stonith__exit_status(const stonith_callback_data_t *data); int stonith__execution_status(const stonith_callback_data_t *data); const char *stonith__exit_reason(const stonith_callback_data_t *data); int stonith__event_exit_status(const stonith_event_t *event); int stonith__event_execution_status(const stonith_event_t *event); const char *stonith__event_exit_reason(const stonith_event_t *event); char *stonith__event_description(const stonith_event_t *event); gchar *stonith__history_description(const stonith_history_t *event, bool full_history, const char *later_succeeded, uint32_t show_opts); /*! * \internal * \brief Is a fencing operation in pending state? * * \param[in] state State as enum op_state value * * \return A boolean */ static inline bool stonith__op_state_pending(enum op_state state) { return state != st_failed && state != st_done; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node); gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node); /*! * \internal * \brief Validate a fencing configuration * * \param[in,out] st Fencer connection to use * \param[in] call_options Group of enum stonith_call_options * \param[in] rsc_id Resource to validate * \param[in] namespace_s Type of fence agent to search for * \param[in] agent Fence agent to validate * \param[in,out] params Fence device configuration parameters * \param[in] timeout_sec How long to wait for operation to complete * \param[in,out] output If non-NULL, where to store any agent output * \param[in,out] error_output If non-NULL, where to store agent error output * * \return Standard Pacemaker return code */ int stonith__validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, GHashTable *params, int timeout_sec, char **output, char **error_output); #ifdef __cplusplus } #endif #endif // PCMK__CRM_FENCING_INTERNAL__H diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c index f9a66ed93d..e0e68f5600 100644 --- a/lib/fencing/st_client.c +++ b/lib/fencing/st_client.c @@ -1,2761 +1,2773 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include // xmlNode #include // xmlXPathObject, etc. #include #include #include #include #include #include "fencing_private.h" CRM_TRACE_INIT_DATA(stonith); // Used as stonith_t:st_private typedef struct stonith_private_s { char *token; crm_ipc_t *ipc; mainloop_io_t *source; GHashTable *stonith_op_callback_table; GList *notify_list; int notify_refcnt; bool notify_deletes; void (*op_callback) (stonith_t * st, stonith_callback_data_t * data); } stonith_private_t; // Used as stonith_event_t:opaque struct event_private { pcmk__action_result_t result; }; typedef struct stonith_notify_client_s { const char *event; const char *obj_id; /* implement one day */ const char *obj_type; /* implement one day */ void (*notify) (stonith_t * st, stonith_event_t * e); bool delete; } stonith_notify_client_t; typedef struct stonith_callback_client_s { void (*callback) (stonith_t * st, stonith_callback_data_t * data); const char *id; void *user_data; gboolean only_success; gboolean allow_timeout_updates; struct timer_rec_s *timer; } stonith_callback_client_t; struct notify_blob_s { stonith_t *stonith; xmlNode *xml; }; struct timer_rec_s { int call_id; int timeout; guint ref; stonith_t *stonith; }; typedef int (*stonith_op_t) (const char *, int, const char *, xmlNode *, xmlNode *, xmlNode *, xmlNode **, xmlNode **); bool stonith_dispatch(stonith_t * st); xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static int stonith_send_command(stonith_t *stonith, const char *op, xmlNode *data, xmlNode **output_data, int call_options, int timeout); static void stonith_connection_destroy(gpointer user_data); static void stonith_send_notification(gpointer data, gpointer user_data); static int stonith_api_del_notification(stonith_t *stonith, const char *event); /*! * \brief Get agent namespace by name * * \param[in] namespace_s Name of namespace as string * * \return Namespace as enum value */ enum stonith_namespace stonith_text2namespace(const char *namespace_s) { if (pcmk__str_eq(namespace_s, "any", pcmk__str_null_matches)) { return st_namespace_any; } else if (!strcmp(namespace_s, "redhat") || !strcmp(namespace_s, "stonith-ng")) { return st_namespace_rhcs; } else if (!strcmp(namespace_s, "internal")) { return st_namespace_internal; } else if (!strcmp(namespace_s, "heartbeat")) { return st_namespace_lha; } return st_namespace_invalid; } /*! * \brief Get agent namespace name * * \param[in] namespace Namespace as enum value * * \return Namespace name as string */ const char * stonith_namespace2text(enum stonith_namespace st_namespace) { switch (st_namespace) { case st_namespace_any: return "any"; case st_namespace_rhcs: return "stonith-ng"; case st_namespace_internal: return "internal"; case st_namespace_lha: return "heartbeat"; default: break; } return "unsupported"; } /*! * \brief Determine namespace of a fence agent * * \param[in] agent Fence agent type * \param[in] namespace_s Name of agent namespace as string, if known * * \return Namespace of specified agent, as enum value */ enum stonith_namespace stonith_get_namespace(const char *agent, const char *namespace_s) { if (pcmk__str_eq(namespace_s, "internal", pcmk__str_none)) { return st_namespace_internal; } if (stonith__agent_is_rhcs(agent)) { return st_namespace_rhcs; } #if HAVE_STONITH_STONITH_H if (stonith__agent_is_lha(agent)) { return st_namespace_lha; } #endif return st_namespace_invalid; } gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) { gboolean rv = FALSE; stonith_t *stonith_api = (st != NULL)? st : stonith__api_new(); char *list = NULL; if(stonith_api) { if (stonith_api->state == stonith_disconnected) { int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); if (rc != pcmk_ok) { crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); } } if (stonith_api->state != stonith_disconnected) { /* caveat!!! * this might fail when when stonithd is just updating the device-list * probably something we should fix as well for other api-calls */ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); if ((rc != pcmk_ok) || (list == NULL)) { /* due to the race described above it can happen that * we drop in here - so as not to make remote nodes * panic on that answer */ if (rc == -ENODEV) { crm_notice("Cluster does not have watchdog fencing device"); } else { crm_warn("Could not check for watchdog fencing device: %s", pcmk_strerror(rc)); } } else if (list[0] == '\0') { rv = TRUE; } else { GList *targets = stonith__parse_targets(list); rv = pcmk__str_in_list(node, targets, pcmk__str_casei); g_list_free_full(targets, free); } free(list); if (!st) { /* if we're provided the api we still might have done the * connection - but let's assume the caller won't bother */ stonith_api->cmds->disconnect(stonith_api); } } if (!st) { - stonith_api_delete(stonith_api); + stonith__api_free(stonith_api); } } else { crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); } crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", node, rv?"":"not "); return rv; } gboolean stonith__watchdog_fencing_enabled_for_node(const char *node) { return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); } /* when cycling through the list we don't want to delete items so just mark them and when we know nobody is using the list loop over it to remove the marked items */ static void foreach_notify_entry (stonith_private_t *private, GFunc func, gpointer user_data) { private->notify_refcnt++; g_list_foreach(private->notify_list, func, user_data); private->notify_refcnt--; if ((private->notify_refcnt == 0) && private->notify_deletes) { GList *list_item = private->notify_list; private->notify_deletes = FALSE; while (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; GList *next = g_list_next(list_item); if (list_client->delete) { free(list_client); private->notify_list = g_list_delete_link(private->notify_list, list_item); } list_item = next; } } } static void stonith_connection_destroy(gpointer user_data) { stonith_t *stonith = user_data; stonith_private_t *native = NULL; struct notify_blob_s blob; crm_trace("Sending destroyed notification"); blob.stonith = stonith; blob.xml = pcmk__xe_create(NULL, PCMK__XE_NOTIFY); native = stonith->st_private; native->ipc = NULL; native->source = NULL; free(native->token); native->token = NULL; stonith->state = stonith_disconnected; crm_xml_add(blob.xml, PCMK__XA_T, PCMK__VALUE_ST_NOTIFY); crm_xml_add(blob.xml, PCMK__XA_SUBT, PCMK__VALUE_ST_NOTIFY_DISCONNECT); foreach_notify_entry(native, stonith_send_notification, &blob); pcmk__xml_free(blob.xml); } xmlNode * create_device_registration_xml(const char *id, enum stonith_namespace standard, const char *agent, const stonith_key_value_t *params, const char *rsc_provides) { xmlNode *data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); xmlNode *args = pcmk__xe_create(data, PCMK__XE_ATTRIBUTES); #if HAVE_STONITH_STONITH_H if (standard == st_namespace_any) { standard = stonith_get_namespace(agent, NULL); } if (standard == st_namespace_lha) { hash2field((gpointer) "plugin", (gpointer) agent, args); agent = "fence_legacy"; } #endif crm_xml_add(data, PCMK_XA_ID, id); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK_XA_AGENT, agent); if ((standard != st_namespace_any) && (standard != st_namespace_invalid)) { crm_xml_add(data, PCMK__XA_NAMESPACE, stonith_namespace2text(standard)); } if (rsc_provides) { crm_xml_add(data, PCMK__XA_RSC_PROVIDES, rsc_provides); } for (; params; params = params->next) { hash2field((gpointer) params->key, (gpointer) params->value, args); } return data; } static int stonith_api_register_device(stonith_t *st, int call_options, const char *id, const char *namespace_s, const char *agent, const stonith_key_value_t *params) { int rc = 0; xmlNode *data = NULL; data = create_device_registration_xml(id, stonith_text2namespace(namespace_s), agent, params, NULL); rc = stonith_send_command(st, STONITH_OP_DEVICE_ADD, data, NULL, call_options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_device(stonith_t * st, int call_options, const char *name) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK_XA_ID, name); rc = stonith_send_command(st, STONITH_OP_DEVICE_DEL, data, NULL, call_options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level) { int rc = 0; xmlNode *data = NULL; CRM_CHECK(node || pattern || (attr && value), return -EINVAL); data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); if (node) { crm_xml_add(data, PCMK_XA_TARGET, node); } else if (pattern) { crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern); } else { crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr); crm_xml_add(data, PCMK_XA_TARGET_VALUE, value); } crm_xml_add_int(data, PCMK_XA_INDEX, level); rc = stonith_send_command(st, STONITH_OP_LEVEL_DEL, data, NULL, options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_remove_level(stonith_t * st, int options, const char *node, int level) { return stonith_api_remove_level_full(st, options, node, NULL, NULL, NULL, level); } /*! * \internal * \brief Create XML for fence topology level registration request * * \param[in] node If not NULL, target level by this node name * \param[in] pattern If not NULL, target by node name using this regex * \param[in] attr If not NULL, target by this node attribute * \param[in] value If not NULL, target by this node attribute value * \param[in] level Index number of level to register * \param[in] device_list List of devices in level * * \return Newly allocated XML tree on success, NULL otherwise * * \note The caller should set only one of node, pattern or attr/value. */ xmlNode * create_level_registration_xml(const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list) { GString *list = NULL; xmlNode *data; CRM_CHECK(node || pattern || (attr && value), return NULL); data = pcmk__xe_create(NULL, PCMK_XE_FENCING_LEVEL); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add_int(data, PCMK_XA_ID, level); crm_xml_add_int(data, PCMK_XA_INDEX, level); if (node) { crm_xml_add(data, PCMK_XA_TARGET, node); } else if (pattern) { crm_xml_add(data, PCMK_XA_TARGET_PATTERN, pattern); } else { crm_xml_add(data, PCMK_XA_TARGET_ATTRIBUTE, attr); crm_xml_add(data, PCMK_XA_TARGET_VALUE, value); } for (; device_list; device_list = device_list->next) { pcmk__add_separated_word(&list, 1024, device_list->value, ","); } if (list != NULL) { crm_xml_add(data, PCMK_XA_DEVICES, (const char *) list->str); g_string_free(list, TRUE); } return data; } static int stonith_api_register_level_full(stonith_t *st, int options, const char *node, const char *pattern, const char *attr, const char *value, int level, const stonith_key_value_t *device_list) { int rc = 0; xmlNode *data = create_level_registration_xml(node, pattern, attr, value, level, device_list); CRM_CHECK(data != NULL, return -EINVAL); rc = stonith_send_command(st, STONITH_OP_LEVEL_ADD, data, NULL, options, 0); pcmk__xml_free(data); return rc; } static int stonith_api_register_level(stonith_t * st, int options, const char *node, int level, const stonith_key_value_t * device_list) { return stonith_api_register_level_full(st, options, node, NULL, NULL, NULL, level, device_list); } static int stonith_api_device_list(stonith_t *stonith, int call_options, const char *namespace_s, stonith_key_value_t **devices, int timeout) { int count = 0; enum stonith_namespace ns = stonith_text2namespace(namespace_s); if (devices == NULL) { crm_err("Parameter error: stonith_api_device_list"); return -EFAULT; } #if HAVE_STONITH_STONITH_H // Include Linux-HA agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_lha)) { count += stonith__list_lha_agents(devices); } #endif // Include Red Hat agents if requested if ((ns == st_namespace_any) || (ns == st_namespace_rhcs)) { count += stonith__list_rhcs_agents(devices); } return count; } // See stonith_api_operations_t:metadata() documentation static int stonith_api_device_metadata(stonith_t *stonith, int call_options, const char *agent, const char *namespace_s, char **output, int timeout_sec) { /* By executing meta-data directly, we can get it from stonith_admin when * the cluster is not running, which is important for higher-level tools. */ enum stonith_namespace ns = stonith_get_namespace(agent, namespace_s); if (timeout_sec <= 0) { timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } crm_trace("Looking up metadata for %s agent %s", stonith_namespace2text(ns), agent); switch (ns) { case st_namespace_rhcs: return stonith__rhcs_metadata(agent, timeout_sec, output); #if HAVE_STONITH_STONITH_H case st_namespace_lha: return stonith__lha_metadata(agent, timeout_sec, output); #endif default: crm_err("Can't get fence agent '%s' meta-data: No such agent", agent); break; } return -ENODEV; } static int stonith_api_query(stonith_t * stonith, int call_options, const char *target, stonith_key_value_t ** devices, int timeout) { int rc = 0, lpc = 0, max = 0; xmlNode *data = NULL; xmlNode *output = NULL; xmlXPathObject *xpathObj = NULL; CRM_CHECK(devices != NULL, return -EINVAL); data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, target); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, PCMK_ACTION_OFF); rc = stonith_send_command(stonith, STONITH_OP_QUERY, data, &output, call_options, timeout); if (rc < 0) { return rc; } xpathObj = pcmk__xpath_search(output->doc, "//*[@" PCMK_XA_AGENT "]"); if (xpathObj) { max = pcmk__xpath_num_results(xpathObj); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = pcmk__xpath_result(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { xmlChar *match_path = xmlGetNodePath(match); crm_info("//*[@" PCMK_XA_AGENT "][%d] = %s", lpc, match_path); free(match_path); *devices = stonith_key_value_add(*devices, NULL, crm_element_value(match, PCMK_XA_ID)); } } xmlXPathFreeObject(xpathObj); } pcmk__xml_free(output); pcmk__xml_free(data); return max; } /*! * \internal * \brief Make a STONITH_OP_EXEC request * * \param[in,out] stonith Fencer connection * \param[in] call_options Bitmask of \c stonith_call_options * \param[in] id Fence device ID that request is for * \param[in] action Agent action to request (list, status, monitor) * \param[in] target Name of target node for requested action * \param[in] timeout_sec Error if not completed within this many seconds * \param[out] output Where to set agent output */ static int stonith_api_call(stonith_t *stonith, int call_options, const char *id, const char *action, const char *target, int timeout_sec, xmlNode **output) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, PCMK__XE_ST_DEVICE_ID); crm_xml_add(data, PCMK__XA_ST_ORIGIN, __func__); crm_xml_add(data, PCMK__XA_ST_DEVICE_ID, id); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action); crm_xml_add(data, PCMK__XA_ST_TARGET, target); rc = stonith_send_command(stonith, STONITH_OP_EXEC, data, output, call_options, timeout_sec); pcmk__xml_free(data); return rc; } static int stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **list_info, int timeout) { int rc; xmlNode *output = NULL; rc = stonith_api_call(stonith, call_options, id, PCMK_ACTION_LIST, NULL, timeout, &output); if (output && list_info) { const char *list_str; list_str = crm_element_value(output, PCMK__XA_ST_OUTPUT); if (list_str) { *list_info = strdup(list_str); } } if (output) { pcmk__xml_free(output); } return rc; } static int stonith_api_monitor(stonith_t * stonith, int call_options, const char *id, int timeout) { return stonith_api_call(stonith, call_options, id, PCMK_ACTION_MONITOR, NULL, timeout, NULL); } static int stonith_api_status(stonith_t * stonith, int call_options, const char *id, const char *port, int timeout) { return stonith_api_call(stonith, call_options, id, PCMK_ACTION_STATUS, port, timeout, NULL); } static int stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance, int delay) { int rc = 0; xmlNode *data = NULL; data = pcmk__xe_create(NULL, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, node); crm_xml_add(data, PCMK__XA_ST_DEVICE_ACTION, action); crm_xml_add_int(data, PCMK__XA_ST_TIMEOUT, timeout); crm_xml_add_int(data, PCMK__XA_ST_TOLERANCE, tolerance); crm_xml_add_int(data, PCMK__XA_ST_DELAY, delay); rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout); pcmk__xml_free(data); return rc; } static int stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const char *action, int timeout, int tolerance) { return stonith_api_fence_with_delay(stonith, call_options, node, action, timeout, tolerance, 0); } static int stonith_api_confirm(stonith_t * stonith, int call_options, const char *target) { stonith__set_call_options(call_options, target, st_opt_manual_ack); return stonith_api_fence(stonith, call_options, target, PCMK_ACTION_OFF, 0, 0); } static int stonith_api_history(stonith_t * stonith, int call_options, const char *node, stonith_history_t ** history, int timeout) { int rc = 0; xmlNode *data = NULL; xmlNode *output = NULL; stonith_history_t *last = NULL; *history = NULL; if (node) { data = pcmk__xe_create(NULL, __func__); crm_xml_add(data, PCMK__XA_ST_TARGET, node); } stonith__set_call_options(call_options, node, st_opt_sync_call); rc = stonith_send_command(stonith, STONITH_OP_FENCE_HISTORY, data, &output, call_options, timeout); pcmk__xml_free(data); if (rc == 0) { xmlNode *op = NULL; xmlNode *reply = pcmk__xpath_find_one(output->doc, "//" PCMK__XE_ST_HISTORY, LOG_NEVER); for (op = pcmk__xe_first_child(reply, NULL, NULL, NULL); op != NULL; op = pcmk__xe_next(op, NULL)) { stonith_history_t *kvp; long long completed; long long completed_nsec = 0L; kvp = pcmk__assert_alloc(1, sizeof(stonith_history_t)); kvp->target = crm_element_value_copy(op, PCMK__XA_ST_TARGET); kvp->action = crm_element_value_copy(op, PCMK__XA_ST_DEVICE_ACTION); kvp->origin = crm_element_value_copy(op, PCMK__XA_ST_ORIGIN); kvp->delegate = crm_element_value_copy(op, PCMK__XA_ST_DELEGATE); kvp->client = crm_element_value_copy(op, PCMK__XA_ST_CLIENTNAME); crm_element_value_ll(op, PCMK__XA_ST_DATE, &completed); kvp->completed = (time_t) completed; crm_element_value_ll(op, PCMK__XA_ST_DATE_NSEC, &completed_nsec); kvp->completed_nsec = completed_nsec; crm_element_value_int(op, PCMK__XA_ST_STATE, &kvp->state); kvp->exit_reason = crm_element_value_copy(op, PCMK_XA_EXIT_REASON); if (last) { last->next = kvp; } else { *history = kvp; } last = kvp; } } pcmk__xml_free(output); return rc; } void stonith_history_free(stonith_history_t *history) { stonith_history_t *hp, *hp_old; for (hp = history; hp; hp_old = hp, hp = hp->next, free(hp_old)) { free(hp->target); free(hp->action); free(hp->origin); free(hp->delegate); free(hp->client); free(hp->exit_reason); } } static gint stonithlib_GCompareFunc(gconstpointer a, gconstpointer b) { int rc = 0; const stonith_notify_client_t *a_client = a; const stonith_notify_client_t *b_client = b; if (a_client->delete || b_client->delete) { /* make entries marked for deletion not findable */ return -1; } CRM_CHECK(a_client->event != NULL && b_client->event != NULL, return 0); rc = strcmp(a_client->event, b_client->event); if (rc == 0) { if (a_client->notify == NULL || b_client->notify == NULL) { return 0; } else if (a_client->notify == b_client->notify) { return 0; } else if (((long)a_client->notify) < ((long)b_client->notify)) { crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return -1; } crm_err("callbacks for %s are not equal: %p vs. %p", a_client->event, a_client->notify, b_client->notify); return 1; } return rc; } xmlNode * stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options) { xmlNode *op_msg = NULL; CRM_CHECK(token != NULL, return NULL); op_msg = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(op_msg, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(op_msg, PCMK__XA_ST_OP, op); crm_xml_add_int(op_msg, PCMK__XA_ST_CALLID, call_id); crm_trace("Sending call options: %.8lx, %d", (long)call_options, call_options); crm_xml_add_int(op_msg, PCMK__XA_ST_CALLOPT, call_options); if (data != NULL) { xmlNode *wrapper = pcmk__xe_create(op_msg, PCMK__XE_ST_CALLDATA); pcmk__xml_copy(wrapper, data); } return op_msg; } static void stonith_destroy_op_callback(gpointer data) { stonith_callback_client_t *blob = data; if (blob->timer && blob->timer->ref > 0) { g_source_remove(blob->timer->ref); } free(blob->timer); free(blob); } static int stonith_api_signoff(stonith_t * stonith) { stonith_private_t *native = stonith->st_private; crm_debug("Disconnecting from the fencer"); if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } free(native->token); native->token = NULL; stonith->state = stonith_disconnected; return pcmk_ok; } static int stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) { stonith_private_t *private = stonith->st_private; if (all_callbacks) { private->op_callback = NULL; g_hash_table_destroy(private->stonith_op_callback_table); private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); } else if (call_id == 0) { private->op_callback = NULL; } else { pcmk__intkey_table_remove(private->stonith_op_callback_table, call_id); } return pcmk_ok; } /*! * \internal * \brief Invoke a (single) specified fence action callback * * \param[in,out] st Fencer API connection * \param[in] call_id If positive, call ID of completed fence action, * otherwise legacy return code for early failure * \param[in,out] result Full result for action * \param[in,out] userdata User data to pass to callback * \param[in] callback Fence action callback to invoke */ static void invoke_fence_action_callback(stonith_t *st, int call_id, pcmk__action_result_t *result, void *userdata, void (*callback) (stonith_t *st, stonith_callback_data_t *data)) { stonith_callback_data_t data = { 0, }; data.call_id = call_id; data.rc = pcmk_rc2legacy(stonith__result2rc(result)); data.userdata = userdata; data.opaque = (void *) result; callback(st, &data); } /*! * \internal * \brief Invoke any callbacks registered for a specified fence action result * * Given a fence action result from the fencer, invoke any callback registered * for that action, as well as any global callback registered. * * \param[in,out] stonith Fencer API connection * \param[in] msg If non-NULL, fencer reply * \param[in] call_id If \p msg is NULL, call ID of action that timed out */ static void invoke_registered_callbacks(stonith_t *stonith, const xmlNode *msg, int call_id) { stonith_private_t *private = NULL; stonith_callback_client_t *cb_info = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(stonith != NULL, return); CRM_CHECK(stonith->st_private != NULL, return); private = stonith->st_private; if (msg == NULL) { // Fencer didn't reply in time pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Fencer accepted request but did not reply in time"); CRM_LOG_ASSERT(call_id > 0); } else { // We have the fencer reply if ((crm_element_value_int(msg, PCMK__XA_ST_CALLID, &call_id) != 0) || (call_id <= 0)) { crm_log_xml_warn(msg, "Bad fencer reply"); } stonith__xe_get_result(msg, &result); } if (call_id > 0) { cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); } if ((cb_info != NULL) && (cb_info->callback != NULL) && (pcmk__result_ok(&result) || !(cb_info->only_success))) { crm_trace("Invoking callback %s for call %d", pcmk__s(cb_info->id, "without ID"), call_id); invoke_fence_action_callback(stonith, call_id, &result, cb_info->user_data, cb_info->callback); } else if ((private->op_callback == NULL) && !pcmk__result_ok(&result)) { crm_warn("Fencing action without registered callback failed: %d (%s%s%s)", result.exit_status, pcmk_exec_status_str(result.execution_status), ((result.exit_reason == NULL)? "" : ": "), ((result.exit_reason == NULL)? "" : result.exit_reason)); crm_log_xml_debug(msg, "Failed fence update"); } if (private->op_callback != NULL) { crm_trace("Invoking global callback for call %d", call_id); invoke_fence_action_callback(stonith, call_id, &result, NULL, private->op_callback); } if (cb_info != NULL) { stonith_api_del_callback(stonith, call_id, FALSE); } pcmk__reset_result(&result); } static gboolean stonith_async_timeout_handler(gpointer data) { struct timer_rec_s *timer = data; crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); invoke_registered_callbacks(timer->stonith, NULL, timer->call_id); /* Always return TRUE, never remove the handler * We do that in stonith_del_callback() */ return TRUE; } static void set_callback_timeout(stonith_callback_client_t * callback, stonith_t * stonith, int call_id, int timeout) { struct timer_rec_s *async_timer = callback->timer; if (timeout <= 0) { return; } if (!async_timer) { async_timer = pcmk__assert_alloc(1, sizeof(struct timer_rec_s)); callback->timer = async_timer; } async_timer->stonith = stonith; async_timer->call_id = call_id; /* Allow a fair bit of grace to allow the server to tell us of a timeout * This is only a fallback */ async_timer->timeout = (timeout + 60) * 1000; if (async_timer->ref) { g_source_remove(async_timer->ref); } async_timer->ref = pcmk__create_timer(async_timer->timeout, stonith_async_timeout_handler, async_timer); } static void update_callback_timeout(int call_id, int timeout, stonith_t * st) { stonith_callback_client_t *callback = NULL; stonith_private_t *private = st->st_private; callback = pcmk__intkey_table_lookup(private->stonith_op_callback_table, call_id); if (!callback || !callback->allow_timeout_updates) { return; } set_callback_timeout(callback, st, call_id, timeout); } static int stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) { const char *type = NULL; struct notify_blob_s blob; stonith_t *st = userdata; stonith_private_t *private = NULL; pcmk__assert(st != NULL); private = st->st_private; blob.stonith = st; blob.xml = pcmk__xml_parse(buffer); if (blob.xml == NULL) { crm_warn("Received malformed message from fencer: %s", buffer); return 0; } /* do callbacks */ type = crm_element_value(blob.xml, PCMK__XA_T); crm_trace("Activating %s callbacks...", type); if (pcmk__str_eq(type, PCMK__VALUE_STONITH_NG, pcmk__str_none)) { invoke_registered_callbacks(st, blob.xml, 0); } else if (pcmk__str_eq(type, PCMK__VALUE_ST_NOTIFY, pcmk__str_none)) { foreach_notify_entry(private, stonith_send_notification, &blob); } else if (pcmk__str_eq(type, PCMK__VALUE_ST_ASYNC_TIMEOUT_VALUE, pcmk__str_none)) { int call_id = 0; int timeout = 0; crm_element_value_int(blob.xml, PCMK__XA_ST_TIMEOUT, &timeout); crm_element_value_int(blob.xml, PCMK__XA_ST_CALLID, &call_id); update_callback_timeout(call_id, timeout, st); } else { crm_err("Unknown message type: %s", type); crm_log_xml_warn(blob.xml, "BadReply"); } pcmk__xml_free(blob.xml); return 1; } static int stonith_api_signon(stonith_t * stonith, const char *name, int *stonith_fd) { int rc = pcmk_ok; stonith_private_t *native = NULL; const char *display_name = name? name : "client"; struct ipc_client_callbacks st_callbacks = { .dispatch = stonith_dispatch_internal, .destroy = stonith_connection_destroy }; CRM_CHECK(stonith != NULL, return -EINVAL); native = stonith->st_private; pcmk__assert(native != NULL); crm_debug("Attempting fencer connection by %s with%s mainloop", display_name, (stonith_fd? "out" : "")); stonith->state = stonith_connected_command; if (stonith_fd) { /* No mainloop */ native->ipc = crm_ipc_new("stonith-ng", 0); if (native->ipc != NULL) { rc = pcmk__connect_generic_ipc(native->ipc); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_fd(native->ipc, stonith_fd); if (rc != pcmk_rc_ok) { crm_debug("Couldn't get file descriptor for IPC: %s", pcmk_rc_str(rc)); } } if (rc != pcmk_rc_ok) { crm_ipc_close(native->ipc); crm_ipc_destroy(native->ipc); native->ipc = NULL; } } } else { /* With mainloop */ native->source = mainloop_add_ipc_client("stonith-ng", G_PRIORITY_MEDIUM, 0, stonith, &st_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { rc = -ENOTCONN; } else { xmlNode *reply = NULL; xmlNode *hello = pcmk__xe_create(NULL, PCMK__XE_STONITH_COMMAND); crm_xml_add(hello, PCMK__XA_T, PCMK__VALUE_STONITH_NG); crm_xml_add(hello, PCMK__XA_ST_OP, CRM_OP_REGISTER); crm_xml_add(hello, PCMK__XA_ST_CLIENTNAME, name); rc = crm_ipc_send(native->ipc, hello, crm_ipc_client_response, -1, &reply); if (rc < 0) { crm_debug("Couldn't register with the fencer: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); rc = -ECOMM; } else if (reply == NULL) { crm_debug("Couldn't register with the fencer: no reply"); rc = -EPROTO; } else { const char *msg_type = crm_element_value(reply, PCMK__XA_ST_OP); native->token = crm_element_value_copy(reply, PCMK__XA_ST_CLIENTID); if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_none)) { crm_debug("Couldn't register with the fencer: invalid reply type '%s'", (msg_type? msg_type : "(missing)")); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else if (native->token == NULL) { crm_debug("Couldn't register with the fencer: no token in reply"); crm_log_xml_debug(reply, "Invalid fencer reply"); rc = -EPROTO; } else { crm_debug("Connection to fencer by %s succeeded (registration token: %s)", display_name, native->token); rc = pcmk_ok; } } pcmk__xml_free(reply); pcmk__xml_free(hello); } if (rc != pcmk_ok) { crm_debug("Connection attempt to fencer by %s failed: %s " QB_XS " rc=%d", display_name, pcmk_strerror(rc), rc); stonith->cmds->disconnect(stonith); } return rc; } static int stonith_set_notification(stonith_t * stonith, const char *callback, int enabled) { int rc = pcmk_ok; xmlNode *notify_msg = pcmk__xe_create(NULL, __func__); stonith_private_t *native = stonith->st_private; if (stonith->state != stonith_disconnected) { crm_xml_add(notify_msg, PCMK__XA_ST_OP, STONITH_OP_NOTIFY); if (enabled) { crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_ACTIVATE, callback); } else { crm_xml_add(notify_msg, PCMK__XA_ST_NOTIFY_DEACTIVATE, callback); } rc = crm_ipc_send(native->ipc, notify_msg, crm_ipc_client_response, -1, NULL); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't register for fencing notifications: %d", rc); rc = -ECOMM; } else { rc = pcmk_ok; } } pcmk__xml_free(notify_msg); return rc; } static int stonith_api_add_notification(stonith_t * stonith, const char *event, void (*callback) (stonith_t * stonith, stonith_event_t * e)) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = NULL; private = stonith->st_private; crm_trace("Adding callback for %s events (%d)", event, g_list_length(private->notify_list)); new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = callback; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); if (list_item != NULL) { crm_warn("Callback already present"); free(new_client); return -ENOTUNIQ; } else { private->notify_list = g_list_append(private->notify_list, new_client); stonith_set_notification(stonith, event, 1); crm_trace("Callback added (%d)", g_list_length(private->notify_list)); } return pcmk_ok; } static void del_notify_entry(gpointer data, gpointer user_data) { stonith_notify_client_t *entry = data; stonith_t * stonith = user_data; if (!entry->delete) { crm_debug("Removing callback for %s events", entry->event); stonith_api_del_notification(stonith, entry->event); } } static int stonith_api_del_notification(stonith_t * stonith, const char *event) { GList *list_item = NULL; stonith_notify_client_t *new_client = NULL; stonith_private_t *private = stonith->st_private; if (event == NULL) { foreach_notify_entry(private, del_notify_entry, stonith); crm_trace("Removed callback"); return pcmk_ok; } crm_debug("Removing callback for %s events", event); new_client = pcmk__assert_alloc(1, sizeof(stonith_notify_client_t)); new_client->event = event; new_client->notify = NULL; list_item = g_list_find_custom(private->notify_list, new_client, stonithlib_GCompareFunc); stonith_set_notification(stonith, event, 0); if (list_item != NULL) { stonith_notify_client_t *list_client = list_item->data; if (private->notify_refcnt) { list_client->delete = TRUE; private->notify_deletes = TRUE; } else { private->notify_list = g_list_remove(private->notify_list, list_client); free(list_client); } crm_trace("Removed callback"); } else { crm_trace("Callback not present"); } free(new_client); return pcmk_ok; } static int stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int options, void *user_data, const char *callback_name, void (*callback) (stonith_t * st, stonith_callback_data_t * data)) { stonith_callback_client_t *blob = NULL; stonith_private_t *private = NULL; CRM_CHECK(stonith != NULL, return -EINVAL); CRM_CHECK(stonith->st_private != NULL, return -EINVAL); private = stonith->st_private; if (call_id == 0) { // Add global callback private->op_callback = callback; } else if (call_id < 0) { // Call failed immediately, so call callback now if (!(options & st_opt_report_only_success)) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); pcmk__set_result(&result, CRM_EX_ERROR, stonith__legacy2status(call_id), NULL); invoke_fence_action_callback(stonith, call_id, &result, user_data, callback); } else { crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); } return FALSE; } blob = pcmk__assert_alloc(1, sizeof(stonith_callback_client_t)); blob->id = callback_name; blob->only_success = (options & st_opt_report_only_success) ? TRUE : FALSE; blob->user_data = user_data; blob->callback = callback; blob->allow_timeout_updates = (options & st_opt_timeout_updates) ? TRUE : FALSE; if (timeout > 0) { set_callback_timeout(blob, stonith, call_id, timeout); } pcmk__intkey_table_insert(private->stonith_op_callback_table, call_id, blob); crm_trace("Added callback to %s for call %d", callback_name, call_id); return TRUE; } static void stonith_dump_pending_op(gpointer key, gpointer value, gpointer user_data) { int call = GPOINTER_TO_INT(key); stonith_callback_client_t *blob = value; crm_debug("Call %d (%s): pending", call, pcmk__s(blob->id, "no ID")); } void stonith_dump_pending_callbacks(stonith_t * stonith) { stonith_private_t *private = stonith->st_private; if (private->stonith_op_callback_table == NULL) { return; } return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL); } /*! * \internal * \brief Get the data section of a fencer notification * * \param[in] msg Notification XML * \param[in] ntype Notification type */ static xmlNode * get_event_data_xml(xmlNode *msg, const char *ntype) { char *data_addr = crm_strdup_printf("//%s", ntype); xmlNode *data = pcmk__xpath_find_one(msg->doc, data_addr, LOG_DEBUG); free(data_addr); return data; } /* */ static stonith_event_t * xml_to_event(xmlNode *msg) { stonith_event_t *event = pcmk__assert_alloc(1, sizeof(stonith_event_t)); struct event_private *event_private = NULL; event->opaque = pcmk__assert_alloc(1, sizeof(struct event_private)); event_private = (struct event_private *) event->opaque; crm_log_xml_trace(msg, "stonith_notify"); // All notification types have the operation result and notification subtype stonith__xe_get_result(msg, &event_private->result); event->operation = crm_element_value_copy(msg, PCMK__XA_ST_OP); // @COMPAT The API originally provided the result as a legacy return code event->result = pcmk_rc2legacy(stonith__result2rc(&event_private->result)); // Some notification subtypes have additional information if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_FENCE, pcmk__str_none)) { xmlNode *data = get_event_data_xml(msg, event->operation); if (data == NULL) { crm_err("No data for %s event", event->operation); crm_log_xml_notice(msg, "BadEvent"); } else { event->origin = crm_element_value_copy(data, PCMK__XA_ST_ORIGIN); event->action = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ACTION); event->target = crm_element_value_copy(data, PCMK__XA_ST_TARGET); event->executioner = crm_element_value_copy(data, PCMK__XA_ST_DELEGATE); event->id = crm_element_value_copy(data, PCMK__XA_ST_REMOTE_OP); event->client_origin = crm_element_value_copy(data, PCMK__XA_ST_CLIENTNAME); event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID); } } else if (pcmk__str_any_of(event->operation, STONITH_OP_DEVICE_ADD, STONITH_OP_DEVICE_DEL, STONITH_OP_LEVEL_ADD, STONITH_OP_LEVEL_DEL, NULL)) { xmlNode *data = get_event_data_xml(msg, event->operation); if (data == NULL) { crm_err("No data for %s event", event->operation); crm_log_xml_notice(msg, "BadEvent"); } else { event->device = crm_element_value_copy(data, PCMK__XA_ST_DEVICE_ID); } } return event; } static void event_free(stonith_event_t * event) { struct event_private *event_private = event->opaque; free(event->id); free(event->operation); free(event->origin); free(event->action); free(event->target); free(event->executioner); free(event->device); free(event->client_origin); pcmk__reset_result(&event_private->result); free(event->opaque); free(event); } static void stonith_send_notification(gpointer data, gpointer user_data) { struct notify_blob_s *blob = user_data; stonith_notify_client_t *entry = data; stonith_event_t *st_event = NULL; const char *event = NULL; if (blob->xml == NULL) { crm_warn("Skipping callback - NULL message"); return; } event = crm_element_value(blob->xml, PCMK__XA_SUBT); if (entry == NULL) { crm_warn("Skipping callback - NULL callback client"); return; } else if (entry->delete) { crm_trace("Skipping callback - marked for deletion"); return; } else if (entry->notify == NULL) { crm_warn("Skipping callback - NULL callback"); return; } else if (!pcmk__str_eq(entry->event, event, pcmk__str_none)) { crm_trace("Skipping callback - event mismatch %p/%s vs. %s", entry, entry->event, event); return; } st_event = xml_to_event(blob->xml); crm_trace("Invoking callback for %p/%s event...", entry, event); entry->notify(blob->stonith, st_event); crm_trace("Callback invoked..."); event_free(st_event); } /*! * \internal * \brief Create and send an API request * * \param[in,out] stonith Stonith connection * \param[in] op API operation to request * \param[in] data Data to attach to request * \param[out] output_data If not NULL, will be set to reply if synchronous * \param[in] call_options Bitmask of stonith_call_options to use * \param[in] timeout Error if not completed within this many seconds * * \return pcmk_ok (for synchronous requests) or positive call ID * (for asynchronous requests) on success, -errno otherwise */ static int stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNode ** output_data, int call_options, int timeout) { int rc = 0; int reply_id = -1; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; stonith_private_t *native = NULL; pcmk__assert((stonith != NULL) && (stonith->st_private != NULL) && (op != NULL)); native = stonith->st_private; if (output_data != NULL) { *output_data = NULL; } if ((stonith->state == stonith_disconnected) || (native->token == NULL)) { return -ENOTCONN; } /* Increment the call ID, which must be positive to avoid conflicting with * error codes. This shouldn't be a problem unless the client mucked with * it or the counter wrapped around. */ stonith->call_id++; if (stonith->call_id < 1) { stonith->call_id = 1; } op_msg = stonith_create_op(stonith->call_id, native->token, op, data, call_options); if (op_msg == NULL) { return -EINVAL; } crm_xml_add_int(op_msg, PCMK__XA_ST_TIMEOUT, timeout); crm_trace("Sending %s message to fencer with timeout %ds", op, timeout); if (data) { const char *delay_s = crm_element_value(data, PCMK__XA_ST_DELAY); if (delay_s) { crm_xml_add(op_msg, PCMK__XA_ST_DELAY, delay_s); } } { enum crm_ipc_flags ipc_flags = crm_ipc_flags_none; if (call_options & st_opt_sync_call) { pcmk__set_ipc_flags(ipc_flags, "stonith command", crm_ipc_client_response); } rc = crm_ipc_send(native->ipc, op_msg, ipc_flags, 1000 * (timeout + 60), &op_reply); } pcmk__xml_free(op_msg); if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%ds): %d", op, timeout, rc); rc = -ECOMM; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (!(call_options & st_opt_sync_call)) { crm_trace("Async call %d, returning", stonith->call_id); pcmk__xml_free(op_reply); return stonith->call_id; } crm_element_value_int(op_reply, PCMK__XA_ST_CALLID, &reply_id); if (reply_id == stonith->call_id) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; crm_trace("Synchronous reply %d received", reply_id); stonith__xe_get_result(op_reply, &result); rc = pcmk_rc2legacy(stonith__result2rc(&result)); pcmk__reset_result(&result); if ((call_options & st_opt_discard_reply) || output_data == NULL) { crm_trace("Discarding reply"); } else { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } } else if (reply_id <= 0) { crm_err("Received bad reply: No id set"); crm_log_xml_err(op_reply, "Bad reply"); pcmk__xml_free(op_reply); op_reply = NULL; rc = -ENOMSG; } else { crm_err("Received bad reply: %d (wanted %d)", reply_id, stonith->call_id); crm_log_xml_err(op_reply, "Old reply"); pcmk__xml_free(op_reply); op_reply = NULL; rc = -ENOMSG; } done: if (!crm_ipc_connected(native->ipc)) { crm_err("Fencer disconnected"); free(native->token); native->token = NULL; stonith->state = stonith_disconnected; } pcmk__xml_free(op_reply); return rc; } /* Not used with mainloop */ bool stonith_dispatch(stonith_t * st) { gboolean stay_connected = TRUE; stonith_private_t *private = NULL; pcmk__assert(st != NULL); private = st->st_private; while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); stonith_dispatch_internal(msg, strlen(msg), st); } if (!crm_ipc_connected(private->ipc)) { crm_err("Connection closed"); stay_connected = FALSE; } } return stay_connected; } static int -stonith_api_free(stonith_t * stonith) +free_stonith_api(stonith_t *stonith) { int rc = pcmk_ok; crm_trace("Destroying %p", stonith); if (stonith->state != stonith_disconnected) { crm_trace("Unregistering notifications and disconnecting %p first", stonith); stonith->cmds->remove_notification(stonith, NULL); rc = stonith->cmds->disconnect(stonith); } if (stonith->state == stonith_disconnected) { stonith_private_t *private = stonith->st_private; crm_trace("Removing %d callbacks", g_hash_table_size(private->stonith_op_callback_table)); g_hash_table_destroy(private->stonith_op_callback_table); crm_trace("Destroying %d notification clients", g_list_length(private->notify_list)); g_list_free_full(private->notify_list, free); free(stonith->st_private); free(stonith->cmds); free(stonith); } else { crm_err("Not free'ing active connection: %s (%d)", pcmk_strerror(rc), rc); } return rc; } void stonith_api_delete(stonith_t * stonith) { - crm_trace("Destroying %p", stonith); - if(stonith) { - stonith->cmds->free(stonith); - } + stonith__api_free(stonith); } static gboolean is_stonith_param(gpointer key, gpointer value, gpointer user_data) { return pcmk_stonith_param(key); } int stonith__validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, GHashTable *params, int timeout_sec, char **output, char **error_output) { int rc = pcmk_rc_ok; /* Use a dummy node name in case the agent requires a target. We assume the * actual target doesn't matter for validation purposes (if in practice, * that is incorrect, we will need to allow the caller to pass the target). */ const char *target = "node1"; char *host_arg = NULL; if (params != NULL) { host_arg = pcmk__str_copy(g_hash_table_lookup(params, PCMK_STONITH_HOST_ARGUMENT)); /* Remove special stonith params from the table before doing anything else */ g_hash_table_foreach_remove(params, is_stonith_param, NULL); } #if PCMK__ENABLE_CIBSECRETS rc = pcmk__substitute_secrets(rsc_id, params); if (rc != pcmk_rc_ok) { crm_warn("Could not replace secret parameters for validation of %s: %s", agent, pcmk_rc_str(rc)); // rc is standard return value, don't return it in this function } #endif if (output) { *output = NULL; } if (error_output) { *error_output = NULL; } if (timeout_sec <= 0) { timeout_sec = PCMK_DEFAULT_ACTION_TIMEOUT_MS; } switch (stonith_get_namespace(agent, namespace_s)) { case st_namespace_rhcs: rc = stonith__rhcs_validate(st, call_options, target, agent, params, host_arg, timeout_sec, output, error_output); rc = pcmk_legacy2rc(rc); break; #if HAVE_STONITH_STONITH_H case st_namespace_lha: rc = stonith__lha_validate(st, call_options, target, agent, params, timeout_sec, output, error_output); rc = pcmk_legacy2rc(rc); break; #endif case st_namespace_invalid: errno = ENOENT; rc = errno; if (error_output) { *error_output = crm_strdup_printf("Agent %s not found", agent); } else { crm_err("Agent %s not found", agent); } break; default: errno = EOPNOTSUPP; rc = errno; if (error_output) { *error_output = crm_strdup_printf("Agent %s does not support validation", agent); } else { crm_err("Agent %s does not support validation", agent); } break; } free(host_arg); return rc; } static int stonith_api_validate(stonith_t *st, int call_options, const char *rsc_id, const char *namespace_s, const char *agent, const stonith_key_value_t *params, int timeout_sec, char **output, char **error_output) { /* Validation should be done directly via the agent, so we can get it from * stonith_admin when the cluster is not running, which is important for * higher-level tools. */ int rc = pcmk_ok; GHashTable *params_table = pcmk__strkey_table(free, free); // Convert parameter list to a hash table for (; params; params = params->next) { if (!pcmk_stonith_param(params->key)) { pcmk__insert_dup(params_table, params->key, params->value); } } rc = stonith__validate(st, call_options, rsc_id, namespace_s, agent, params_table, timeout_sec, output, error_output); g_hash_table_destroy(params_table); return rc; } /*! * \internal * \brief Create a new fencer API connection object * * \return Newly allocated fencer API connection object, or \c NULL on * allocation failure */ stonith_t * stonith__api_new(void) { stonith_t *new_stonith = NULL; stonith_private_t *private = NULL; new_stonith = calloc(1, sizeof(stonith_t)); if (new_stonith == NULL) { return NULL; } private = calloc(1, sizeof(stonith_private_t)); if (private == NULL) { free(new_stonith); return NULL; } new_stonith->st_private = private; private->stonith_op_callback_table = pcmk__intkey_table(stonith_destroy_op_callback); private->notify_list = NULL; private->notify_refcnt = 0; private->notify_deletes = FALSE; new_stonith->call_id = 1; new_stonith->state = stonith_disconnected; new_stonith->cmds = calloc(1, sizeof(stonith_api_operations_t)); if (new_stonith->cmds == NULL) { free(new_stonith->st_private); free(new_stonith); return NULL; } - new_stonith->cmds->free = stonith_api_free; + new_stonith->cmds->free = free_stonith_api; new_stonith->cmds->connect = stonith_api_signon; new_stonith->cmds->disconnect = stonith_api_signoff; new_stonith->cmds->list = stonith_api_list; new_stonith->cmds->monitor = stonith_api_monitor; new_stonith->cmds->status = stonith_api_status; new_stonith->cmds->fence = stonith_api_fence; new_stonith->cmds->fence_with_delay = stonith_api_fence_with_delay; new_stonith->cmds->confirm = stonith_api_confirm; new_stonith->cmds->history = stonith_api_history; new_stonith->cmds->list_agents = stonith_api_device_list; new_stonith->cmds->metadata = stonith_api_device_metadata; new_stonith->cmds->query = stonith_api_query; new_stonith->cmds->remove_device = stonith_api_remove_device; new_stonith->cmds->register_device = stonith_api_register_device; new_stonith->cmds->remove_level = stonith_api_remove_level; new_stonith->cmds->remove_level_full = stonith_api_remove_level_full; new_stonith->cmds->register_level = stonith_api_register_level; new_stonith->cmds->register_level_full = stonith_api_register_level_full; new_stonith->cmds->remove_callback = stonith_api_del_callback; new_stonith->cmds->register_callback = stonith_api_add_callback; new_stonith->cmds->remove_notification = stonith_api_del_notification; new_stonith->cmds->register_notification = stonith_api_add_notification; new_stonith->cmds->validate = stonith_api_validate; return new_stonith; } +/*! + * \internal + * \brief Free a fencer API connection object + * + * \param[in,out] stonith_api Fencer API connection object + */ +void +stonith__api_free(stonith_t *stonith_api) +{ + crm_trace("Destroying %p", stonith_api); + if (stonith_api != NULL) { + stonith_api->cmds->free(stonith_api); + } +} + /*! * \brief Make a blocking connection attempt to the fencer * * \param[in,out] st Fencer API object * \param[in] name Client name to use with fencer * \param[in] max_attempts Return error if this many attempts fail * * \return pcmk_ok on success, result of last attempt otherwise */ int stonith_api_connect_retry(stonith_t *st, const char *name, int max_attempts) { int rc = -EINVAL; // if max_attempts is not positive for (int attempt = 1; attempt <= max_attempts; attempt++) { rc = st->cmds->connect(st, name, NULL); if (rc == pcmk_ok) { return pcmk_ok; } else if (attempt < max_attempts) { crm_notice("Fencer connection attempt %d of %d failed (retrying in 2s): %s " QB_XS " rc=%d", attempt, max_attempts, pcmk_strerror(rc), rc); sleep(2); } } crm_notice("Could not connect to fencer: %s " QB_XS " rc=%d", pcmk_strerror(rc), rc); return rc; } stonith_key_value_t * stonith_key_value_add(stonith_key_value_t * head, const char *key, const char *value) { stonith_key_value_t *p, *end; p = pcmk__assert_alloc(1, sizeof(stonith_key_value_t)); p->key = pcmk__str_copy(key); p->value = pcmk__str_copy(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void stonith_key_value_freeall(stonith_key_value_t * head, int keys, int values) { stonith_key_value_t *p; while (head) { p = head->next; if (keys) { free(head->key); } if (values) { free(head->value); } free(head); head = p; } } #define api_log_open() openlog("stonith-api", LOG_CONS | LOG_NDELAY | LOG_PID, LOG_DAEMON) #define api_log(level, fmt, args...) syslog(level, "%s: "fmt, __func__, args) int stonith_api_kick(uint32_t nodeid, const char *uname, int timeout, bool off) { int rc = pcmk_ok; stonith_t *st = stonith__api_new(); const char *action = off? PCMK_ACTION_OFF : PCMK_ACTION_REBOOT; api_log_open(); if (st == NULL) { api_log(LOG_ERR, "API initialization failed, could not kick (%s) node %u/%s", action, nodeid, uname); return -EPROTO; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_ERR, "Connection failed, could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); int opts = 0; stonith__set_call_options(opts, name, st_opt_sync_call|st_opt_allow_self_fencing); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->fence(st, opts, name, action, timeout, 0); free(name); if (rc != pcmk_ok) { api_log(LOG_ERR, "Could not kick (%s) node %u/%s : %s (%d)", action, nodeid, uname, pcmk_strerror(rc), rc); } else { api_log(LOG_NOTICE, "Node %u/%s kicked: %s", nodeid, uname, action); } } - stonith_api_delete(st); + stonith__api_free(st); return rc; } time_t stonith_api_time(uint32_t nodeid, const char *uname, bool in_progress) { int rc = pcmk_ok; time_t when = 0; stonith_t *st = stonith__api_new(); stonith_history_t *history = NULL, *hp = NULL; if (st == NULL) { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: " "API initialization failed", nodeid, uname); return when; } rc = st->cmds->connect(st, "stonith-api", NULL); if (rc != pcmk_ok) { api_log(LOG_NOTICE, "Connection failed: %s (%d)", pcmk_strerror(rc), rc); } else { int entries = 0; int progress = 0; int completed = 0; int opts = 0; char *name = (uname == NULL)? pcmk__itoa(nodeid) : strdup(uname); stonith__set_call_options(opts, name, st_opt_sync_call); if ((uname == NULL) && (nodeid > 0)) { stonith__set_call_options(opts, name, st_opt_cs_nodeid); } rc = st->cmds->history(st, opts, name, &history, 120); free(name); for (hp = history; hp; hp = hp->next) { entries++; if (in_progress) { progress++; if (hp->state != st_done && hp->state != st_failed) { when = time(NULL); } } else if (hp->state == st_done) { completed++; if (hp->completed > when) { when = hp->completed; } } } stonith_history_free(history); if(rc == pcmk_ok) { api_log(LOG_INFO, "Found %d entries for %u/%s: %d in progress, %d completed", entries, nodeid, uname, progress, completed); } else { api_log(LOG_ERR, "Could not retrieve fence history for %u/%s: %s (%d)", nodeid, uname, pcmk_strerror(rc), rc); } } - stonith_api_delete(st); + stonith__api_free(st); if(when) { api_log(LOG_INFO, "Node %u/%s last kicked at: %ld", nodeid, uname, (long int)when); } return when; } bool stonith_agent_exists(const char *agent, int timeout) { stonith_t *st = NULL; stonith_key_value_t *devices = NULL; stonith_key_value_t *dIter = NULL; bool rc = FALSE; if (agent == NULL) { return rc; } st = stonith__api_new(); if (st == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return FALSE; } st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, timeout == 0 ? 120 : timeout); for (dIter = devices; dIter != NULL; dIter = dIter->next) { if (pcmk__str_eq(dIter->value, agent, pcmk__str_none)) { rc = TRUE; break; } } stonith_key_value_freeall(devices, 1, 1); - stonith_api_delete(st); + stonith__api_free(st); return rc; } const char * stonith_action_str(const char *action) { if (action == NULL) { return "fencing"; } else if (strcmp(action, PCMK_ACTION_ON) == 0) { return "unfencing"; } else if (strcmp(action, PCMK_ACTION_OFF) == 0) { return "turning off"; } else { return action; } } /*! * \internal * \brief Parse a target name from one line of a target list string * * \param[in] line One line of a target list string * \param[in] len String length of line * \param[in,out] output List to add newly allocated target name to */ static void parse_list_line(const char *line, int len, GList **output) { size_t i = 0; size_t entry_start = 0; /* Skip complaints about additional parameters device doesn't understand * * @TODO Document or eliminate the implied restriction of target names */ if (strstr(line, "invalid") || strstr(line, "variable")) { crm_debug("Skipping list output line: %s", line); return; } // Process line content, character by character for (i = 0; i <= len; i++) { if (isspace(line[i]) || (line[i] == ',') || (line[i] == ';') || (line[i] == '\0')) { // We've found a separator (i.e. the end of an entry) int rc = 0; char *entry = NULL; if (i == entry_start) { // Skip leading and sequential separators entry_start = i + 1; continue; } entry = pcmk__assert_alloc(i - entry_start + 1, sizeof(char)); /* Read entry, stopping at first separator * * @TODO Document or eliminate these character restrictions */ rc = sscanf(line + entry_start, "%[a-zA-Z0-9_-.]", entry); if (rc != 1) { crm_warn("Could not parse list output entry: %s " QB_XS " entry_start=%d position=%d", line + entry_start, entry_start, i); free(entry); } else if (pcmk__strcase_any_of(entry, PCMK_ACTION_ON, PCMK_ACTION_OFF, NULL)) { /* Some agents print the target status in the list output, * though none are known now (the separate list-status command * is used for this, but it can also print "UNKNOWN"). To handle * this possibility, skip such entries. * * @TODO Document or eliminate the implied restriction of target * names. */ free(entry); } else { // We have a valid entry *output = g_list_append(*output, entry); } entry_start = i + 1; } } } /*! * \internal * \brief Parse a list of targets from a string * * \param[in] list_output Target list as a string * * \return List of target names * \note The target list string format is flexible, to allow for user-specified * lists such pcmk_host_list and the output of an agent's list action * (whether direct or via the API, which escapes newlines). There may be * multiple lines, separated by either a newline or an escaped newline * (backslash n). Each line may have one or more target names, separated * by any combination of whitespace, commas, and semi-colons. Lines * containing "invalid" or "variable" will be ignored entirely. Target * names "on" or "off" (case-insensitive) will be ignored. Target names * may contain only alphanumeric characters, underbars (_), dashes (-), * and dots (.) (if any other character occurs in the name, it and all * subsequent characters in the name will be ignored). * \note The caller is responsible for freeing the result with * g_list_free_full(result, free). */ GList * stonith__parse_targets(const char *target_spec) { GList *targets = NULL; if (target_spec != NULL) { size_t out_len = strlen(target_spec); size_t line_start = 0; // Starting index of line being processed for (size_t i = 0; i <= out_len; ++i) { if ((target_spec[i] == '\n') || (target_spec[i] == '\0') || ((target_spec[i] == '\\') && (target_spec[i + 1] == 'n'))) { // We've reached the end of one line of output int len = i - line_start; if (len > 0) { char *line = strndup(target_spec + line_start, len); line[len] = '\0'; // Because it might be a newline parse_list_line(line, len, &targets); free(line); } if (target_spec[i] == '\\') { ++i; // backslash-n takes up two positions } line_start = i + 1; } } } return targets; } /*! * \internal * \brief Check whether a fencing failure was followed by an equivalent success * * \param[in] event Fencing failure * \param[in] top_history Complete fencing history (must be sorted by * stonith__sort_history() beforehand) * * \return The name of the node that executed the fencing if a later successful * event exists, or NULL if no such event exists */ const char * stonith__later_succeeded(const stonith_history_t *event, const stonith_history_t *top_history) { const char *other = NULL; for (const stonith_history_t *prev_hp = top_history; prev_hp != NULL; prev_hp = prev_hp->next) { if (prev_hp == event) { break; } if ((prev_hp->state == st_done) && pcmk__str_eq(event->target, prev_hp->target, pcmk__str_casei) && pcmk__str_eq(event->action, prev_hp->action, pcmk__str_none) && ((event->completed < prev_hp->completed) || ((event->completed == prev_hp->completed) && (event->completed_nsec < prev_hp->completed_nsec)))) { if ((event->delegate == NULL) || pcmk__str_eq(event->delegate, prev_hp->delegate, pcmk__str_casei)) { // Prefer equivalent fencing by same executioner return prev_hp->delegate; } else if (other == NULL) { // Otherwise remember first successful executioner other = (prev_hp->delegate == NULL)? "some node" : prev_hp->delegate; } } } return other; } /*! * \internal * \brief Sort fencing history, pending first then by most recently completed * * \param[in,out] history List of stonith actions * * \return New head of sorted \p history */ stonith_history_t * stonith__sort_history(stonith_history_t *history) { stonith_history_t *new = NULL, *pending = NULL, *hp, *np, *tmp; for (hp = history; hp; ) { tmp = hp->next; if ((hp->state == st_done) || (hp->state == st_failed)) { /* sort into new */ if ((!new) || (hp->completed > new->completed) || ((hp->completed == new->completed) && (hp->completed_nsec > new->completed_nsec))) { hp->next = new; new = hp; } else { np = new; do { if ((!np->next) || (hp->completed > np->next->completed) || ((hp->completed == np->next->completed) && (hp->completed_nsec > np->next->completed_nsec))) { hp->next = np->next; np->next = hp; break; } np = np->next; } while (1); } } else { /* put into pending */ hp->next = pending; pending = hp; } hp = tmp; } /* pending actions don't have a completed-stamp so make them go front */ if (pending) { stonith_history_t *last_pending = pending; while (last_pending->next) { last_pending = last_pending->next; } last_pending->next = new; new = pending; } return new; } /*! * \brief Return string equivalent of an operation state value * * \param[in] state Fencing operation state value * * \return Human-friendly string equivalent of state */ const char * stonith_op_state_str(enum op_state state) { switch (state) { case st_query: return "querying"; case st_exec: return "executing"; case st_done: return "completed"; case st_duplicate: return "duplicate"; case st_failed: return "failed"; } return "unknown"; } stonith_history_t * stonith__first_matching_event(stonith_history_t *history, bool (*matching_fn)(stonith_history_t *, void *), void *user_data) { for (stonith_history_t *hp = history; hp; hp = hp->next) { if (matching_fn(hp, user_data)) { return hp; } } return NULL; } bool stonith__event_state_pending(stonith_history_t *history, void *user_data) { return history->state != st_failed && history->state != st_done; } bool stonith__event_state_eq(stonith_history_t *history, void *user_data) { return history->state == GPOINTER_TO_INT(user_data); } bool stonith__event_state_neq(stonith_history_t *history, void *user_data) { return history->state != GPOINTER_TO_INT(user_data); } /*! * \internal * \brief Check whether a given parameter exists in a fence agent's metadata * * \param[in] metadata Agent metadata * \param[in] name Parameter name * * \retval \c true If \p name exists as a parameter in \p metadata * \retval \c false Otherwise */ static bool param_is_supported(xmlNode *metadata, const char *name) { char *xpath_s = crm_strdup_printf("//" PCMK_XE_PARAMETER "[@" PCMK_XA_NAME "='%s']", name); xmlXPathObject *xpath = pcmk__xpath_search(metadata->doc, xpath_s); bool supported = (pcmk__xpath_num_results(xpath) > 0); free(xpath_s); xmlXPathFreeObject(xpath); return supported; } /*! * \internal * \brief Get the default host argument based on a device's agent metadata * * If an agent supports the "plug" parameter, default to that. Otherwise default * to the "port" parameter if supported. Otherwise return \c NULL. * * \param[in] metadata Agent metadata * * \return Parameter name for default host argument */ const char * stonith__default_host_arg(xmlNode *metadata) { CRM_CHECK(metadata != NULL, return NULL); if (param_is_supported(metadata, "plug")) { return "plug"; } if (param_is_supported(metadata, "port")) { return "port"; } return NULL; } /*! * \internal * \brief Retrieve fence agent meta-data asynchronously * * \param[in] agent Agent to execute * \param[in] timeout_sec Error if not complete within this time * \param[in] callback Function to call with result (this will always be * called, whether by this function directly or * later via the main loop, and on success the * metadata will be in its result argument's * action_stdout) * \param[in,out] user_data User data to pass to callback * * \return Standard Pacemaker return code * \note The caller must use a main loop. This function is not a * stonith_api_operations_t method because it does not need a stonith_t * object and does not go through the fencer, but executes the agent * directly. */ int stonith__metadata_async(const char *agent, int timeout_sec, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data) { switch (stonith_get_namespace(agent, NULL)) { case st_namespace_rhcs: { stonith_action_t *action = NULL; int rc = pcmk_ok; action = stonith__action_create(agent, PCMK_ACTION_METADATA, NULL, timeout_sec, NULL, NULL, NULL); rc = stonith__execute_async(action, user_data, callback, NULL); if (rc != pcmk_ok) { callback(0, stonith__action_result(action), user_data); stonith__destroy_action(action); } return pcmk_legacy2rc(rc); } #if HAVE_STONITH_STONITH_H case st_namespace_lha: // LHA metadata is simply synthesized, so simulate async { pcmk__action_result_t result = { .exit_status = CRM_EX_OK, .execution_status = PCMK_EXEC_DONE, .exit_reason = NULL, .action_stdout = NULL, .action_stderr = NULL, }; stonith__lha_metadata(agent, timeout_sec, &result.action_stdout); callback(0, &result, user_data); pcmk__reset_result(&result); return pcmk_rc_ok; } #endif default: { pcmk__action_result_t result = { .exit_status = CRM_EX_NOSUCH, .execution_status = PCMK_EXEC_ERROR_HARD, .exit_reason = crm_strdup_printf("No such agent '%s'", agent), .action_stdout = NULL, .action_stderr = NULL, }; callback(0, &result, user_data); pcmk__reset_result(&result); return ENOENT; } } } /*! * \internal * \brief Return the exit status from an async action callback * * \param[in] data Callback data * * \return Exit status from callback data */ int stonith__exit_status(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return CRM_EX_ERROR; } return ((pcmk__action_result_t *) data->opaque)->exit_status; } /*! * \internal * \brief Return the execution status from an async action callback * * \param[in] data Callback data * * \return Execution status from callback data */ int stonith__execution_status(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } return ((pcmk__action_result_t *) data->opaque)->execution_status; } /*! * \internal * \brief Return the exit reason from an async action callback * * \param[in] data Callback data * * \return Exit reason from callback data */ const char * stonith__exit_reason(const stonith_callback_data_t *data) { if ((data == NULL) || (data->opaque == NULL)) { return NULL; } return ((pcmk__action_result_t *) data->opaque)->exit_reason; } /*! * \internal * \brief Return the exit status from an event notification * * \param[in] event Event * * \return Exit status from event */ int stonith__event_exit_status(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return CRM_EX_ERROR; } else { struct event_private *event_private = event->opaque; return event_private->result.exit_status; } } /*! * \internal * \brief Return the execution status from an event notification * * \param[in] event Event * * \return Execution status from event */ int stonith__event_execution_status(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return PCMK_EXEC_UNKNOWN; } else { struct event_private *event_private = event->opaque; return event_private->result.execution_status; } } /*! * \internal * \brief Return the exit reason from an event notification * * \param[in] event Event * * \return Exit reason from event */ const char * stonith__event_exit_reason(const stonith_event_t *event) { if ((event == NULL) || (event->opaque == NULL)) { return NULL; } else { struct event_private *event_private = event->opaque; return event_private->result.exit_reason; } } /*! * \internal * \brief Return a human-friendly description of a fencing event * * \param[in] event Event to describe * * \return Newly allocated string with description of \p event * \note The caller is responsible for freeing the return value. * This function asserts on memory errors and never returns NULL. */ char * stonith__event_description(const stonith_event_t *event) { // Use somewhat readable defaults const char *origin = pcmk__s(event->client_origin, "a client"); const char *origin_node = pcmk__s(event->origin, "a node"); const char *executioner = pcmk__s(event->executioner, "the cluster"); const char *device = pcmk__s(event->device, "unknown"); const char *action = pcmk__s(event->action, event->operation); const char *target = pcmk__s(event->target, "no node"); const char *reason = stonith__event_exit_reason(event); const char *status; if (action == NULL) { action = "(unknown)"; } if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) { status = pcmk_exec_status_str(stonith__event_execution_status(event)); } else if (stonith__event_exit_status(event) != CRM_EX_OK) { status = pcmk_exec_status_str(PCMK_EXEC_ERROR); } else { status = crm_exit_str(CRM_EX_OK); } if (pcmk__str_eq(event->operation, PCMK__VALUE_ST_NOTIFY_HISTORY, pcmk__str_none)) { return crm_strdup_printf("Fencing history may have changed"); } else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { return crm_strdup_printf("A fencing device (%s) was added", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { return crm_strdup_printf("A fencing device (%s) was removed", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { return crm_strdup_printf("A fencing topology level (%s) was added", device); } else if (pcmk__str_eq(event->operation, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { return crm_strdup_printf("A fencing topology level (%s) was removed", device); } // event->operation should be PCMK__VALUE_ST_NOTIFY_FENCE at this point return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)", action, target, executioner, origin, origin_node, status, ((reason == NULL)? "" : " ("), pcmk__s(reason, ""), ((reason == NULL)? "" : ")"), pcmk__s(event->id, "(none)")); } // Deprecated functions kept only for backward API compatibility // LCOV_EXCL_START // See comments in stonith-ng.h for why we re-declare before defining stonith_t *stonith_api_new(void); stonith_t * stonith_api_new(void) { return stonith__api_new(); } // LCOV_EXCL_STOP // End deprecated API diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c index 04e9ee8ad2..16cf66ec19 100644 --- a/lib/pacemaker/pcmk_fence.c +++ b/lib/pacemaker/pcmk_fence.c @@ -1,677 +1,677 @@ /* - * Copyright 2009-2024 the Pacemaker project contributors + * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include // stonith__* #include #include #include #include #include "libpacemaker_private.h" static const int st_opts = st_opt_sync_call|st_opt_allow_self_fencing; static GMainLoop *mainloop = NULL; static struct { stonith_t *st; const char *target; const char *action; char *name; unsigned int timeout; unsigned int tolerance; int delay; pcmk__action_result_t result; } async_fence_data = { NULL, }; static int handle_level(stonith_t *st, const char *target, int fence_level, GList *devices, bool added) { const char *node = NULL; const char *pattern = NULL; const char *name = NULL; char *value = NULL; int rc = pcmk_rc_ok; if (target == NULL) { // Not really possible, but makes static analysis happy return EINVAL; } /* Determine if targeting by attribute, node name pattern or node name */ value = strchr(target, '='); if (value != NULL) { name = target; *value++ = '\0'; } else if (*target == '@') { pattern = target + 1; } else { node = target; } /* Register or unregister level as appropriate */ if (added) { stonith_key_value_t *kvs = NULL; for (GList *iter = devices; iter != NULL; iter = iter->next) { kvs = stonith_key_value_add(kvs, NULL, iter->data); } rc = st->cmds->register_level_full(st, st_opts, node, pattern, name, value, fence_level, kvs); stonith_key_value_freeall(kvs, 0, 1); } else { rc = st->cmds->remove_level_full(st, st_opts, node, pattern, name, value, fence_level); } return pcmk_legacy2rc(rc); } static stonith_history_t * reduce_fence_history(stonith_history_t *history) { stonith_history_t *new, *hp, *np; if (!history) { return history; } new = history; hp = new->next; new->next = NULL; while (hp) { stonith_history_t *hp_next = hp->next; hp->next = NULL; for (np = new; ; np = np->next) { if ((hp->state == st_done) || (hp->state == st_failed)) { /* action not in progress */ if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) && pcmk__str_eq(hp->action, np->action, pcmk__str_none) && (hp->state == np->state) && ((hp->state == st_done) || pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) { /* purge older hp */ stonith_history_free(hp); break; } } if (!np->next) { np->next = hp; break; } } hp = hp_next; } return new; } static void notify_callback(stonith_t * st, stonith_event_t * e) { if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei) && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) { pcmk__set_result(&async_fence_data.result, stonith__event_exit_status(e), stonith__event_execution_status(e), stonith__event_exit_reason(e)); g_main_loop_quit(mainloop); } } static void fence_callback(stonith_t * stonith, stonith_callback_data_t * data) { pcmk__set_result(&async_fence_data.result, stonith__exit_status(data), stonith__execution_status(data), stonith__exit_reason(data)); g_main_loop_quit(mainloop); } static gboolean async_fence_helper(gpointer user_data) { stonith_t *st = async_fence_data.st; int call_id = 0; int rc = stonith_api_connect_retry(st, async_fence_data.name, 10); int timeout = 0; if (rc != pcmk_ok) { g_main_loop_quit(mainloop); pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc)); return TRUE; } st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE, notify_callback); call_id = st->cmds->fence_with_delay(st, st_opt_allow_self_fencing, async_fence_data.target, async_fence_data.action, pcmk__timeout_ms2s(async_fence_data.timeout), pcmk__timeout_ms2s(async_fence_data.tolerance), async_fence_data.delay); if (call_id < 0) { g_main_loop_quit(mainloop); pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_ERROR, pcmk_strerror(call_id)); return TRUE; } timeout = pcmk__timeout_ms2s(async_fence_data.timeout); if (async_fence_data.delay > 0) { timeout += async_fence_data.delay; } st->cmds->register_callback(st, call_id, timeout, st_opt_timeout_updates, NULL, "callback", fence_callback); return TRUE; } int pcmk__request_fencing(stonith_t *st, const char *target, const char *action, const char *name, unsigned int timeout, unsigned int tolerance, int delay, char **reason) { crm_trigger_t *trig; int rc = pcmk_rc_ok; async_fence_data.st = st; async_fence_data.name = strdup(name); async_fence_data.target = target; async_fence_data.action = action; async_fence_data.timeout = timeout; async_fence_data.tolerance = tolerance; async_fence_data.delay = delay; pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN, NULL); trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL); mainloop_set_trigger(trig); mainloop = g_main_loop_new(NULL, FALSE); g_main_loop_run(mainloop); free(async_fence_data.name); if (reason != NULL) { // Give the caller ownership of the exit reason *reason = async_fence_data.result.exit_reason; async_fence_data.result.exit_reason = NULL; } rc = stonith__result2rc(&async_fence_data.result); pcmk__reset_result(&async_fence_data.result); return rc; } int pcmk_request_fencing(xmlNodePtr *xml, const char *target, const char *action, const char *name, unsigned int timeout, unsigned int tolerance, int delay, char **reason) { stonith_t *st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__request_fencing(st, target, action, name, timeout, tolerance, delay, reason); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_history(pcmk__output_t *out, stonith_t *st, const char *target, unsigned int timeout, int verbose, bool broadcast, bool cleanup) { stonith_history_t *history = NULL; stonith_history_t *latest = NULL; int rc = pcmk_rc_ok; int opts = 0; if (cleanup) { out->info(out, "cleaning up fencing-history%s%s", target ? " for node " : "", target ? target : ""); } if (broadcast) { out->info(out, "gather fencing-history from all nodes"); } stonith__set_call_options(opts, target, st_opts); if (cleanup) { stonith__set_call_options(opts, target, st_opt_cleanup); } if (broadcast) { stonith__set_call_options(opts, target, st_opt_broadcast); } if (pcmk__str_eq(target, "*", pcmk__str_none)) { target = NULL; } rc = st->cmds->history(st, opts, target, &history, pcmk__timeout_ms2s(timeout)); if (cleanup) { // Cleanup doesn't return a history list stonith_history_free(history); return pcmk_legacy2rc(rc); } out->begin_list(out, "event", "events", "Fencing history"); history = stonith__sort_history(history); for (stonith_history_t *hp = history; hp != NULL; hp = hp->next) { if (hp->state == st_done) { latest = hp; } if (out->is_quiet(out) || !verbose) { continue; } out->message(out, "stonith-event", hp, true, false, stonith__later_succeeded(hp, history), (uint32_t) pcmk_show_failed_detail); out->increment_list(out); } if (latest) { if (out->is_quiet(out)) { out->message(out, "stonith-event", latest, false, true, NULL, (uint32_t) pcmk_show_failed_detail); } else if (!verbose) { // already printed if verbose out->message(out, "stonith-event", latest, false, false, NULL, (uint32_t) pcmk_show_failed_detail); out->increment_list(out); } } out->end_list(out); stonith_history_free(history); return pcmk_legacy2rc(rc); } int pcmk_fence_history(xmlNodePtr *xml, const char *target, unsigned int timeout, bool quiet, int verbose, bool broadcast, bool cleanup) { stonith_t *st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } out->quiet = quiet; rc = pcmk__fence_history(out, st, target, timeout, verbose, broadcast, cleanup); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_installed(pcmk__output_t *out, stonith_t *st, unsigned int timeout) { stonith_key_value_t *devices = NULL; int rc = pcmk_rc_ok; rc = st->cmds->list_agents(st, st_opt_sync_call, NULL, &devices, pcmk__timeout_ms2s(timeout)); // rc is a negative error code or a positive number of agents if (rc < 0) { return pcmk_legacy2rc(rc); } out->begin_list(out, "fence device", "fence devices", "Installed fence devices"); for (stonith_key_value_t *iter = devices; iter != NULL; iter = iter->next) { out->list_item(out, "device", "%s", iter->value); } out->end_list(out); stonith_key_value_freeall(devices, 1, 1); return pcmk_rc_ok; } int pcmk_fence_installed(xmlNodePtr *xml, unsigned int timeout) { stonith_t *st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__fence_installed(out, st, timeout); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_last(pcmk__output_t *out, const char *target, bool as_nodeid) { time_t when = 0; if (target == NULL) { return pcmk_rc_ok; } if (as_nodeid) { when = stonith_api_time(atol(target), NULL, FALSE); } else { when = stonith_api_time(0, target, FALSE); } return out->message(out, "last-fenced", target, when); } int pcmk_fence_last(xmlNodePtr *xml, const char *target, bool as_nodeid) { pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { return rc; } stonith__register_messages(out); rc = pcmk__fence_last(out, target, as_nodeid); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); return rc; } int pcmk__fence_list_targets(pcmk__output_t *out, stonith_t *st, const char *device_id, unsigned int timeout) { GList *targets = NULL; char *lists = NULL; int rc = pcmk_rc_ok; rc = st->cmds->list(st, st_opts, device_id, &lists, pcmk__timeout_ms2s(timeout)); if (rc != pcmk_rc_ok) { return pcmk_legacy2rc(rc); } targets = stonith__parse_targets(lists); out->begin_list(out, "fence target", "fence targets", "Fence Targets"); while (targets != NULL) { out->list_item(out, NULL, "%s", (const char *) targets->data); targets = targets->next; } out->end_list(out); free(lists); return rc; } int pcmk_fence_list_targets(xmlNodePtr *xml, const char *device_id, unsigned int timeout) { stonith_t *st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__fence_list_targets(out, st, device_id, timeout); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_metadata(pcmk__output_t *out, stonith_t *st, const char *agent, unsigned int timeout) { char *buffer = NULL; int rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, pcmk__timeout_ms2s(timeout)); if (rc != pcmk_rc_ok) { return pcmk_legacy2rc(rc); } out->output_xml(out, PCMK_XE_METADATA, buffer); free(buffer); return rc; } int pcmk_fence_metadata(xmlNodePtr *xml, const char *agent, unsigned int timeout) { stonith_t *st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__fence_metadata(out, st, agent, timeout); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_registered(pcmk__output_t *out, stonith_t *st, const char *target, unsigned int timeout) { stonith_key_value_t *devices = NULL; int rc = pcmk_rc_ok; rc = st->cmds->query(st, st_opts, target, &devices, pcmk__timeout_ms2s(timeout)); /* query returns a negative error code or a positive number of results. */ if (rc < 0) { return pcmk_legacy2rc(rc); } out->begin_list(out, "fence device", "fence devices", "Registered fence devices"); for (stonith_key_value_t *iter = devices; iter != NULL; iter = iter->next) { out->list_item(out, "device", "%s", iter->value); } out->end_list(out); stonith_key_value_freeall(devices, 1, 1); /* Return pcmk_rc_ok here, not the number of results. Callers probably * don't care. */ return pcmk_rc_ok; } int pcmk_fence_registered(xmlNodePtr *xml, const char *target, unsigned int timeout) { stonith_t *st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__fence_registered(out, st, target, timeout); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_register_level(stonith_t *st, const char *target, int fence_level, GList *devices) { return handle_level(st, target, fence_level, devices, true); } int pcmk_fence_register_level(xmlNodePtr *xml, const char *target, int fence_level, GList *devices) { stonith_t* st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__fence_register_level(st, target, fence_level, devices); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_unregister_level(stonith_t *st, const char *target, int fence_level) { return handle_level(st, target, fence_level, NULL, false); } int pcmk_fence_unregister_level(xmlNodePtr *xml, const char *target, int fence_level) { stonith_t* st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__fence_unregister_level(st, target, fence_level); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__fence_validate(pcmk__output_t *out, stonith_t *st, const char *agent, const char *id, GHashTable *params, unsigned int timeout) { char *output = NULL; char *error_output = NULL; int rc; rc = stonith__validate(st, st_opt_sync_call, id, NULL, agent, params, pcmk__timeout_ms2s(timeout), &output, &error_output); out->message(out, "validate", agent, id, output, error_output, rc); return pcmk_legacy2rc(rc); } int pcmk_fence_validate(xmlNodePtr *xml, const char *agent, const char *id, GHashTable *params, unsigned int timeout) { stonith_t *st = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; rc = pcmk__setup_output_fencing(&out, &st, xml); if (rc != pcmk_rc_ok) { return rc; } rc = pcmk__fence_validate(out, st, agent, id, params, timeout); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); return rc; } int pcmk__get_fencing_history(stonith_t *st, stonith_history_t **stonith_history, enum pcmk__fence_history fence_history) { int rc = pcmk_rc_ok; if ((st == NULL) || (st->state == stonith_disconnected)) { rc = ENOTCONN; } else if (fence_history != pcmk__fence_history_none) { rc = st->cmds->history(st, st_opt_sync_call, NULL, stonith_history, 120); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { return rc; } *stonith_history = stonith__sort_history(*stonith_history); if (fence_history == pcmk__fence_history_reduced) { *stonith_history = reduce_fence_history(*stonith_history); } } return rc; } diff --git a/lib/pacemaker/pcmk_setup.c b/lib/pacemaker/pcmk_setup.c index 9267593d3f..80a78c5fb6 100644 --- a/lib/pacemaker/pcmk_setup.c +++ b/lib/pacemaker/pcmk_setup.c @@ -1,121 +1,121 @@ /* * Copyright 2024-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include "libpacemaker_private.h" /*! * \internal * \brief Set up a pcmk__output_t, (optionally) cib_t, and * (optionally) pcmk_scheduler_t for use in implementing * public/private API function pairs * * \param[in,out] out Where to store a \c pcmk__output_t object * \param[in,out] cib Where to store a \c cib_t object * (may be \c NULL if a CIB is not needed) * \param[in,out] scheduler Where to store a \c pcmk_scheduler_t object * (may be \c NULL if a scheduler is not needed) * \param[in,out] xml Where to write any result XML * * \note The \p cib and \p scheduler arguments will only be valid if there * are no errors in this function. However, \p out will always be * valid unless there are errors setting it up so that other errors * may still be reported. * * \return Standard Pacemaker return code */ int pcmk__setup_output_cib_sched(pcmk__output_t **out, cib_t **cib, pcmk_scheduler_t **scheduler, xmlNode **xml) { int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(out, xml); if (rc != pcmk_rc_ok) { return rc; } if (cib != NULL) { *cib = cib_new(); if (*cib == NULL) { return pcmk_rc_cib_corrupt; } rc = (*cib)->cmds->signon(*cib, crm_system_name, cib_command); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { cib__clean_up_connection(cib); return rc; } } if (scheduler != NULL) { rc = pcmk__init_scheduler(*out, NULL, NULL, scheduler); if (rc != pcmk_rc_ok && cib != NULL) { cib__clean_up_connection(cib); return rc; } pcmk__unpack_constraints(*scheduler); } pcmk__register_lib_messages(*out); return rc; } /*! * \internal * \brief Set up a pcmk__output_t and stonith_t for use in implementing * public/private API function pairs * * \param[in,out] out Where to store a \c pcmk__output_t object * \param[in,out] st Where to store a \c stonith_t object * \param[in,out] xml Where to write any result XML * * \note The \p st argument will only be valid if there are no errors in this * function. However, \p out will always be valid unless there are * errors setting it up so that other errors may still be reported. * * \return Standard Pacemaker return code */ int pcmk__setup_output_fencing(pcmk__output_t **out, stonith_t **st, xmlNode **xml) { int rc = pcmk_rc_ok; rc = pcmk__xml_output_new(out, xml); if (rc != pcmk_rc_ok) { return rc; } *st = stonith__api_new(); if (*st == NULL) { return ENOMEM; } rc = (*st)->cmds->connect(*st, crm_system_name, NULL); if (rc < 0) { rc = pcmk_legacy2rc(rc); - stonith_api_delete(*st); + stonith__api_free(*st); return rc; } pcmk__register_lib_messages(*out); stonith__register_messages(*out); return rc; } diff --git a/lib/pacemaker/pcmk_status.c b/lib/pacemaker/pcmk_status.c index 4ec8e749ec..bf76f10262 100644 --- a/lib/pacemaker/pcmk_status.c +++ b/lib/pacemaker/pcmk_status.c @@ -1,282 +1,282 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include // stonith__register_messages() #include #include static stonith_t * fencing_connect(void) { stonith_t *st = stonith__api_new(); int rc = pcmk_rc_ok; if (st == NULL) { return NULL; } rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_rc_ok) { return st; } else { - stonith_api_delete(st); + stonith__api_free(st); return NULL; } } /*! * \internal * \brief Output the cluster status given a fencer and CIB connection * * \param[in,out] scheduler Scheduler object (will be reset) * \param[in,out] stonith Fencer connection * \param[in,out] cib CIB connection * \param[in] current_cib Current CIB XML * \param[in] pcmkd_state \p pacemakerd state * \param[in] fence_history How much of the fencing history to output * \param[in] show Group of \p pcmk_section_e flags * \param[in] show_opts Group of \p pcmk_show_opt_e flags * \param[in] only_node If a node name or tag, include only the * matching node(s) (if any) in the output. * If \p "*" or \p NULL, include all nodes * in the output. * \param[in] only_rsc If a resource ID or tag, include only the * matching resource(s) (if any) in the * output. If \p "*" or \p NULL, include all * resources in the output. * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID * * \return Standard Pacemaker return code */ int pcmk__output_cluster_status(pcmk_scheduler_t *scheduler, stonith_t *stonith, cib_t *cib, xmlNode *current_cib, enum pcmk_pacemakerd_state pcmkd_state, enum pcmk__fence_history fence_history, uint32_t show, uint32_t show_opts, const char *only_node, const char *only_rsc, const char *neg_location_prefix) { xmlNode *cib_copy = pcmk__xml_copy(NULL, current_cib); stonith_history_t *stonith_history = NULL; int history_rc = 0; GList *unames = NULL; GList *resources = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; if ((scheduler == NULL) || (scheduler->priv->out == NULL)) { return EINVAL; } out = scheduler->priv->out; rc = pcmk__update_configured_schema(&cib_copy, false); if (rc != pcmk_rc_ok) { cib__clean_up_connection(&cib); pcmk__xml_free(cib_copy); out->err(out, "Upgrade failed: %s", pcmk_rc_str(rc)); return rc; } /* get the stonith-history if there is evidence we need it */ if (fence_history != pcmk__fence_history_none) { history_rc = pcmk__get_fencing_history(stonith, &stonith_history, fence_history); } pcmk_reset_scheduler(scheduler); scheduler->input = cib_copy; cluster_status(scheduler); /* Unpack constraints if any section will need them * (tickets may be referenced in constraints but not granted yet, * and bans need negative location constraints) */ if (pcmk_is_set(show, pcmk_section_bans) || pcmk_is_set(show, pcmk_section_tickets)) { pcmk__unpack_constraints(scheduler); } unames = pe__build_node_name_list(scheduler, only_node); resources = pe__build_rsc_list(scheduler, only_rsc); /* Always print DC if NULL. */ if (scheduler->dc_node == NULL) { show |= pcmk_section_dc; } out->message(out, "cluster-status", scheduler, pcmkd_state, pcmk_rc2exitc(history_rc), stonith_history, fence_history, show, show_opts, neg_location_prefix, unames, resources); g_list_free_full(unames, free); g_list_free_full(resources, free); stonith_history_free(stonith_history); stonith_history = NULL; return rc; } int pcmk_status(xmlNodePtr *xml) { cib_t *cib = NULL; pcmk__output_t *out = NULL; int rc = pcmk_rc_ok; uint32_t show_opts = pcmk_show_pending |pcmk_show_inactive_rscs |pcmk_show_timing; cib = cib_new(); if (cib == NULL) { return pcmk_rc_cib_corrupt; } rc = pcmk__xml_output_new(&out, xml); if (rc != pcmk_rc_ok) { cib_delete(cib); return rc; } pcmk__register_lib_messages(out); pe__register_messages(out); stonith__register_messages(out); rc = pcmk__status(out, cib, pcmk__fence_history_full, pcmk_section_all, show_opts, NULL, NULL, NULL, 0); pcmk__xml_output_finish(out, pcmk_rc2exitc(rc), xml); cib_delete(cib); return rc; } /*! * \internal * \brief Query and output the cluster status * * The operation is considered a success if we're able to get the \p pacemakerd * state. If possible, we'll also try to connect to the fencer and CIB and * output their respective status information. * * \param[in,out] out Output object * \param[in,out] cib CIB connection * \param[in] fence_history How much of the fencing history to output * \param[in] show Group of \p pcmk_section_e flags * \param[in] show_opts Group of \p pcmk_show_opt_e flags * \param[in] only_node If a node name or tag, include only the * matching node(s) (if any) in the output. * If \p "*" or \p NULL, include all nodes * in the output. * \param[in] only_rsc If a resource ID or tag, include only the * matching resource(s) (if any) in the * output. If \p "*" or \p NULL, include all * resources in the output. * \param[in] neg_location_prefix Prefix denoting a ban in a constraint ID * \param[in] timeout_ms How long to wait for a reply from the * \p pacemakerd API. If 0, * \p pcmk_ipc_dispatch_sync will be used. * If positive, \p pcmk_ipc_dispatch_main * will be used, and a new mainloop will be * created for this purpose (freed before * return). * * \return Standard Pacemaker return code */ int pcmk__status(pcmk__output_t *out, cib_t *cib, enum pcmk__fence_history fence_history, uint32_t show, uint32_t show_opts, const char *only_node, const char *only_rsc, const char *neg_location_prefix, unsigned int timeout_ms) { xmlNode *current_cib = NULL; int rc = pcmk_rc_ok; stonith_t *stonith = NULL; enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid; time_t last_updated = 0; pcmk_scheduler_t *scheduler = NULL; if (cib == NULL) { return ENOTCONN; } if (cib->variant == cib_native) { rc = pcmk__pacemakerd_status(out, crm_system_name, timeout_ms, false, &pcmkd_state); if (rc != pcmk_rc_ok) { return rc; } last_updated = time(NULL); switch (pcmkd_state) { case pcmk_pacemakerd_state_running: case pcmk_pacemakerd_state_shutting_down: case pcmk_pacemakerd_state_remote: /* Fencer and CIB may still be available while shutting down or * running on a Pacemaker Remote node */ break; default: // Fencer and CIB are definitely unavailable out->message(out, "pacemakerd-health", NULL, pcmkd_state, NULL, last_updated); return rc; } if (fence_history != pcmk__fence_history_none) { stonith = fencing_connect(); } } rc = cib__signon_query(out, &cib, ¤t_cib); if (rc != pcmk_rc_ok) { if (pcmkd_state != pcmk_pacemakerd_state_invalid) { // Invalid at this point means we didn't query the pcmkd state out->message(out, "pacemakerd-health", NULL, pcmkd_state, NULL, last_updated); } goto done; } scheduler = pcmk_new_scheduler(); pcmk__mem_assert(scheduler); scheduler->priv->out = out; if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) { // Currently used only in the times section pcmk__query_node_name(out, 0, &(scheduler->priv->local_node_name), 0); } rc = pcmk__output_cluster_status(scheduler, stonith, cib, current_cib, pcmkd_state, fence_history, show, show_opts, only_node, only_rsc, neg_location_prefix); if (rc != pcmk_rc_ok) { out->err(out, "Error outputting status info from the fencer or CIB"); } done: pcmk_free_scheduler(scheduler); - stonith_api_delete(stonith); + stonith__api_free(stonith); pcmk__xml_free(current_cib); return pcmk_rc_ok; } diff --git a/tools/crm_mon.c b/tools/crm_mon.c index c4f828edf6..ba3cc415de 100644 --- a/tools/crm_mon.c +++ b/tools/crm_mon.c @@ -1,2190 +1,2190 @@ /* * Copyright 2004-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // pcmk__ends_with_ext() #include #include #include #include #include #include #include #include #include #include #include #include #include #include // stonith__* #include "crm_mon.h" #define SUMMARY "Provides a summary of cluster's current state.\n\n" \ "Outputs varying levels of detail in a number of different formats." /* * Definitions indicating which items to print */ static uint32_t show; static uint32_t show_opts = pcmk_show_pending; /* * Definitions indicating how to output */ static mon_output_format_t output_format = mon_output_unset; /* other globals */ static GIOChannel *io_channel = NULL; static GMainLoop *mainloop = NULL; static guint reconnect_timer = 0; static mainloop_timer_t *refresh_timer = NULL; static enum pcmk_pacemakerd_state pcmkd_state = pcmk_pacemakerd_state_invalid; static cib_t *cib = NULL; static stonith_t *st = NULL; static xmlNode *current_cib = NULL; static GError *error = NULL; static pcmk__common_args_t *args = NULL; static pcmk__output_t *out = NULL; static GOptionContext *context = NULL; static gchar **processed_args = NULL; static time_t last_refresh = 0; volatile crm_trigger_t *refresh_trigger = NULL; static pcmk_scheduler_t *scheduler = NULL; static enum pcmk__fence_history fence_history = pcmk__fence_history_none; int interactive_fence_level = 0; static pcmk__supported_format_t formats[] = { #if PCMK__ENABLE_CURSES CRM_MON_SUPPORTED_FORMAT_CURSES, #endif PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_default(pcmk__output_t *out, va_list args) { return pcmk_rc_no_output; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_html(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); if (out->dest != stdout) { out->reset(out); } pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, "Not connected to CIB"); if (desc != NULL) { pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ": "); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, desc); } if (state != pcmk_pacemakerd_state_invalid) { const char *state_s = pcmk__pcmkd_state_enum2friendly(state); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, " ("); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, state_s); pcmk__output_create_xml_text_node(out, PCMK__XE_SPAN, ")"); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_text(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); int rc = pcmk_rc_ok; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { rc = out->info(out, "Not connected to CIB%s%s (%s)", (desc != NULL)? ": " : "", pcmk__s(desc, ""), pcmk__pcmkd_state_enum2friendly(state)); } else { rc = out->info(out, "Not connected to CIB%s%s", (desc != NULL)? ": " : "", pcmk__s(desc, "")); } out->finish(out, CRM_EX_DISCONNECT, true, NULL); return rc; } PCMK__OUTPUT_ARGS("crm-mon-disconnected", "const char *", "enum pcmk_pacemakerd_state") static int crm_mon_disconnected_xml(pcmk__output_t *out, va_list args) { const char *desc = va_arg(args, const char *); enum pcmk_pacemakerd_state state = (enum pcmk_pacemakerd_state) va_arg(args, int); const char *state_s = NULL; if (out->dest != stdout) { out->reset(out); } if (state != pcmk_pacemakerd_state_invalid) { state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state); } pcmk__output_create_xml_node(out, PCMK_XE_CRM_MON_DISCONNECTED, PCMK_XA_DESCRIPTION, desc, PCMK_XA_PACEMAKERD_STATE, state_s, NULL); out->finish(out, CRM_EX_DISCONNECT, true, NULL); return pcmk_rc_ok; } static pcmk__message_entry_t fmt_functions[] = { { "crm-mon-disconnected", "default", crm_mon_disconnected_default }, { "crm-mon-disconnected", "html", crm_mon_disconnected_html }, { "crm-mon-disconnected", "text", crm_mon_disconnected_text }, { "crm-mon-disconnected", "xml", crm_mon_disconnected_xml }, { NULL, NULL, NULL }, }; #define RECONNECT_MSECS 5000 struct { guint reconnect_ms; enum mon_exec_mode exec_mode; gboolean fence_connect; gboolean print_pending; gboolean show_bans; gboolean watch_fencing; char *pid_file; char *external_agent; char *external_recipient; char *neg_location_prefix; char *only_node; char *only_rsc; GSList *user_includes_excludes; GSList *includes_excludes; } options = { .reconnect_ms = RECONNECT_MSECS, .exec_mode = mon_exec_unset, .fence_connect = TRUE, }; static crm_exit_t clean_up(crm_exit_t exit_code); static void crm_diff_update(const char *event, xmlNode * msg); static void clean_up_on_connection_failure(int rc); static int mon_refresh_display(gpointer user_data); static int setup_cib_connection(void); static int setup_fencer_connection(void); static int setup_api_connections(void); static void mon_st_callback_event(stonith_t * st, stonith_event_t * e); static void mon_st_callback_display(stonith_t * st, stonith_event_t * e); static void refresh_after_event(gboolean data_updated, gboolean enforce); static uint32_t all_includes(mon_output_format_t fmt) { if ((fmt == mon_output_plain) || (fmt == mon_output_console)) { return ~pcmk_section_options; } else { return pcmk_section_all; } } static uint32_t default_includes(mon_output_format_t fmt) { switch (fmt) { case mon_output_plain: case mon_output_console: case mon_output_html: return pcmk_section_summary |pcmk_section_nodes |pcmk_section_resources |pcmk_section_failures; case mon_output_xml: return all_includes(fmt); default: return 0; } } struct { const char *name; uint32_t bit; } sections[] = { { "attributes", pcmk_section_attributes }, { "bans", pcmk_section_bans }, { "counts", pcmk_section_counts }, { "dc", pcmk_section_dc }, { "failcounts", pcmk_section_failcounts }, { "failures", pcmk_section_failures }, { PCMK_VALUE_FENCING, pcmk_section_fencing_all }, { "fencing-failed", pcmk_section_fence_failed }, { "fencing-pending", pcmk_section_fence_pending }, { "fencing-succeeded", pcmk_section_fence_worked }, { "maint-mode", pcmk_section_maint_mode }, { "nodes", pcmk_section_nodes }, { "operations", pcmk_section_operations }, { "options", pcmk_section_options }, { "resources", pcmk_section_resources }, { "stack", pcmk_section_stack }, { "summary", pcmk_section_summary }, { "tickets", pcmk_section_tickets }, { "times", pcmk_section_times }, { NULL } }; static uint32_t find_section_bit(const char *name) { for (int i = 0; sections[i].name != NULL; i++) { if (pcmk__str_eq(sections[i].name, name, pcmk__str_casei)) { return sections[i].bit; } } return 0; } static gboolean apply_exclude(const gchar *excludes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(excludes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = 0; } else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) { show = all_includes(output_format); } else if (bit != 0) { show &= ~bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--exclude options: all, attributes, bans, counts, dc, " "failcounts, failures, fencing, fencing-failed, " "fencing-pending, fencing-succeeded, maint-mode, nodes, " PCMK_VALUE_NONE ", operations, options, resources, " "stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include(const gchar *includes, GError **error) { char **parts = NULL; gboolean result = TRUE; parts = g_strsplit(includes, ",", 0); for (char **s = parts; *s != NULL; s++) { uint32_t bit = find_section_bit(*s); if (pcmk__str_eq(*s, "all", pcmk__str_none)) { show = all_includes(output_format); } else if (pcmk__starts_with(*s, "bans")) { show |= pcmk_section_bans; if (options.neg_location_prefix != NULL) { free(options.neg_location_prefix); options.neg_location_prefix = NULL; } if (strlen(*s) > 4 && (*s)[4] == ':') { options.neg_location_prefix = strdup(*s+5); } } else if (pcmk__str_any_of(*s, PCMK_VALUE_DEFAULT, "defaults", NULL)) { show |= default_includes(output_format); } else if (pcmk__str_eq(*s, PCMK_VALUE_NONE, pcmk__str_none)) { show = 0; } else if (bit != 0) { show |= bit; } else { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--include options: all, attributes, bans[:PREFIX], counts, dc, " PCMK_VALUE_DEFAULT ", failcounts, failures, fencing, " "fencing-failed, fencing-pending, fencing-succeeded, " "maint-mode, nodes, " PCMK_VALUE_NONE ", operations, " "options, resources, stack, summary, tickets, times"); result = FALSE; break; } } g_strfreev(parts); return result; } static gboolean apply_include_exclude(GSList *lst, GError **error) { gboolean rc = TRUE; GSList *node = lst; while (node != NULL) { char *s = node->data; if (pcmk__starts_with(s, "--include=")) { rc = apply_include(s+10, error); } else if (pcmk__starts_with(s, "-I=")) { rc = apply_include(s+3, error); } else if (pcmk__starts_with(s, "--exclude=")) { rc = apply_exclude(s+10, error); } else if (pcmk__starts_with(s, "-U=")) { rc = apply_exclude(s+3, error); } if (rc != TRUE) { break; } node = node->next; } return rc; } static gboolean user_include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.user_includes_excludes = g_slist_append(options.user_includes_excludes, s); return TRUE; } static gboolean include_exclude_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { char *s = crm_strdup_printf("%s=%s", option_name, optarg); options.includes_excludes = g_slist_append(options.includes_excludes, s); return TRUE; } static gboolean as_xml_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { pcmk__str_update(&args->output_ty, "xml"); output_format = mon_output_legacy_xml; return TRUE; } static gboolean fence_history_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg == NULL) { interactive_fence_level = 2; } else { pcmk__scan_min_int(optarg, &interactive_fence_level, 0); } switch (interactive_fence_level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK_VALUE_FENCING, data, err); case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", PCMK_VALUE_FENCING, data, err); case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; return include_exclude_cb("--include", "fencing-failed,fencing-pending", data, err); case 0: options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; return include_exclude_cb("--exclude", PCMK_VALUE_FENCING, data, err); default: g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Fence history must be 0-3"); return FALSE; } } static gboolean group_by_node_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_rscs_by_node; return TRUE; } static gboolean hide_headers_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--exclude", "summary", data, err); } static gboolean inactive_resources_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_inactive_rscs; return TRUE; } static gboolean print_brief_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_brief; return TRUE; } static gboolean print_detail_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_details; return TRUE; } static gboolean print_description_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_description; return TRUE; } static gboolean print_timing_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { show_opts |= pcmk_show_timing; return user_include_exclude_cb("--include", "operations", data, err); } static gboolean reconnect_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { int rc = crm_get_msec(optarg); if (rc == -1) { g_set_error(err, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid value for -i: %s", optarg); return FALSE; } else { pcmk_parse_interval_spec(optarg, &options.reconnect_ms); if (options.exec_mode != mon_exec_daemonized) { // Reconnect interval applies to daemonized too, so don't override options.exec_mode = mon_exec_update; } } return TRUE; } /*! * \internal * \brief Enable one-shot mode * * \param[in] option_name Name of option being parsed (ignored) * \param[in] optarg Value to be parsed (ignored) * \param[in] data User data (ignored) * \param[out] err Where to store error (ignored) */ static gboolean one_shot_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.exec_mode = mon_exec_one_shot; return TRUE; } /*! * \internal * \brief Enable daemonized mode * * \param[in] option_name Name of option being parsed (ignored) * \param[in] optarg Value to be parsed (ignored) * \param[in] data User data (ignored) * \param[out] err Where to store error (ignored) */ static gboolean daemonize_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { options.exec_mode = mon_exec_daemonized; return TRUE; } static gboolean show_attributes_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "attributes", data, err); } static gboolean show_bans_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { if (optarg != NULL) { char *s = crm_strdup_printf("bans:%s", optarg); gboolean rc = user_include_exclude_cb("--include", s, data, err); free(s); return rc; } else { return user_include_exclude_cb("--include", "bans", data, err); } } static gboolean show_failcounts_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts", data, err); } static gboolean show_operations_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "failcounts,operations", data, err); } static gboolean show_tickets_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { return user_include_exclude_cb("--include", "tickets", data, err); } static gboolean use_cib_file_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { setenv("CIB_file", optarg, 1); options.exec_mode = mon_exec_one_shot; return TRUE; } #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry addl_entries[] = { { "interval", 'i', 0, G_OPTION_ARG_CALLBACK, reconnect_cb, "Update frequency (default is 5 seconds). Note: When run interactively\n" INDENT "on a live cluster, the display will be updated automatically\n" INDENT "whenever the cluster configuration or status changes.", "TIMESPEC" }, { "one-shot", '1', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, one_shot_cb, "Display the cluster status once and exit", NULL }, { "daemonize", 'd', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, daemonize_cb, "Run in the background as a daemon.\n" INDENT "Requires at least one of --output-to and --external-agent.", NULL }, { "pid-file", 'p', 0, G_OPTION_ARG_FILENAME, &options.pid_file, "(Advanced) Daemon pid file location", "FILE" }, { "external-agent", 'E', 0, G_OPTION_ARG_FILENAME, &options.external_agent, "A program to run when resource operations take place", "FILE" }, { "external-recipient", 'e', 0, G_OPTION_ARG_STRING, &options.external_recipient, "A recipient for your program (assuming you want the program to send something to someone).", "RCPT" }, { "watch-fencing", 'W', 0, G_OPTION_ARG_NONE, &options.watch_fencing, "Listen for fencing events. For use with --external-agent.", NULL }, { "xml-file", 'x', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, use_cib_file_cb, NULL, NULL }, { NULL } }; static GOptionEntry display_entries[] = { { "include", 'I', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to include in the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "exclude", 'U', 0, G_OPTION_ARG_CALLBACK, user_include_exclude_cb, "A list of sections to exclude from the output.\n" INDENT "See `Output Control` help for more information.", "SECTION(s)" }, { "node", 0, 0, G_OPTION_ARG_STRING, &options.only_node, "When displaying information about nodes, show only what's related to the given\n" INDENT "node, or to all nodes tagged with the given tag", "NODE" }, { "resource", 0, 0, G_OPTION_ARG_STRING, &options.only_rsc, "When displaying information about resources, show only what's related to the given\n" INDENT "resource, or to all resources tagged with the given tag", "RSC" }, { "group-by-node", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, group_by_node_cb, "Group resources by node", NULL }, { "inactive", 'r', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, inactive_resources_cb, "Display inactive resources", NULL }, { "failcounts", 'f', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_failcounts_cb, "Display resource fail counts", NULL }, { "operations", 'o', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_operations_cb, "Display resource operation history", NULL }, { "timing-details", 't', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_timing_cb, "Display resource operation history with timing details", NULL }, { "tickets", 'c', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_tickets_cb, "Display cluster tickets", NULL }, { "fence-history", 'm', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, fence_history_cb, "Show fence history:\n" INDENT "0=off, 1=failures and pending (default without option),\n" INDENT "2=add successes (default without value for option),\n" INDENT "3=show full history without reduction to most recent of each flavor", "LEVEL" }, { "neg-locations", 'L', G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, show_bans_cb, "Display negative location constraints [optionally filtered by id prefix]", NULL }, { "show-node-attributes", 'A', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, show_attributes_cb, "Display node attributes", NULL }, { "hide-headers", 'D', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, hide_headers_cb, "Hide all headers", NULL }, { "show-detail", 'R', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_detail_cb, "Show more details (node IDs, individual clone instances)", NULL }, { "show-description", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_description_cb, "Show resource descriptions", NULL }, { "brief", 'b', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, print_brief_cb, "Brief output", NULL }, { "pending", 'j', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.print_pending, "Display pending state if '" PCMK_META_RECORD_PENDING "' is enabled", NULL }, { NULL } }; static GOptionEntry deprecated_entries[] = { /* @COMPAT resource-agents <4.15.0 uses --as-xml, so removing this option * must wait until we no longer support building on any platforms that ship * the older agents. */ { "as-xml", 'X', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, as_xml_cb, "Write cluster status as XML to stdout. This will enable one-shot mode.\n" INDENT "Use --output-as=xml instead.", NULL }, { NULL } }; /* *INDENT-ON* */ /* Reconnect to the CIB and fencing agent after reconnect_ms has passed. This sounds * like it would be more broadly useful, but only ever happens after a disconnect via * mon_cib_connection_destroy. */ static gboolean reconnect_after_timeout(gpointer data) { #if PCMK__ENABLE_CURSES if (output_format == mon_output_console) { clear(); refresh(); } #endif out->transient(out, "Reconnecting..."); if (setup_api_connections() == pcmk_rc_ok) { // Trigger redrawing the screen (needs reconnect_timer == 0) reconnect_timer = 0; refresh_after_event(FALSE, TRUE); return G_SOURCE_REMOVE; } out->message(out, "crm-mon-disconnected", "Latest connection attempt failed", pcmkd_state); reconnect_timer = pcmk__create_timer(options.reconnect_ms, reconnect_after_timeout, NULL); return G_SOURCE_REMOVE; } /* Called from various places when we are disconnected from the CIB or from the * fencing agent. If the CIB connection is still valid, this function will also * attempt to sign off and reconnect. */ static void mon_cib_connection_destroy(gpointer user_data) { const char *msg = "Connection to the cluster lost"; pcmkd_state = pcmk_pacemakerd_state_invalid; /* No crm-mon-disconnected message for console; a working implementation * is not currently worth the effort */ out->transient(out, "%s", msg); out->message(out, "crm-mon-disconnected", msg, pcmkd_state); if (refresh_timer != NULL) { /* we'll trigger a refresh after reconnect */ mainloop_timer_stop(refresh_timer); } if (reconnect_timer) { /* we'll trigger a new reconnect-timeout at the end */ g_source_remove(reconnect_timer); reconnect_timer = 0; } /* the client API won't properly reconnect notifications if they are still * in the table - so remove them */ if (st != NULL) { if (st->state != stonith_disconnected) { st->cmds->disconnect(st); } st->cmds->remove_notification(st, NULL); } if (cib) { cib->cmds->signoff(cib); reconnect_timer = pcmk__create_timer(options.reconnect_ms, reconnect_after_timeout, NULL); } } /* Signal handler installed into the mainloop for normal program shutdown */ static void mon_shutdown(int nsig) { clean_up(CRM_EX_OK); } #if PCMK__ENABLE_CURSES static volatile sighandler_t ncurses_winch_handler; /* Signal handler installed the regular way (not into the main loop) for when * the screen is resized. Commonly, this happens when running in an xterm and * the user changes its size. */ static void mon_winresize(int nsig) { static int not_done; int lines = 0, cols = 0; if (!not_done++) { if (ncurses_winch_handler) /* the original ncurses WINCH signal handler does the * magic of retrieving the new window size; * otherwise, we'd have to use ioctl or tgetent */ (*ncurses_winch_handler) (SIGWINCH); getmaxyx(stdscr, lines, cols); resizeterm(lines, cols); /* Alert the mainloop code we'd like the refresh_trigger to run next * time the mainloop gets around to checking. */ mainloop_set_trigger((crm_trigger_t *) refresh_trigger); } not_done--; } #endif static int setup_fencer_connection(void) { int rc = pcmk_ok; if (options.fence_connect && st == NULL) { st = stonith__api_new(); } if (!options.fence_connect || st == NULL || st->state != stonith_disconnected) { return rc; } rc = st->cmds->connect(st, crm_system_name, NULL); if (rc == pcmk_ok) { crm_trace("Setting up stonith callbacks"); if (options.watch_fencing) { st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, mon_st_callback_event); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_FENCE, mon_st_callback_event); } else { st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_DISCONNECT, mon_st_callback_display); st->cmds->register_notification(st, PCMK__VALUE_ST_NOTIFY_HISTORY, mon_st_callback_display); } } else { - stonith_api_delete(st); + stonith__api_free(st); st = NULL; } return rc; } static int setup_cib_connection(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { // Already connected with notifications registered for CIB updates return rc; } rc = cib__signon_query(out, &cib, ¤t_cib); if (rc == pcmk_rc_ok) { rc = pcmk_legacy2rc(cib->cmds->set_connection_dnotify(cib, mon_cib_connection_destroy)); if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support connection loss " "notifications; crm_mon will be unable to reconnect after " "connection loss"); rc = pcmk_rc_ok; } if (rc == pcmk_rc_ok) { cib->cmds->del_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY, crm_diff_update); rc = cib->cmds->add_notify_callback(cib, PCMK__VALUE_CIB_DIFF_NOTIFY, crm_diff_update); rc = pcmk_legacy2rc(rc); } if (rc != pcmk_rc_ok) { if (rc == EPROTONOSUPPORT) { out->err(out, "CIB client does not support CIB diff " "notifications"); } else { out->err(out, "CIB diff notification setup failed"); } out->err(out, "Cannot monitor CIB changes; exiting"); cib__clean_up_connection(&cib); - stonith_api_delete(st); + stonith__api_free(st); st = NULL; } } return rc; } /* This is used to set up the fencing options after the interactive UI has been stared. * fence_history_cb can't be used because it builds up a list of includes/excludes that * then have to be processed with apply_include_exclude and that could affect other * things. */ static void set_fencing_options(int level) { switch (level) { case 3: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 2: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fencing_all; break; case 1: options.fence_connect = TRUE; fence_history = pcmk__fence_history_full; show |= pcmk_section_fence_failed | pcmk_section_fence_pending; break; default: interactive_fence_level = 0; options.fence_connect = FALSE; fence_history = pcmk__fence_history_none; show &= ~pcmk_section_fencing_all; break; } } static int setup_api_connections(void) { int rc = pcmk_rc_ok; CRM_CHECK(cib != NULL, return EINVAL); if (cib->state != cib_disconnected) { return rc; } if (cib->variant == cib_native) { rc = pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); if (rc != pcmk_rc_ok) { return rc; } switch (pcmkd_state) { case pcmk_pacemakerd_state_running: case pcmk_pacemakerd_state_remote: case pcmk_pacemakerd_state_shutting_down: /* Fencer and CIB may still be available while shutting down or * running on a Pacemaker Remote node */ break; default: // Fencer and CIB are definitely unavailable return ENOTCONN; } setup_fencer_connection(); } rc = setup_cib_connection(); return rc; } #if PCMK__ENABLE_CURSES static const char * get_option_desc(char c) { const char *desc = "No help available"; for (GOptionEntry *entry = display_entries; entry != NULL; entry++) { if (entry->short_name == c) { desc = entry->description; break; } } return desc; } #define print_option_help(out, option, condition) \ curses_formatted_printf(out, "%c %c: \t%s\n", ((condition)? '*': ' '), option, get_option_desc(option)); /* This function is called from the main loop when there is something to be read * on stdin, like an interactive user's keystroke. All it does is read the keystroke, * set flags (or show the page showing which keystrokes are valid), and redraw the * screen. It does not do anything with connections to the CIB or fencing agent * agent what would happen in mon_refresh_display. */ static gboolean detect_user_input(GIOChannel *channel, GIOCondition condition, gpointer user_data) { int c; gboolean config_mode = FALSE; gboolean rc = G_SOURCE_CONTINUE; /* If the attached pty device (pseudo-terminal) has been closed/deleted, * the condition (G_IO_IN | G_IO_ERR | G_IO_HUP) occurs. * Exit with an error, otherwise the process would persist in the * background and significantly raise the CPU usage. */ if ((condition & G_IO_ERR) && (condition & G_IO_HUP)) { rc = G_SOURCE_REMOVE; clean_up(CRM_EX_IOERR); } /* The connection/fd has been closed. Refresh the screen and remove this * event source hence ignore stdin. */ if (condition & (G_IO_HUP | G_IO_NVAL)) { rc = G_SOURCE_REMOVE; } if ((condition & G_IO_IN) == 0) { return rc; } while (1) { /* Get user input */ c = getchar(); switch (c) { case 'm': interactive_fence_level++; if (interactive_fence_level > 3) { interactive_fence_level = 0; } set_fencing_options(interactive_fence_level); break; case 'c': show ^= pcmk_section_tickets; break; case 'f': show ^= pcmk_section_failcounts; break; case 'n': show_opts ^= pcmk_show_rscs_by_node; break; case 'o': show ^= pcmk_section_operations; if (!pcmk_is_set(show, pcmk_section_operations)) { show_opts &= ~pcmk_show_timing; } break; case 'r': show_opts ^= pcmk_show_inactive_rscs; break; case 'R': show_opts ^= pcmk_show_details; break; case 't': show_opts ^= pcmk_show_timing; if (pcmk_is_set(show_opts, pcmk_show_timing)) { show |= pcmk_section_operations; } break; case 'A': show ^= pcmk_section_attributes; break; case 'L': show ^= pcmk_section_bans; break; case 'D': /* If any header is shown, clear them all, otherwise set them all */ if (pcmk_any_flags_set(show, pcmk_section_summary)) { show &= ~pcmk_section_summary; } else { show |= pcmk_section_summary; } /* Regardless, we don't show options in console mode. */ show &= ~pcmk_section_options; break; case 'b': show_opts ^= pcmk_show_brief; break; case 'j': show_opts ^= pcmk_show_pending; break; case '?': config_mode = TRUE; break; default: /* All other keys just redraw the screen. */ goto refresh; } if (!config_mode) goto refresh; clear(); refresh(); curses_formatted_printf(out, "%s", "Display option change mode\n"); print_option_help(out, 'c', pcmk_is_set(show, pcmk_section_tickets)); print_option_help(out, 'f', pcmk_is_set(show, pcmk_section_failcounts)); print_option_help(out, 'n', pcmk_is_set(show_opts, pcmk_show_rscs_by_node)); print_option_help(out, 'o', pcmk_is_set(show, pcmk_section_operations)); print_option_help(out, 'r', pcmk_is_set(show_opts, pcmk_show_inactive_rscs)); print_option_help(out, 't', pcmk_is_set(show_opts, pcmk_show_timing)); print_option_help(out, 'A', pcmk_is_set(show, pcmk_section_attributes)); print_option_help(out, 'L', pcmk_is_set(show, pcmk_section_bans)); print_option_help(out, 'D', !pcmk_is_set(show, pcmk_section_summary)); print_option_help(out, 'R', pcmk_any_flags_set(show_opts, pcmk_show_details)); print_option_help(out, 'b', pcmk_is_set(show_opts, pcmk_show_brief)); print_option_help(out, 'j', pcmk_is_set(show_opts, pcmk_show_pending)); curses_formatted_printf(out, "%d m: \t%s\n", interactive_fence_level, get_option_desc('m')); curses_formatted_printf(out, "%s", "\nToggle fields via field letter, type any other key to return\n"); } refresh: refresh_after_event(FALSE, TRUE); return rc; } #endif // PCMK__ENABLE_CURSES // Basically crm_signal_handler(SIGCHLD, SIG_IGN) plus the SA_NOCLDWAIT flag static void avoid_zombies(void) { struct sigaction sa; memset(&sa, 0, sizeof(struct sigaction)); if (sigemptyset(&sa.sa_mask) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); return; } sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART|SA_NOCLDWAIT; if (sigaction(SIGCHLD, &sa, NULL) < 0) { crm_warn("Cannot avoid zombies: %s", pcmk_rc_str(errno)); } } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; #if PCMK__ENABLE_CURSES const char *fmts = "console (default), html, text, xml, none"; #else const char *fmts = "text (default), html, xml, none"; #endif // PCMK__ENABLE_CURSES const char *desc = NULL; desc = "Notes:\n\n" "Time Specification:\n\n" "The TIMESPEC in any command line option can be specified in many\n" "different formats. It can be an integer number of seconds, a\n" "number plus units (us/usec/ms/msec/s/sec/m/min/h/hr), or an ISO\n" "8601 period specification.\n\n" "Output Control:\n\n" "By default, a particular set of sections are written to the\n" "output destination. The default varies based on the output\n" "format: XML includes all sections by default, while other output\n" "formats include less. This set can be modified with the --include\n" "and --exclude command line options. Each option may be passed\n" "multiple times, and each can specify a comma-separated list of\n" "sections. The options are applied to the default set, in order\n" "from left to right as they are passed on the command line. For a\n" "list of valid sections, pass --include=list or --exclude=list.\n\n" "Interactive Use:\n\n" #if PCMK__ENABLE_CURSES "When run interactively, crm_mon can be told to hide and show\n" "various sections of output. To see a help screen explaining the\n" "options, press '?'. Any key stroke aside from those listed will\n" "cause the screen to refresh.\n\n" #else "The local installation of Pacemaker was built without support for\n" "interactive (console) mode. A curses library must be available at\n" "build time to support interactive mode.\n\n" #endif // PCMK__ENABLE_CURSES "Examples:\n\n" #if PCMK__ENABLE_CURSES "Display the cluster status on the console with updates as they\n" "occur:\n\n" "\tcrm_mon\n\n" #endif // PCMK__ENABLE_CURSES "Display the cluster status once and exit:\n\n" "\tcrm_mon -1\n\n" "Display the cluster status, group resources by node, and include\n" "inactive resources in the list:\n\n" "\tcrm_mon --group-by-node --inactive\n\n" "Start crm_mon as a background daemon and have it write the\n" "cluster status to an HTML file:\n\n" "\tcrm_mon --daemonize --output-as html " "--output-to /path/to/docroot/filename.html\n\n" "Display the cluster status as XML:\n\n" "\tcrm_mon --output-as xml\n\n"; context = pcmk__build_arg_context(args, fmts, group, NULL); pcmk__add_main_args(context, extra_prog_entries); g_option_context_set_description(context, desc); pcmk__add_arg_group(context, "display", "Display Options:", "Show display options", display_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); pcmk__add_arg_group(context, "deprecated", "Deprecated Options:", "Show deprecated options", deprecated_entries); return context; } /*! * \internal * \brief Set output format based on \c --output-as arguments and mode arguments * * When the deprecated \c --as-xml argument is parsed, a callback function sets * \c output_format. Otherwise, this function does the same based on the current * \c --output-as arguments and the \c --one-shot and \c --daemonize arguments. * * \param[in,out] args Command line arguments */ static void reconcile_output_format(pcmk__common_args_t *args) { if (output_format != mon_output_unset) { /* The deprecated --as-xml argument was used, and we're finished. Note * that this means the deprecated argument takes precedence. */ return; } if (pcmk__str_eq(args->output_ty, PCMK_VALUE_NONE, pcmk__str_none)) { output_format = mon_output_none; } else if (pcmk__str_eq(args->output_ty, "html", pcmk__str_none)) { output_format = mon_output_html; umask(S_IWGRP | S_IWOTH); // World-readable HTML } else if (pcmk__str_eq(args->output_ty, "xml", pcmk__str_none)) { output_format = mon_output_xml; #if PCMK__ENABLE_CURSES } else if (pcmk__str_eq(args->output_ty, "console", pcmk__str_null_matches)) { /* Console is the default format if no conflicting options are given. * * Use text output instead if one of the following conditions is met: * * We've requested daemonized or one-shot mode (console output is * incompatible with modes other than mon_exec_update) * * We requested the version, which is effectively one-shot * * We specified a non-stdout output destination (console mode is * compatible only with stdout) */ if ((options.exec_mode == mon_exec_daemonized) || (options.exec_mode == mon_exec_one_shot) || args->version || !pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } else { pcmk__str_update(&args->output_ty, "console"); output_format = mon_output_console; crm_enable_stderr(FALSE); } #endif // PCMK__ENABLE_CURSES } else if (pcmk__str_eq(args->output_ty, "text", pcmk__str_null_matches)) { /* Text output was explicitly requested, or it's the default because * curses is not enabled */ pcmk__str_update(&args->output_ty, "text"); output_format = mon_output_plain; } // Otherwise, invalid format. Let pcmk__output_new() throw an error. } /*! * \internal * \brief Set execution mode to the output format's default if appropriate * * \param[in,out] args Command line arguments */ static void set_default_exec_mode(const pcmk__common_args_t *args) { if (output_format == mon_output_console) { /* Update is the only valid mode for console, but set here instead of * reconcile_output_format() for isolation and consistency */ options.exec_mode = mon_exec_update; } else if (options.exec_mode == mon_exec_unset) { // Default to one-shot mode for all other formats options.exec_mode = mon_exec_one_shot; } else if ((options.exec_mode == mon_exec_update) && pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { // If not using console format, update mode cannot be used with stdout options.exec_mode = mon_exec_one_shot; } } static void clean_up_on_connection_failure(int rc) { if (rc == ENOTCONN) { if (pcmkd_state == pcmk_pacemakerd_state_remote) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: remote-node not connected to cluster"); } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error: cluster is not available on this node"); } } else { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Connection to cluster failed: %s", pcmk_rc_str(rc)); } clean_up(pcmk_rc2exitc(rc)); } static void one_shot(void) { int rc = pcmk__status(out, cib, fence_history, show, show_opts, options.only_node, options.only_rsc, options.neg_location_prefix, 0); if (rc == pcmk_rc_ok) { clean_up(pcmk_rc2exitc(rc)); } else { clean_up_on_connection_failure(rc); } } static void exit_on_invalid_cib(void) { if (cib != NULL) { return; } // Shouldn't really be possible g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Invalid CIB source"); clean_up(CRM_EX_ERROR); } int main(int argc, char **argv) { int rc = pcmk_rc_ok; GOptionGroup *output_group = NULL; args = pcmk__new_common_args(SUMMARY); context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); options.pid_file = strdup("/tmp/ClusterMon.pid"); pcmk__cli_init_logging("crm_mon", 0); // Avoid needing to wait for subprocesses forked for -E/--external-agent avoid_zombies(); processed_args = pcmk__cmdline_preproc(argv, "eimpxEILU"); fence_history_cb("--fence-history", "1", NULL, NULL); /* Set an HTML title regardless of what format we will eventually use. * Doing this here means the user can give their own title on the command * line. */ if (!pcmk__force_args(context, &error, "%s --html-title \"Cluster Status\"", g_get_prgname())) { return clean_up(CRM_EX_USAGE); } if (!g_option_context_parse_strv(context, &processed_args, &error)) { return clean_up(CRM_EX_USAGE); } for (int i = 0; i < args->verbosity; i++) { crm_bump_log_level(argc, argv); } if (!args->version) { if (args->quiet) { include_exclude_cb("--exclude", "times", NULL, NULL); } if (options.watch_fencing) { fence_history_cb("--fence-history", "0", NULL, NULL); options.fence_connect = TRUE; } /* create the cib-object early to be able to do further * decisions based on the cib-source */ cib = cib_new(); exit_on_invalid_cib(); switch (cib->variant) { case cib_native: // Everything (fencer, CIB, pcmkd status) should be available break; case cib_file: // Live fence history is not meaningful fence_history_cb("--fence-history", "0", NULL, NULL); /* Notifications are unsupported; nothing to monitor * @COMPAT: Let setup_cib_connection() handle this by exiting? */ options.exec_mode = mon_exec_one_shot; break; case cib_remote: // We won't receive any fencing updates fence_history_cb("--fence-history", "0", NULL, NULL); break; default: /* something is odd */ exit_on_invalid_cib(); break; } if ((options.exec_mode == mon_exec_daemonized) && !options.external_agent && pcmk__str_eq(args->output_dest, "-", pcmk__str_null_matches)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires at least one of --output-to " "(with value not set to '-') and --external-agent"); return clean_up(CRM_EX_USAGE); } } reconcile_output_format(args); set_default_exec_mode(args); rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_ERROR, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); return clean_up(CRM_EX_ERROR); } if (output_format == mon_output_legacy_xml) { output_format = mon_output_xml; pcmk__output_set_legacy_xml(out); } /* output_format MUST NOT BE CHANGED AFTER THIS POINT. */ /* If we had a valid format for pcmk__output_new(), output_format should be * set by now. */ pcmk__assert(output_format != mon_output_unset); if (output_format == mon_output_plain) { pcmk__output_text_set_fancy(out, true); } if (options.exec_mode == mon_exec_daemonized) { if (!options.external_agent && (output_format == mon_output_none)) { g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, "--daemonize requires --external-agent if used with " "--output-as=none"); return clean_up(CRM_EX_USAGE); } crm_enable_stderr(FALSE); cib_delete(cib); cib = NULL; pcmk__daemonize(crm_system_name, options.pid_file); cib = cib_new(); exit_on_invalid_cib(); } show = default_includes(output_format); /* Apply --include/--exclude flags we used internally. There's no error reporting * here because this would be a programming error. */ apply_include_exclude(options.includes_excludes, &error); /* And now apply any --include/--exclude flags the user gave on the command line. * These are done in a separate pass from the internal ones because we want to * make sure whatever the user specifies overrides whatever we do. */ if (!apply_include_exclude(options.user_includes_excludes, &error)) { return clean_up(CRM_EX_USAGE); } /* Sync up the initial value of interactive_fence_level with whatever was set with * --include/--exclude= options. */ if (pcmk_all_flags_set(show, pcmk_section_fencing_all)) { interactive_fence_level = 3; } else if (pcmk_is_set(show, pcmk_section_fence_worked)) { interactive_fence_level = 2; } else if (pcmk_any_flags_set(show, pcmk_section_fence_failed | pcmk_section_fence_pending)) { interactive_fence_level = 1; } else { interactive_fence_level = 0; } pcmk__register_lib_messages(out); crm_mon_register_messages(out); pe__register_messages(out); stonith__register_messages(out); // Messages internal to this file, nothing curses-specific pcmk__register_messages(out, fmt_functions); if (args->version) { out->version(out, false); return clean_up(CRM_EX_OK); } if (output_format == mon_output_xml) { show_opts |= pcmk_show_inactive_rscs | pcmk_show_timing; } if ((output_format == mon_output_html) && (out->dest != stdout)) { char *content = pcmk__itoa(pcmk__timeout_ms2s(options.reconnect_ms)); pcmk__html_add_header(PCMK__XE_META, PCMK__XA_HTTP_EQUIV, PCMK__VALUE_REFRESH, PCMK__XA_CONTENT, content, NULL); free(content); } crm_info("Starting %s", crm_system_name); cib__set_output(cib, out); if (options.exec_mode == mon_exec_one_shot) { one_shot(); } scheduler = pcmk_new_scheduler(); pcmk__mem_assert(scheduler); scheduler->priv->out = out; if ((cib->variant == cib_native) && pcmk_is_set(show, pcmk_section_times)) { // Currently used only in the times section pcmk__query_node_name(out, 0, &(scheduler->priv->local_node_name), 0); } out->message(out, "crm-mon-disconnected", "Waiting for initial connection", pcmkd_state); do { out->transient(out, "Connecting to cluster..."); rc = setup_api_connections(); if (rc != pcmk_rc_ok) { if ((rc == ENOTCONN) || (rc == ECONNREFUSED)) { out->transient(out, "Connection failed. Retrying in %s...", pcmk__readable_interval(options.reconnect_ms)); } // Give some time to view all output even if we won't retry pcmk__sleep_ms(options.reconnect_ms); #if PCMK__ENABLE_CURSES if (output_format == mon_output_console) { clear(); refresh(); } #endif } } while ((rc == ENOTCONN) || (rc == ECONNREFUSED)); if (rc != pcmk_rc_ok) { clean_up_on_connection_failure(rc); } set_fencing_options(interactive_fence_level); mon_refresh_display(NULL); mainloop = g_main_loop_new(NULL, FALSE); mainloop_add_signal(SIGTERM, mon_shutdown); mainloop_add_signal(SIGINT, mon_shutdown); #if PCMK__ENABLE_CURSES if (output_format == mon_output_console) { ncurses_winch_handler = crm_signal_handler(SIGWINCH, mon_winresize); if (ncurses_winch_handler == SIG_DFL || ncurses_winch_handler == SIG_IGN || ncurses_winch_handler == SIG_ERR) ncurses_winch_handler = NULL; io_channel = g_io_channel_unix_new(STDIN_FILENO); g_io_add_watch(io_channel, (G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL), detect_user_input, NULL); } #endif /* When refresh_trigger->trigger is set to TRUE, call mon_refresh_display. In * this file, that is anywhere mainloop_set_trigger is called. */ refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_display, NULL); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); crm_info("Exiting %s", crm_system_name); return clean_up(CRM_EX_OK); } static int send_custom_trap(const char *node, const char *rsc, const char *task, int target_rc, int rc, int status, const char *desc) { pid_t pid; /*setenv needs chars, these are ints */ char *rc_s = pcmk__itoa(rc); char *status_s = pcmk__itoa(status); char *target_rc_s = pcmk__itoa(target_rc); crm_debug("Sending external notification to '%s' via '%s'", options.external_recipient, options.external_agent); if(rsc) { setenv("CRM_notify_rsc", rsc, 1); } if (options.external_recipient) { setenv("CRM_notify_recipient", options.external_recipient, 1); } setenv("CRM_notify_node", node, 1); setenv("CRM_notify_task", task, 1); setenv("CRM_notify_desc", desc, 1); setenv("CRM_notify_rc", rc_s, 1); setenv("CRM_notify_target_rc", target_rc_s, 1); setenv("CRM_notify_status", status_s, 1); pid = fork(); if (pid == -1) { out->err(out, "notification fork() failed: %s", strerror(errno)); } if (pid == 0) { /* crm_debug("notification: I am the child. Executing the nofitication program."); */ execl(options.external_agent, options.external_agent, NULL); crm_exit(CRM_EX_ERROR); } crm_trace("Finished running custom notification program '%s'.", options.external_agent); free(target_rc_s); free(status_s); free(rc_s); return 0; } static int handle_rsc_op(xmlNode *xml, void *userdata) { const char *node_id = (const char *) userdata; int rc = -1; int status = -1; int target_rc = -1; gboolean notify = TRUE; char *rsc = NULL; char *task = NULL; const char *desc = NULL; const char *magic = NULL; const char *id = NULL; const char *node = NULL; xmlNode *n = xml; xmlNode * rsc_op = xml; if(strcmp((const char*)xml->name, PCMK__XE_LRM_RSC_OP) != 0) { pcmk__xe_foreach_child(xml, NULL, handle_rsc_op, (void *) node_id); return pcmk_rc_ok; } id = pcmk__xe_history_key(rsc_op); magic = crm_element_value(rsc_op, PCMK__XA_TRANSITION_MAGIC); if (magic == NULL) { /* non-change */ return pcmk_rc_ok; } if (!decode_transition_magic(magic, NULL, NULL, NULL, &status, &rc, &target_rc)) { crm_err("Invalid event %s detected for %s", magic, id); return pcmk_rc_ok; } if (parse_op_key(id, &rsc, &task, NULL) == FALSE) { crm_err("Invalid event detected for %s", id); goto bail; } node = crm_element_value(rsc_op, PCMK__META_ON_NODE); while ((n != NULL) && !pcmk__xe_is(n, PCMK__XE_NODE_STATE)) { n = n->parent; } if(node == NULL && n) { node = crm_element_value(n, PCMK_XA_UNAME); } if (node == NULL && n) { node = pcmk__xe_id(n); } if (node == NULL) { node = node_id; } if (node == NULL) { crm_err("No node detected for event %s (%s)", magic, id); goto bail; } /* look up where we expected it to be? */ desc = pcmk_rc_str(pcmk_rc_ok); if ((status == PCMK_EXEC_DONE) && (target_rc == rc)) { crm_notice("%s of %s on %s completed: %s", task, rsc, node, desc); if (rc == PCMK_OCF_NOT_RUNNING) { notify = FALSE; } } else if (status == PCMK_EXEC_DONE) { desc = crm_exit_str(rc); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } else { desc = pcmk_exec_status_str(status); crm_warn("%s of %s on %s failed: %s", task, rsc, node, desc); } if (notify && options.external_agent) { send_custom_trap(node, rsc, task, target_rc, rc, status, desc); } bail: free(rsc); free(task); return pcmk_rc_ok; } /* This function is just a wrapper around mainloop_set_trigger so that it can be * called from a mainloop directly. It's simply another way of ensuring the screen * gets redrawn. */ static gboolean mon_trigger_refresh(gpointer user_data) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); return FALSE; } static int handle_op_for_node(xmlNode *xml, void *userdata) { const char *node = crm_element_value(xml, PCMK_XA_UNAME); if (node == NULL) { node = pcmk__xe_id(xml); } handle_rsc_op(xml, (void *) node); return pcmk_rc_ok; } static int crm_diff_update_element(xmlNode *change, void *userdata) { const char *name = NULL; const char *op = crm_element_value(change, PCMK_XA_OPERATION); const char *xpath = crm_element_value(change, PCMK_XA_PATH); xmlNode *match = NULL; const char *node = NULL; if (op == NULL) { return pcmk_rc_ok; } else if (strcmp(op, PCMK_VALUE_CREATE) == 0) { match = change->children; } else if (pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE, NULL)) { return pcmk_rc_ok; } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) { match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL); if(match) { match = match->children; } } if(match) { name = (const char *)match->name; } crm_trace("Handling %s operation for %s %p, %s", op, xpath, match, name); if(xpath == NULL) { /* Version field, ignore */ } else if(name == NULL) { crm_debug("No result for %s operation to %s", op, xpath); pcmk__assert(pcmk__str_any_of(op, PCMK_VALUE_MOVE, PCMK_VALUE_DELETE, NULL)); } else if (strcmp(name, PCMK_XE_CIB) == 0) { pcmk__xe_foreach_child(pcmk__xe_first_child(match, PCMK_XE_STATUS, NULL, NULL), NULL, handle_op_for_node, NULL); } else if (strcmp(name, PCMK_XE_STATUS) == 0) { pcmk__xe_foreach_child(match, NULL, handle_op_for_node, NULL); } else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) { node = crm_element_value(match, PCMK_XA_UNAME); if (node == NULL) { node = pcmk__xe_id(match); } handle_rsc_op(match, (void *) node); } else if (strcmp(name, PCMK__XE_LRM) == 0) { node = pcmk__xe_id(match); handle_rsc_op(match, (void *) node); } else if (strcmp(name, PCMK__XE_LRM_RESOURCES) == 0) { char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else if (strcmp(name, PCMK__XE_LRM_RESOURCE) == 0) { char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else if (strcmp(name, PCMK__XE_LRM_RSC_OP) == 0) { char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM); handle_rsc_op(match, local_node); free(local_node); } else { crm_trace("Ignoring %s operation for %s %p, %s", op, xpath, match, name); } return pcmk_rc_ok; } static void crm_diff_update(const char *event, xmlNode * msg) { int rc = -1; static bool stale = FALSE; gboolean cib_updated = FALSE; xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); out->progress(out, false); if (current_cib != NULL) { rc = xml_apply_patchset(current_cib, diff, TRUE); switch (rc) { case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(current_cib); current_cib = NULL; break; case pcmk_ok: cib_updated = TRUE; break; default: crm_notice("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); pcmk__xml_free(current_cib); current_cib = NULL; } } if (current_cib == NULL) { crm_trace("Re-requesting the full cib"); cib->cmds->query(cib, NULL, ¤t_cib, cib_sync_call); } if (options.external_agent) { int format = 0; crm_element_value_int(diff, PCMK_XA_FORMAT, &format); if (format == 2) { xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL); xmlNode *diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL); pcmk__xe_foreach_child(diff, NULL, crm_diff_update_element, NULL); } else { crm_err("Unknown patch format: %d", format); } } if (current_cib == NULL) { if(!stale) { out->info(out, "--- Stale data ---"); } stale = TRUE; return; } stale = FALSE; refresh_after_event(cib_updated, FALSE); } static int mon_refresh_display(gpointer user_data) { int rc = pcmk_rc_ok; last_refresh = time(NULL); if (output_format == mon_output_none) { return G_SOURCE_REMOVE; } if (fence_history == pcmk__fence_history_full && !pcmk_all_flags_set(show, pcmk_section_fencing_all) && output_format != mon_output_xml) { fence_history = pcmk__fence_history_reduced; } // Get an up-to-date pacemakerd status for the cluster summary if (cib->variant == cib_native) { pcmk__pacemakerd_status(out, crm_system_name, options.reconnect_ms / 2, false, &pcmkd_state); } if (out->dest != stdout) { out->reset(out); } rc = pcmk__output_cluster_status(scheduler, st, cib, current_cib, pcmkd_state, fence_history, show, show_opts, options.only_node,options.only_rsc, options.neg_location_prefix); if (rc == pcmk_rc_schema_validation) { clean_up(CRM_EX_CONFIG); return G_SOURCE_REMOVE; } if (out->dest != stdout) { out->finish(out, CRM_EX_OK, true, NULL); } return G_SOURCE_CONTINUE; } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is used on the command line */ static void mon_st_callback_event(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else if (options.external_agent) { char *desc = stonith__event_description(e); send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc); free(desc); } } /* Cause the screen to be redrawn (via mainloop_set_trigger) when various conditions are met: * * - If the last update occurred more than reconnect_ms ago (defaults to 5s, but * can be changed via the -i command line option), or * - After every 10 CIB updates, or * - If it's been 2s since the last update * * This function sounds like it would be more broadly useful, but it is only called when a * fencing event is received or a CIB diff occurrs. */ static void refresh_after_event(gboolean data_updated, gboolean enforce) { static int updates = 0; time_t now = time(NULL); if (data_updated) { updates++; } if(refresh_timer == NULL) { refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); } if (reconnect_timer > 0) { /* we will receive a refresh request after successful reconnect */ mainloop_timer_stop(refresh_timer); return; } /* as we're not handling initial failure of fencer-connection as * fatal give it a retry here * not getting here if cib-reconnection is already on the way */ setup_fencer_connection(); if (enforce || ((now - last_refresh) > pcmk__timeout_ms2s(options.reconnect_ms)) || updates >= 10) { mainloop_set_trigger((crm_trigger_t *) refresh_trigger); mainloop_timer_stop(refresh_timer); updates = 0; } else { mainloop_timer_start(refresh_timer); } } /* This function is called for fencing events (see setup_fencer_connection() for * which ones) when --watch-fencing is NOT used on the command line */ static void mon_st_callback_display(stonith_t * st, stonith_event_t * e) { if (st->state == stonith_disconnected) { /* disconnect cib as well and have everything reconnect */ mon_cib_connection_destroy(NULL); } else { out->progress(out, false); refresh_after_event(TRUE, FALSE); } } /* * De-init ncurses, disconnect from the CIB manager, disconnect fencing, * deallocate memory and show usage-message if requested. * * We don't actually return, but nominally returning crm_exit_t allows a usage * like "return clean_up(exit_code);" which helps static analysis understand the * code flow. */ static crm_exit_t clean_up(crm_exit_t exit_code) { /* Quitting crm_mon is much more complicated than it ought to be. */ /* (1) Close connections, free things, etc. */ if (io_channel != NULL) { g_io_channel_shutdown(io_channel, TRUE, NULL); } cib__clean_up_connection(&cib); - stonith_api_delete(st); + stonith__api_free(st); free(options.neg_location_prefix); free(options.only_node); free(options.only_rsc); free(options.pid_file); g_slist_free_full(options.includes_excludes, free); g_strfreev(processed_args); pcmk_free_scheduler(scheduler); /* (2) If this is abnormal termination and we're in curses mode, shut down * curses first. Any messages displayed to the screen before curses is shut * down will be lost because doing the shut down will also restore the * screen to whatever it looked like before crm_mon was started. */ if (((error != NULL) || (exit_code == CRM_EX_USAGE)) && (output_format == mon_output_console) && (out != NULL)) { out->finish(out, exit_code, false, NULL); pcmk__output_free(out); out = NULL; } /* (3) If this is a command line usage related failure, print the usage * message. */ if (exit_code == CRM_EX_USAGE && (output_format == mon_output_console || output_format == mon_output_plain)) { char *help = g_option_context_get_help(context, TRUE, NULL); fprintf(stderr, "%s", help); g_free(help); } pcmk__free_arg_context(context); /* (4) If this is any kind of error, print the error out and exit. Make * sure to handle situations both before and after formatted output is * set up. We want errors to appear formatted if at all possible. */ if (error != NULL) { if (out != NULL) { out->err(out, "%s: %s", g_get_prgname(), error->message); out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } else { fprintf(stderr, "%s: %s\n", g_get_prgname(), error->message); } g_clear_error(&error); crm_exit(exit_code); } /* (5) Print formatted output to the screen if we made it far enough in * crm_mon to be able to do so. */ if (out != NULL) { if (options.exec_mode != mon_exec_daemonized) { out->finish(out, exit_code, true, NULL); } pcmk__output_free(out); pcmk__unregister_formats(); } crm_exit(exit_code); } diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c index 08b7932f36..5d7547aba1 100644 --- a/tools/stonith_admin.c +++ b/tools/stonith_admin.c @@ -1,726 +1,726 @@ /* * Copyright 2009-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include // gboolean, gchar, etc. #include #include #include #include #include #include #include // stonith__register_messages() #include #include #include #include #define SUMMARY "stonith_admin - Access the Pacemaker fencing API" char action = 0; struct { gboolean as_nodeid; gboolean broadcast; gboolean cleanup; gboolean installed; gboolean metadata; gboolean registered; gboolean validate_cfg; GList *devices; GHashTable *params; int fence_level; int timeout ; long long tolerance_ms; int delay; char *agent; char *confirm_host; char *fence_host; char *history; char *last_fenced; char *query; char *reboot_host; char *register_dev; char *register_level; char *targets; char *terminate; char *unfence_host; char *unregister_dev; char *unregister_level; } options = { .timeout = 120, .delay = 0 }; gboolean add_env_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean add_stonith_device(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean add_stonith_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean add_tolerance(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); gboolean set_tag(const gchar *option_name, const gchar *optarg, gpointer data, GError **error); #define INDENT " " /* *INDENT-OFF* */ static GOptionEntry defn_entries[] = { { "register", 'R', 0, G_OPTION_ARG_STRING, &options.register_dev, "Register the named stonith device. Requires: --agent.\n" INDENT "Optional: --option, --env-option.", "DEVICE" }, { "deregister", 'D', 0, G_OPTION_ARG_STRING, &options.unregister_dev, "De-register the named stonith device.", "DEVICE" }, { "register-level", 'r', 0, G_OPTION_ARG_STRING, &options.register_level, "Register a stonith level for the named target,\n" INDENT "specified as one of NAME, @PATTERN, or ATTR=VALUE.\n" INDENT "Requires: --index and one or more --device entries.", "TARGET" }, { "deregister-level", 'd', 0, G_OPTION_ARG_STRING, &options.unregister_level, "Unregister a stonith level for the named target,\n" INDENT "specified as for --register-level. Requires: --index", "TARGET" }, { NULL } }; static GOptionEntry query_entries[] = { { "list", 'l', 0, G_OPTION_ARG_STRING, &options.terminate, "List devices that can terminate the specified host.\n" INDENT "Optional: --timeout", "HOST" }, { "list-registered", 'L', 0, G_OPTION_ARG_NONE, &options.registered, "List all registered devices. Optional: --timeout.", NULL }, { "list-installed", 'I', 0, G_OPTION_ARG_NONE, &options.installed, "List all installed devices. Optional: --timeout.", NULL }, { "list-targets", 's', 0, G_OPTION_ARG_STRING, &options.targets, "List the targets that can be fenced by the\n" INDENT "named device. Optional: --timeout.", "DEVICE" }, { "metadata", 'M', 0, G_OPTION_ARG_NONE, &options.metadata, "Show agent metadata. Requires: --agent.\n" INDENT "Optional: --timeout.", NULL }, { "query", 'Q', 0, G_OPTION_ARG_STRING, &options.query, "Check the named device's status. Optional: --timeout.", "DEVICE" }, { "history", 'H', 0, G_OPTION_ARG_STRING, &options.history, "Show last successful fencing operation for named node\n" INDENT "(or '*' for all nodes). Optional: --timeout, --cleanup,\n" INDENT "--quiet (show only the operation's epoch timestamp),\n" INDENT "--verbose (show all recorded and pending operations),\n" INDENT "--broadcast (update history from all nodes available).", "NODE" }, { "last", 'h', 0, G_OPTION_ARG_STRING, &options.last_fenced, "Indicate when the named node was last fenced.\n" INDENT "Optional: --as-node-id.", "NODE" }, { "validate", 'K', 0, G_OPTION_ARG_NONE, &options.validate_cfg, "Validate a fence device configuration.\n" INDENT "Requires: --agent. Optional: --option, --env-option,\n" INDENT "--quiet (print no output, only return status).", NULL }, { NULL } }; static GOptionEntry fence_entries[] = { { "fence", 'F', 0, G_OPTION_ARG_STRING, &options.fence_host, "Fence named host. Optional: --timeout, --tolerance, --delay.", "HOST" }, { "unfence", 'U', 0, G_OPTION_ARG_STRING, &options.unfence_host, "Unfence named host. Optional: --timeout, --tolerance, --delay.", "HOST" }, { "reboot", 'B', 0, G_OPTION_ARG_STRING, &options.reboot_host, "Reboot named host. Optional: --timeout, --tolerance, --delay.", "HOST" }, { "confirm", 'C', 0, G_OPTION_ARG_STRING, &options.confirm_host, "Tell cluster that named host is now safely down.", "HOST", }, { NULL } }; static GOptionEntry addl_entries[] = { { "cleanup", 'c', 0, G_OPTION_ARG_NONE, &options.cleanup, "Cleanup wherever appropriate. Requires --history.", NULL }, { "broadcast", 'b', 0, G_OPTION_ARG_NONE, &options.broadcast, "Broadcast wherever appropriate.", NULL }, { "agent", 'a', 0, G_OPTION_ARG_STRING, &options.agent, "The agent to use (for example, fence_xvm;\n" INDENT "with --register, --metadata, --validate).", "AGENT" }, { "option", 'o', 0, G_OPTION_ARG_CALLBACK, add_stonith_params, "Specify a device configuration parameter as NAME=VALUE\n" INDENT "(may be specified multiple times; with --register,\n" INDENT "--validate).", "PARAM" }, { "env-option", 'e', 0, G_OPTION_ARG_CALLBACK, add_env_params, "Specify a device configuration parameter with the\n" INDENT "specified name, using the value of the\n" INDENT "environment variable of the same name prefixed with\n" INDENT "OCF_RESKEY_ (may be specified multiple times;\n" INDENT "with --register, --validate).", "PARAM" }, { "tag", 'T', 0, G_OPTION_ARG_CALLBACK, set_tag, "Identify fencing operations in logs with the specified\n" INDENT "tag; useful when multiple entities might invoke\n" INDENT "stonith_admin (used with most commands).", "TAG" }, { "device", 'v', 0, G_OPTION_ARG_CALLBACK, add_stonith_device, "Device ID (with --register-level, device to associate with\n" INDENT "a given host and level; may be specified multiple times)" #if PCMK__ENABLE_CIBSECRETS "\n" INDENT "(with --validate, name to use to load CIB secrets)" #endif ".", "DEVICE" }, { "index", 'i', 0, G_OPTION_ARG_INT, &options.fence_level, "The stonith level (1-9) (with --register-level,\n" INDENT "--deregister-level).", "LEVEL" }, { "timeout", 't', 0, G_OPTION_ARG_INT, &options.timeout, "Operation timeout in seconds (default 120;\n" INDENT "used with most commands).", "SECONDS" }, { "delay", 'y', 0, G_OPTION_ARG_INT, &options.delay, "Apply a fencing delay in seconds. Any static/random delays from\n" INDENT "pcmk_delay_base/max will be added, otherwise all\n" INDENT "disabled with the value -1\n" INDENT "(default 0; with --fence, --reboot, --unfence).", "SECONDS" }, { "as-node-id", 'n', 0, G_OPTION_ARG_NONE, &options.as_nodeid, "(Advanced) The supplied node is the corosync node ID\n" INDENT "(with --last).", NULL }, { "tolerance", 0, 0, G_OPTION_ARG_CALLBACK, add_tolerance, "(Advanced) Do nothing if an equivalent --fence request\n" INDENT "succeeded less than this many seconds earlier\n" INDENT "(with --fence, --unfence, --reboot).", "SECONDS" }, { NULL } }; /* *INDENT-ON* */ static pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_HTML, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; static const int st_opts = st_opt_sync_call|st_opt_allow_self_fencing; static char *name = NULL; gboolean add_env_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { char *key = crm_strdup_printf("OCF_RESKEY_%s", optarg); const char *env = getenv(key); gboolean retval = TRUE; if (env == NULL) { g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Invalid option: -e %s", optarg); retval = FALSE; } else { crm_info("Got: '%s'='%s'", optarg, env); if (options.params != NULL) { options.params = pcmk__strkey_table(free, free); } pcmk__insert_dup(options.params, optarg, env); } free(key); return retval; } gboolean add_stonith_device(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { options.devices = g_list_append(options.devices, pcmk__str_copy(optarg)); return TRUE; } gboolean add_tolerance(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { // pcmk__request_fencing() expects an unsigned int options.tolerance_ms = crm_get_msec(optarg); if (options.tolerance_ms < 0) { crm_warn("Ignoring invalid tolerance '%s'", optarg); options.tolerance_ms = 0; } else { options.tolerance_ms = QB_MIN(options.tolerance_ms, UINT_MAX); } return TRUE; } gboolean add_stonith_params(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { gchar *name = NULL; gchar *value = NULL; int rc = 0; gboolean retval = TRUE; crm_info("Scanning: -o %s", optarg); rc = pcmk__scan_nvpair(optarg, &name, &value); if (rc != pcmk_rc_ok) { g_set_error(error, PCMK__RC_ERROR, rc, "Invalid option: -o %s: %s", optarg, pcmk_rc_str(rc)); retval = FALSE; } else { crm_info("Got: '%s'='%s'", name, value); if (options.params == NULL) { options.params = pcmk__strkey_table(free, free); } pcmk__insert_dup(options.params, name, value); } g_free(name); g_free(value); return retval; } gboolean set_tag(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { free(name); name = crm_strdup_printf("%s.%s", crm_system_name, optarg); return TRUE; } static GOptionContext * build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { GOptionContext *context = NULL; GOptionEntry extra_prog_entries[] = { { "quiet", 'q', 0, G_OPTION_ARG_NONE, &(args->quiet), "Be less descriptive in output.", NULL }, { NULL } }; context = pcmk__build_arg_context(args, "text (default), html, xml", group, NULL); /* Add the -q option, which cannot be part of the globally supported options * because some tools use that flag for something else. */ pcmk__add_main_args(context, extra_prog_entries); pcmk__add_arg_group(context, "definition", "Device Definition Commands:", "Show device definition help", defn_entries); pcmk__add_arg_group(context, "queries", "Queries:", "Show query help", query_entries); pcmk__add_arg_group(context, "fence", "Fencing Commands:", "Show fence help", fence_entries); pcmk__add_arg_group(context, "additional", "Additional Options:", "Show additional options", addl_entries); return context; } // \return Standard Pacemaker return code static int request_fencing(stonith_t *st, const char *target, const char *command, GError **error) { char *reason = NULL; int rc = pcmk__request_fencing(st, target, command, name, options.timeout * 1000, options.tolerance_ms, options.delay, &reason); if (rc != pcmk_rc_ok) { const char *rc_str = pcmk_rc_str(rc); const char *what = "fence"; if (strcmp(command, PCMK_ACTION_ON) == 0) { what = "unfence"; } // If reason is identical to return code string, don't display it twice if (pcmk__str_eq(rc_str, reason, pcmk__str_none)) { free(reason); reason = NULL; } g_set_error(error, PCMK__RC_ERROR, rc, "Couldn't %s %s: %s%s%s%s", what, target, rc_str, ((reason == NULL)? "" : " ("), ((reason == NULL)? "" : reason), ((reason == NULL)? "" : ")")); } free(reason); return rc; } int main(int argc, char **argv) { int rc = 0; crm_exit_t exit_code = CRM_EX_OK; bool no_connect = false; bool required_agent = false; char *target = NULL; const char *device = NULL; stonith_t *st = NULL; GError *error = NULL; pcmk__output_t *out = NULL; GOptionGroup *output_group = NULL; pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); gchar **processed_args = pcmk__cmdline_preproc(argv, "adehilorstvyBCDFHQRTU"); GOptionContext *context = build_arg_context(args, &output_group); pcmk__register_formats(output_group, formats); if (!g_option_context_parse_strv(context, &processed_args, &error)) { exit_code = CRM_EX_USAGE; goto done; } pcmk__cli_init_logging("stonith_admin", args->verbosity); if (name == NULL) { name = strdup(crm_system_name); } rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); if (rc != pcmk_rc_ok) { exit_code = CRM_EX_ERROR; g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", args->output_ty, pcmk_rc_str(rc)); goto done; } pcmk__output_enable_list_element(out); stonith__register_messages(out); if (args->version) { out->version(out, false); goto done; } if (options.validate_cfg) { required_agent = true; no_connect = true; action = 'K'; } if (options.installed) { no_connect = true; action = 'I'; } if (options.registered) { action = 'L'; } if (options.register_dev != NULL) { required_agent = true; action = 'R'; device = options.register_dev; } if (options.query != NULL) { action = 'Q'; device = options.query; } if (options.unregister_dev != NULL) { action = 'D'; device = options.unregister_dev; } if (options.targets != NULL) { action = 's'; device = options.targets; } if (options.terminate != NULL) { action = 'L'; target = options.terminate; } if (options.metadata) { no_connect = true; required_agent = true; action = 'M'; } if (options.reboot_host != NULL) { no_connect = true; action = 'B'; target = options.reboot_host; crm_log_args(argc, argv); } if (options.fence_host != NULL) { no_connect = true; action = 'F'; target = options.fence_host; crm_log_args(argc, argv); } if (options.unfence_host != NULL) { no_connect = true; action = 'U'; target = options.unfence_host; crm_log_args(argc, argv); } if (options.confirm_host != NULL) { action = 'C'; target = options.confirm_host; crm_log_args(argc, argv); } if (options.last_fenced != NULL) { action = 'h'; target = options.last_fenced; } if (options.history != NULL) { action = 'H'; target = options.history; } if (options.register_level != NULL) { action = 'r'; target = options.register_level; } if (options.unregister_level != NULL) { action = 'd'; target = options.unregister_level; } if ((options.timeout > (UINT_MAX / 1000)) || (options.timeout < 0)) { out->err(out, "Integer value \"%d\" for -t out of range", options.timeout); exit_code = CRM_EX_USAGE; goto done; } if (action == 0) { char *help = g_option_context_get_help(context, TRUE, NULL); out->err(out, "%s", help); g_free(help); exit_code = CRM_EX_USAGE; goto done; } if (required_agent && options.agent == NULL) { char *help = g_option_context_get_help(context, TRUE, NULL); out->err(out, "Please specify an agent to query using -a,--agent [value]"); out->err(out, "%s", help); g_free(help); exit_code = CRM_EX_USAGE; goto done; } out->quiet = args->quiet; st = stonith__api_new(); if (st == NULL) { rc = -ENOMEM; } else if (!no_connect) { rc = st->cmds->connect(st, name, NULL); } if (rc < 0) { out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc)); exit_code = CRM_EX_DISCONNECT; goto done; } switch (action) { case 'I': rc = pcmk__fence_installed(out, st, options.timeout*1000); if (rc != pcmk_rc_ok) { out->err(out, "Failed to list installed devices: %s", pcmk_rc_str(rc)); } break; case 'L': rc = pcmk__fence_registered(out, st, target, options.timeout*1000); if (rc != pcmk_rc_ok) { out->err(out, "Failed to list registered devices: %s", pcmk_rc_str(rc)); } break; case 'Q': rc = st->cmds->monitor(st, st_opts, device, options.timeout); if (rc != pcmk_rc_ok) { rc = st->cmds->list(st, st_opts, device, NULL, options.timeout); } rc = pcmk_legacy2rc(rc); break; case 's': rc = pcmk__fence_list_targets(out, st, device, options.timeout*1000); if (rc != pcmk_rc_ok) { out->err(out, "Couldn't list targets: %s", pcmk_rc_str(rc)); } break; case 'R': { /* register_device wants a stonith_key_value_t instead of a GHashTable */ stonith_key_value_t *params = NULL; GHashTableIter iter; gpointer key, val; if (options.params != NULL) { g_hash_table_iter_init(&iter, options.params); while (g_hash_table_iter_next(&iter, &key, &val)) { params = stonith_key_value_add(params, key, val); } } rc = st->cmds->register_device(st, st_opts, device, NULL, options.agent, params); stonith_key_value_freeall(params, 1, 1); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Can't register device %s using agent %s: %s", device, options.agent, pcmk_rc_str(rc)); } break; } case 'D': rc = st->cmds->remove_device(st, st_opts, device); rc = pcmk_legacy2rc(rc); if (rc != pcmk_rc_ok) { out->err(out, "Can't unregister device %s: %s", device, pcmk_rc_str(rc)); } break; case 'd': rc = pcmk__fence_unregister_level(st, target, options.fence_level); if (rc != pcmk_rc_ok) { out->err(out, "Can't unregister topology level %d for %s: %s", options.fence_level, target, pcmk_rc_str(rc)); } break; case 'r': rc = pcmk__fence_register_level(st, target, options.fence_level, options.devices); if (rc != pcmk_rc_ok) { out->err(out, "Can't register topology level %d for %s: %s", options.fence_level, target, pcmk_rc_str(rc)); } break; case 'M': rc = pcmk__fence_metadata(out, st, options.agent, options.timeout*1000); if (rc != pcmk_rc_ok) { out->err(out, "Can't get fence agent meta-data: %s", pcmk_rc_str(rc)); } break; case 'C': rc = st->cmds->confirm(st, st_opts, target); rc = pcmk_legacy2rc(rc); break; case 'B': rc = request_fencing(st, target, PCMK_ACTION_REBOOT, &error); break; case 'F': rc = request_fencing(st, target, PCMK_ACTION_OFF, &error); break; case 'U': rc = request_fencing(st, target, PCMK_ACTION_ON, &error); break; case 'h': rc = pcmk__fence_last(out, target, options.as_nodeid); break; case 'H': rc = pcmk__fence_history(out, st, target, options.timeout*1000, args->verbosity, options.broadcast, options.cleanup); break; case 'K': device = NULL; if (options.devices != NULL) { device = g_list_nth_data(options.devices, 0); } rc = pcmk__fence_validate(out, st, options.agent, device, options.params, options.timeout*1000); break; } crm_info("Command returned: %s (%d)", pcmk_rc_str(rc), rc); exit_code = pcmk_rc2exitc(rc); done: g_strfreev(processed_args); pcmk__free_arg_context(context); pcmk__output_and_clear_error(&error, out); if (out != NULL) { out->finish(out, exit_code, true, NULL); pcmk__output_free(out); } pcmk__unregister_formats(); free(name); g_list_free_full(options.devices, free); if (options.params != NULL) { g_hash_table_destroy(options.params); } if (st != NULL) { st->cmds->disconnect(st); - stonith_api_delete(st); + stonith__api_free(st); } return exit_code; }