diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 222fdbca5e..9b94ab6e79 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -1,870 +1,870 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include static qb_ipcs_service_t *ipcs = NULL; static crm_trigger_t *config_read_trigger = NULL; #if SUPPORT_COROSYNC extern gboolean crm_connect_corosync(crm_cluster_t * cluster); #endif void crm_shutdown(int nsig); static gboolean crm_read_options(gpointer user_data); /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = pcmk_cluster_new(); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); controld_set_fsa_input_flags(R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); #if SUPPORT_COROSYNC if (is_corosync_cluster()) { registered = crm_connect_corosync(cluster); } #endif // SUPPORT_COROSYNC if (registered) { controld_election_init(cluster->uname); controld_globals.our_nodename = cluster->uname; controld_globals.our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } } if (!registered) { controld_set_fsa_input_flags(R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __func__); controld_clear_fsa_input_flags(R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ controld_set_fsa_input_flags(R_SHUTDOWN); controld_disconnect_fencer(FALSE); } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; controld_set_fsa_input_flags(R_SHUTDOWN); //controld_set_fsa_input_flags(R_STAYDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", pcmk__s(controld_globals.dc_name, "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } void crmd_fast_exit(crm_exit_t exit_code) { if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", exit_code, CRM_EX_FATAL); exit_code = CRM_EX_FATAL; } else if ((exit_code == CRM_EX_OK) && pcmk_is_set(controld_globals.fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = CRM_EX_ERROR; } if (controld_globals.logger_out != NULL) { controld_globals.logger_out->finish(controld_globals.logger_out, exit_code, true, NULL); pcmk__output_free(controld_globals.logger_out); controld_globals.logger_out = NULL; } crm_exit(exit_code); } crm_exit_t crmd_exit(crm_exit_t exit_code) { GMainLoop *mloop = controld_globals.mainloop; static bool in_progress = FALSE; if (in_progress && (exit_code == CRM_EX_OK)) { crm_debug("Exit is already in progress"); return exit_code; } else if(in_progress) { crm_notice("Error during shutdown process, exiting now with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } in_progress = TRUE; crm_trace("Preparing to exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); /* Suppress secondary errors resulting from us disconnecting everything */ controld_set_fsa_input_flags(R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } controld_close_attrd_ipc(); controld_shutdown_schedulerd_ipc(); controld_disconnect_fencer(TRUE); if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) { crm_debug("No mainloop detected"); exit_code = CRM_EX_ERROR; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if (exit_code != CRM_EX_OK) { crm_notice("Forcing immediate exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } /* Clean up as much memory as possible for valgrind */ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL; iter = iter->next) { fsa_data_t *fsa_data = (fsa_data_t *) iter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(controld_globals.fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } controld_clear_fsa_input_flags(R_MEMBERSHIP); g_list_free(controld_globals.fsa_message_queue); controld_globals.fsa_message_queue = NULL; controld_free_node_pending_timers(); controld_election_fini(); /* Tear down the CIB manager connection, but don't free it yet -- it could * be used when we drain the mainloop later. */ controld_disconnect_cib_manager(); verify_stopped(controld_globals.fsa_state, LOG_WARNING); controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_destroy_all(); mainloop_destroy_trigger(config_read_trigger); config_read_trigger = NULL; controld_destroy_fsa_trigger(); controld_destroy_transition_trigger(); pcmk__client_cleanup(); crm_peer_destroy(); controld_free_fsa_timers(); te_cleanup_stonith_history_sync(NULL, TRUE); controld_free_sched_timer(); free(controld_globals.our_nodename); controld_globals.our_nodename = NULL; free(controld_globals.our_uuid); controld_globals.our_uuid = NULL; free(controld_globals.dc_name); controld_globals.dc_name = NULL; free(controld_globals.dc_version); controld_globals.dc_version = NULL; free(controld_globals.cluster_name); controld_globals.cluster_name = NULL; free(controld_globals.te_uuid); controld_globals.te_uuid = NULL; free_max_generation(); controld_destroy_failed_sync_table(); controld_destroy_outside_events_table(); mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop); /* Don't re-enter this block */ controld_globals.mainloop = NULL; /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(controld_globals.cib_conn); controld_globals.cib_conn = NULL; throttle_fini(); /* Graceful */ crm_trace("Done preparing for exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); return exit_code; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_exit_t exit_code = CRM_EX_OK; if (pcmk_is_set(action, A_EXIT_1)) { exit_code = CRM_EX_ERROR; crm_err("Exiting now due to errors"); } verify_stopped(cur_state, LOG_ERR); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); controld_init_fsa_trigger(); controld_init_transition_trigger(); crm_debug("Creating CIB manager and executor objects"); controld_globals.cib_conn = cib_new(); lrm_state_init_local(); if (controld_init_fsa_timers() == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } // \return libqb error code (0 on success, -errno on error) static int32_t accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Accepting new IPC client connection"); if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } // \return libqb error code (0 on success, -errno on error) static int32_t dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL); return 0; } pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE); CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(msg, F_CRM_USER, client->user); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (controld_authorize_ipc_message(msg, client, NULL)) { crm_trace("Processing IPC message from client %s", pcmk__client_name(client)); route_message(C_IPC_MESSAGE, msg); } controld_trigger_fsa(); free_xml(msg); return 0; } static int32_t ipc_client_disconnected(qb_ipcs_connection_t *c) { pcmk__client_t *client = pcmk__find_client(c); if (client) { crm_trace("Disconnecting %sregistered client %s (%p/%p)", (client->userdata? "" : "un"), pcmk__client_name(client), c, client); free(client->userdata); pcmk__free_client(client); controld_trigger_fsa(); } return 0; } static void ipc_connection_destroyed(qb_ipcs_connection_t *c) { crm_trace("Connection %p", c); ipc_client_disconnected(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = accept_controller_client, .connection_created = NULL, .msg_process = dispatch_controller_ipc, .connection_closed = ipc_client_disconnected, .connection_destroyed = ipc_connection_destroyed }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_MEMBERSHIP)) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) { crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_READ_CONFIG)) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) { crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = pcmk__serve_controld_ipc(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_notice("Pacemaker controller successfully started and accepting connections"); } controld_set_fsa_input_flags(R_ST_REQUIRED); controld_timer_fencer_connect(GINT_TO_POINTER(TRUE)); controld_clear_fsa_input_flags(R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { controld_set_fsa_input_flags(R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } static pcmk__cluster_option_t controller_options[] = { /* name, old name, type, allowed values, * default value, validator, * short description, * long description */ { PCMK_OPT_DC_VERSION, NULL, "string", NULL, PCMK__VALUE_NONE, NULL, N_("Pacemaker version on cluster node elected Designated Controller (DC)"), N_("Includes a hash which identifies the exact changeset the code was " "built from. Used for diagnostic purposes.") }, { PCMK_OPT_CLUSTER_INFRASTRUCTURE, NULL, "string", NULL, "corosync", NULL, N_("The messaging stack on which Pacemaker is currently running"), N_("Used for informational and diagnostic purposes.") }, { PCMK_OPT_CLUSTER_NAME, NULL, "string", NULL, NULL, NULL, N_("An arbitrary name for the cluster"), N_("This optional value is mostly for users' convenience as desired " "in administration, but may also be used in Pacemaker " "configuration rules via the #cluster-name node attribute, and " "by higher-level tools and resource agents.") }, { PCMK_OPT_DC_DEADTIME, NULL, "time", NULL, "20s", pcmk__valid_interval_spec, N_("How long to wait for a response from other nodes during start-up"), N_("The optimal value will depend on the speed and load of your network " "and the type of switches used.") }, { PCMK_OPT_CLUSTER_RECHECK_INTERVAL, NULL, "time", N_("Zero disables polling, while positive values are an interval in seconds" "(unless other units are specified, for example \"5min\")"), "15min", pcmk__valid_interval_spec, N_("Polling interval to recheck cluster state and evaluate rules " "with date specifications"), N_("Pacemaker is primarily event-driven, and looks ahead to know when to " "recheck cluster state for failure timeouts and most time-based " "rules. However, it will also recheck the cluster after this " "amount of inactivity, to evaluate rules with date specifications " "and serve as a fail-safe for certain types of scheduler bugs.") }, { PCMK_OPT_LOAD_THRESHOLD, NULL, "percentage", NULL, "80%", pcmk__valid_percentage, N_("Maximum amount of system load that should be used by cluster nodes"), N_("The cluster will slow down its recovery process when the amount of " "system resources used (currently CPU) approaches this limit"), }, { PCMK_OPT_NODE_ACTION_LIMIT, NULL, "integer", NULL, "0", pcmk__valid_number, N_("Maximum number of jobs that can be scheduled per node " "(defaults to 2x cores)") }, { PCMK_OPT_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, N_("How a cluster node should react if notified of its own fencing"), N_("A cluster node may receive notification of its own fencing if fencing " "is misconfigured, or if fabric fencing is in use that doesn't cut " "cluster communication. Allowed values are \"stop\" to attempt to " "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt " "to immediately reboot the local node, falling back to stop on failure.") }, { PCMK_OPT_ELECTION_TIMEOUT, NULL, "time", NULL, "2min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("Declare an election failed if it is not decided within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { PCMK_OPT_SHUTDOWN_ESCALATION, NULL, "time", NULL, "20min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("Exit immediately if shutdown does not complete within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { PCMK_OPT_JOIN_INTEGRATION_TIMEOUT, "crmd-integration-timeout", "time", NULL, "3min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { PCMK_OPT_JOIN_FINALIZATION_TIMEOUT, "crmd-finalization-timeout", "time", NULL, "30min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", N_("If you need to adjust this value, it probably indicates " "the presence of a bug.") }, { "transition-delay", "crmd-transition-delay", "time", NULL, "0s", pcmk__valid_interval_spec, N_("*** Advanced Use Only *** Enabling this option will slow down " "cluster recovery under all conditions"), N_("Delay cluster recovery for this much time to allow for additional " "events to occur. Useful if your configuration is sensitive to " "the order in which ping updates arrive.") }, { - "stonith-watchdog-timeout", NULL, "time", NULL, + PCMK_OPT_STONITH_WATCHDOG_TIMEOUT, NULL, "time", NULL, "0", controld_verify_stonith_watchdog_timeout, N_("How long before nodes can be assumed to be safely down when " "watchdog-based self-fencing via SBD is in use"), N_("If this is set to a positive value, lost nodes are assumed to " "self-fence using watchdog-based SBD within this much time. This " "does not require a fencing resource to be explicitly configured, " "though a fence_watchdog resource can be configured, to limit use " "to specific nodes. If this is set to 0 (the default), the cluster " "will never assume watchdog-based self-fencing. If this is set to a " "negative value, the cluster will use twice the local value of the " "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " "or otherwise treat this as 0. WARNING: When used, this timeout " "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " "watchdog-based SBD, and Pacemaker will refuse to start on any of " "those nodes where this is not true for the local value or SBD is " "not active. When this is set to a negative value, " "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " "that use SBD, otherwise data corruption or loss could occur.") }, { PCMK_OPT_STONITH_MAX_ATTEMPTS, NULL, "integer", NULL, "10", pcmk__valid_positive_number, N_("How many times fencing can fail before it will no longer be " "immediately re-attempted on a target") }, // Already documented in libpe_status (other values must be kept identical) { PCMK_OPT_NO_QUORUM_POLICY, NULL, "select", "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, N_("What to do when the cluster does not have quorum"), NULL }, { PCMK_OPT_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", pcmk__valid_boolean, N_("Whether to lock resources to a cleanly shut down node"), N_("When true, resources active on a node when it is cleanly shut down " "are kept \"locked\" to that node (not allowed to run elsewhere) " "until they start again on that node after it rejoins (or for at " "most shutdown-lock-limit, if set). Stonith resources and " "Pacemaker Remote connections are never locked. Clone and bundle " "instances and the promoted role of promotable clones are " "currently never locked, though support could be added in a future " "release.") }, { PCMK_OPT_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, N_("Do not lock resources to a cleanly shut down node longer than " "this"), N_("If shutdown-lock is true and this is set to a nonzero time " "duration, shutdown locks will expire after this much time has " "passed since the shutdown was initiated, even if the node has not " "rejoined.") }, { PCMK_OPT_NODE_PENDING_TIMEOUT, NULL, "time", NULL, "0", pcmk__valid_interval_spec, N_("How long to wait for a node that has joined the cluster to join " "the controller process group"), N_("Fence nodes that do not join the controller process group within " "this much time after joining the cluster, to allow the cluster " "to continue managing resources. A value of 0 means never fence " "pending nodes. Setting the value to 2h means fence nodes after " "2 hours.") }, }; void crmd_metadata(void) { const char *desc_short = "Pacemaker controller options"; const char *desc_long = "Cluster options used by Pacemaker's controller"; gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short, desc_long, controller_options, PCMK__NELEM(controller_options)); printf("%s", s); g_free(s); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); controld_set_fsa_input_flags(R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, XML_CIB_TAG_CRMCONFIG)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = pcmk__strkey_table(free, free); pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); // Validate all options, and use defaults if not already present in hash pcmk__validate_cluster_options(config_hash, controller_options, PCMK__NELEM(controller_options)); value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) { controld_set_global_flags(controld_no_quorum_suicide); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK); if (crm_is_true(value)) { controld_set_global_flags(controld_shutdown_lock_enabled); } else { controld_clear_global_flags(controld_shutdown_lock_enabled); } value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); controld_globals.shutdown_lock_limit = crm_parse_interval_spec(value) / 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); controld_globals.node_pending_timeout = crm_parse_interval_spec(value) / 1000; value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME); pcmk__str_update(&(controld_globals.cluster_name), value); // Let subcomponents initialize their own static variables controld_configure_election(config_hash); controld_configure_fencing(config_hash); controld_configure_fsa_timers(config_hash); controld_configure_throttle(config_hash); alerts = first_named_child(output, XML_CIB_TAG_ALERTS); crmd_unpack_alerts(alerts); controld_set_fsa_input_flags(R_READ_CONFIG); controld_trigger_fsa(); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } /*! * \internal * \brief Trigger read and processing of the configuration * * \param[in] fn Calling function name * \param[in] line Line number where call occurred */ void controld_trigger_config_as(const char *fn, int line) { if (config_read_trigger != NULL) { crm_trace("%s:%d - Triggered config processing", fn, line); mainloop_set_trigger(config_read_trigger); } } gboolean crm_read_options(gpointer user_data) { cib_t *cib_conn = controld_globals.cib_conn; int call_id = cib_conn->cmds->query(cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath|cib_scope_local); fsa_register_cib_callback(call_id, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); controld_trigger_config(); } void crm_shutdown(int nsig) { const char *value = NULL; guint default_period_ms = 0; if ((controld_globals.mainloop == NULL) || !g_main_loop_is_running(controld_globals.mainloop)) { crmd_exit(CRM_EX_OK); return; } if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); return; } controld_set_fsa_input_flags(R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); /* If shutdown timer doesn't have a period set, use the default * * @TODO: Evaluate whether this is still necessary. As long as * config_query_callback() has been run at least once, it doesn't look like * anything could have changed the timer period since then. */ value = pcmk__cluster_option(NULL, controller_options, PCMK__NELEM(controller_options), PCMK_OPT_SHUTDOWN_ESCALATION); default_period_ms = crm_parse_interval_spec(value); controld_shutdown_start_countdown(default_period_ms); } diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c index b90cc5e635..cf2baad845 100644 --- a/daemons/controld/controld_execd_state.c +++ b/daemons/controld/controld_execd_state.c @@ -1,813 +1,815 @@ /* - * Copyright 2012-2023 the Pacemaker project contributors + * Copyright 2012-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include static GHashTable *lrm_state_table = NULL; extern GHashTable *proxy_table; int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); static void free_rsc_info(gpointer value) { lrmd_rsc_info_t *rsc_info = value; lrmd_free_rsc_info(rsc_info); } static void free_deletion_op(gpointer value) { struct pending_deletion_op_s *op = value; free(op->rsc); delete_ha_msg_input(op->input); free(op); } static void free_recurring_op(gpointer value) { active_op_t *op = value; free(op->user_data); free(op->rsc_id); free(op->op_type); free(op->op_key); if (op->params) { g_hash_table_destroy(op->params); } free(op); } static gboolean fail_pending_op(gpointer key, gpointer value, gpointer user_data) { lrmd_event_data_t event = { 0, }; lrm_state_t *lrm_state = user_data; active_op_t *op = value; crm_trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)", op->rsc_id, op->op_type, op->interval_ms, lrm_state->node_name, (char*)key, op->user_data); event.type = lrmd_event_exec_complete; event.rsc_id = op->rsc_id; event.op_type = op->op_type; event.user_data = op->user_data; event.timeout = 0; event.interval_ms = op->interval_ms; lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, "Action was pending when executor connection was dropped"); event.t_run = (unsigned int) op->start_time; event.t_rcchange = (unsigned int) op->start_time; event.call_id = op->call_id; event.remote_nodename = lrm_state->node_name; event.params = op->params; process_lrm_event(lrm_state, &event, op, NULL); lrmd__reset_result(&event); return TRUE; } gboolean lrm_state_is_local(lrm_state_t *lrm_state) { return (lrm_state != NULL) && pcmk__str_eq(lrm_state->node_name, controld_globals.our_nodename, pcmk__str_casei); } /*! * \internal * \brief Create executor state entry for a node and add it to the state table * * \param[in] node_name Node to create entry for * * \return Newly allocated executor state object initialized for \p node_name */ static lrm_state_t * lrm_state_create(const char *node_name) { lrm_state_t *state = NULL; if (!node_name) { crm_err("No node name given for lrm state object"); return NULL; } state = calloc(1, sizeof(lrm_state_t)); if (!state) { return NULL; } state->node_name = strdup(node_name); state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info); state->deletion_ops = pcmk__strkey_table(free, free_deletion_op); state->active_ops = pcmk__strkey_table(free, free_recurring_op); state->resource_history = pcmk__strkey_table(NULL, history_free); state->metadata_cache = metadata_cache_new(); g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); return state; } static gboolean remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) { remote_proxy_t *proxy = value; const char *node_name = user_data; if (pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { return TRUE; } return FALSE; } static remote_proxy_t * find_connected_proxy_by_node(const char * node_name) { GHashTableIter gIter; remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return NULL); g_hash_table_iter_init(&gIter, proxy_table); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) { if (proxy->source && pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { return proxy; } } return NULL; } static void remote_proxy_disconnect_by_node(const char * node_name) { remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return); while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) { /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected() * , which removes the entry from proxy_table. * Do not do this in a g_hash_table_iter_next() loop. */ if (proxy->source) { mainloop_del_ipc_client(proxy->source); } } return; } static void internal_lrm_state_destroy(gpointer data) { lrm_state_t *lrm_state = data; if (!lrm_state) { return; } /* Rather than directly remove the recorded proxy entries from proxy_table, * make sure any connected proxies get disconnected. So that * remote_proxy_disconnected() will be called and as well remove the * entries from proxy_table. */ remote_proxy_disconnect_by_node(lrm_state->node_name); crm_trace("Destroying proxy table %s with %u members", lrm_state->node_name, g_hash_table_size(proxy_table)); // Just in case there's still any leftovers in proxy_table g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); remote_ra_cleanup(lrm_state); lrmd_api_delete(lrm_state->conn); if (lrm_state->rsc_info_cache) { crm_trace("Destroying rsc info cache with %u members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_destroy(lrm_state->rsc_info_cache); } if (lrm_state->resource_history) { crm_trace("Destroying history op cache with %u members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_destroy(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Destroying deletion op cache with %u members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_destroy(lrm_state->deletion_ops); } if (lrm_state->active_ops != NULL) { crm_trace("Destroying pending op cache with %u members", g_hash_table_size(lrm_state->active_ops)); g_hash_table_destroy(lrm_state->active_ops); } metadata_cache_free(lrm_state->metadata_cache); free((char *)lrm_state->node_name); free(lrm_state); } void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata) { if (lrm_state->resource_history) { crm_trace("Resetting resource history cache with %u members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_remove_all(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Resetting deletion operations cache with %u members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_remove_all(lrm_state->deletion_ops); } if (lrm_state->active_ops != NULL) { crm_trace("Resetting active operations cache with %u members", g_hash_table_size(lrm_state->active_ops)); g_hash_table_remove_all(lrm_state->active_ops); } if (lrm_state->rsc_info_cache) { crm_trace("Resetting resource information cache with %u members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_remove_all(lrm_state->rsc_info_cache); } if (reset_metadata) { metadata_cache_reset(lrm_state->metadata_cache); } } gboolean lrm_state_init_local(void) { if (lrm_state_table) { return TRUE; } lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy); if (!lrm_state_table) { return FALSE; } proxy_table = pcmk__strikey_table(NULL, remote_proxy_free); if (!proxy_table) { g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; return FALSE; } return TRUE; } void lrm_state_destroy_all(void) { if (lrm_state_table) { crm_trace("Destroying state table with %u members", g_hash_table_size(lrm_state_table)); g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; } if(proxy_table) { crm_trace("Destroying proxy table with %u members", g_hash_table_size(proxy_table)); g_hash_table_destroy(proxy_table); proxy_table = NULL; } } lrm_state_t * lrm_state_find(const char *node_name) { if ((node_name == NULL) || (lrm_state_table == NULL)) { return NULL; } return g_hash_table_lookup(lrm_state_table, node_name); } lrm_state_t * lrm_state_find_or_create(const char *node_name) { lrm_state_t *lrm_state; CRM_CHECK(lrm_state_table != NULL, return NULL); lrm_state = g_hash_table_lookup(lrm_state_table, node_name); if (!lrm_state) { lrm_state = lrm_state_create(node_name); } return lrm_state; } GList * lrm_state_get_list(void) { if (lrm_state_table == NULL) { return NULL; } return g_hash_table_get_values(lrm_state_table); } void lrm_state_disconnect_only(lrm_state_t * lrm_state) { int removed = 0; if (!lrm_state->conn) { return; } crm_trace("Disconnecting %s", lrm_state->node_name); remote_proxy_disconnect_by_node(lrm_state->node_name); ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn); if (!pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { removed = g_hash_table_foreach_remove(lrm_state->active_ops, fail_pending_op, lrm_state); crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name); } } void lrm_state_disconnect(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return; } lrm_state_disconnect_only(lrm_state); lrmd_api_delete(lrm_state->conn); lrm_state->conn = NULL; } int lrm_state_is_connected(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return FALSE; } return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn); } int lrm_state_poke_connection(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return -ENOTCONN; } return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn); } // \return Standard Pacemaker return code int controld_connect_local_executor(lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; if (lrm_state->conn == NULL) { lrmd_t *api = NULL; rc = lrmd__new(&api, NULL, NULL, 0); if (rc != pcmk_rc_ok) { return rc; } api->cmds->set_callback(api, lrm_op_callback); lrm_state->conn = api; } rc = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { lrm_state->num_lrm_register_fails = 0; } else { lrm_state->num_lrm_register_fails++; } return rc; } static remote_proxy_t * crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel) { struct ipc_client_callbacks proxy_callbacks = { .dispatch = remote_proxy_dispatch, .destroy = remote_proxy_disconnected }; remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name, session_id, channel); return proxy; } gboolean crmd_is_proxy_session(const char *session) { return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; } void crmd_proxy_send(const char *session, xmlNode *msg) { remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); lrm_state_t *lrm_state = NULL; if (!proxy) { return; } crm_log_xml_trace(msg, "to-proxy"); lrm_state = lrm_state_find(proxy->node_name); if (lrm_state) { crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name); remote_proxy_relay_event(proxy, msg); } } static void crmd_proxy_dispatch(const char *session, xmlNode *msg) { crm_trace("Processing proxied IPC message from session %s", session); crm_log_xml_trace(msg, "controller[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, session); if (controld_authorize_ipc_message(msg, NULL, session)) { route_message(C_IPC_MESSAGE, msg); } controld_trigger_fsa(); } static void remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { crm_err("Query resulted in an error: %s", pcmk_strerror(rc)); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); } } else { lrmd_t * lrmd = (lrmd_t *)user_data; crm_time_t *now = crm_time_new(NULL); GHashTable *config_hash = pcmk__strkey_table(free, free); crm_debug("Call %d : Parsing CIB options", call_id); pe_unpack_nvpairs(output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); /* Now send it to the remote peer */ lrmd__validate_remote_settings(lrmd, config_hash); g_hash_table_destroy(config_hash); crm_time_free(now); } } static void crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) { lrm_state_t *lrm_state = userdata; const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); const char *op = crm_element_value(msg, F_LRMD_IPC_OP); if (pcmk__str_eq(op, LRMD_IPC_OP_NEW, pcmk__str_casei)) { const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel); if (!remote_ra_controlling_guest(lrm_state)) { if (proxy != NULL) { cib_t *cib_conn = controld_globals.cib_conn; - /* Look up stonith-watchdog-timeout and send to the remote peer for validation */ + /* Look up PCMK_OPT_STONITH_WATCHDOG_TIMEOUT and send to the + * remote peer for validation + */ int rc = cib_conn->cmds->query(cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); cib_conn->cmds->register_callback_full(cib_conn, rc, 10, FALSE, lrmd, "remote_config_check", remote_config_check, NULL); } } else { crm_debug("Skipping remote_config_check for guest-nodes"); } } else if (pcmk__str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ, pcmk__str_casei)) { char *now_s = NULL; crm_notice("%s requested shutdown of its remote connection", lrm_state->node_name); if (!remote_ra_is_in_maintenance(lrm_state)) { now_s = pcmk__ttoa(time(NULL)); update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE); free(now_s); remote_proxy_ack_shutdown(lrmd); crm_warn("Reconnection attempts to %s may result in failures that must be cleared", lrm_state->node_name); } else { remote_proxy_nack_shutdown(lrmd); crm_notice("Remote resource for %s is not managed so no ordered shutdown happening", lrm_state->node_name); } return; } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei) && proxy && proxy->is_local) { /* This is for the controller, which we are, so don't try * to send to ourselves over IPC -- do it directly. */ int flags = 0; xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); CRM_CHECK(request != NULL, return); CRM_CHECK(lrm_state->node_name, return); crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); pcmk__update_acl_user(request, F_LRMD_IPC_USER, lrm_state->node_name); /* Pacemaker Remote nodes don't know their own names (as known to the * cluster). When getting a node info request with no name or ID, add * the name, so we don't return info for ourselves instead of the * Pacemaker Remote node. */ if (pcmk__str_eq(crm_element_value(request, F_CRM_TASK), CRM_OP_NODE_INFO, pcmk__str_casei)) { int node_id = 0; crm_element_value_int(request, XML_ATTR_ID, &node_id); if ((node_id <= 0) && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) { crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name); } } crmd_proxy_dispatch(session, request); crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); if (flags & crm_ipc_client_response) { int msg_id = 0; xmlNode *op_reply = create_xml_node(NULL, "ack"); crm_xml_add(op_reply, "function", __func__); crm_xml_add_int(op_reply, "line", __LINE__); crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); remote_proxy_relay_response(proxy, op_reply, msg_id); free_xml(op_reply); } } else { remote_proxy_cb(lrmd, lrm_state->node_name, msg); } } // \return Standard Pacemaker return code int controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server, int port, int timeout_ms) { int rc = pcmk_rc_ok; if (lrm_state->conn == NULL) { lrmd_t *api = NULL; rc = lrmd__new(&api, lrm_state->node_name, server, port); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%s failed: %s " CRM_XS " rc=%d", server, port, pcmk_rc_str(rc), rc); return rc; } lrm_state->conn = api; api->cmds->set_callback(api, remote_lrm_op_callback); lrmd_internal_set_proxy_callback(api, lrm_state, crmd_remote_proxy_cb); } crm_trace("Initiating remote connection to %s:%d with timeout %dms", server, port, timeout_ms); rc = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name, timeout_ms); if (rc == pcmk_ok) { lrm_state->num_lrm_register_fails = 0; } else { lrm_state->num_lrm_register_fails++; // Ignored for remote connections } return pcmk_legacy2rc(rc); } int lrm_state_get_metadata(lrm_state_t * lrm_state, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options) { lrmd_key_value_t *params = NULL; if (!lrm_state->conn) { return -ENOTCONN; } /* Add the node name to the environment, as is done with normal resource * action calls. Meta-data calls shouldn't need it, but some agents are * written with an ocf_local_nodename call at the beginning regardless of * action. Without the environment variable, the agent would try to contact * the controller to get the node name -- but the controller would be * blocking on the synchronous meta-data call. * * At this point, we have to assume that agents are unlikely to make other * calls that require the controller, such as crm_node --quorum or * --cluster-id. * * @TODO Make meta-data calls asynchronous. (This will be part of a larger * project to make meta-data calls via the executor rather than directly.) */ params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET, lrm_state->node_name); return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn, class, provider, agent, output, options, params); } int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, guint interval_ms) { if (!lrm_state->conn) { return -ENOTCONN; } /* Figure out a way to make this async? * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms); } return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, action, interval_ms); } lrmd_rsc_info_t * lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc = NULL; if (!lrm_state->conn) { return NULL; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_get_rsc_info(lrm_state, rsc_id); } rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id); if (rsc == NULL) { /* only contact the lrmd if we don't already have a cached rsc info */ rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options); if (rsc == NULL) { return NULL; } /* cache the result */ g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc); } return lrmd_copy_rsc_info(rsc); } /*! * \internal * \brief Initiate a resource agent action * * \param[in,out] lrm_state Executor state object * \param[in] rsc_id ID of resource for action * \param[in] action Action to execute * \param[in] userdata String to copy and pass to execution callback * \param[in] interval_ms Action interval (in milliseconds) * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] start_delay_ms Delay (in ms) before initiating action * \param[in] parameters Hash table of resource parameters * \param[out] call_id Where to store call ID on success * * \return Standard Pacemaker return code */ int controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout_ms, int start_delay_ms, GHashTable *parameters, int *call_id) { int rc = pcmk_rc_ok; lrmd_key_value_t *params = NULL; if (lrm_state->conn == NULL) { return ENOTCONN; } // Convert parameters from hash table to list if (parameters != NULL) { const char *key = NULL; const char *value = NULL; GHashTableIter iter; g_hash_table_iter_init(&iter, parameters); while (g_hash_table_iter_next(&iter, (gpointer *) &key, (gpointer *) &value)) { params = lrmd_key_value_add(params, key, value); } } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { rc = controld_execute_remote_agent(lrm_state, rsc_id, action, userdata, interval_ms, timeout_ms, start_delay_ms, params, call_id); } else { rc = ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id, action, userdata, interval_ms, timeout_ms, start_delay_ms, lrmd_opt_notify_changes_only, params); if (rc < 0) { rc = pcmk_legacy2rc(rc); } else { *call_id = rc; rc = pcmk_rc_ok; } } return rc; } int lrm_state_register_rsc(lrm_state_t * lrm_state, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options) { lrmd_t *conn = (lrmd_t *) lrm_state->conn; if (conn == NULL) { return -ENOTCONN; } if (is_remote_lrmd_ra(agent, provider, NULL)) { return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL; } /* @TODO Implement an asynchronous version of this (currently a blocking * call to the lrmd). */ return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, agent, options); } int lrm_state_unregister_rsc(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { if (!lrm_state->conn) { return -ENOTCONN; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { g_hash_table_remove(lrm_state_table, rsc_id); return pcmk_ok; } g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id); /* @TODO Optimize this ... this function is a blocking round trip from * client to daemon. The controld_execd_state.c code path that uses this * function should always treat it as an async operation. The executor API * should make an async version available. */ return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options); } diff --git a/daemons/fenced/fenced_cib.c b/daemons/fenced/fenced_cib.c index e11bf68355..e662493a40 100644 --- a/daemons/fenced/fenced_cib.c +++ b/daemons/fenced/fenced_cib.c @@ -1,734 +1,738 @@ /* - * Copyright 2009-2023 the Pacemaker project contributors + * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include static xmlNode *local_cib = NULL; static cib_t *cib_api = NULL; static bool have_cib_devices = FALSE; /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { GString *xpath = NULL; xmlNode *match; CRM_CHECK((local_cib != NULL) && (node != NULL) && (name != NULL) && (value != NULL), return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@" XML_ATTR_UNAME "='", node, "']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@" XML_NVPAIR_ATTR_NAME "='", name, "' " "and @" XML_NVPAIR_ATTR_VALUE "='", value, "']", NULL); match = get_xpath_object((const char *) xpath->str, local_cib, LOG_NEVER); g_string_free(xpath, TRUE); return (match != NULL); } static void add_topology_level(xmlNode *match) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(match != NULL, return); fenced_register_level(match, &desc, &result); fenced_send_level_notification(STONITH_OP_LEVEL_ADD, &result, desc); pcmk__reset_result(&result); free(desc); } static void topology_remove_helper(const char *node, int level) { char *desc = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); fenced_unregister_level(data, &desc, &result); fenced_send_level_notification(STONITH_OP_LEVEL_DEL, &result, desc); pcmk__reset_result(&result); free_xml(data); free(desc); } static void remove_topology_level(xmlNode *match) { int index = 0; char *key = NULL; CRM_CHECK(match != NULL, return); key = stonith_level_key(match, fenced_target_by_unknown); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); remove_topology_level(match); add_topology_level(match); } } /* Fencing */ void fencing_topology_init(void) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, true); } } +#define XPATH_WATCHDOG_TIMEOUT "//" XML_CIB_TAG_NVPAIR \ + "[@" XML_NVPAIR_ATTR_NAME "='" \ + PCMK_OPT_STONITH_WATCHDOG_TIMEOUT "']" + static void update_stonith_watchdog_timeout_ms(xmlNode *cib) { long timeout_ms = 0; xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; - stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", - cib, LOG_NEVER); + stonith_watchdog_xml = get_xpath_object(XPATH_WATCHDOG_TIMEOUT, cib, + LOG_NEVER); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if (value) { timeout_ms = crm_get_msec(value); } if (timeout_ms < 0) { timeout_ms = pcmk__auto_watchdog_timeout(); } stonith_watchdog_timeout_ms = timeout_ms; } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GHashTableIter iter; stonith_device_t *device = NULL; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->cib_registered) { device->dirty = TRUE; } } /* have list repopulated if cib has a watchdog-fencing-resource TODO: keep a cached list for queries happening while we are refreshing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = NULL; fenced_scheduler_run(local_cib); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->dirty) { g_hash_table_iter_remove(&iter); } } } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if ((op == NULL) || (strcmp(op, "move") == 0) || strstr(xpath, "/"XML_CIB_TAG_STATUS)) { continue; } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS) || strstr(xpath, XML_TAG_META_SETS)) { needs_update = TRUE; pcmk__str_update(&reason, "(meta) attribute deleted from resource"); break; } pcmk__str_update(&mutable, xpath); rsc_id = strstr(mutable, "primitive[@" XML_ATTR_ID "=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@" XML_ATTR_ID "=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, true); /* watchdog_device_update called afterwards to fall back to implicit definition if needed */ } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } else { crm_trace("No updates for device list found in CIB"); } free(reason); } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } static void watchdog_device_update(void) { if (stonith_watchdog_timeout_ms > 0) { if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && !stonith_watchdog_targets) { /* getting here watchdog-fencing enabled, no device there yet and reason isn't stonith_watchdog_targets preventing that */ int rc; xmlNode *xml; xml = create_device_registration_xml( STONITH_WATCHDOG_ID, st_namespace_internal, STONITH_WATCHDOG_AGENT, NULL, /* stonith_device_register will add our own name as PCMK_STONITH_HOST_LIST param so we can skip that here */ NULL); rc = stonith_device_register(xml, TRUE); free_xml(xml); if (rc != pcmk_ok) { rc = pcmk_legacy2rc(rc); exit_code = CRM_EX_FATAL; crm_crit("Cannot register watchdog pseudo fence agent: %s", pcmk_rc_str(rc)); stonith_shutdown(0); } } } else if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) != NULL) { /* be silent if no device - todo parameter to stonith_device_remove */ stonith_device_remove(STONITH_WATCHDOG_ID, true); } } /*! * \internal * \brief Query the full CIB * * \return Standard Pacemaker return code */ static int fenced_query_cib(void) { int rc = pcmk_ok; crm_trace("Re-requesting full CIB"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local|cib_sync_call); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { CRM_ASSERT(local_cib != NULL); } else { crm_err("Couldn't retrieve the CIB: %s " CRM_XS " rc=%d", pcmk_rc_str(rc), rc); } return rc; } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, fenced_target_by_unknown); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, PCMK_XA_FORMAT, &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { add_topology_level(change->children); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { remove_topology_level(match->children); add_topology_level(match->children); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static void update_cib_cache_cb(const char *event, xmlNode * msg) { long timeout_ms_saved = stonith_watchdog_timeout_ms; bool need_full_refresh = false; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { if (fenced_query_cib() != pcmk_rc_ok) { return; } need_full_refresh = true; } pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); if (timeout_ms_saved != stonith_watchdog_timeout_ms) { need_full_refresh = true; } if (need_full_refresh) { fencing_topology_init(); cib_devices_update(); } else { // Partial refresh update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); } watchdog_device_update(); } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from CIB"); have_cib_devices = TRUE; local_cib = copy_xml(output); pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } /*! * \internal * \brief Disconnect from CIB manager */ void fenced_cib_cleanup(void) { if (cib_api != NULL) { cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb); cib__clean_up_connection(&cib_api); } free_xml(local_cib); local_cib = NULL; } void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for fencing topology changes"); } } diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 54bb792263..49c81d6fd8 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -1,3681 +1,3681 @@ /* * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include GHashTable *device_list = NULL; GHashTable *topology = NULL; static GList *cmd_list = NULL; static GHashTable *fenced_handlers = NULL; struct device_search_s { /* target of fence action */ char *host; /* requested fence action */ char *action; /* timeout to use if a device is queried dynamically for possible targets */ int per_device_timeout; /* number of registered fencing devices at time of request */ int replies_needed; /* number of device replies received so far */ int replies_received; /* whether the target is eligible to perform requested action (or off) */ bool allow_suicide; /* private data to pass to search callback function */ void *user_data; /* function to call when all replies have been received */ void (*callback) (GList * devices, void *user_data); /* devices capable of performing requested action (or off if remapping) */ GList *capable; /* Whether to perform searches that support the action */ uint32_t support_action_only; }; static gboolean stonith_device_dispatch(gpointer user_data); static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data); static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence); static int get_agent_metadata(const char *agent, xmlNode **metadata); static void read_action_metadata(stonith_device_t *device); static enum fenced_target_by unpack_level_kind(const xmlNode *level); typedef struct async_command_s { int id; int pid; int fd_stdout; int options; int default_timeout; /* seconds */ int timeout; /* seconds */ int start_delay; // seconds (-1 means disable static/random fencing delays) int delay_id; char *op; char *origin; char *client; char *client_name; char *remote_op_id; char *target; uint32_t target_nodeid; char *action; char *device; GList *device_list; GList *next_device_iter; // device_list entry for next device to execute void *internal_user_data; void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data); guint timer_sigterm; guint timer_sigkill; /*! If the operation timed out, this is the last signal * we sent to the process to get it to terminate */ int last_timeout_signo; stonith_device_t *active_on; stonith_device_t *activating_on; } async_command_t; static xmlNode *construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result); static gboolean is_action_required(const char *action, const stonith_device_t *device) { return (device != NULL) && device->automatic_unfencing && pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none); } static int get_action_delay_max(const stonith_device_t *device, const char *action) { const char *value = NULL; int delay_max = 0; if (!pcmk__is_fencing_action(action)) { return 0; } value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_MAX); if (value) { delay_max = crm_parse_interval_spec(value) / 1000; } return delay_max; } static int get_action_delay_base(const stonith_device_t *device, const char *action, const char *target) { char *hash_value = NULL; int delay_base = 0; if (!pcmk__is_fencing_action(action)) { return 0; } hash_value = g_hash_table_lookup(device->params, PCMK_STONITH_DELAY_BASE); if (hash_value) { char *value = strdup(hash_value); char *valptr = value; CRM_ASSERT(value != NULL); if (target != NULL) { for (char *val = strtok(value, "; \t"); val != NULL; val = strtok(NULL, "; \t")) { char *mapval = strchr(val, ':'); if (mapval == NULL || mapval[1] == 0) { crm_err("pcmk_delay_base: empty value in mapping", val); continue; } if (mapval != val && strncasecmp(target, val, (size_t)(mapval - val)) == 0) { value = mapval + 1; crm_debug("pcmk_delay_base mapped to %s for %s", value, target); break; } } } if (strchr(value, ':') == 0) { delay_base = crm_parse_interval_spec(value) / 1000; } free(valptr); } return delay_base; } /*! * \internal * \brief Override STONITH timeout with pcmk_*_timeout if available * * \param[in] device STONITH device to use * \param[in] action STONITH action name * \param[in] default_timeout Timeout to use if device does not have * a pcmk_*_timeout parameter for action * * \return Value of pcmk_(action)_timeout if available, otherwise default_timeout * \note For consistency, it would be nice if reboot/off/on timeouts could be * set the same way as start/stop/monitor timeouts, i.e. with an * entry in the fencing resource configuration. However that * is insufficient because fencing devices may be registered directly via * the fencer's register_device() API instead of going through the CIB * (e.g. stonith_admin uses it for its -R option, and the executor uses it * to ensure a device is registered when a command is issued). As device * properties, pcmk_*_timeout parameters can be grabbed by the fencer when * the device is registered, whether by CIB change or API call. */ static int get_action_timeout(const stonith_device_t *device, const char *action, int default_timeout) { if (action && device && device->params) { char buffer[64] = { 0, }; const char *value = NULL; /* If "reboot" was requested but the device does not support it, * we will remap to "off", so check timeout for "off" instead */ if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_trace("%s doesn't support reboot, using timeout for off instead", device->id); action = PCMK_ACTION_OFF; } /* If the device config specified an action-specific timeout, use it */ snprintf(buffer, sizeof(buffer), "pcmk_%s_timeout", action); value = g_hash_table_lookup(device->params, buffer); if (value) { return atoi(value); } } return default_timeout; } /*! * \internal * \brief Get the currently executing device for a fencing operation * * \param[in] cmd Fencing operation to check * * \return Currently executing device for \p cmd if any, otherwise NULL */ static stonith_device_t * cmd_device(const async_command_t *cmd) { if ((cmd == NULL) || (cmd->device == NULL) || (device_list == NULL)) { return NULL; } return g_hash_table_lookup(device_list, cmd->device); } /*! * \internal * \brief Return the configured reboot action for a given device * * \param[in] device_id Device ID * * \return Configured reboot action for \p device_id */ const char * fenced_device_reboot_action(const char *device_id) { const char *action = NULL; if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if ((device != NULL) && (device->params != NULL)) { action = g_hash_table_lookup(device->params, "pcmk_reboot_action"); } } return pcmk__s(action, PCMK_ACTION_REBOOT); } /*! * \internal * \brief Check whether a given device supports the "on" action * * \param[in] device_id Device ID * * \return true if \p device_id supports "on", otherwise false */ bool fenced_device_supports_on(const char *device_id) { if ((device_list != NULL) && (device_id != NULL)) { stonith_device_t *device = g_hash_table_lookup(device_list, device_id); if (device != NULL) { return pcmk_is_set(device->flags, st_device_supports_on); } } return false; } static void free_async_command(async_command_t * cmd) { if (!cmd) { return; } if (cmd->delay_id) { g_source_remove(cmd->delay_id); } cmd_list = g_list_remove(cmd_list, cmd); g_list_free_full(cmd->device_list, free); free(cmd->device); free(cmd->action); free(cmd->target); free(cmd->remote_op_id); free(cmd->client); free(cmd->client_name); free(cmd->origin); free(cmd->op); free(cmd); } /*! * \internal * \brief Create a new asynchronous fencing operation from request XML * * \param[in] msg Fencing request XML (from IPC or CPG) * * \return Newly allocated fencing operation on success, otherwise NULL * * \note This asserts on memory errors, so a NULL return indicates an * unparseable message. */ static async_command_t * create_async_command(xmlNode *msg) { xmlNode *op = NULL; async_command_t *cmd = NULL; if (msg == NULL) { return NULL; } op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); if (op == NULL) { return NULL; } cmd = calloc(1, sizeof(async_command_t)); CRM_ASSERT(cmd != NULL); // All messages must include these cmd->action = crm_element_value_copy(op, F_STONITH_ACTION); cmd->op = crm_element_value_copy(msg, F_STONITH_OPERATION); cmd->client = crm_element_value_copy(msg, F_STONITH_CLIENTID); if ((cmd->action == NULL) || (cmd->op == NULL) || (cmd->client == NULL)) { free_async_command(cmd); return NULL; } crm_element_value_int(msg, F_STONITH_CALLID, &(cmd->id)); crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options)); crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay)); crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout)); cmd->timeout = cmd->default_timeout; cmd->origin = crm_element_value_copy(msg, F_ORIG); cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID); cmd->client_name = crm_element_value_copy(msg, F_STONITH_CLIENTNAME); cmd->target = crm_element_value_copy(op, F_STONITH_TARGET); cmd->device = crm_element_value_copy(op, F_STONITH_DEVICE); cmd->done_cb = st_child_done; // Track in global command list cmd_list = g_list_append(cmd_list, cmd); return cmd; } static int get_action_limit(stonith_device_t * device) { const char *value = NULL; int action_limit = 1; value = g_hash_table_lookup(device->params, PCMK_STONITH_ACTION_LIMIT); if ((value == NULL) || (pcmk__scan_min_int(value, &action_limit, INT_MIN) != pcmk_rc_ok) || (action_limit == 0)) { action_limit = 1; } return action_limit; } static int get_active_cmds(stonith_device_t * device) { int counter = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return 0); for (gIter = cmd_list; gIter != NULL; gIter = gIterNext) { async_command_t *cmd = gIter->data; gIterNext = gIter->next; if (cmd->active_on == device) { counter++; } } return counter; } static void fork_cb(int pid, void *user_data) { async_command_t *cmd = (async_command_t *) user_data; stonith_device_t * device = /* in case of a retry we've done the move from activating_on to active_on already */ cmd->activating_on?cmd->activating_on:cmd->active_on; CRM_ASSERT(device); crm_debug("Operation '%s' [%d]%s%s using %s now running with %ds timeout", cmd->action, pid, ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, cmd->timeout); cmd->active_on = device; cmd->activating_on = NULL; } static int get_agent_metadata_cb(gpointer data) { stonith_device_t *device = data; guint period_ms; switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } return G_SOURCE_REMOVE; case EAGAIN: period_ms = pcmk__mainloop_timer_get_period(device->timer); if (period_ms < 160 * 1000) { mainloop_timer_set_period(device->timer, 2 * period_ms); } return G_SOURCE_CONTINUE; default: return G_SOURCE_REMOVE; } } /*! * \internal * \brief Call a command's action callback for an internal (not library) result * * \param[in,out] cmd Command to report result for * \param[in] execution_status Execution status to use for result * \param[in] exit_status Exit status to use for result * \param[in] exit_reason Exit reason to use for result */ static void report_internal_result(async_command_t *cmd, int exit_status, int execution_status, const char *exit_reason) { pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, exit_status, execution_status, exit_reason); cmd->done_cb(0, &result, cmd); pcmk__reset_result(&result); } static gboolean stonith_device_execute(stonith_device_t * device) { int exec_rc = 0; const char *action_str = NULL; const char *host_arg = NULL; async_command_t *cmd = NULL; stonith_action_t *action = NULL; int active_cmds = 0; int action_limit = 0; GList *gIter = NULL; GList *gIterNext = NULL; CRM_CHECK(device != NULL, return FALSE); active_cmds = get_active_cmds(device); action_limit = get_action_limit(device); if (action_limit > -1 && active_cmds >= action_limit) { crm_trace("%s is over its action limit of %d (%u active action%s)", device->id, action_limit, active_cmds, pcmk__plural_s(active_cmds)); return TRUE; } for (gIter = device->pending_ops; gIter != NULL; gIter = gIterNext) { async_command_t *pending_op = gIter->data; gIterNext = gIter->next; if (pending_op && pending_op->delay_id) { crm_trace("Operation '%s'%s%s using %s was asked to run too early, " "waiting for start delay of %ds", pending_op->action, ((pending_op->target == NULL)? "" : " targeting "), pcmk__s(pending_op->target, ""), device->id, pending_op->start_delay); continue; } device->pending_ops = g_list_remove_link(device->pending_ops, gIter); g_list_free_1(gIter); cmd = pending_op; break; } if (cmd == NULL) { crm_trace("No actions using %s are needed", device->id); return TRUE; } if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { if (pcmk__is_fencing_action(cmd->action)) { if (node_does_watchdog_fencing(stonith_our_uname)) { pcmk__panic(__func__); goto done; } } else { crm_info("Faking success for %s watchdog operation", cmd->action); report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); goto done; } } #if SUPPORT_CIBSECRETS exec_rc = pcmk__substitute_secrets(device->id, device->params); if (exec_rc != pcmk_rc_ok) { if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_none)) { crm_info("Proceeding with stop operation for %s " "despite being unable to load CIB secrets (%s)", device->id, pcmk_rc_str(exec_rc)); } else { crm_err("Considering %s unconfigured " "because unable to load CIB secrets: %s", device->id, pcmk_rc_str(exec_rc)); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, "Failed to get CIB secrets"); goto done; } } #endif action_str = cmd->action; if (pcmk__str_eq(cmd->action, PCMK_ACTION_REBOOT, pcmk__str_none) && !pcmk_is_set(device->flags, st_device_supports_reboot)) { crm_notice("Remapping 'reboot' action%s%s using %s to 'off' " "because agent '%s' does not support reboot", ((cmd->target == NULL)? "" : " targeting "), pcmk__s(cmd->target, ""), device->id, device->agent); action_str = PCMK_ACTION_OFF; } if (pcmk_is_set(device->flags, st_device_supports_parameter_port)) { host_arg = "port"; } else if (pcmk_is_set(device->flags, st_device_supports_parameter_plug)) { host_arg = "plug"; } action = stonith__action_create(device->agent, action_str, cmd->target, cmd->target_nodeid, cmd->timeout, device->params, device->aliases, host_arg); /* for async exec, exec_rc is negative for early error exit otherwise handling of success/errors is done via callbacks */ cmd->activating_on = device; exec_rc = stonith__execute_async(action, (void *)cmd, cmd->done_cb, fork_cb); if (exec_rc < 0) { cmd->activating_on = NULL; cmd->done_cb(0, stonith__action_result(action), cmd); stonith__destroy_action(action); } done: /* Device might get triggered to work by multiple fencing commands * simultaneously. Trigger the device again to make sure any * remaining concurrent commands get executed. */ if (device->pending_ops) { mainloop_set_trigger(device->work); } return TRUE; } static gboolean stonith_device_dispatch(gpointer user_data) { return stonith_device_execute(user_data); } static gboolean start_delay_helper(gpointer data) { async_command_t *cmd = data; stonith_device_t *device = cmd_device(cmd); cmd->delay_id = 0; if (device) { mainloop_set_trigger(device->work); } return FALSE; } static void schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) { int delay_max = 0; int delay_base = 0; int requested_delay = cmd->start_delay; CRM_CHECK(cmd != NULL, return); CRM_CHECK(device != NULL, return); if (cmd->device) { free(cmd->device); } if (device->include_nodeid && (cmd->target != NULL)) { crm_node_t *node = crm_get_peer(0, cmd->target); cmd->target_nodeid = node->id; } cmd->device = strdup(device->id); cmd->timeout = get_action_timeout(device, cmd->action, cmd->default_timeout); if (cmd->remote_op_id) { crm_debug("Scheduling '%s' action%s%s using %s for remote peer %s " "with op id %.8s and timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->origin, cmd->remote_op_id, cmd->timeout); } else { crm_debug("Scheduling '%s' action%s%s using %s for %s with timeout %ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->client, cmd->timeout); } device->pending_ops = g_list_append(device->pending_ops, cmd); mainloop_set_trigger(device->work); // Value -1 means disable any static/random fencing delays if (requested_delay < 0) { return; } delay_max = get_action_delay_max(device, cmd->action); delay_base = get_action_delay_base(device, cmd->action, cmd->target); if (delay_max == 0) { delay_max = delay_base; } if (delay_max < delay_base) { crm_warn(PCMK_STONITH_DELAY_BASE " (%ds) is larger than " PCMK_STONITH_DELAY_MAX " (%ds) for %s using %s " "(limiting to maximum delay)", delay_base, delay_max, cmd->action, device->id); delay_base = delay_max; } if (delay_max > 0) { // coverity[dontcall] It doesn't matter here if rand() is predictable cmd->start_delay += ((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0) + delay_base; } if (cmd->start_delay > 0) { crm_notice("Delaying '%s' action%s%s using %s for %ds " CRM_XS " timeout=%ds requested_delay=%ds base=%ds max=%ds", cmd->action, (cmd->target == NULL)? "" : " targeting ", pcmk__s(cmd->target, ""), device->id, cmd->start_delay, cmd->timeout, requested_delay, delay_base, delay_max); cmd->delay_id = g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd); } } static void free_device(gpointer data) { GList *gIter = NULL; stonith_device_t *device = data; g_hash_table_destroy(device->params); g_hash_table_destroy(device->aliases); for (gIter = device->pending_ops; gIter != NULL; gIter = gIter->next) { async_command_t *cmd = gIter->data; crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Device was removed before action could be executed"); } g_list_free(device->pending_ops); g_list_free_full(device->targets, free); if (device->timer) { mainloop_timer_stop(device->timer); mainloop_timer_del(device->timer); } mainloop_destroy_trigger(device->work); free_xml(device->agent_metadata); free(device->namespace); if (device->on_target_actions != NULL) { g_string_free(device->on_target_actions, TRUE); } free(device->agent); free(device->id); free(device); } void free_device_list(void) { if (device_list != NULL) { g_hash_table_destroy(device_list); device_list = NULL; } } void init_device_list(void) { if (device_list == NULL) { device_list = pcmk__strkey_table(NULL, free_device); } } static GHashTable * build_port_aliases(const char *hostmap, GList ** targets) { char *name = NULL; int last = 0, lpc = 0, max = 0, added = 0; GHashTable *aliases = pcmk__strikey_table(free, free); if (hostmap == NULL) { return aliases; } max = strlen(hostmap); for (; lpc <= max; lpc++) { switch (hostmap[lpc]) { /* Skip escaped chars */ case '\\': lpc++; break; /* Assignment chars */ case '=': case ':': if (lpc > last) { free(name); name = calloc(1, 1 + lpc - last); memcpy(name, hostmap + last, lpc - last); } last = lpc + 1; break; /* Delimeter chars */ /* case ',': Potentially used to specify multiple ports */ case 0: case ';': case ' ': case '\t': if (name) { char *value = NULL; int k = 0; value = calloc(1, 1 + lpc - last); memcpy(value, hostmap + last, lpc - last); for (int i = 0; value[i] != '\0'; i++) { if (value[i] != '\\') { value[k++] = value[i]; } } value[k] = '\0'; crm_debug("Adding alias '%s'='%s'", name, value); g_hash_table_replace(aliases, name, value); if (targets) { *targets = g_list_append(*targets, strdup(value)); } value = NULL; name = NULL; added++; } else if (lpc > last) { crm_debug("Parse error at offset %d near '%s'", lpc - last, hostmap + last); } last = lpc + 1; break; } if (hostmap[lpc] == 0) { break; } } if (added == 0) { crm_info("No host mappings detected in '%s'", hostmap); } free(name); return aliases; } GHashTable *metadata_cache = NULL; void free_metadata_cache(void) { if (metadata_cache != NULL) { g_hash_table_destroy(metadata_cache); metadata_cache = NULL; } } static void init_metadata_cache(void) { if (metadata_cache == NULL) { metadata_cache = pcmk__strkey_table(free, free); } } int get_agent_metadata(const char *agent, xmlNode ** metadata) { char *buffer = NULL; if (metadata == NULL) { return EINVAL; } *metadata = NULL; if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { return pcmk_rc_ok; } init_metadata_cache(); buffer = g_hash_table_lookup(metadata_cache, agent); if (buffer == NULL) { stonith_t *st = stonith_api_new(); int rc; if (st == NULL) { crm_warn("Could not get agent meta-data: " "API memory allocation failed"); return EAGAIN; } rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); stonith_api_delete(st); if (rc || !buffer) { crm_err("Could not retrieve metadata for fencing agent %s", agent); return EAGAIN; } g_hash_table_replace(metadata_cache, strdup(agent), buffer); } *metadata = string2xml(buffer); return pcmk_rc_ok; } static gboolean is_nodeid_required(xmlNode * xml) { xmlXPathObjectPtr xpath = NULL; if (stand_alone) { return FALSE; } if (!xml) { return FALSE; } xpath = xpath_search(xml, "//parameter[@name='nodeid']"); if (numXpathResults(xpath) <= 0) { freeXpathObject(xpath); return FALSE; } freeXpathObject(xpath); return TRUE; } static void read_action_metadata(stonith_device_t *device) { xmlXPathObjectPtr xpath = NULL; int max = 0; int lpc = 0; if (device->agent_metadata == NULL) { return; } xpath = xpath_search(device->agent_metadata, "//action"); max = numXpathResults(xpath); if (max <= 0) { freeXpathObject(xpath); return; } for (lpc = 0; lpc < max; lpc++) { const char *action = NULL; xmlNode *match = getXpathResult(xpath, lpc); CRM_LOG_ASSERT(match != NULL); if(match == NULL) { continue; }; action = crm_element_value(match, "name"); if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_list); } else if (pcmk__str_eq(action, PCMK_ACTION_STATUS, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_status); } else if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { stonith__set_device_flags(device->flags, device->id, st_device_supports_reboot); } else if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { /* "automatic" means the cluster will unfence node when it joins */ /* "required" is a deprecated synonym for "automatic" */ if (pcmk__xe_attr_is_true(match, "automatic") || pcmk__xe_attr_is_true(match, "required")) { device->automatic_unfencing = TRUE; } stonith__set_device_flags(device->flags, device->id, st_device_supports_on); } if ((action != NULL) && pcmk__xe_attr_is_true(match, "on_target")) { pcmk__add_word(&(device->on_target_actions), 64, action); } } freeXpathObject(xpath); } /*! * \internal * \brief Set a pcmk_*_action parameter if not already set * * \param[in,out] params Device parameters * \param[in] action Name of action * \param[in] value Value to use if action is not already set */ static void map_action(GHashTable *params, const char *action, const char *value) { char *key = crm_strdup_printf("pcmk_%s_action", action); if (g_hash_table_lookup(params, key)) { crm_warn("Ignoring %s='%s', see %s instead", STONITH_ATTR_ACTION_OP, value, key); free(key); } else { crm_warn("Mapping %s='%s' to %s='%s'", STONITH_ATTR_ACTION_OP, value, key, value); g_hash_table_insert(params, key, strdup(value)); } } /*! * \internal * \brief Create device parameter table from XML * * \param[in] name Device name (used for logging only) * \param[in] dev XML containing device parameters */ static GHashTable * xml2device_params(const char *name, const xmlNode *dev) { GHashTable *params = xml2list(dev); const char *value; /* Action should never be specified in the device configuration, * but we support it for users who are familiar with other software * that worked that way. */ value = g_hash_table_lookup(params, STONITH_ATTR_ACTION_OP); if (value != NULL) { crm_warn("%s has '%s' parameter, which should never be specified in configuration", name, STONITH_ATTR_ACTION_OP); if (*value == '\0') { crm_warn("Ignoring empty '%s' parameter", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_REBOOT) == 0) { crm_warn("Ignoring %s='reboot' (see " PCMK_OPT_STONITH_ACTION " cluster property instead)", STONITH_ATTR_ACTION_OP); } else if (strcmp(value, PCMK_ACTION_OFF) == 0) { map_action(params, PCMK_ACTION_REBOOT, value); } else { map_action(params, PCMK_ACTION_OFF, value); map_action(params, PCMK_ACTION_REBOOT, value); } g_hash_table_remove(params, STONITH_ATTR_ACTION_OP); } return params; } static const char * target_list_type(stonith_device_t * dev) { const char *check_type = NULL; check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); if (check_type == NULL) { if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { check_type = "static-list"; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { check_type = "static-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { check_type = "dynamic-list"; } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { check_type = "status"; } else { check_type = PCMK__VALUE_NONE; } } return check_type; } static stonith_device_t * build_device_from_xml(xmlNode *dev) { const char *value; stonith_device_t *device = NULL; char *agent = crm_element_value_copy(dev, "agent"); CRM_CHECK(agent != NULL, return device); device = calloc(1, sizeof(stonith_device_t)); CRM_CHECK(device != NULL, {free(agent); return device;}); device->id = crm_element_value_copy(dev, XML_ATTR_ID); device->agent = agent; device->namespace = crm_element_value_copy(dev, "namespace"); device->params = xml2device_params(device->id, dev); value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_LIST); if (value) { device->targets = stonith__parse_targets(value); } value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); device->aliases = build_port_aliases(value, &(device->targets)); value = target_list_type(device); if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) { /* Other than "static-list", dev-> targets is unnecessary. */ g_list_free_full(device->targets, free); device->targets = NULL; } switch (get_agent_metadata(device->agent, &device->agent_metadata)) { case pcmk_rc_ok: if (device->agent_metadata) { read_action_metadata(device); stonith__device_parameter_flags(&(device->flags), device->id, device->agent_metadata); } break; case EAGAIN: if (device->timer == NULL) { device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, TRUE, get_agent_metadata_cb, device); } if (!mainloop_timer_running(device->timer)) { mainloop_timer_start(device->timer); } break; default: break; } value = g_hash_table_lookup(device->params, "nodeid"); if (!value) { device->include_nodeid = is_nodeid_required(device->agent_metadata); } value = crm_element_value(dev, "rsc_provides"); if (pcmk__str_eq(value, PCMK__VALUE_UNFENCING, pcmk__str_casei)) { device->automatic_unfencing = TRUE; } if (is_action_required(PCMK_ACTION_ON, device)) { crm_info("Fencing device '%s' requires unfencing", device->id); } if (device->on_target_actions != NULL) { crm_info("Fencing device '%s' requires actions (%s) to be executed " "on target", device->id, (const char *) device->on_target_actions->str); } device->work = mainloop_add_trigger(G_PRIORITY_HIGH, stonith_device_dispatch, device); /* TODO: Hook up priority */ return device; } static void schedule_internal_command(const char *origin, stonith_device_t * device, const char *action, const char *target, int timeout, void *internal_user_data, void (*done_cb) (int pid, const pcmk__action_result_t *result, void *user_data)) { async_command_t *cmd = NULL; cmd = calloc(1, sizeof(async_command_t)); cmd->id = -1; cmd->default_timeout = timeout ? timeout : 60; cmd->timeout = cmd->default_timeout; cmd->action = strdup(action); pcmk__str_update(&cmd->target, target); cmd->device = strdup(device->id); cmd->origin = strdup(origin); cmd->client = strdup(crm_system_name); cmd->client_name = strdup(crm_system_name); cmd->internal_user_data = internal_user_data; cmd->done_cb = done_cb; /* cmd, not internal_user_data, is passed to 'done_cb' as the userdata */ schedule_stonith_command(cmd, device); } // Fence agent status commands use custom exit status codes enum fence_status_code { fence_status_invalid = -1, fence_status_active = 0, fence_status_unknown = 1, fence_status_inactive = 2, }; static void status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can = FALSE; free_async_command(cmd); if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (result->execution_status != PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because status could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); search_devices_record_result(search, dev->id, FALSE); return; } switch (result->exit_status) { case fence_status_unknown: crm_trace("%s reported it cannot fence %s", dev->id, search->host); break; case fence_status_active: case fence_status_inactive: crm_trace("%s reported it can fence %s", dev->id, search->host); can = TRUE; break; default: crm_warn("Assuming %s cannot fence %s " "(status returned unknown code %d)", dev->id, search->host, result->exit_status); break; } search_devices_record_result(search, dev->id, can); } static void dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; struct device_search_s *search = cmd->internal_user_data; stonith_device_t *dev = cmd_device(cmd); gboolean can_fence = FALSE; free_async_command(cmd); /* Host/alias must be in the list output to be eligible to be fenced * * Will cause problems if down'd nodes aren't listed or (for virtual nodes) * if the guest is still listed despite being moved to another machine */ if (!dev) { search_devices_record_result(search, NULL, FALSE); return; } mainloop_set_trigger(dev->work); if (pcmk__result_ok(result)) { crm_info("Refreshing target list for %s", dev->id); g_list_free_full(dev->targets, free); dev->targets = stonith__parse_targets(result->action_stdout); dev->targets_age = time(NULL); } else if (dev->targets != NULL) { if (result->execution_status == PCMK_EXEC_DONE) { crm_info("Reusing most recent target list for %s " "because list returned error code %d", dev->id, result->exit_status); } else { crm_info("Reusing most recent target list for %s " "because list could not be executed: %s%s%s%s", dev->id, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } } else { // We have never successfully executed list if (result->execution_status == PCMK_EXEC_DONE) { crm_warn("Assuming %s cannot fence %s " "because list returned error code %d", dev->id, search->host, result->exit_status); } else { crm_warn("Assuming %s cannot fence %s " "because list could not be executed: %s%s%s%s", dev->id, search->host, pcmk_exec_status_str(result->execution_status), ((result->exit_reason == NULL)? "" : " ("), ((result->exit_reason == NULL)? "" : result->exit_reason), ((result->exit_reason == NULL)? "" : ")")); } /* Fall back to pcmk_host_check="status" if the user didn't explicitly * specify "dynamic-list". */ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { crm_notice("Switching to pcmk_host_check='status' for %s", dev->id); g_hash_table_replace(dev->params, strdup(PCMK_STONITH_HOST_CHECK), strdup("status")); } } if (dev->targets) { const char *alias = g_hash_table_lookup(dev->aliases, search->host); if (!alias) { alias = search->host; } if (pcmk__str_in_list(alias, dev->targets, pcmk__str_casei)) { can_fence = TRUE; } } search_devices_record_result(search, dev->id, can_fence); } /*! * \internal * \brief Returns true if any key in first is not in second or second has a different value for key */ static int device_params_diff(GHashTable *first, GHashTable *second) { char *key = NULL; char *value = NULL; GHashTableIter gIter; g_hash_table_iter_init(&gIter, first); while (g_hash_table_iter_next(&gIter, (void **)&key, (void **)&value)) { if(strstr(key, "CRM_meta") == key) { continue; } else if(strcmp(key, "crm_feature_set") == 0) { continue; } else { char *other_value = g_hash_table_lookup(second, key); if (!other_value || !pcmk__str_eq(other_value, value, pcmk__str_casei)) { crm_trace("Different value for %s: %s != %s", key, other_value, value); return 1; } } } return 0; } /*! * \internal * \brief Checks to see if an identical device already exists in the device_list */ static stonith_device_t * device_has_duplicate(const stonith_device_t *device) { stonith_device_t *dup = g_hash_table_lookup(device_list, device->id); if (!dup) { crm_trace("No match for %s", device->id); return NULL; } else if (!pcmk__str_eq(dup->agent, device->agent, pcmk__str_casei)) { crm_trace("Different agent: %s != %s", dup->agent, device->agent); return NULL; } /* Use calculate_operation_digest() here? */ if (device_params_diff(device->params, dup->params) || device_params_diff(dup->params, device->params)) { return NULL; } crm_trace("Match"); return dup; } int stonith_device_register(xmlNode *dev, gboolean from_cib) { stonith_device_t *dup = NULL; stonith_device_t *device = build_device_from_xml(dev); guint ndevices = 0; int rv = pcmk_ok; CRM_CHECK(device != NULL, return -ENOMEM); /* do we have a watchdog-device? */ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { if (stonith_watchdog_timeout_ms <= 0) { crm_err("Ignoring watchdog fence device without " - "stonith-watchdog-timeout set."); + PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " set."); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { crm_err("Ignoring watchdog fence device with unknown " "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", device->agent?device->agent:""); rv = -ENODEV; /* fall through to cleanup & return */ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_err("Ignoring watchdog fence device " "named %s !='"STONITH_WATCHDOG_ID"'.", device->id?device->id:""); rv = -ENODEV; /* fall through to cleanup & return */ } else { if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { /* this either has an empty list or the targets configured for watchdog-fencing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = device->targets; device->targets = NULL; } if (node_does_watchdog_fencing(stonith_our_uname)) { g_list_free_full(device->targets, free); device->targets = stonith__parse_targets(stonith_our_uname); g_hash_table_replace(device->params, strdup(PCMK_STONITH_HOST_LIST), strdup(stonith_our_uname)); /* proceed as with any other stonith-device */ break; } crm_debug("Skip registration of watchdog fence device on node not in host-list."); /* cleanup and fall through to more cleanup and return */ device->targets = NULL; stonith_device_remove(device->id, from_cib); } free_device(device); return rv; } while (0); dup = device_has_duplicate(device); if (dup) { ndevices = g_hash_table_size(device_list); crm_debug("Device '%s' already in device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); free_device(device); device = dup; dup = g_hash_table_lookup(device_list, device->id); dup->dirty = FALSE; } else { stonith_device_t *old = g_hash_table_lookup(device_list, device->id); if (from_cib && old && old->api_registered) { /* If the cib is writing over an entry that is shared with a stonith client, * copy any pending ops that currently exist on the old entry to the new one. * Otherwise the pending ops will be reported as failures */ crm_info("Overwriting existing entry for %s from CIB", device->id); device->pending_ops = old->pending_ops; device->api_registered = TRUE; old->pending_ops = NULL; if (device->pending_ops) { mainloop_set_trigger(device->work); } } g_hash_table_replace(device_list, device->id, device); ndevices = g_hash_table_size(device_list); crm_notice("Added '%s' to device list (%d active device%s)", device->id, ndevices, pcmk__plural_s(ndevices)); } if (from_cib) { device->cib_registered = TRUE; } else { device->api_registered = TRUE; } return pcmk_ok; } void stonith_device_remove(const char *id, bool from_cib) { stonith_device_t *device = g_hash_table_lookup(device_list, id); guint ndevices = 0; if (!device) { ndevices = g_hash_table_size(device_list); crm_info("Device '%s' not found (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); return; } if (from_cib) { device->cib_registered = FALSE; } else { device->verified = FALSE; device->api_registered = FALSE; } if (!device->cib_registered && !device->api_registered) { g_hash_table_remove(device_list, id); ndevices = g_hash_table_size(device_list); crm_info("Removed '%s' from device list (%d active device%s)", id, ndevices, pcmk__plural_s(ndevices)); } else { crm_trace("Not removing '%s' from device list (%d active) because " "still registered via:%s%s", id, g_hash_table_size(device_list), (device->cib_registered? " cib" : ""), (device->api_registered? " api" : "")); } } /*! * \internal * \brief Return the number of stonith levels registered for a node * * \param[in] tp Node's topology table entry * * \return Number of non-NULL levels in topology entry * \note This function is used only for log messages. */ static int count_active_levels(const stonith_topology_t *tp) { int lpc = 0; int count = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { count++; } } return count; } static void free_topology_entry(gpointer data) { stonith_topology_t *tp = data; int lpc = 0; for (lpc = 0; lpc < ST_LEVEL_MAX; lpc++) { if (tp->levels[lpc] != NULL) { g_list_free_full(tp->levels[lpc], free); } } free(tp->target); free(tp->target_value); free(tp->target_pattern); free(tp->target_attribute); free(tp); } void free_topology_list(void) { if (topology != NULL) { g_hash_table_destroy(topology); topology = NULL; } } void init_topology_list(void) { if (topology == NULL) { topology = pcmk__strkey_table(NULL, free_topology_entry); } } char * stonith_level_key(const xmlNode *level, enum fenced_target_by mode) { if (mode == fenced_target_by_unknown) { mode = unpack_level_kind(level); } switch (mode) { case fenced_target_by_name: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET); case fenced_target_by_pattern: return crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); case fenced_target_by_attribute: return crm_strdup_printf("%s=%s", crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE), crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE)); default: return crm_strdup_printf("unknown-%s", ID(level)); } } /*! * \internal * \brief Parse target identification from topology level XML * * \param[in] level Topology level XML to parse * * \return How to identify target of \p level */ static enum fenced_target_by unpack_level_kind(const xmlNode *level) { if (crm_element_value(level, XML_ATTR_STONITH_TARGET) != NULL) { return fenced_target_by_name; } if (crm_element_value(level, XML_ATTR_STONITH_TARGET_PATTERN) != NULL) { return fenced_target_by_pattern; } if (!stand_alone /* if standalone, there's no attribute manager */ && (crm_element_value(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE) != NULL) && (crm_element_value(level, XML_ATTR_STONITH_TARGET_VALUE) != NULL)) { return fenced_target_by_attribute; } return fenced_target_by_unknown; } static stonith_key_value_t * parse_device_list(const char *devices) { int lpc = 0; int max = 0; int last = 0; stonith_key_value_t *output = NULL; if (devices == NULL) { return output; } max = strlen(devices); for (lpc = 0; lpc <= max; lpc++) { if (devices[lpc] == ',' || devices[lpc] == 0) { char *line = strndup(devices + last, lpc - last); output = stonith_key_value_add(output, NULL, line); free(line); last = lpc + 1; } } return output; } /*! * \internal * \brief Unpack essential information from topology request XML * * \param[in] xml Request XML to search * \param[out] mode If not NULL, where to store level kind * \param[out] target If not NULL, where to store representation of target * \param[out] id If not NULL, where to store level number * \param[out] desc If not NULL, where to store log-friendly level description * * \return Topology level XML from within \p xml, or NULL if not found * \note The caller is responsible for freeing \p *target and \p *desc if set. */ static xmlNode * unpack_level_request(xmlNode *xml, enum fenced_target_by *mode, char **target, int *id, char **desc) { enum fenced_target_by local_mode = fenced_target_by_unknown; char *local_target = NULL; int local_id = 0; /* The level element can be the top element or lower. If top level, don't * search by xpath, because it might give multiple hits if the XML is the * CIB. */ if ((xml != NULL) && !pcmk__xe_is(xml, XML_TAG_FENCING_LEVEL)) { xml = get_xpath_object("//" XML_TAG_FENCING_LEVEL, xml, LOG_WARNING); } if (xml == NULL) { if (desc != NULL) { *desc = crm_strdup_printf("missing"); } } else { local_mode = unpack_level_kind(xml); local_target = stonith_level_key(xml, local_mode); crm_element_value_int(xml, XML_ATTR_STONITH_INDEX, &local_id); if (desc != NULL) { *desc = crm_strdup_printf("%s[%d]", local_target, local_id); } } if (mode != NULL) { *mode = local_mode; } if (id != NULL) { *id = local_id; } if (target != NULL) { *target = local_target; } else { free(local_target); } return xml; } /*! * \internal * \brief Register a fencing topology level for a target * * Given an XML request specifying the target name, level index, and device IDs * for the level, this will create an entry for the target in the global topology * table if one does not already exist, then append the specified device IDs to * the entry's device list for the specified level. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of registration */ void fenced_register_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = 0; xmlNode *level; enum fenced_target_by mode; char *target; stonith_topology_t *tp; stonith_key_value_t *dIter = NULL; stonith_key_value_t *devices = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); level = unpack_level_request(msg, &mode, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure an ID was given (even the client API adds an ID) if (pcmk__str_empty(ID(level))) { crm_warn("Ignoring registration for topology level without ID"); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Topology level is invalid without ID"); return; } // Ensure a valid target was specified if (mode == fenced_target_by_unknown) { crm_warn("Ignoring registration for topology level '%s' " "without valid target", ID(level)); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid target for topology level '%s'", ID(level)); return; } // Ensure level ID is in allowed range if ((id <= 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology registration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level '%s'", pcmk__s(crm_element_value(level, XML_ATTR_STONITH_INDEX), ""), ID(level)); return; } /* Find or create topology table entry */ tp = g_hash_table_lookup(topology, target); if (tp == NULL) { tp = calloc(1, sizeof(stonith_topology_t)); if (tp == NULL) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_ERROR, strerror(ENOMEM)); free(target); return; } tp->kind = mode; tp->target = target; tp->target_value = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_VALUE); tp->target_pattern = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_PATTERN); tp->target_attribute = crm_element_value_copy(level, XML_ATTR_STONITH_TARGET_ATTRIBUTE); g_hash_table_replace(topology, tp->target, tp); crm_trace("Added %s (%d) to the topology (%d active entries)", target, (int) mode, g_hash_table_size(topology)); } else { free(target); } if (tp->levels[id] != NULL) { crm_info("Adding to the existing %s[%d] topology entry", tp->target, id); } devices = parse_device_list(crm_element_value(level, XML_ATTR_STONITH_DEVICES)); for (dIter = devices; dIter; dIter = dIter->next) { const char *device = dIter->value; crm_trace("Adding device '%s' for %s[%d]", device, tp->target, id); tp->levels[id] = g_list_append(tp->levels[id], strdup(device)); } stonith_key_value_freeall(devices, 1, 1); { int nlevels = count_active_levels(tp); crm_info("Target %s has %d active fencing level%s", tp->target, nlevels, pcmk__plural_s(nlevels)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } /*! * \internal * \brief Unregister a fencing topology level for a target * * Given an XML request specifying the target name and level index (or 0 for all * levels), this will remove any corresponding entry for the target from the * global topology table. * * \param[in] msg XML request for STONITH level registration * \param[out] desc If not NULL, set to string representation "TARGET[LEVEL]" * \param[out] result Where to set result of unregistration */ void fenced_unregister_level(xmlNode *msg, char **desc, pcmk__action_result_t *result) { int id = -1; stonith_topology_t *tp; char *target; xmlNode *level = NULL; CRM_CHECK(result != NULL, return); level = unpack_level_request(msg, NULL, &target, &id, desc); if (level == NULL) { fenced_set_protocol_error(result); return; } // Ensure level ID is in allowed range if ((id < 0) || (id >= ST_LEVEL_MAX)) { crm_warn("Ignoring topology unregistration for %s with invalid level %d", target, id); free(target); crm_log_xml_trace(level, "Bad level"); pcmk__format_result(result, CRM_EX_INVALID_PARAM, PCMK_EXEC_INVALID, "Invalid level number '%s' for topology level %s", pcmk__s(crm_element_value(level, XML_ATTR_STONITH_INDEX), ""), // Client API doesn't add ID to unregistration XML pcmk__s(ID(level), "")); return; } tp = g_hash_table_lookup(topology, target); if (tp == NULL) { guint nentries = g_hash_table_size(topology); crm_info("No fencing topology found for %s (%d active %s)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (id == 0 && g_hash_table_remove(topology, target)) { guint nentries = g_hash_table_size(topology); crm_info("Removed all fencing topology entries related to %s " "(%d active %s remaining)", target, nentries, pcmk__plural_alt(nentries, "entry", "entries")); } else if (tp->levels[id] != NULL) { guint nlevels; g_list_free_full(tp->levels[id], free); tp->levels[id] = NULL; nlevels = count_active_levels(tp); crm_info("Removed level %d from fencing topology for %s " "(%d active level%s remaining)", id, target, nlevels, pcmk__plural_s(nlevels)); } free(target); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } static char * list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) { int max = g_list_length(list); size_t delim_len = delim?strlen(delim):0; size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); char *rv; GList *gIter; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; alloc_size += strlen(value); } rv = calloc(alloc_size, sizeof(char)); if (rv) { char *pos = rv; const char *lead_delim = ""; for (gIter = list; gIter != NULL; gIter = gIter->next) { const char *value = (const char *) gIter->data; pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; lead_delim = delim; } if (max && terminate_with_delim) { sprintf(pos, "%s", delim); } } return rv; } /*! * \internal * \brief Execute a fence agent action directly (and asynchronously) * * Handle a STONITH_OP_EXEC API message by scheduling a requested agent action * directly on a specified device. Only list, monitor, and status actions are * expected to use this call, though it should work with any agent command. * * \param[in] msg Request XML specifying action * \param[out] result Where to store result of action * * \note If the action is monitor, the device must be registered via the API * (CIB registration is not sufficient), because monitor should not be * possible unless the device is "started" (API registered). */ static void execute_agent_action(xmlNode *msg, pcmk__action_result_t *result) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, msg, LOG_ERR); xmlNode *op = get_xpath_object("//@" F_STONITH_ACTION, msg, LOG_ERR); const char *id = crm_element_value(dev, F_STONITH_DEVICE); const char *action = crm_element_value(op, F_STONITH_ACTION); async_command_t *cmd = NULL; stonith_device_t *device = NULL; if ((id == NULL) || (action == NULL)) { crm_info("Malformed API action request: device %s, action %s", (id? id : "not specified"), (action? action : "not specified")); fenced_set_protocol_error(result); return; } if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { // Watchdog agent actions are implemented internally if (stonith_watchdog_timeout_ms <= 0) { pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Watchdog fence device not configured"); return; } else if (pcmk__str_eq(action, PCMK_ACTION_LIST, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_result_output(result, list_to_string(stonith_watchdog_targets, "\n", TRUE), NULL); return; } else if (pcmk__str_eq(action, PCMK_ACTION_MONITOR, pcmk__str_none)) { pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return; } } device = g_hash_table_lookup(device_list, id); if (device == NULL) { crm_info("Ignoring API '%s' action request because device %s not found", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not found", id); return; } else if (!device->api_registered && (strcmp(action, PCMK_ACTION_MONITOR) == 0)) { // Monitors may run only on "started" (API-registered) devices crm_info("Ignoring API '%s' action request because device %s not active", action, id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "'%s' not active", id); return; } cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } schedule_stonith_command(cmd, device); pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } static void search_devices_record_result(struct device_search_s *search, const char *device, gboolean can_fence) { search->replies_received++; if (can_fence && device) { if (search->support_action_only != st_device_supports_none) { stonith_device_t *dev = g_hash_table_lookup(device_list, device); if (dev && !pcmk_is_set(dev->flags, search->support_action_only)) { return; } } search->capable = g_list_append(search->capable, strdup(device)); } if (search->replies_needed == search->replies_received) { guint ndevices = g_list_length(search->capable); crm_debug("Search found %d device%s that can perform '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); search->callback(search->capable, search->user_data); free(search->host); free(search->action); free(search); } } /*! * \internal * \brief Check whether the local host is allowed to execute a fencing action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Hostname of fence target * \param[in] allow_suicide Whether self-fencing is allowed for this operation * * \return TRUE if local host is allowed to execute action, FALSE otherwise */ static gboolean localhost_is_eligible(const stonith_device_t *device, const char *action, const char *target, gboolean allow_suicide) { gboolean localhost_is_target = pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei); if ((device != NULL) && (action != NULL) && (device->on_target_actions != NULL) && (strstr((const char*) device->on_target_actions->str, action) != NULL)) { if (!localhost_is_target) { crm_trace("Operation '%s' using %s can only be executed for local " "host, not %s", action, device->id, target); return FALSE; } } else if (localhost_is_target && !allow_suicide) { crm_trace("'%s' operation does not support self-fencing", action); return FALSE; } return TRUE; } /*! * \internal * \brief Check if local node is allowed to execute (possibly remapped) action * * \param[in] device Fence device to check * \param[in] action Fence action to check * \param[in] target Node name of fence target * \param[in] allow_self Whether self-fencing is allowed for this operation * * \return true if local node is allowed to execute \p action or any actions it * might be remapped to, otherwise false */ static bool localhost_is_eligible_with_remap(const stonith_device_t *device, const char *action, const char *target, gboolean allow_self) { // Check exact action if (localhost_is_eligible(device, action, target, allow_self)) { return true; } // Check potential remaps if (pcmk__str_eq(action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* "reboot" might get remapped to "off" then "on", so even if reboot is * disallowed, return true if either of those is allowed. We'll report * the disallowed actions with the results. We never allow self-fencing * for remapped "on" actions because the target is off at that point. */ if (localhost_is_eligible(device, PCMK_ACTION_OFF, target, allow_self) || localhost_is_eligible(device, PCMK_ACTION_ON, target, FALSE)) { return true; } } return false; } static void can_fence_host_with_device(stonith_device_t *dev, struct device_search_s *search) { gboolean can = FALSE; const char *check_type = "Internal bug"; const char *target = NULL; const char *alias = NULL; const char *dev_id = "Unspecified device"; const char *action = (search == NULL)? NULL : search->action; CRM_CHECK((dev != NULL) && (action != NULL), goto search_report_results); if (dev->id != NULL) { dev_id = dev->id; } target = search->host; if (target == NULL) { can = TRUE; check_type = "No target"; goto search_report_results; } /* Answer immediately if the device does not support the action * or the local node is not allowed to perform it */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none) && !pcmk_is_set(dev->flags, st_device_supports_on)) { check_type = "Agent does not support 'on'"; goto search_report_results; } else if (!localhost_is_eligible_with_remap(dev, action, target, search->allow_suicide)) { check_type = "This node is not allowed to execute action"; goto search_report_results; } // Check eligibility as specified by pcmk_host_check check_type = target_list_type(dev); alias = g_hash_table_lookup(dev->aliases, target); if (pcmk__str_eq(check_type, PCMK__VALUE_NONE, pcmk__str_casei)) { can = TRUE; } else if (pcmk__str_eq(check_type, "static-list", pcmk__str_casei)) { if (pcmk__str_in_list(target, dev->targets, pcmk__str_casei)) { can = TRUE; } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) && g_hash_table_lookup(dev->aliases, target)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "dynamic-list", pcmk__str_casei)) { time_t now = time(NULL); if (dev->targets == NULL || dev->targets_age + 60 < now) { int device_timeout = get_action_timeout(dev, PCMK_ACTION_LIST, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_list_timeout (%ds) parameter of %s " "is larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_LIST, NULL, search->per_device_timeout, search, dynamic_list_search_cb); /* we'll respond to this search request async in the cb */ return; } if (pcmk__str_in_list(((alias == NULL)? target : alias), dev->targets, pcmk__str_casei)) { can = TRUE; } } else if (pcmk__str_eq(check_type, "status", pcmk__str_casei)) { int device_timeout = get_action_timeout(dev, check_type, search->per_device_timeout); if (device_timeout > search->per_device_timeout) { crm_notice("Since the pcmk_status_timeout (%ds) parameter of %s is " "larger than " PCMK_OPT_STONITH_TIMEOUT " (%ds), " "timeout may occur", device_timeout, dev_id, search->per_device_timeout); } crm_trace("Running '%s' to check whether %s is eligible to fence %s (%s)", check_type, dev_id, target, action); schedule_internal_command(__func__, dev, PCMK_ACTION_STATUS, target, search->per_device_timeout, search, status_search_cb); /* we'll respond to this search request async in the cb */ return; } else { crm_err("Invalid value for " PCMK_STONITH_HOST_CHECK ": %s", check_type); check_type = "Invalid " PCMK_STONITH_HOST_CHECK; } search_report_results: crm_info("%s is%s eligible to fence (%s) %s%s%s%s: %s", dev_id, (can? "" : " not"), pcmk__s(action, "unspecified action"), pcmk__s(target, "unspecified target"), (alias == NULL)? "" : " (as '", pcmk__s(alias, ""), (alias == NULL)? "" : "')", check_type); search_devices_record_result(search, ((dev == NULL)? NULL : dev_id), can); } static void search_devices(gpointer key, gpointer value, gpointer user_data) { stonith_device_t *dev = value; struct device_search_s *search = user_data; can_fence_host_with_device(dev, search); } #define DEFAULT_QUERY_TIMEOUT 20 static void get_capable_devices(const char *host, const char *action, int timeout, bool suicide, void *user_data, void (*callback) (GList * devices, void *user_data), uint32_t support_action_only) { struct device_search_s *search; guint ndevices = g_hash_table_size(device_list); if (ndevices == 0) { callback(NULL, user_data); return; } search = calloc(1, sizeof(struct device_search_s)); if (!search) { crm_crit("Cannot search for capable fence devices: %s", strerror(ENOMEM)); callback(NULL, user_data); return; } pcmk__str_update(&search->host, host); pcmk__str_update(&search->action, action); search->per_device_timeout = timeout; search->allow_suicide = suicide; search->callback = callback; search->user_data = user_data; search->support_action_only = support_action_only; /* We are guaranteed this many replies, even if a device is * unregistered while the search is in progress. */ search->replies_needed = ndevices; crm_debug("Searching %d device%s to see which can execute '%s' targeting %s", ndevices, pcmk__plural_s(ndevices), (search->action? search->action : "unknown action"), (search->host? search->host : "any node")); g_hash_table_foreach(device_list, search_devices, search); } struct st_query_data { xmlNode *reply; char *remote_peer; char *client_id; char *target; char *action; int call_options; }; /*! * \internal * \brief Add action-specific attributes to query reply XML * * \param[in,out] xml XML to add attributes to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target */ static void add_action_specific_attributes(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target) { int action_specific_timeout; int delay_max; int delay_base; CRM_CHECK(xml && action && device, return); if (is_action_required(action, device)) { crm_trace("Action '%s' is required using %s", action, device->id); crm_xml_add_int(xml, F_STONITH_DEVICE_REQUIRED, 1); } action_specific_timeout = get_action_timeout(device, action, 0); if (action_specific_timeout) { crm_trace("Action '%s' has timeout %dms using %s", action, action_specific_timeout, device->id); crm_xml_add_int(xml, F_STONITH_ACTION_TIMEOUT, action_specific_timeout); } delay_max = get_action_delay_max(device, action); if (delay_max > 0) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); crm_xml_add_int(xml, F_STONITH_DELAY_MAX, delay_max); } delay_base = get_action_delay_base(device, action, target); if (delay_base > 0) { crm_xml_add_int(xml, F_STONITH_DELAY_BASE, delay_base); } if ((delay_max > 0) && (delay_base == 0)) { crm_trace("Action '%s' has maximum random delay %ds using %s", action, delay_max, device->id); } else if ((delay_max == 0) && (delay_base > 0)) { crm_trace("Action '%s' has a static delay of %ds using %s", action, delay_base, device->id); } else if ((delay_max > 0) && (delay_base > 0)) { crm_trace("Action '%s' has a minimum delay of %ds and a randomly chosen " "maximum delay of %ds using %s", action, delay_base, delay_max, device->id); } } /*! * \internal * \brief Add "disallowed" attribute to query reply XML if appropriate * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_disallowed(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { if (!localhost_is_eligible(device, action, target, allow_suicide)) { crm_trace("Action '%s' using %s is disallowed for local host", action, device->id); pcmk__xe_set_bool_attr(xml, F_STONITH_ACTION_DISALLOWED, true); } } /*! * \internal * \brief Add child element with action-specific values to query reply XML * * \param[in,out] xml XML to add attribute to * \param[in] action Fence action * \param[in] device Fence device * \param[in] target Fence target * \param[in] allow_suicide Whether self-fencing is allowed */ static void add_action_reply(xmlNode *xml, const char *action, const stonith_device_t *device, const char *target, gboolean allow_suicide) { xmlNode *child = create_xml_node(xml, F_STONITH_ACTION); crm_xml_add(child, XML_ATTR_ID, action); add_action_specific_attributes(child, action, device, target); add_disallowed(child, action, device, target, allow_suicide); } /*! * \internal * \brief Send a reply to a CPG peer or IPC client * * \param[in] reply XML reply to send * \param[in] call_options Send synchronously if st_opt_sync_call is set * \param[in] remote_peer If not NULL, name of peer node to send CPG reply * \param[in,out] client If not NULL, client to send IPC reply */ static void stonith_send_reply(const xmlNode *reply, int call_options, const char *remote_peer, pcmk__client_t *client) { CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client != NULL)), return); if (remote_peer == NULL) { do_local_reply(reply, client, call_options); } else { send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); } } static void stonith_query_capable_device_cb(GList * devices, void *user_data) { struct st_query_data *query = user_data; int available_devices = 0; xmlNode *dev = NULL; xmlNode *list = NULL; GList *lpc = NULL; pcmk__client_t *client = NULL; if (query->client_id != NULL) { client = pcmk__find_client_by_id(query->client_id); if ((client == NULL) && (query->remote_peer == NULL)) { crm_trace("Skipping reply to %s: no longer a client", query->client_id); goto done; } } /* Pack the results into XML */ list = create_xml_node(NULL, __func__); crm_xml_add(list, F_STONITH_TARGET, query->target); for (lpc = devices; lpc != NULL; lpc = lpc->next) { stonith_device_t *device = g_hash_table_lookup(device_list, lpc->data); const char *action = query->action; if (!device) { /* It is possible the device got unregistered while * determining who can fence the target */ continue; } available_devices++; dev = create_xml_node(list, F_STONITH_DEVICE); crm_xml_add(dev, XML_ATTR_ID, device->id); crm_xml_add(dev, "namespace", device->namespace); crm_xml_add(dev, "agent", device->agent); crm_xml_add_int(dev, F_STONITH_DEVICE_VERIFIED, device->verified); crm_xml_add_int(dev, F_STONITH_DEVICE_SUPPORT_FLAGS, device->flags); /* If the originating fencer wants to reboot the node, and we have a * capable device that doesn't support "reboot", remap to "off" instead. */ if (!pcmk_is_set(device->flags, st_device_supports_reboot) && pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { crm_trace("%s doesn't support reboot, using values for off instead", device->id); action = PCMK_ACTION_OFF; } /* Add action-specific values if available */ add_action_specific_attributes(dev, action, device, query->target); if (pcmk__str_eq(query->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A "reboot" *might* get remapped to "off" then "on", so after * sending the "reboot"-specific values in the main element, we add * sub-elements for "off" and "on" values. * * We short-circuited earlier if "reboot", "off" and "on" are all * disallowed for the local host. However if only one or two are * disallowed, we send back the results and mark which ones are * disallowed. If "reboot" is disallowed, this might cause problems * with older fencer versions, which won't check for it. Older * versions will ignore "off" and "on", so they are not a problem. */ add_disallowed(dev, action, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_OFF, device, query->target, pcmk_is_set(query->call_options, st_opt_allow_suicide)); add_action_reply(dev, PCMK_ACTION_ON, device, query->target, FALSE); } /* A query without a target wants device parameters */ if (query->target == NULL) { xmlNode *attrs = create_xml_node(dev, XML_TAG_ATTRS); g_hash_table_foreach(device->params, hash2field, attrs); } } crm_xml_add_int(list, F_STONITH_AVAILABLE_DEVICES, available_devices); if (query->target) { crm_debug("Found %d matching device%s for target '%s'", available_devices, pcmk__plural_s(available_devices), query->target); } else { crm_debug("%d device%s installed", available_devices, pcmk__plural_s(available_devices)); } if (list != NULL) { crm_log_xml_trace(list, "Add query results"); add_message_xml(query->reply, F_STONITH_CALLDATA, list); } stonith_send_reply(query->reply, query->call_options, query->remote_peer, client); done: free_xml(query->reply); free(query->remote_peer); free(query->client_id); free(query->target); free(query->action); free(query); free_xml(list); g_list_free_full(devices, free); } /*! * \internal * \brief Log the result of an asynchronous command * * \param[in] cmd Command the result is for * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] next Alternate device that will be tried if command failed * \param[in] op_merged Whether this command was merged with an earlier one */ static void log_async_result(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, const char *next, bool op_merged) { int log_level = LOG_ERR; int output_log_level = LOG_NEVER; guint devices_remaining = g_list_length(cmd->next_device_iter); GString *msg = g_string_sized_new(80); // Reasonable starting size // Choose log levels appropriately if we have a result if (pcmk__result_ok(result)) { log_level = (cmd->target == NULL)? LOG_DEBUG : LOG_NOTICE; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) { output_log_level = LOG_DEBUG; } next = NULL; } else { log_level = (cmd->target == NULL)? LOG_NOTICE : LOG_ERR; if ((result->action_stdout != NULL) && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_none)) { output_log_level = LOG_WARNING; } } // Build the log message piece by piece pcmk__g_strcat(msg, "Operation '", cmd->action, "' ", NULL); if (pid != 0) { g_string_append_printf(msg, "[%d] ", pid); } if (cmd->target != NULL) { pcmk__g_strcat(msg, "targeting ", cmd->target, " ", NULL); } if (cmd->device != NULL) { pcmk__g_strcat(msg, "using ", cmd->device, " ", NULL); } // Add exit status or execution status as appropriate if (result->execution_status == PCMK_EXEC_DONE) { g_string_append_printf(msg, "returned %d", result->exit_status); } else { pcmk__g_strcat(msg, "could not be executed: ", pcmk_exec_status_str(result->execution_status), NULL); } // Add exit reason and next device if appropriate if (result->exit_reason != NULL) { pcmk__g_strcat(msg, " (", result->exit_reason, ")", NULL); } if (next != NULL) { pcmk__g_strcat(msg, ", retrying with ", next, NULL); } if (devices_remaining > 0) { g_string_append_printf(msg, " (%u device%s remaining)", (unsigned int) devices_remaining, pcmk__plural_s(devices_remaining)); } g_string_append_printf(msg, " " CRM_XS " %scall %d from %s", (op_merged? "merged " : ""), cmd->id, cmd->client_name); // Log the result do_crm_log(log_level, "%s", msg->str); g_string_free(msg, TRUE); // Log the output (which may have multiple lines), if appropriate if (output_log_level != LOG_NEVER) { char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); crm_log_output(output_log_level, prefix, result->action_stdout); free(prefix); } } /*! * \internal * \brief Reply to requester after asynchronous command completion * * \param[in] cmd Command that completed * \param[in] result Result of command * \param[in] pid Process ID of command, if available * \param[in] merged If true, command was merged with another, not executed */ static void send_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result, int pid, bool merged) { xmlNode *reply = NULL; pcmk__client_t *client = NULL; CRM_CHECK((cmd != NULL) && (result != NULL), return); log_async_result(cmd, result, pid, NULL, merged); if (cmd->client != NULL) { client = pcmk__find_client_by_id(cmd->client); if ((client == NULL) && (cmd->origin == NULL)) { crm_trace("Skipping reply to %s: no longer a client", cmd->client); return; } } reply = construct_async_reply(cmd, result); if (merged) { pcmk__xe_set_bool_attr(reply, F_STONITH_MERGED, true); } if (!stand_alone && pcmk__is_fencing_action(cmd->action) && pcmk__str_eq(cmd->origin, cmd->target, pcmk__str_casei)) { /* The target was also the originator, so broadcast the result on its * behalf (since it will be unable to). */ crm_trace("Broadcast '%s' result for %s (target was also originator)", cmd->action, cmd->target); crm_xml_add(reply, F_SUBTYPE, "broadcast"); crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); } else { // Reply only to the originator stonith_send_reply(reply, cmd->options, cmd->origin, client); } crm_log_xml_trace(reply, "Reply"); free_xml(reply); if (stand_alone) { /* Do notification with a clean data object */ xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); stonith__xe_set_result(notify_data, result); crm_xml_add(notify_data, F_STONITH_TARGET, cmd->target); crm_xml_add(notify_data, F_STONITH_OPERATION, cmd->op); crm_xml_add(notify_data, F_STONITH_DELEGATE, "localhost"); crm_xml_add(notify_data, F_STONITH_DEVICE, cmd->device); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data); fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); } } static void cancel_stonith_command(async_command_t * cmd) { stonith_device_t *device = cmd_device(cmd); if (device) { crm_trace("Cancel scheduled '%s' action using %s", cmd->action, device->id); device->pending_ops = g_list_remove(device->pending_ops, cmd); } } /*! * \internal * \brief Cancel and reply to any duplicates of a just-completed operation * * Check whether any fencing operations are scheduled to do the same thing as * one that just succeeded. If so, rather than performing the same operation * twice, return the result of this operation for all matching pending commands. * * \param[in,out] cmd Fencing operation that just succeeded * \param[in] result Result of \p cmd * \param[in] pid If nonzero, process ID of agent invocation (for logs) * * \note Duplicate merging will do the right thing for either type of remapped * reboot. If the executing fencer remapped an unsupported reboot to off, * then cmd->action will be "reboot" and will be merged with any other * reboot requests. If the originating fencer remapped a topology reboot * to off then on, we will get here once with cmd->action "off" and once * with "on", and they will be merged separately with similar requests. */ static void reply_to_duplicates(async_command_t *cmd, const pcmk__action_result_t *result, int pid) { GList *next = NULL; for (GList *iter = cmd_list; iter != NULL; iter = next) { async_command_t *cmd_other = iter->data; next = iter->next; // We might delete this entry, so grab next now if (cmd == cmd_other) { continue; } /* A pending operation matches if: * 1. The client connections are different. * 2. The target is the same. * 3. The fencing action is the same. * 4. The device scheduled to execute the action is the same. */ if (pcmk__str_eq(cmd->client, cmd_other->client, pcmk__str_casei) || !pcmk__str_eq(cmd->target, cmd_other->target, pcmk__str_casei) || !pcmk__str_eq(cmd->action, cmd_other->action, pcmk__str_none) || !pcmk__str_eq(cmd->device, cmd_other->device, pcmk__str_casei)) { continue; } crm_notice("Merging fencing action '%s'%s%s originating from " "client %s with identical fencing request from client %s", cmd_other->action, (cmd_other->target == NULL)? "" : " targeting ", pcmk__s(cmd_other->target, ""), cmd_other->client_name, cmd->client_name); // Stop tracking the duplicate, send its result, and cancel it cmd_list = g_list_remove_link(cmd_list, iter); send_async_reply(cmd_other, result, pid, true); cancel_stonith_command(cmd_other); free_async_command(cmd_other); g_list_free_1(iter); } } /*! * \internal * \brief Return the next required device (if any) for an operation * * \param[in,out] cmd Fencing operation that just succeeded * * \return Next device required for action if any, otherwise NULL */ static stonith_device_t * next_required_device(async_command_t *cmd) { for (GList *iter = cmd->next_device_iter; iter != NULL; iter = iter->next) { stonith_device_t *next_device = g_hash_table_lookup(device_list, iter->data); if (is_action_required(cmd->action, next_device)) { /* This is only called for successful actions, so it's OK to skip * non-required devices. */ cmd->next_device_iter = iter->next; return next_device; } } return NULL; } static void st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; stonith_device_t *next_device = NULL; CRM_CHECK(cmd != NULL, return); device = cmd_device(cmd); cmd->active_on = NULL; /* The device is ready to do something else now */ if (device) { if (!device->verified && pcmk__result_ok(result) && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_LIST, PCMK_ACTION_MONITOR, PCMK_ACTION_STATUS, NULL)) { device->verified = TRUE; } mainloop_set_trigger(device->work); } if (pcmk__result_ok(result)) { next_device = next_required_device(cmd); } else if ((cmd->next_device_iter != NULL) && !is_action_required(cmd->action, device)) { /* if this device didn't work out, see if there are any others we can try. * if the failed device was 'required', we can't pick another device. */ next_device = g_hash_table_lookup(device_list, cmd->next_device_iter->data); cmd->next_device_iter = cmd->next_device_iter->next; } if (next_device == NULL) { send_async_reply(cmd, result, pid, false); if (pcmk__result_ok(result)) { reply_to_duplicates(cmd, result, pid); } free_async_command(cmd); } else { // This operation requires more fencing log_async_result(cmd, result, pid, next_device->id, false); schedule_stonith_command(cmd, next_device); } } static gint sort_device_priority(gconstpointer a, gconstpointer b) { const stonith_device_t *dev_a = a; const stonith_device_t *dev_b = b; if (dev_a->priority > dev_b->priority) { return -1; } else if (dev_a->priority < dev_b->priority) { return 1; } return 0; } static void stonith_fence_get_devices_cb(GList * devices, void *user_data) { async_command_t *cmd = user_data; stonith_device_t *device = NULL; guint ndevices = g_list_length(devices); crm_info("Found %d matching device%s for target '%s'", ndevices, pcmk__plural_s(ndevices), cmd->target); if (devices != NULL) { /* Order based on priority */ devices = g_list_sort(devices, sort_device_priority); device = g_hash_table_lookup(device_list, devices->data); } if (device == NULL) { // No device found pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__format_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "No device configured for target '%s'", cmd->target); send_async_reply(cmd, &result, 0, false); pcmk__reset_result(&result); free_async_command(cmd); g_list_free_full(devices, free); } else { // Device found, schedule it for fencing cmd->device_list = devices; cmd->next_device_iter = devices->next; schedule_stonith_command(cmd, device); } } /*! * \internal * \brief Execute a fence action via the local node * * \param[in] msg Fencing request * \param[out] result Where to store result of fence action */ static void fence_locally(xmlNode *msg, pcmk__action_result_t *result) { const char *device_id = NULL; stonith_device_t *device = NULL; async_command_t *cmd = NULL; xmlNode *dev = NULL; CRM_CHECK((msg != NULL) && (result != NULL), return); dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); cmd = create_async_command(msg); if (cmd == NULL) { crm_log_xml_warn(msg, "invalid"); fenced_set_protocol_error(result); return; } device_id = crm_element_value(dev, F_STONITH_DEVICE); if (device_id != NULL) { device = g_hash_table_lookup(device_list, device_id); if (device == NULL) { crm_err("Requested device '%s' is not available", device_id); pcmk__format_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, "Requested device '%s' not found", device_id); return; } schedule_stonith_command(cmd, device); } else { const char *host = crm_element_value(dev, F_STONITH_TARGET); if (pcmk_is_set(cmd->options, st_opt_cs_nodeid)) { int nodeid = 0; crm_node_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); if (node != NULL) { host = node->uname; } } /* If we get to here, then self-fencing is implicitly allowed */ get_capable_devices(host, cmd->action, cmd->default_timeout, TRUE, cmd, stonith_fence_get_devices_cb, fenced_support_flag(cmd->action)); } pcmk__set_result(result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } /*! * \internal * \brief Build an XML reply for a fencing operation * * \param[in] request Request that reply is for * \param[in] data If not NULL, add to reply as call data * \param[in] result Full result of fencing operation * * \return Newly created XML reply * \note The caller is responsible for freeing the result. * \note This has some overlap with construct_async_reply(), but that copies * values from an async_command_t, whereas this one copies them from the * request. */ xmlNode * fenced_construct_reply(const xmlNode *request, xmlNode *data, const pcmk__action_result_t *result) { xmlNode *reply = NULL; reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); stonith__xe_set_result(reply, result); if (request == NULL) { /* Most likely, this is the result of a stonith operation that was * initiated before we came up. Unfortunately that means we lack enough * information to provide clients with a full result. * * @TODO Maybe synchronize this information at start-up? */ crm_warn("Missing request information for client notifications for " "operation with result '%s' (initiated before we came up?)", pcmk_exec_status_str(result->execution_status)); } else { const char *name = NULL; const char *value = NULL; // Attributes to copy from request to reply const char *names[] = { F_STONITH_OPERATION, F_STONITH_CALLID, F_STONITH_CLIENTID, F_STONITH_CLIENTNAME, F_STONITH_REMOTE_OP_ID, F_STONITH_CALLOPTS }; for (int lpc = 0; lpc < PCMK__NELEM(names); lpc++) { name = names[lpc]; value = crm_element_value(request, name); crm_xml_add(reply, name, value); } if (data != NULL) { add_message_xml(reply, F_STONITH_CALLDATA, data); } } return reply; } /*! * \internal * \brief Build an XML reply to an asynchronous fencing command * * \param[in] cmd Fencing command that reply is for * \param[in] result Command result */ static xmlNode * construct_async_reply(const async_command_t *cmd, const pcmk__action_result_t *result) { xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY); crm_xml_add(reply, "st_origin", __func__); crm_xml_add(reply, F_TYPE, T_STONITH_NG); crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); crm_xml_add(reply, F_STONITH_CLIENTID, cmd->client); crm_xml_add(reply, F_STONITH_CLIENTNAME, cmd->client_name); crm_xml_add(reply, F_STONITH_TARGET, cmd->target); crm_xml_add(reply, F_STONITH_ACTION, cmd->op); crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); stonith__xe_set_result(reply, result); return reply; } bool fencing_peer_active(crm_node_t *peer) { if (peer == NULL) { return FALSE; } else if (peer->uname == NULL) { return FALSE; } else if (pcmk_is_set(peer->processes, crm_get_cluster_proc())) { return TRUE; } return FALSE; } void set_fencing_completed(remote_fencing_op_t *op) { struct timespec tv; qb_util_timespec_from_epoch_get(&tv); op->completed = tv.tv_sec; op->completed_nsec = tv.tv_nsec; } /*! * \internal * \brief Look for alternate node needed if local node shouldn't fence target * * \param[in] target Node that must be fenced * * \return Name of an alternate node that should fence \p target if any, * or NULL otherwise */ static const char * check_alternate_host(const char *target) { if (pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { GHashTableIter gIter; crm_node_t *entry = NULL; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if (fencing_peer_active(entry) && !pcmk__str_eq(entry->uname, target, pcmk__str_casei)) { crm_notice("Forwarding self-fencing request to %s", entry->uname); return entry->uname; } } crm_warn("Will handle own fencing because no peer can"); } return NULL; } static void remove_relay_op(xmlNode * request) { xmlNode *dev = get_xpath_object("//@" F_STONITH_ACTION, request, LOG_TRACE); const char *relay_op_id = NULL; const char *op_id = NULL; const char *client_name = NULL; const char *target = NULL; remote_fencing_op_t *relay_op = NULL; if (dev) { target = crm_element_value(dev, F_STONITH_TARGET); } relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID_RELAY); op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); client_name = crm_element_value(request, F_STONITH_CLIENTNAME); /* Delete RELAY operation. */ if (relay_op_id && target && pcmk__str_eq(target, stonith_our_uname, pcmk__str_casei)) { relay_op = g_hash_table_lookup(stonith_remote_op_list, relay_op_id); if (relay_op) { GHashTableIter iter; remote_fencing_op_t *list_op = NULL; g_hash_table_iter_init(&iter, stonith_remote_op_list); /* If the operation to be deleted is registered as a duplicate, delete the registration. */ while (g_hash_table_iter_next(&iter, NULL, (void **)&list_op)) { GList *dup_iter = NULL; if (list_op != relay_op) { for (dup_iter = list_op->duplicates; dup_iter != NULL; dup_iter = dup_iter->next) { remote_fencing_op_t *other = dup_iter->data; if (other == relay_op) { other->duplicates = g_list_remove(other->duplicates, relay_op); break; } } } } crm_debug("Deleting relay op %s ('%s'%s%s for %s), " "replaced by op %s ('%s'%s%s for %s)", relay_op->id, relay_op->action, (relay_op->target == NULL)? "" : " targeting ", pcmk__s(relay_op->target, ""), relay_op->client_name, op_id, relay_op->action, (target == NULL)? "" : " targeting ", pcmk__s(target, ""), client_name); g_hash_table_remove(stonith_remote_op_list, relay_op_id); } } } /*! * \internal * \brief Check whether an API request was sent by a privileged user * * API commands related to fencing configuration may be done only by privileged * IPC users (i.e. root or hacluster), because all other users should go through * the CIB to have ACLs applied. If no client was given, this is a peer request, * which is always allowed. * * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) * \param[in] op Requested API operation (for logging only) * * \return true if sender is peer or privileged client, otherwise false */ static inline bool is_privileged(const pcmk__client_t *c, const char *op) { if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { return true; } else { crm_warn("Rejecting IPC request '%s' from unprivileged client %s", pcmk__s(op, ""), pcmk__client_name(c)); return false; } } // CRM_OP_REGISTER static xmlNode * handle_register_request(pcmk__request_t *request) { xmlNode *reply = create_xml_node(NULL, "reply"); CRM_ASSERT(request->ipc_client != NULL); crm_xml_add(reply, F_STONITH_OPERATION, CRM_OP_REGISTER); crm_xml_add(reply, F_STONITH_CLIENTID, request->ipc_client->id); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return reply; } // STONITH_OP_EXEC static xmlNode * handle_agent_request(pcmk__request_t *request) { execute_agent_action(request->xml, &request->result); if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_TIMEOUT_UPDATE static xmlNode * handle_update_timeout_request(pcmk__request_t *request) { const char *call_id = crm_element_value(request->xml, F_STONITH_CALLID); const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); int op_timeout = 0; crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &op_timeout); do_stonith_async_timeout_update(client_id, call_id, op_timeout); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } // STONITH_OP_QUERY static xmlNode * handle_query_request(pcmk__request_t *request) { int timeout = 0; xmlNode *dev = NULL; const char *action = NULL; const char *target = NULL; const char *client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); struct st_query_data *query = NULL; if (request->peer != NULL) { // Record it for the future notification create_remote_stonith_op(client_id, request->xml, TRUE); } /* Delete the DC node RELAY operation. */ remove_relay_op(request->xml); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); dev = get_xpath_object("//@" F_STONITH_ACTION, request->xml, LOG_NEVER); if (dev != NULL) { const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (pcmk__str_eq(device, "manual_ack", pcmk__str_casei)) { return NULL; // No query or reply necessary } target = crm_element_value(dev, F_STONITH_TARGET); action = crm_element_value(dev, F_STONITH_ACTION); } crm_log_xml_trace(request->xml, "Query"); query = calloc(1, sizeof(struct st_query_data)); CRM_ASSERT(query != NULL); query->reply = fenced_construct_reply(request->xml, NULL, &request->result); pcmk__str_update(&query->remote_peer, request->peer); pcmk__str_update(&query->client_id, client_id); pcmk__str_update(&query->target, target); pcmk__str_update(&query->action, action); query->call_options = request->call_options; crm_element_value_int(request->xml, F_STONITH_TIMEOUT, &timeout); get_capable_devices(target, action, timeout, pcmk_is_set(query->call_options, st_opt_allow_suicide), query, stonith_query_capable_device_cb, st_device_supports_none); return NULL; } // T_STONITH_NOTIFY static xmlNode * handle_notify_request(pcmk__request_t *request) { const char *flag_name = NULL; CRM_ASSERT(request->ipc_client != NULL); flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_ACTIVATE); if (flag_name != NULL) { crm_debug("Enabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__set_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } flag_name = crm_element_value(request->xml, F_STONITH_NOTIFY_DEACTIVATE); if (flag_name != NULL) { crm_debug("Disabling %s callbacks for client %s", flag_name, pcmk__request_origin(request)); pcmk__clear_client_flags(request->ipc_client, get_stonith_flag(flag_name)); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); pcmk__set_request_flags(request, pcmk__request_reuse_options); return pcmk__ipc_create_ack(request->ipc_flags, "ack", NULL, CRM_EX_OK); } // STONITH_OP_RELAY static xmlNode * handle_relay_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml, LOG_TRACE); crm_notice("Received forwarded fencing request from " "%s %s to fence (%s) peer %s", pcmk__request_origin_type(request), pcmk__request_origin(request), crm_element_value(dev, F_STONITH_ACTION), crm_element_value(dev, F_STONITH_TARGET)); if (initiate_remote_stonith_op(NULL, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); return fenced_construct_reply(request->xml, NULL, &request->result); } pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); return NULL; } // STONITH_OP_FENCE static xmlNode * handle_fence_request(pcmk__request_t *request) { if ((request->peer != NULL) || stand_alone) { fence_locally(request->xml, &request->result); } else if (pcmk_is_set(request->call_options, st_opt_manual_ack)) { switch (fenced_handle_manual_confirmation(request->ipc_client, request->xml)) { case pcmk_rc_ok: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); break; case EINPROGRESS: pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); break; default: fenced_set_protocol_error(&request->result); break; } } else { const char *alternate_host = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request->xml, LOG_TRACE); const char *target = crm_element_value(dev, F_STONITH_TARGET); const char *action = crm_element_value(dev, F_STONITH_ACTION); const char *device = crm_element_value(dev, F_STONITH_DEVICE); if (request->ipc_client != NULL) { int tolerance = 0; crm_notice("Client %s wants to fence (%s) %s using %s", pcmk__request_origin(request), action, target, (device? device : "any device")); crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); if (stonith_check_fence_tolerance(tolerance, target, action)) { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return fenced_construct_reply(request->xml, NULL, &request->result); } alternate_host = check_alternate_host(target); } else { crm_notice("Peer %s wants to fence (%s) '%s' with device '%s'", request->peer, action, target, (device == NULL)? "(any)" : device); } if (alternate_host != NULL) { const char *client_id = NULL; remote_fencing_op_t *op = NULL; if (request->ipc_client->id == 0) { client_id = crm_element_value(request->xml, F_STONITH_CLIENTID); } else { client_id = request->ipc_client->id; } /* Create a duplicate fencing operation to relay with the client ID. * When a query response is received, this operation should be * deleted to avoid keeping the duplicate around. */ op = create_remote_stonith_op(client_id, request->xml, FALSE); crm_xml_add(request->xml, F_STONITH_OPERATION, STONITH_OP_RELAY); crm_xml_add(request->xml, F_STONITH_CLIENTID, request->ipc_client->id); crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); send_cluster_message(crm_get_peer(0, alternate_host), crm_msg_stonith_ng, request->xml, FALSE); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } else if (initiate_remote_stonith_op(request->ipc_client, request->xml, FALSE) == NULL) { fenced_set_protocol_error(&request->result); } else { pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, NULL); } } if (request->result.execution_status == PCMK_EXEC_PENDING) { return NULL; } return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_FENCE_HISTORY static xmlNode * handle_history_request(pcmk__request_t *request) { xmlNode *reply = NULL; xmlNode *data = NULL; stonith_fence_history(request->xml, &data, request->peer, request->call_options); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (!pcmk_is_set(request->call_options, st_opt_discard_reply)) { /* When the local node broadcasts its history, it sets * st_opt_discard_reply and doesn't need a reply. */ reply = fenced_construct_reply(request->xml, data, &request->result); } free_xml(data); return reply; } // STONITH_OP_DEVICE_ADD static xmlNode * handle_device_add_request(pcmk__request_t *request) { const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml, LOG_ERR); if (is_privileged(request->ipc_client, op)) { int rc = stonith_device_register(dev, FALSE); pcmk__set_result(&request->result, ((rc == pcmk_ok)? CRM_EX_OK : CRM_EX_ERROR), stonith__legacy2status(rc), ((rc == pcmk_ok)? NULL : pcmk_strerror(rc))); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must register device via CIB"); } fenced_send_device_notification(op, &request->result, (dev == NULL)? NULL : ID(dev)); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_DEVICE_DEL static xmlNode * handle_device_delete_request(pcmk__request_t *request) { xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request->xml, LOG_ERR); const char *device_id = crm_element_value(dev, XML_ATTR_ID); const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); if (is_privileged(request->ipc_client, op)) { stonith_device_remove(device_id, false); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete device via CIB"); } fenced_send_device_notification(op, &request->result, device_id); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_ADD static xmlNode * handle_level_add_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); if (is_privileged(request->ipc_client, op)) { fenced_register_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must add level via CIB"); } fenced_send_level_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // STONITH_OP_LEVEL_DEL static xmlNode * handle_level_delete_request(pcmk__request_t *request) { char *desc = NULL; const char *op = crm_element_value(request->xml, F_STONITH_OPERATION); if (is_privileged(request->ipc_client, op)) { fenced_unregister_level(request->xml, &desc, &request->result); } else { unpack_level_request(request->xml, NULL, NULL, NULL, &desc); pcmk__set_result(&request->result, CRM_EX_INSUFFICIENT_PRIV, PCMK_EXEC_INVALID, "Unprivileged users must delete level via CIB"); } fenced_send_level_notification(op, &request->result, desc); free(desc); return fenced_construct_reply(request->xml, NULL, &request->result); } // CRM_OP_RM_NODE_CACHE static xmlNode * handle_cache_request(pcmk__request_t *request) { int node_id = 0; const char *name = NULL; crm_element_value_int(request->xml, XML_ATTR_ID, &node_id); name = crm_element_value(request->xml, XML_ATTR_UNAME); reap_crm_member(node_id, name); pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); return NULL; } static xmlNode * handle_unknown_request(pcmk__request_t *request) { crm_err("Unknown IPC request %s from %s %s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request)); pcmk__format_result(&request->result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, "Unknown IPC request type '%s' (bug?)", request->op); return fenced_construct_reply(request->xml, NULL, &request->result); } static void fenced_register_handlers(void) { pcmk__server_command_t handlers[] = { { CRM_OP_REGISTER, handle_register_request }, { STONITH_OP_EXEC, handle_agent_request }, { STONITH_OP_TIMEOUT_UPDATE, handle_update_timeout_request }, { STONITH_OP_QUERY, handle_query_request }, { T_STONITH_NOTIFY, handle_notify_request }, { STONITH_OP_RELAY, handle_relay_request }, { STONITH_OP_FENCE, handle_fence_request }, { STONITH_OP_FENCE_HISTORY, handle_history_request }, { STONITH_OP_DEVICE_ADD, handle_device_add_request }, { STONITH_OP_DEVICE_DEL, handle_device_delete_request }, { STONITH_OP_LEVEL_ADD, handle_level_add_request }, { STONITH_OP_LEVEL_DEL, handle_level_delete_request }, { CRM_OP_RM_NODE_CACHE, handle_cache_request }, { NULL, handle_unknown_request }, }; fenced_handlers = pcmk__register_handlers(handlers); } void fenced_unregister_handlers(void) { if (fenced_handlers != NULL) { g_hash_table_destroy(fenced_handlers); fenced_handlers = NULL; } } static void handle_request(pcmk__request_t *request) { xmlNode *reply = NULL; const char *reason = NULL; if (fenced_handlers == NULL) { fenced_register_handlers(); } reply = pcmk__process_request(request, fenced_handlers); if (reply != NULL) { if (pcmk_is_set(request->flags, pcmk__request_reuse_options) && (request->ipc_client != NULL)) { /* Certain IPC-only commands must reuse the call options from the * original request rather than the ones set by stonith_send_reply() * -> do_local_reply(). */ pcmk__ipc_send_xml(request->ipc_client, request->ipc_id, reply, request->ipc_flags); request->ipc_client->request_id = 0; } else { stonith_send_reply(reply, request->call_options, request->peer, request->ipc_client); } free_xml(reply); } reason = request->result.exit_reason; crm_debug("Processed %s request from %s %s: %s%s%s%s", request->op, pcmk__request_origin_type(request), pcmk__request_origin(request), pcmk_exec_status_str(request->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } static void handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) { // Copy, because request might be freed before we want to log this char *op = crm_element_value_copy(request, F_STONITH_OPERATION); if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { process_remote_stonith_query(request); } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) { fenced_process_fencing_reply(request); } else { crm_err("Ignoring unknown %s reply from %s %s", pcmk__s(op, "untyped"), ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); crm_log_xml_warn(request, "UnknownOp"); free(op); return; } crm_debug("Processed %s reply from %s %s", op, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); free(op); } /*! * \internal * \brief Handle a message from an IPC client or CPG peer * * \param[in,out] client If not NULL, IPC client that sent message * \param[in] id If from IPC client, IPC message ID * \param[in] flags Message flags * \param[in,out] message Message XML * \param[in] remote_peer If not NULL, CPG peer that sent message */ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, xmlNode *message, const char *remote_peer) { int call_options = st_opt_none; bool is_reply = false; CRM_CHECK(message != NULL, return); if (get_xpath_object("//" T_STONITH_REPLY, message, LOG_NEVER) != NULL) { is_reply = true; } crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options); crm_debug("Processing %ssynchronous %s %s %u from %s %s", pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", crm_element_value(message, F_STONITH_OPERATION), (is_reply? "reply" : "request"), id, ((client == NULL)? "peer" : "client"), ((client == NULL)? remote_peer : pcmk__client_name(client))); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(client == NULL || client->request_id == id); } if (is_reply) { handle_reply(client, message, remote_peer); } else { pcmk__request_t request = { .ipc_client = client, .ipc_id = id, .ipc_flags = flags, .peer = remote_peer, .xml = message, .call_options = call_options, .result = PCMK__UNKNOWN_RESULT, }; request.op = crm_element_value_copy(request.xml, F_STONITH_OPERATION); CRM_CHECK(request.op != NULL, return); if (pcmk_is_set(request.call_options, st_opt_sync_call)) { pcmk__set_request_flags(&request, pcmk__request_sync); } handle_request(&request); pcmk__reset_request(&request); } } diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index 843b3d4654..85957ff92c 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -1,2512 +1,2512 @@ /* - * Copyright 2009-2023 the Pacemaker project contributors + * Copyright 2009-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TIMEOUT_MULTIPLY_FACTOR 1.2 /* When one fencer queries its peers for devices able to handle a fencing * request, each peer will reply with a list of such devices available to it. * Each reply will be parsed into a peer_device_info_t, with each device's * information kept in a device_properties_t. */ typedef struct device_properties_s { /* Whether access to this device has been verified */ gboolean verified; /* The remaining members are indexed by the operation's "phase" */ /* Whether this device has been executed in each phase */ gboolean executed[st_phase_max]; /* Whether this device is disallowed from executing in each phase */ gboolean disallowed[st_phase_max]; /* Action-specific timeout for each phase */ int custom_action_timeout[st_phase_max]; /* Action-specific maximum random delay for each phase */ int delay_max[st_phase_max]; /* Action-specific base delay for each phase */ int delay_base[st_phase_max]; /* Group of enum st_device_flags */ uint32_t device_support_flags; } device_properties_t; typedef struct { /* Name of peer that sent this result */ char *host; /* Only try peers for non-topology based operations once */ gboolean tried; /* Number of entries in the devices table */ int ndevices; /* Devices available to this host that are capable of fencing the target */ GHashTable *devices; } peer_device_info_t; GHashTable *stonith_remote_op_list = NULL; extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, int call_options); static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer); static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup); static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer); static gint sort_strings(gconstpointer a, gconstpointer b) { return strcmp(a, b); } static void free_remote_query(gpointer data) { if (data != NULL) { peer_device_info_t *peer = data; g_hash_table_destroy(peer->devices); free(peer->host); free(peer); } } void free_stonith_remote_op_list(void) { if (stonith_remote_op_list != NULL) { g_hash_table_destroy(stonith_remote_op_list); stonith_remote_op_list = NULL; } } struct peer_count_data { const remote_fencing_op_t *op; gboolean verified_only; uint32_t support_action_only; int count; }; /*! * \internal * \brief Increment a counter if a device has not been executed yet * * \param[in] key Device ID (ignored) * \param[in] value Device properties * \param[in,out] user_data Peer count data */ static void count_peer_device(gpointer key, gpointer value, gpointer user_data) { device_properties_t *props = (device_properties_t*)value; struct peer_count_data *data = user_data; if (!props->executed[data->op->phase] && (!data->verified_only || props->verified) && ((data->support_action_only == st_device_supports_none) || pcmk_is_set(props->device_support_flags, data->support_action_only))) { ++(data->count); } } /*! * \internal * \brief Check the number of available devices in a peer's query results * * \param[in] op Operation that results are for * \param[in] peer Peer to count * \param[in] verified_only Whether to count only verified devices * \param[in] support_action_only Whether to count only devices that support action * * \return Number of devices available to peer that were not already executed */ static int count_peer_devices(const remote_fencing_op_t *op, const peer_device_info_t *peer, gboolean verified_only, uint32_t support_on_action_only) { struct peer_count_data data; data.op = op; data.verified_only = verified_only; data.support_action_only = support_on_action_only; data.count = 0; if (peer) { g_hash_table_foreach(peer->devices, count_peer_device, &data); } return data.count; } /*! * \internal * \brief Search for a device in a query result * * \param[in] op Operation that result is for * \param[in] peer Query result for a peer * \param[in] device Device ID to search for * * \return Device properties if found, NULL otherwise */ static device_properties_t * find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, uint32_t support_action_only) { device_properties_t *props = g_hash_table_lookup(peer->devices, device); if (props && support_action_only != st_device_supports_none && !pcmk_is_set(props->device_support_flags, support_action_only)) { return NULL; } return (props && !props->executed[op->phase] && !props->disallowed[op->phase])? props : NULL; } /*! * \internal * \brief Find a device in a peer's device list and mark it as executed * * \param[in] op Operation that peer result is for * \param[in,out] peer Peer with results to search * \param[in] device ID of device to mark as done * \param[in] verified_devices_only Only consider verified devices * * \return TRUE if device was found and marked, FALSE otherwise */ static gboolean grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, const char *device, gboolean verified_devices_only) { device_properties_t *props = find_peer_device(op, peer, device, fenced_support_flag(op->action)); if ((props == NULL) || (verified_devices_only && !props->verified)) { return FALSE; } crm_trace("Removing %s from %s (%d remaining)", device, peer->host, count_peer_devices(op, peer, FALSE, st_device_supports_none)); props->executed[op->phase] = TRUE; return TRUE; } static void clear_remote_op_timers(remote_fencing_op_t * op) { if (op->query_timer) { g_source_remove(op->query_timer); op->query_timer = 0; } if (op->op_timer_total) { g_source_remove(op->op_timer_total); op->op_timer_total = 0; } if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } } static void free_remote_op(gpointer data) { remote_fencing_op_t *op = data; crm_log_xml_debug(op->request, "Destroying"); clear_remote_op_timers(op); free(op->id); free(op->action); free(op->delegate); free(op->target); free(op->client_id); free(op->client_name); free(op->originator); if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); } if (op->request) { free_xml(op->request); op->request = NULL; } if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } g_list_free_full(op->automatic_list, free); g_list_free(op->duplicates); pcmk__reset_result(&op->result); free(op); } void init_stonith_remote_op_hash_table(GHashTable **table) { if (*table == NULL) { *table = pcmk__strkey_table(NULL, free_remote_op); } } /*! * \internal * \brief Return an operation's originally requested action (before any remap) * * \param[in] op Operation to check * * \return Operation's original action */ static const char * op_requested_action(const remote_fencing_op_t *op) { return ((op->phase > st_phase_requested)? PCMK_ACTION_REBOOT : op->action); } /*! * \internal * \brief Remap a "reboot" operation to the "off" phase * * \param[in,out] op Operation to remap */ static void op_phase_off(remote_fencing_op_t *op) { crm_info("Remapping multiple-device reboot targeting %s to 'off' " CRM_XS " id=%.8s", op->target, op->id); op->phase = st_phase_off; /* Happily, "off" and "on" are shorter than "reboot", so we can reuse the * memory allocation at each phase. */ strcpy(op->action, PCMK_ACTION_OFF); } /*! * \internal * \brief Advance a remapped reboot operation to the "on" phase * * \param[in,out] op Operation to remap */ static void op_phase_on(remote_fencing_op_t *op) { GList *iter = NULL; crm_info("Remapped 'off' targeting %s complete, " "remapping to 'on' for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_on; strcpy(op->action, PCMK_ACTION_ON); /* Skip devices with automatic unfencing, because the cluster will handle it * when the node rejoins. */ for (iter = op->automatic_list; iter != NULL; iter = iter->next) { GList *match = g_list_find_custom(op->devices_list, iter->data, sort_strings); if (match) { op->devices_list = g_list_remove(op->devices_list, match->data); } } g_list_free_full(op->automatic_list, free); op->automatic_list = NULL; /* Rewind device list pointer */ op->devices = op->devices_list; } /*! * \internal * \brief Reset a remapped reboot operation * * \param[in,out] op Operation to reset */ static void undo_op_remap(remote_fencing_op_t *op) { if (op->phase > 0) { crm_info("Undoing remap of reboot targeting %s for %s " CRM_XS " id=%.8s", op->target, op->client_name, op->id); op->phase = st_phase_requested; strcpy(op->action, PCMK_ACTION_REBOOT); } } /*! * \internal * \brief Create notification data XML for a fencing operation result * * \param[in] op Fencer operation that completed * * \return Newly created XML to add as notification data * \note The caller is responsible for freeing the result. */ static xmlNode * fencing_result2xml(const remote_fencing_op_t *op) { xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); crm_xml_add_int(notify_data, "state", op->state); crm_xml_add(notify_data, F_STONITH_TARGET, op->target); crm_xml_add(notify_data, F_STONITH_ACTION, op->action); crm_xml_add(notify_data, F_STONITH_DELEGATE, op->delegate); crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(notify_data, F_STONITH_ORIGIN, op->originator); crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id); crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name); return notify_data; } /*! * \internal * \brief Broadcast a fence result notification to all CPG peers * * \param[in] op Fencer operation that completed * \param[in] op_merged Whether this operation is a duplicate of another */ void fenced_broadcast_op_result(const remote_fencing_op_t *op, bool op_merged) { static int count = 0; xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY); xmlNode *notify_data = fencing_result2xml(op); count++; crm_trace("Broadcasting result to peers"); crm_xml_add(bcast, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(bcast, F_SUBTYPE, "broadcast"); crm_xml_add(bcast, F_STONITH_OPERATION, T_STONITH_NOTIFY); crm_xml_add_int(bcast, "count", count); if (op_merged) { pcmk__xe_set_bool_attr(bcast, F_STONITH_MERGED, true); } stonith__xe_set_result(notify_data, &op->result); add_message_xml(bcast, F_STONITH_CALLDATA, notify_data); send_cluster_message(NULL, crm_msg_stonith_ng, bcast, FALSE); free_xml(notify_data); free_xml(bcast); return; } /*! * \internal * \brief Reply to a local request originator and notify all subscribed clients * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data) { xmlNode *notify_data = NULL; xmlNode *reply = NULL; pcmk__client_t *client = NULL; if (op->notify_sent == TRUE) { /* nothing to do */ return; } /* Do notification with a clean data object */ crm_xml_add_int(data, "state", op->state); crm_xml_add(data, F_STONITH_TARGET, op->target); crm_xml_add(data, F_STONITH_OPERATION, op->action); reply = fenced_construct_reply(op->request, data, &op->result); crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate); /* Send fencing OP reply to local client that initiated fencing */ client = pcmk__find_client_by_id(op->client_id); if (client == NULL) { crm_trace("Skipping reply to %s: no longer a client", op->client_id); } else { do_local_reply(reply, client, op->call_options); } /* bcast to all local clients that the fencing operation happend */ notify_data = fencing_result2xml(op); fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data); free_xml(notify_data); fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); /* mark this op as having notify's already sent */ op->notify_sent = TRUE; free_xml(reply); } /*! * \internal * \brief Finalize all duplicates of a given fencer operation * * \param[in,out] op Fencer operation that completed * \param[in,out] data Top-level XML to add notification to */ static void finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data) { for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *other = iter->data; if (other->state == st_duplicate) { other->state = op->state; crm_debug("Performing duplicate notification for %s@%s: %s " CRM_XS " id=%.8s", other->client_name, other->originator, pcmk_exec_status_str(op->result.execution_status), other->id); pcmk__copy_result(&op->result, &other->result); finalize_op(other, data, true); } else { // Possible if (for example) it timed out already crm_err("Skipping duplicate notification for %s@%s " CRM_XS " state=%s id=%.8s", other->client_name, other->originator, stonith_op_state_str(other->state), other->id); } } } static char * delegate_from_xml(xmlNode *xml) { xmlNode *match = get_xpath_object("//@" F_STONITH_DELEGATE, xml, LOG_NEVER); if (match == NULL) { return crm_element_value_copy(xml, F_ORIG); } else { return crm_element_value_copy(match, F_STONITH_DELEGATE); } } /*! * \internal * \brief Finalize a peer fencing operation * * Clean up after a fencing operation completes. This function has two code * paths: the executioner uses it to broadcast the result to CPG peers, and then * each peer (including the executioner) uses it to process that broadcast and * notify its IPC clients of the result. * * \param[in,out] op Fencer operation that completed * \param[in,out] data If not NULL, XML reply of last delegated operation * \param[in] dup Whether this operation is a duplicate of another * (in which case, do not broadcast the result) * * \note The operation result should be set before calling this function. */ static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup) { int level = LOG_ERR; const char *subt = NULL; xmlNode *local_data = NULL; gboolean op_merged = FALSE; CRM_CHECK((op != NULL), return); // This is a no-op if timers have already been cleared clear_remote_op_timers(op); if (op->notify_sent) { // Most likely, this is a timed-out action that eventually completed crm_notice("Operation '%s'%s%s by %s for %s@%s%s: " "Result arrived too late " CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), op->id); return; } set_fencing_completed(op); undo_op_remap(op); if (data == NULL) { data = create_xml_node(NULL, "remote-op"); local_data = data; } else if (op->delegate == NULL) { switch (op->result.execution_status) { case PCMK_EXEC_NO_FENCE_DEVICE: break; case PCMK_EXEC_INVALID: if (op->result.exit_status != CRM_EX_EXPIRED) { op->delegate = delegate_from_xml(data); } break; default: op->delegate = delegate_from_xml(data); break; } } if (dup || (crm_element_value(data, F_STONITH_MERGED) != NULL)) { op_merged = true; } /* Tell everyone the operation is done, we will continue * with doing the local notifications once we receive * the broadcast back. */ subt = crm_element_value(data, F_SUBTYPE); if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) { /* Defer notification until the bcast message arrives */ fenced_broadcast_op_result(op, op_merged); free_xml(local_data); return; } if (pcmk__result_ok(&op->result) || dup || !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { level = LOG_NOTICE; } do_crm_log(level, "Operation '%s'%s%s by %s for %s@%s%s: %s (%s%s%s) " CRM_XS " id=%.8s", op->action, (op->target? " targeting " : ""), (op->target? op->target : ""), (op->delegate? op->delegate : "unknown node"), op->client_name, op->originator, (op_merged? " (merged)" : ""), crm_exit_str(op->result.exit_status), pcmk_exec_status_str(op->result.execution_status), ((op->result.exit_reason == NULL)? "" : ": "), ((op->result.exit_reason == NULL)? "" : op->result.exit_reason), op->id); handle_local_reply_and_notify(op, data); if (!dup) { finalize_op_duplicates(op, data); } /* Free non-essential parts of the record * Keep the record around so we can query the history */ if (op->query_results) { g_list_free_full(op->query_results, free_remote_query); op->query_results = NULL; } if (op->request) { free_xml(op->request); op->request = NULL; } free_xml(local_data); } /*! * \internal * \brief Finalize a watchdog fencer op after the waiting time expires * * \param[in,out] userdata Fencer operation that completed * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_watchdog_done(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Self-fencing (%s) by %s for %s assumed complete " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, NULL, false); return G_SOURCE_REMOVE; } static gboolean remote_op_timeout_one(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_one = 0; crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, "Peer did not return fence result within timeout"); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; crm_trace("Try another device for '%s' action targeting %s " "for client %s without delay " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } // Try another device, if appropriate request_peer_fencing(op, NULL); return G_SOURCE_REMOVE; } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] op Fencer operation that timed out * \param[in] reason Readable description of what step timed out */ static void finalize_timed_out_op(remote_fencing_op_t *op, const char *reason) { crm_debug("Action '%s' targeting %s for client %s timed out " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); if (op->phase == st_phase_on) { /* A remapped reboot operation timed out in the "on" phase, but the * "off" phase completed successfully, so quit trying any further * devices, and return success. */ op->state = st_done; pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { op->state = st_failed; pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason); } finalize_op(op, NULL, false); } /*! * \internal * \brief Finalize a remote fencer operation that timed out * * \param[in,out] userdata Fencer operation that timed out * * \return G_SOURCE_REMOVE (which tells glib not to restart timer) */ static gboolean remote_op_timeout(gpointer userdata) { remote_fencing_op_t *op = userdata; op->op_timer_total = 0; if (op->state == st_done) { crm_debug("Action '%s' targeting %s for client %s already completed " CRM_XS " id=%.8s", op->action, op->target, op->client_name, op->id); } else { finalize_timed_out_op(userdata, "Fencing did not complete within a " "total timeout based on the " "configured timeout and retries for " "any devices attempted"); } return G_SOURCE_REMOVE; } static gboolean remote_op_query_timeout(gpointer data) { remote_fencing_op_t *op = data; op->query_timer = 0; if (op->state == st_done) { crm_debug("Operation %.8s targeting %s already completed", op->id, op->target); } else if (op->state == st_exec) { crm_debug("Operation %.8s targeting %s already in progress", op->id, op->target); } else if (op->query_results) { // Query succeeded, so attempt the actual fencing crm_debug("Query %.8s targeting %s complete (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); request_peer_fencing(op, NULL); } else { crm_debug("Query %.8s targeting %s timed out (state=%s)", op->id, op->target, stonith_op_state_str(op->state)); finalize_timed_out_op(op, "No capable peers replied to device query " "within timeout"); } return G_SOURCE_REMOVE; } static gboolean topology_is_empty(stonith_topology_t *tp) { int i; if (tp == NULL) { return TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { if (tp->levels[i] != NULL) { return FALSE; } } return TRUE; } /*! * \internal * \brief Add a device to an operation's automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to add */ static void add_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (!match) { op->automatic_list = g_list_prepend(op->automatic_list, strdup(device)); } } /*! * \internal * \brief Remove a device from the automatic unfencing list * * \param[in,out] op Operation to modify * \param[in] device Device ID to remove */ static void remove_required_device(remote_fencing_op_t *op, const char *device) { GList *match = g_list_find_custom(op->automatic_list, device, sort_strings); if (match) { op->automatic_list = g_list_remove(op->automatic_list, match->data); } } /* deep copy the device list */ static void set_op_device_list(remote_fencing_op_t * op, GList *devices) { GList *lpc = NULL; if (op->devices_list) { g_list_free_full(op->devices_list, free); op->devices_list = NULL; } for (lpc = devices; lpc != NULL; lpc = lpc->next) { op->devices_list = g_list_append(op->devices_list, strdup(lpc->data)); } op->devices = op->devices_list; } /*! * \internal * \brief Check whether a node matches a topology target * * \param[in] tp Topology table entry to check * \param[in] node Name of node to check * * \return TRUE if node matches topology target */ static gboolean topology_matches(const stonith_topology_t *tp, const char *node) { regex_t r_patt; CRM_CHECK(node && tp && tp->target, return FALSE); switch (tp->kind) { case fenced_target_by_attribute: /* This level targets by attribute, so tp->target is a NAME=VALUE pair * of a permanent attribute applied to targeted nodes. The test below * relies on the locally cached copy of the CIB, so if fencing needs to * be done before the initial CIB is received or after a malformed CIB * is received, then the topology will be unable to be used. */ if (node_has_attr(node, tp->target_attribute, tp->target_value)) { crm_notice("Matched %s with %s by attribute", node, tp->target); return TRUE; } break; case fenced_target_by_pattern: /* This level targets node names matching a pattern, so tp->target * (and tp->target_pattern) is a regular expression. */ if (regcomp(&r_patt, tp->target_pattern, REG_EXTENDED|REG_NOSUB)) { crm_info("Bad regex '%s' for fencing level", tp->target); } else { int status = regexec(&r_patt, node, 0, NULL, 0); regfree(&r_patt); if (status == 0) { crm_notice("Matched %s with %s by name", node, tp->target); return TRUE; } } break; case fenced_target_by_name: crm_trace("Testing %s against %s", node, tp->target); return pcmk__str_eq(tp->target, node, pcmk__str_casei); default: break; } crm_trace("No match for %s with %s", node, tp->target); return FALSE; } stonith_topology_t * find_topology_for_host(const char *host) { GHashTableIter tIter; stonith_topology_t *tp = g_hash_table_lookup(topology, host); if(tp != NULL) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } g_hash_table_iter_init(&tIter, topology); while (g_hash_table_iter_next(&tIter, NULL, (gpointer *) & tp)) { if (topology_matches(tp, host)) { crm_trace("Found %s for %s in %d entries", tp->target, host, g_hash_table_size(topology)); return tp; } } crm_trace("No matches for %s in %d topology entries", host, g_hash_table_size(topology)); return NULL; } /*! * \internal * \brief Set fencing operation's device list to target's next topology level * * \param[in,out] op Remote fencing operation to modify * \param[in] empty_ok If true, an operation without a target (i.e. * queries) or a target without a topology will get a * pcmk_rc_ok return value instead of ENODEV * * \return Standard Pacemaker return value */ static int advance_topology_level(remote_fencing_op_t *op, bool empty_ok) { stonith_topology_t *tp = NULL; if (op->target) { tp = find_topology_for_host(op->target); } if (topology_is_empty(tp)) { return empty_ok? pcmk_rc_ok : ENODEV; } CRM_ASSERT(tp->levels != NULL); stonith__set_call_options(op->call_options, op->id, st_opt_topology); /* This is a new level, so undo any remapping left over from previous */ undo_op_remap(op); do { op->level++; } while (op->level < ST_LEVEL_MAX && tp->levels[op->level] == NULL); if (op->level < ST_LEVEL_MAX) { crm_trace("Attempting fencing level %d targeting %s (%d devices) " "for client %s@%s (id=%.8s)", op->level, op->target, g_list_length(tp->levels[op->level]), op->client_name, op->originator, op->id); set_op_device_list(op, tp->levels[op->level]); // The requested delay has been applied for the first fencing level if ((op->level > 1) && (op->client_delay > 0)) { op->client_delay = 0; } if ((g_list_next(op->devices_list) != NULL) && pcmk__str_eq(op->action, PCMK_ACTION_REBOOT, pcmk__str_none)) { /* A reboot has been requested for a topology level with multiple * devices. Instead of rebooting the devices sequentially, we will * turn them all off, then turn them all on again. (Think about * switched power outlets for redundant power supplies.) */ op_phase_off(op); } return pcmk_rc_ok; } crm_info("All %sfencing options targeting %s for client %s@%s failed " CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"", op->target, op->client_name, op->originator, op->id); return ENODEV; } /*! * \internal * \brief If fencing operation is a duplicate, merge it into the other one * * \param[in,out] op Fencing operation to check */ static void merge_duplicates(remote_fencing_op_t *op) { GHashTableIter iter; remote_fencing_op_t *other = NULL; time_t now = time(NULL); g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&other)) { const char *other_action = op_requested_action(other); if (!strcmp(op->id, other->id)) { continue; // Don't compare against self } if (other->state > st_exec) { crm_trace("%.8s not duplicate of %.8s: not in progress", op->id, other->id); continue; } if (!pcmk__str_eq(op->target, other->target, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: node %s vs. %s", op->id, other->id, op->target, other->target); continue; } if (!pcmk__str_eq(op->action, other_action, pcmk__str_none)) { crm_trace("%.8s not duplicate of %.8s: action %s vs. %s", op->id, other->id, op->action, other_action); continue; } if (pcmk__str_eq(op->client_name, other->client_name, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: same client %s", op->id, other->id, op->client_name); continue; } if (pcmk__str_eq(other->target, other->originator, pcmk__str_casei)) { crm_trace("%.8s not duplicate of %.8s: suicide for %s", op->id, other->id, other->target); continue; } if (!fencing_peer_active(crm_get_peer(0, other->originator))) { crm_notice("Failing action '%s' targeting %s originating from " "client %s@%s: Originator is dead " CRM_XS " id=%.8s", other->action, other->target, other->client_name, other->originator, other->id); crm_trace("%.8s not duplicate of %.8s: originator dead", op->id, other->id); other->state = st_failed; continue; } if ((other->total_timeout > 0) && (now > (other->total_timeout + other->created))) { crm_trace("%.8s not duplicate of %.8s: old (%ld vs. %ld + %d)", op->id, other->id, now, other->created, other->total_timeout); continue; } /* There is another in-flight request to fence the same host * Piggyback on that instead. If it fails, so do we. */ other->duplicates = g_list_append(other->duplicates, op); if (other->total_timeout == 0) { other->total_timeout = op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, NULL); crm_trace("Best guess as to timeout used for %.8s: %d", other->id, other->total_timeout); } crm_notice("Merging fencing action '%s' targeting %s originating from " "client %s with identical request from %s@%s " CRM_XS " original=%.8s duplicate=%.8s total_timeout=%ds", op->action, op->target, op->client_name, other->client_name, other->originator, op->id, other->id, other->total_timeout); report_timeout_period(op, other->total_timeout); op->state = st_duplicate; } } static uint32_t fencing_active_peers(void) { uint32_t count = 0; crm_node_t *entry; GHashTableIter gIter; g_hash_table_iter_init(&gIter, crm_peer_cache); while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) { if(fencing_peer_active(entry)) { count++; } } return count; } /*! * \internal * \brief Process a manual confirmation of a pending fence action * * \param[in] client IPC client that sent confirmation * \param[in,out] msg Request XML with manual confirmation * * \return Standard Pacemaker return code */ int fenced_handle_manual_confirmation(const pcmk__client_t *client, xmlNode *msg) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); CRM_CHECK(dev != NULL, return EPROTO); crm_notice("Received manual confirmation that %s has been fenced", pcmk__s(crm_element_value(dev, F_STONITH_TARGET), "unknown target")); op = initiate_remote_stonith_op(client, msg, TRUE); if (op == NULL) { return EPROTO; } op->state = st_done; set_fencing_completed(op); op->delegate = strdup("a human"); // For the fencer's purposes, the fencing operation is done pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); finalize_op(op, msg, false); /* For the requester's purposes, the operation is still pending. The * actual result will be sent asynchronously via the operation's done_cb(). */ return EINPROGRESS; } /*! * \internal * \brief Create a new remote stonith operation * * \param[in] client ID of local stonith client that initiated the operation * \param[in] request The request from the client that started the operation * \param[in] peer TRUE if this operation is owned by another stonith peer * (an operation owned by one peer is stored on all peers, * but only the owner executes it; all nodes get the results * once the owner finishes execution) */ void * create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) { remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_NEVER); int call_options = 0; const char *operation = NULL; init_stonith_remote_op_hash_table(&stonith_remote_op_list); /* If this operation is owned by another node, check to make * sure we haven't already created this operation. */ if (peer && dev) { const char *op_id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(op_id != NULL, return NULL); op = g_hash_table_lookup(stonith_remote_op_list, op_id); if (op) { crm_debug("Reusing existing remote fencing op %.8s for %s", op_id, ((client == NULL)? "unknown client" : client)); return op; } } op = calloc(1, sizeof(remote_fencing_op_t)); CRM_ASSERT(op != NULL); crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout)); // Value -1 means disable any static/random fencing delays crm_element_value_int(request, F_STONITH_DELAY, &(op->client_delay)); if (peer && dev) { op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID); } else { op->id = crm_generate_uuid(); } g_hash_table_replace(stonith_remote_op_list, op->id, op); op->state = st_query; op->replies_expected = fencing_active_peers(); op->action = crm_element_value_copy(dev, F_STONITH_ACTION); op->originator = crm_element_value_copy(dev, F_STONITH_ORIGIN); op->delegate = crm_element_value_copy(dev, F_STONITH_DELEGATE); /* May not be set */ op->created = time(NULL); if (op->originator == NULL) { /* Local or relayed request */ op->originator = strdup(stonith_our_uname); } CRM_LOG_ASSERT(client != NULL); if (client) { op->client_id = strdup(client); } /* For a RELAY operation, set fenced on the client. */ operation = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { op->client_name = crm_strdup_printf("%s.%lu", crm_system_name, (unsigned long) getpid()); } else { op->client_name = crm_element_value_copy(request, F_STONITH_CLIENTNAME); } op->target = crm_element_value_copy(dev, F_STONITH_TARGET); op->request = copy_xml(request); /* TODO: Figure out how to avoid this */ crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); op->call_options = call_options; crm_element_value_int(request, F_STONITH_CALLID, &(op->client_callid)); crm_trace("%s new fencing op %s ('%s' targeting %s for client %s, " "base timeout %d, %u %s expected)", (peer && dev)? "Recorded" : "Generated", op->id, op->action, op->target, op->client_name, op->base_timeout, op->replies_expected, pcmk__plural_alt(op->replies_expected, "reply", "replies")); if (op->call_options & st_opt_cs_nodeid) { int nodeid; crm_node_t *node; pcmk__scan_min_int(op->target, &nodeid, 0); node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); /* Ensure the conversion only happens once */ stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); if (node && node->uname) { free(op->target); op->target = strdup(node->uname); } else { crm_warn("Could not expand nodeid '%s' into a host name", op->target); } } /* check to see if this is a duplicate operation of another in-flight operation */ merge_duplicates(op); if (op->state != st_duplicate) { /* kick history readers */ fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL); } /* safe to trim as long as that doesn't touch pending ops */ stonith_fence_history_trim(); return op; } /*! * \internal * \brief Create a peer fencing operation from a request, and initiate it * * \param[in] client IPC client that made request (NULL to get from request) * \param[in] request Request XML * \param[in] manual_ack Whether this is a manual action confirmation * * \return Newly created operation on success, otherwise NULL */ remote_fencing_op_t * initiate_remote_stonith_op(const pcmk__client_t *client, xmlNode *request, gboolean manual_ack) { int query_timeout = 0; xmlNode *query = NULL; const char *client_id = NULL; remote_fencing_op_t *op = NULL; const char *relay_op_id = NULL; const char *operation = NULL; if (client) { client_id = client->id; } else { client_id = crm_element_value(request, F_STONITH_CLIENTID); } CRM_LOG_ASSERT(client_id != NULL); op = create_remote_stonith_op(client_id, request, FALSE); op->owner = TRUE; if (manual_ack) { return op; } CRM_CHECK(op->action, return NULL); if (advance_topology_level(op, true) != pcmk_rc_ok) { op->state = st_failed; } switch (op->state) { case st_failed: // advance_topology_level() exhausted levels pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR, "All topology levels failed"); crm_warn("Could not request peer fencing (%s) targeting %s " CRM_XS " id=%.8s", op->action, op->target, op->id); finalize_op(op, NULL, false); return op; case st_duplicate: crm_info("Requesting peer fencing (%s) targeting %s (duplicate) " CRM_XS " id=%.8s", op->action, op->target, op->id); return op; default: crm_notice("Requesting peer fencing (%s) targeting %s " CRM_XS " id=%.8s state=%s base_timeout=%d", op->action, op->target, op->id, stonith_op_state_str(op->state), op->base_timeout); } query = stonith_create_op(op->client_callid, op->id, STONITH_OP_QUERY, NULL, op->call_options); crm_xml_add(query, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(query, F_STONITH_TARGET, op->target); crm_xml_add(query, F_STONITH_ACTION, op_requested_action(op)); crm_xml_add(query, F_STONITH_ORIGIN, op->originator); crm_xml_add(query, F_STONITH_CLIENTID, op->client_id); crm_xml_add(query, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(query, F_STONITH_TIMEOUT, op->base_timeout); /* In case of RELAY operation, RELAY information is added to the query to delete the original operation of RELAY. */ operation = crm_element_value(request, F_STONITH_OPERATION); if (pcmk__str_eq(operation, STONITH_OP_RELAY, pcmk__str_none)) { relay_op_id = crm_element_value(request, F_STONITH_REMOTE_OP_ID); if (relay_op_id) { crm_xml_add(query, F_STONITH_REMOTE_OP_ID_RELAY, relay_op_id); } } send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); query_timeout = op->base_timeout * TIMEOUT_MULTIPLY_FACTOR; op->query_timer = g_timeout_add((1000 * query_timeout), remote_op_query_timeout, op); return op; } enum find_best_peer_options { /*! Skip checking the target peer for capable fencing devices */ FIND_PEER_SKIP_TARGET = 0x0001, /*! Only check the target peer for capable fencing devices */ FIND_PEER_TARGET_ONLY = 0x0002, /*! Skip peers and devices that are not verified */ FIND_PEER_VERIFIED_ONLY = 0x0004, }; static peer_device_info_t * find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) { GList *iter = NULL; gboolean verified_devices_only = (options & FIND_PEER_VERIFIED_ONLY) ? TRUE : FALSE; if (!device && pcmk_is_set(op->call_options, st_opt_topology)) { return NULL; } for (iter = op->query_results; iter != NULL; iter = iter->next) { peer_device_info_t *peer = iter->data; crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", peer->host, op->target, peer->ndevices, pcmk__plural_s(peer->ndevices), peer->tried, options); if ((options & FIND_PEER_SKIP_TARGET) && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if ((options & FIND_PEER_TARGET_ONLY) && !pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } if (pcmk_is_set(op->call_options, st_opt_topology)) { if (grab_peer_device(op, peer, device, verified_devices_only)) { return peer; } } else if (!peer->tried && count_peer_devices(op, peer, verified_devices_only, fenced_support_flag(op->action))) { /* No topology: Use the current best peer */ crm_trace("Simple fencing"); return peer; } } return NULL; } static peer_device_info_t * stonith_choose_peer(remote_fencing_op_t * op) { const char *device = NULL; peer_device_info_t *peer = NULL; uint32_t active = fencing_active_peers(); do { if (op->devices) { device = op->devices->data; crm_trace("Checking for someone to fence (%s) %s using %s", op->action, op->target, device); } else { crm_trace("Checking for someone to fence (%s) %s", op->action, op->target); } /* Best choice is a peer other than the target with verified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET|FIND_PEER_VERIFIED_ONLY); if (peer) { crm_trace("Found verified peer %s for %s", peer->host, device?device:""); return peer; } if(op->query_timer != 0 && op->replies < QB_MIN(op->replies_expected, active)) { crm_trace("Waiting before looking for unverified devices to fence %s", op->target); return NULL; } /* If no other peer has verified access, next best is unverified access */ peer = find_best_peer(device, op, FIND_PEER_SKIP_TARGET); if (peer) { crm_trace("Found best unverified peer %s", peer->host); return peer; } /* If no other peer can do it, last option is self-fencing * (which is never allowed for the "on" phase of a remapped reboot) */ if (op->phase != st_phase_on) { peer = find_best_peer(device, op, FIND_PEER_TARGET_ONLY); if (peer) { crm_trace("%s will fence itself", peer->host); return peer; } } /* Try the next fencing level if there is one (unless we're in the "on" * phase of a remapped "reboot", because we ignore errors in that case) */ } while ((op->phase != st_phase_on) && pcmk_is_set(op->call_options, st_opt_topology) && (advance_topology_level(op, false) == pcmk_rc_ok)); if ((stonith_watchdog_timeout_ms > 0) && pcmk__is_fencing_action(op->action) && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) && node_does_watchdog_fencing(op->target)) { crm_info("Couldn't contact watchdog-fencing target-node (%s)", op->target); /* check_watchdog_fencing_and_wait will log additional info */ } else { crm_notice("Couldn't find anyone to fence (%s) %s using %s", op->action, op->target, (device? device : "any device")); } return NULL; } static int get_device_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer, const char *device, bool with_delay) { device_properties_t *props; int delay = 0; if (!peer || !device) { return op->base_timeout; } props = g_hash_table_lookup(peer->devices, device); if (!props) { return op->base_timeout; } // op->client_delay < 0 means disable any static/random fencing delays if (with_delay && (op->client_delay >= 0)) { // delay_base is eventually limited by delay_max delay = (props->delay_max[op->phase] > 0 ? props->delay_max[op->phase] : props->delay_base[op->phase]); } return (props->custom_action_timeout[op->phase]? props->custom_action_timeout[op->phase] : op->base_timeout) + delay; } struct timeout_data { const remote_fencing_op_t *op; const peer_device_info_t *peer; int total_timeout; }; /*! * \internal * \brief Add timeout to a total if device has not been executed yet * * \param[in] key GHashTable key (device ID) * \param[in] value GHashTable value (device properties) * \param[in,out] user_data Timeout data */ static void add_device_timeout(gpointer key, gpointer value, gpointer user_data) { const char *device_id = key; device_properties_t *props = value; struct timeout_data *timeout = user_data; if (!props->executed[timeout->op->phase] && !props->disallowed[timeout->op->phase]) { timeout->total_timeout += get_device_timeout(timeout->op, timeout->peer, device_id, true); } } static int get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) { struct timeout_data timeout; timeout.op = op; timeout.peer = peer; timeout.total_timeout = 0; g_hash_table_foreach(peer->devices, add_device_timeout, &timeout); return (timeout.total_timeout? timeout.total_timeout : op->base_timeout); } static int get_op_total_timeout(const remote_fencing_op_t *op, const peer_device_info_t *chosen_peer) { int total_timeout = 0; stonith_topology_t *tp = find_topology_for_host(op->target); if (pcmk_is_set(op->call_options, st_opt_topology) && tp) { int i; GList *device_list = NULL; GList *iter = NULL; GList *auto_list = NULL; if (pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none) && (op->automatic_list != NULL)) { auto_list = g_list_copy(op->automatic_list); } /* Yep, this looks scary, nested loops all over the place. * Here is what is going on. * Loop1: Iterate through fencing levels. * Loop2: If a fencing level has devices, loop through each device * Loop3: For each device in a fencing level, see what peer owns it * and what that peer has reported the timeout is for the device. */ for (i = 0; i < ST_LEVEL_MAX; i++) { if (!tp->levels[i]) { continue; } for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { /* in case of watchdog-device we add the timeout to the budget regardless of if we got a reply or not */ if ((stonith_watchdog_timeout_ms > 0) && pcmk__is_fencing_action(op->action) && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID, pcmk__str_none) && node_does_watchdog_fencing(op->target)) { total_timeout += stonith_watchdog_timeout_ms / 1000; continue; } for (iter = op->query_results; iter != NULL; iter = iter->next) { const peer_device_info_t *peer = iter->data; if (auto_list) { GList *match = g_list_find_custom(auto_list, device_list->data, sort_strings); if (match) { auto_list = g_list_remove(auto_list, match->data); } } if (find_peer_device(op, peer, device_list->data, fenced_support_flag(op->action))) { total_timeout += get_device_timeout(op, peer, device_list->data, true); break; } } /* End Loop3: match device with peer that owns device, find device's timeout period */ } /* End Loop2: iterate through devices at a specific level */ } /*End Loop1: iterate through fencing levels */ //Add only exists automatic_list device timeout if (auto_list) { for (iter = auto_list; iter != NULL; iter = iter->next) { GList *iter2 = NULL; for (iter2 = op->query_results; iter2 != NULL; iter = iter2->next) { peer_device_info_t *peer = iter2->data; if (find_peer_device(op, peer, iter->data, st_device_supports_on)) { total_timeout += get_device_timeout(op, peer, iter->data, true); break; } } } } g_list_free(auto_list); } else if (chosen_peer) { total_timeout = get_peer_timeout(op, chosen_peer); } else { total_timeout = op->base_timeout; } /* Take any requested fencing delay into account to prevent it from eating * up the total timeout. */ return ((total_timeout ? total_timeout : op->base_timeout) + ((op->client_delay > 0)? op->client_delay : 0)); } static void report_timeout_period(remote_fencing_op_t * op, int op_timeout) { GList *iter = NULL; xmlNode *update = NULL; const char *client_node = NULL; const char *client_id = NULL; const char *call_id = NULL; if (op->call_options & st_opt_sync_call) { /* There is no reason to report the timeout for a synchronous call. It * is impossible to use the reported timeout to do anything when the client * is blocking for the response. This update is only important for * async calls that require a callback to report the results in. */ return; } else if (!op->request) { return; } crm_trace("Reporting timeout for %s (id=%.8s)", op->client_name, op->id); client_node = crm_element_value(op->request, F_STONITH_CLIENTNODE); call_id = crm_element_value(op->request, F_STONITH_CALLID); client_id = crm_element_value(op->request, F_STONITH_CLIENTID); if (!client_node || !call_id || !client_id) { return; } if (pcmk__str_eq(client_node, stonith_our_uname, pcmk__str_casei)) { // Client is connected to this node, so send update directly to them do_stonith_async_timeout_update(client_id, call_id, op_timeout); return; } /* The client is connected to another node, relay this update to them */ update = stonith_create_op(op->client_callid, op->id, STONITH_OP_TIMEOUT_UPDATE, NULL, 0); crm_xml_add(update, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(update, F_STONITH_CLIENTID, client_id); crm_xml_add(update, F_STONITH_CALLID, call_id); crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); free_xml(update); for (iter = op->duplicates; iter != NULL; iter = iter->next) { remote_fencing_op_t *dup = iter->data; crm_trace("Reporting timeout for duplicate %.8s to client %s", dup->id, dup->client_name); report_timeout_period(iter->data, op_timeout); } } /*! * \internal * \brief Advance an operation to the next device in its topology * * \param[in,out] op Fencer operation to advance * \param[in] device ID of device that just completed * \param[in,out] msg If not NULL, XML reply of last delegated operation */ static void advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, xmlNode *msg) { /* Advance to the next device at this topology level, if any */ if (op->devices) { op->devices = op->devices->next; } /* Handle automatic unfencing if an "on" action was requested */ if ((op->phase == st_phase_requested) && pcmk__str_eq(op->action, PCMK_ACTION_ON, pcmk__str_none)) { /* If the device we just executed was required, it's not anymore */ remove_required_device(op, device); /* If there are no more devices at this topology level, run through any * remaining devices with automatic unfencing */ if (op->devices == NULL) { op->devices = op->automatic_list; } } if ((op->devices == NULL) && (op->phase == st_phase_off)) { /* We're done with this level and with required devices, but we had * remapped "reboot" to "off", so start over with "on". If any devices * need to be turned back on, op->devices will be non-NULL after this. */ op_phase_on(op); } // This function is only called if the previous device succeeded pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (op->devices) { /* Necessary devices remain, so execute the next one */ crm_trace("Next targeting %s on behalf of %s@%s", op->target, op->client_name, op->originator); // The requested delay has been applied for the first device if (op->client_delay > 0) { op->client_delay = 0; } request_peer_fencing(op, NULL); } else { /* We're done with all devices and phases, so finalize operation */ crm_trace("Marking complex fencing op targeting %s as complete", op->target); op->state = st_done; finalize_op(op, msg, false); } } static gboolean check_watchdog_fencing_and_wait(remote_fencing_op_t * op) { if (node_does_watchdog_fencing(op->target)) { crm_notice("Waiting %lds for %s to self-fence (%s) for " "client %s " CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), op->target, op->action, op->client_name, op->id); if (op->op_timer_one) { g_source_remove(op->op_timer_one); } op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); return TRUE; } else { crm_debug("Skipping fallback to watchdog-fencing as %s is " "not in host-list", op->target); } return FALSE; } /*! * \internal * \brief Ask a peer to execute a fencing operation * * \param[in,out] op Fencing operation to be executed * \param[in,out] peer If NULL or topology is in use, choose best peer to * execute the fencing, otherwise use this peer */ static void request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) { const char *device = NULL; int timeout; CRM_CHECK(op != NULL, return); crm_trace("Action %.8s targeting %s for %s is %s", op->id, op->target, op->client_name, stonith_op_state_str(op->state)); if ((op->phase == st_phase_on) && (op->devices != NULL)) { /* We are in the "on" phase of a remapped topology reboot. If this * device has pcmk_reboot_action="off", or doesn't support the "on" * action, skip it. * * We can't check device properties at this point because we haven't * chosen a peer for this stage yet. Instead, we check the local node's * knowledge about the device. If different versions of the fence agent * are installed on different nodes, there's a chance this could be * mistaken, but the worst that could happen is we don't try turning the * node back on when we should. */ device = op->devices->data; if (pcmk__str_eq(fenced_device_reboot_action(device), PCMK_ACTION_OFF, pcmk__str_none)) { crm_info("Not turning %s back on using %s because the device is " "configured to stay off (pcmk_reboot_action='off')", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } if (!fenced_device_supports_on(device)) { crm_info("Not turning %s back on using %s because the agent " "doesn't support 'on'", op->target, device); advance_topology_device_in_level(op, device, NULL); return; } } timeout = op->base_timeout; if ((peer == NULL) && !pcmk_is_set(op->call_options, st_opt_topology)) { peer = stonith_choose_peer(op); } if (!op->op_timer_total) { op->total_timeout = TIMEOUT_MULTIPLY_FACTOR * get_op_total_timeout(op, peer); op->op_timer_total = g_timeout_add(1000 * op->total_timeout, remote_op_timeout, op); report_timeout_period(op, op->total_timeout); crm_info("Total timeout set to %d for peer's fencing targeting %s for %s" CRM_XS "id=%.8s", op->total_timeout, op->target, op->client_name, op->id); } if (pcmk_is_set(op->call_options, st_opt_topology) && op->devices) { /* Ignore the caller's peer preference if topology is in use, because * that peer might not have access to the required device. With * topology, stonith_choose_peer() removes the device from further * consideration, so the timeout must be calculated beforehand. * * @TODO Basing the total timeout on the caller's preferred peer (above) * is less than ideal. */ peer = stonith_choose_peer(op); device = op->devices->data; /* Fencing timeout sent to peer takes no delay into account. * The peer will add a dedicated timer for any delay upon * schedule_stonith_command(). */ timeout = get_device_timeout(op, peer, device, false); } if (peer) { int timeout_one = 0; xmlNode *remote_op = stonith_create_op(op->client_callid, op->id, STONITH_OP_FENCE, NULL, 0); if (op->client_delay > 0) { /* Take requested fencing delay into account to prevent it from * eating up the timeout. */ timeout_one = TIMEOUT_MULTIPLY_FACTOR * op->client_delay; } crm_xml_add(remote_op, F_STONITH_REMOTE_OP_ID, op->id); crm_xml_add(remote_op, F_STONITH_TARGET, op->target); crm_xml_add(remote_op, F_STONITH_ACTION, op->action); crm_xml_add(remote_op, F_STONITH_ORIGIN, op->originator); crm_xml_add(remote_op, F_STONITH_CLIENTID, op->client_id); crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name); crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout); crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options); crm_xml_add_int(remote_op, F_STONITH_DELAY, op->client_delay); if (device) { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_device_timeout(op, peer, device, true); crm_notice("Requesting that %s perform '%s' action targeting %s " "using %s " CRM_XS " for client %s (%ds)", peer->host, op->action, op->target, device, op->client_name, timeout_one); crm_xml_add(remote_op, F_STONITH_DEVICE, device); } else { timeout_one += TIMEOUT_MULTIPLY_FACTOR * get_peer_timeout(op, peer); crm_notice("Requesting that %s perform '%s' action targeting %s " CRM_XS " for client %s (%ds, %lds)", peer->host, op->action, op->target, op->client_name, timeout_one, stonith_watchdog_timeout_ms); } op->state = st_exec; if (op->op_timer_one) { g_source_remove(op->op_timer_one); op->op_timer_one = 0; } if (!((stonith_watchdog_timeout_ms > 0) && (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none) || (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) && pcmk__is_fencing_action(op->action))) && check_watchdog_fencing_and_wait(op))) { /* Some thoughts about self-fencing cases reaching this point: - Actually check in check_watchdog_fencing_and_wait shouldn't fail if STONITH_WATCHDOG_ID is chosen as fencing-device and it being present implies watchdog-fencing is enabled anyway - If watchdog-fencing is disabled either in general or for a specific target - detected in check_watchdog_fencing_and_wait - for some other kind of self-fencing we can't expect a success answer but timeout is fine if the node doesn't come back in between - Delicate might be the case where we have watchdog-fencing enabled for a node but the watchdog-fencing-device isn't explicitly chosen for suicide. Local pe-execution in sbd may detect the node as unclean and lead to timely suicide. - Otherwise the selection of stonith-watchdog-timeout at - least is questionable. + Otherwise the selection of PCMK_OPT_STONITH_WATCHDOG_TIMEOUT + at least is questionable. */ /* coming here we're not waiting for watchdog timeout - thus engage timer with timout evaluated before */ op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); } send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); peer->tried = TRUE; free_xml(remote_op); return; } else if (op->phase == st_phase_on) { /* A remapped "on" cannot be executed, but the node was already * turned off successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " "after successful 'off'", device, op->target); advance_topology_device_in_level(op, device, NULL); return; } else if (op->owner == FALSE) { crm_err("Fencing (%s) targeting %s for client %s is not ours to control", op->action, op->target, op->client_name); } else if (op->query_timer == 0) { /* We've exhausted all available peers */ crm_info("No remaining peers capable of fencing (%s) %s for client %s " CRM_XS " state=%s", op->action, op->target, op->client_name, stonith_op_state_str(op->state)); CRM_CHECK(op->state < st_done, return); finalize_timed_out_op(op, "All nodes failed, or are unable, to " "fence target"); } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { /* if the operation never left the query state, * but we have all the expected replies, then no devices * are available to execute the fencing operation. */ if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_null_matches)) { if (check_watchdog_fencing_and_wait(op)) { return; } } if (op->state == st_query) { crm_info("No peers (out of %d) have devices capable of fencing " "(%s) %s for client %s " CRM_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } else { if (pcmk_is_set(op->call_options, st_opt_topology)) { pcmk__reset_result(&op->result); pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, NULL); } /* ... else use existing result from previous failed attempt * (topology is not in use, and no devices remain to be attempted). * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would * prevent finalize_op() from setting the correct delegate if * needed. */ crm_info("No peers (out of %d) are capable of fencing (%s) %s " "for client %s " CRM_XS " state=%s", op->replies, op->action, op->target, op->client_name, stonith_op_state_str(op->state)); } op->state = st_failed; finalize_op(op, NULL, false); } else { crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s " "for client %s " CRM_XS " id=%.8s", op->action, op->target, (device? " using " : ""), (device? device : ""), op->client_name, op->id); } } /*! * \internal * \brief Comparison function for sorting query results * * \param[in] a GList item to compare * \param[in] b GList item to compare * * \return Per the glib documentation, "a negative integer if the first value * comes before the second, 0 if they are equal, or a positive integer * if the first value comes after the second." */ static gint sort_peers(gconstpointer a, gconstpointer b) { const peer_device_info_t *peer_a = a; const peer_device_info_t *peer_b = b; return (peer_b->ndevices - peer_a->ndevices); } /*! * \internal * \brief Determine if all the devices in the topology are found or not * * \param[in] op Fencing operation with topology to check */ static gboolean all_topology_devices_found(const remote_fencing_op_t *op) { GList *device = NULL; GList *iter = NULL; device_properties_t *match = NULL; stonith_topology_t *tp = NULL; gboolean skip_target = FALSE; int i; tp = find_topology_for_host(op->target); if (!tp) { return FALSE; } if (pcmk__is_fencing_action(op->action)) { /* Don't count the devices on the target node if we are killing * the target node. */ skip_target = TRUE; } for (i = 0; i < ST_LEVEL_MAX; i++) { for (device = tp->levels[i]; device; device = device->next) { match = NULL; for (iter = op->query_results; iter && !match; iter = iter->next) { peer_device_info_t *peer = iter->data; if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { continue; } match = find_peer_device(op, peer, device->data, st_device_supports_none); } if (!match) { return FALSE; } } } return TRUE; } /*! * \internal * \brief Parse action-specific device properties from XML * * \param[in] xml XML element containing the properties * \param[in] peer Name of peer that sent XML (for logs) * \param[in] device Device ID (for logs) * \param[in] action Action the properties relate to (for logs) * \param[in,out] op Fencing operation that properties are being parsed for * \param[in] phase Phase the properties relate to * \param[in,out] props Device properties to update */ static void parse_action_specific(const xmlNode *xml, const char *peer, const char *device, const char *action, remote_fencing_op_t *op, enum st_remap_phase phase, device_properties_t *props) { props->custom_action_timeout[phase] = 0; crm_element_value_int(xml, F_STONITH_ACTION_TIMEOUT, &props->custom_action_timeout[phase]); if (props->custom_action_timeout[phase]) { crm_trace("Peer %s with device %s returned %s action timeout %d", peer, device, action, props->custom_action_timeout[phase]); } props->delay_max[phase] = 0; crm_element_value_int(xml, F_STONITH_DELAY_MAX, &props->delay_max[phase]); if (props->delay_max[phase]) { crm_trace("Peer %s with device %s returned maximum of random delay %d for %s", peer, device, props->delay_max[phase], action); } props->delay_base[phase] = 0; crm_element_value_int(xml, F_STONITH_DELAY_BASE, &props->delay_base[phase]); if (props->delay_base[phase]) { crm_trace("Peer %s with device %s returned base delay %d for %s", peer, device, props->delay_base[phase], action); } /* Handle devices with automatic unfencing */ if (pcmk__str_eq(action, PCMK_ACTION_ON, pcmk__str_none)) { int required = 0; crm_element_value_int(xml, F_STONITH_DEVICE_REQUIRED, &required); if (required) { crm_trace("Peer %s requires device %s to execute for action %s", peer, device, action); add_required_device(op, device); } } /* If a reboot is remapped to off+on, it's possible that a node is allowed * to perform one action but not another. */ if (pcmk__xe_attr_is_true(xml, F_STONITH_ACTION_DISALLOWED)) { props->disallowed[phase] = TRUE; crm_trace("Peer %s is disallowed from executing %s for device %s", peer, action, device); } } /*! * \internal * \brief Parse one device's properties from peer's XML query reply * * \param[in] xml XML node containing device properties * \param[in,out] op Operation that query and reply relate to * \param[in,out] peer Peer's device information * \param[in] device ID of device being parsed */ static void add_device_properties(const xmlNode *xml, remote_fencing_op_t *op, peer_device_info_t *peer, const char *device) { xmlNode *child; int verified = 0; device_properties_t *props = calloc(1, sizeof(device_properties_t)); int flags = st_device_supports_on; /* Old nodes that don't set the flag assume they support the on action */ /* Add a new entry to this peer's devices list */ CRM_ASSERT(props != NULL); g_hash_table_insert(peer->devices, strdup(device), props); /* Peers with verified (monitored) access will be preferred */ crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); if (verified) { crm_trace("Peer %s has confirmed a verified device %s", peer->host, device); props->verified = TRUE; } crm_element_value_int(xml, F_STONITH_DEVICE_SUPPORT_FLAGS, &flags); props->device_support_flags = flags; /* Parse action-specific device properties */ parse_action_specific(xml, peer->host, device, op_requested_action(op), op, st_phase_requested, props); for (child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { /* Replies for "reboot" operations will include the action-specific * values for "off" and "on" in child elements, just in case the reboot * winds up getting remapped. */ if (pcmk__str_eq(ID(child), PCMK_ACTION_OFF, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_OFF, op, st_phase_off, props); } else if (pcmk__str_eq(ID(child), PCMK_ACTION_ON, pcmk__str_none)) { parse_action_specific(child, peer->host, device, PCMK_ACTION_ON, op, st_phase_on, props); } } } /*! * \internal * \brief Parse a peer's XML query reply and add it to operation's results * * \param[in,out] op Operation that query and reply relate to * \param[in] host Name of peer that sent this reply * \param[in] ndevices Number of devices expected in reply * \param[in] xml XML node containing device list * * \return Newly allocated result structure with parsed reply */ static peer_device_info_t * add_result(remote_fencing_op_t *op, const char *host, int ndevices, const xmlNode *xml) { peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t)); xmlNode *child; // cppcheck seems not to understand the abort logic in CRM_CHECK // cppcheck-suppress memleak CRM_CHECK(peer != NULL, return NULL); peer->host = strdup(host); peer->devices = pcmk__strkey_table(free, free); /* Each child element describes one capable device available to the peer */ for (child = pcmk__xml_first_child(xml); child != NULL; child = pcmk__xml_next(child)) { const char *device = ID(child); if (device) { add_device_properties(child, op, peer, device); } } peer->ndevices = g_hash_table_size(peer->devices); CRM_CHECK(ndevices == peer->ndevices, crm_err("Query claimed to have %d device%s but %d found", ndevices, pcmk__plural_s(ndevices), peer->ndevices)); op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); return peer; } /*! * \internal * \brief Handle a peer's reply to our fencing query * * Parse a query result from XML and store it in the remote operation * table, and when enough replies have been received, issue a fencing request. * * \param[in] msg XML reply received * * \return pcmk_ok on success, -errno on error * * \note See initiate_remote_stonith_op() for how the XML query was initially * formed, and stonith_query() for how the peer formed its XML reply. */ int process_remote_stonith_query(xmlNode *msg) { int ndevices = 0; gboolean host_is_target = FALSE; gboolean have_all_replies = FALSE; const char *id = NULL; const char *host = NULL; remote_fencing_op_t *op = NULL; peer_device_info_t *peer = NULL; uint32_t replies_expected; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return -EPROTO); dev = get_xpath_object("//@" F_STONITH_AVAILABLE_DEVICES, msg, LOG_ERR); CRM_CHECK(dev != NULL, return -EPROTO); crm_element_value_int(dev, F_STONITH_AVAILABLE_DEVICES, &ndevices); op = g_hash_table_lookup(stonith_remote_op_list, id); if (op == NULL) { crm_debug("Received query reply for unknown or expired operation %s", id); return -EOPNOTSUPP; } replies_expected = fencing_active_peers(); if (op->replies_expected < replies_expected) { replies_expected = op->replies_expected; } if ((++op->replies >= replies_expected) && (op->state == st_query)) { have_all_replies = TRUE; } host = crm_element_value(msg, F_ORIG); host_is_target = pcmk__str_eq(host, op->target, pcmk__str_casei); crm_info("Query result %d of %d from %s for %s/%s (%d device%s) %s", op->replies, replies_expected, host, op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); if (ndevices > 0) { peer = add_result(op, host, ndevices, dev); } pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); if (pcmk_is_set(op->call_options, st_opt_topology)) { /* If we start the fencing before all the topology results are in, * it is possible fencing levels will be skipped because of the missing * query results. */ if (op->state == st_query && all_topology_devices_found(op)) { /* All the query results are in for the topology, start the fencing ops. */ crm_trace("All topology devices found"); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } } else if (op->state == st_query) { int nverified = count_peer_devices(op, peer, TRUE, fenced_support_flag(op->action)); /* We have a result for a non-topology fencing op that looks promising, * go ahead and start fencing before query timeout */ if ((peer != NULL) && !host_is_target && nverified) { /* we have a verified device living on a peer that is not the target */ crm_trace("Found %d verified device%s", nverified, pcmk__plural_s(nverified)); request_peer_fencing(op, peer); } else if (have_all_replies) { crm_info("All query replies have arrived, continuing (%d expected/%d received) ", replies_expected, op->replies); request_peer_fencing(op, NULL); } else { crm_trace("Waiting for more peer results before launching fencing operation"); } } else if ((peer != NULL) && (op->state == st_done)) { crm_info("Discarding query result from %s (%d device%s): " "Operation is %s", peer->host, peer->ndevices, pcmk__plural_s(peer->ndevices), stonith_op_state_str(op->state)); } return pcmk_ok; } /*! * \internal * \brief Handle a peer's reply to a fencing request * * Parse a fencing reply from XML, and either finalize the operation * or attempt another device as appropriate. * * \param[in] msg XML reply received */ void fenced_process_fencing_reply(xmlNode *msg) { const char *id = NULL; const char *device = NULL; remote_fencing_op_t *op = NULL; xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(dev != NULL, return); id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); CRM_CHECK(id != NULL, return); dev = stonith__find_xe_with_result(msg); CRM_CHECK(dev != NULL, return); stonith__xe_get_result(dev, &result); device = crm_element_value(dev, F_STONITH_DEVICE); if (stonith_remote_op_list) { op = g_hash_table_lookup(stonith_remote_op_list, id); } if ((op == NULL) && pcmk__result_ok(&result)) { /* Record successful fencing operations */ const char *client_id = crm_element_value(dev, F_STONITH_CLIENTID); op = create_remote_stonith_op(client_id, dev, TRUE); } if (op == NULL) { /* Could be for an event that began before we started */ /* TODO: Record the op for later querying */ crm_info("Received peer result of unknown or expired operation %s", id); pcmk__reset_result(&result); return; } pcmk__reset_result(&op->result); op->result = result; // The operation takes ownership of the result if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { crm_err("Received outdated reply for device %s (instead of %s) to " "fence (%s) %s. Operation already timed out at peer level.", device, (const char *) op->devices->data, op->action, op->target); return; } if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) { if (pcmk__result_ok(&op->result)) { op->state = st_done; } else { op->state = st_failed; } finalize_op(op, msg, false); return; } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { /* If this isn't a remote level broadcast, and we are not the * originator of the operation, we should not be receiving this msg. */ crm_err("Received non-broadcast fencing result for operation %.8s " "we do not own (device %s targeting %s)", op->id, device, op->target); return; } if (pcmk_is_set(op->call_options, st_opt_topology)) { const char *device = NULL; const char *reason = op->result.exit_reason; /* We own the op, and it is complete. broadcast the result to all nodes * and notify our local clients. */ if (op->state == st_done) { finalize_op(op, msg, false); return; } device = crm_element_value(msg, F_STONITH_DEVICE); if ((op->phase == 2) && !pcmk__result_ok(&op->result)) { /* A remapped "on" failed, but the node was already turned off * successfully, so ignore the error and continue. */ crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s " "after successful 'off'", device, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : ": ", (reason == NULL)? "" : reason, op->target); pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); } else { crm_notice("Action '%s' targeting %s%s%s on behalf of %s@%s: " "%s%s%s%s", op->action, op->target, ((device == NULL)? "" : " using "), ((device == NULL)? "" : device), op->client_name, op->originator, pcmk_exec_status_str(op->result.execution_status), (reason == NULL)? "" : " (", (reason == NULL)? "" : reason, (reason == NULL)? "" : ")"); } if (pcmk__result_ok(&op->result)) { /* An operation completed successfully. Try another device if * necessary, otherwise mark the operation as done. */ advance_topology_device_in_level(op, device, msg); return; } else { /* This device failed, time to try another topology level. If no other * levels are available, mark this operation as failed and report results. */ if (advance_topology_level(op, false) != pcmk_rc_ok) { op->state = st_failed; finalize_op(op, msg, false); return; } } } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) { op->state = st_done; finalize_op(op, msg, false); return; } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT) && (op->devices == NULL)) { /* If the operation timed out don't bother retrying other peers. */ op->state = st_failed; finalize_op(op, msg, false); return; } else { /* fall-through and attempt other fencing action using another peer */ } /* Retry on failure */ crm_trace("Next for %s on behalf of %s@%s (result was: %s)", op->target, op->originator, op->client_name, pcmk_exec_status_str(op->result.execution_status)); request_peer_fencing(op, NULL); } gboolean stonith_check_fence_tolerance(int tolerance, const char *target, const char *action) { GHashTableIter iter; time_t now = time(NULL); remote_fencing_op_t *rop = NULL; if (tolerance <= 0 || !stonith_remote_op_list || target == NULL || action == NULL) { return FALSE; } g_hash_table_iter_init(&iter, stonith_remote_op_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&rop)) { if (strcmp(rop->target, target) != 0) { continue; } else if (rop->state != st_done) { continue; /* We don't have to worry about remapped reboots here * because if state is done, any remapping has been undone */ } else if (strcmp(rop->action, action) != 0) { continue; } else if ((rop->completed + tolerance) < now) { continue; } crm_notice("Target %s was fenced (%s) less than %ds ago by %s on behalf of %s", target, action, tolerance, rop->delegate, rop->originator); return TRUE; } return FALSE; } diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index c96d59db5a..8fefe0ccb1 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -1,511 +1,512 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__CRM_MSG_XML__H # define PCMK__CRM_MSG_XML__H # include #if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1) #include #endif #ifdef __cplusplus extern "C" { #endif /* This file defines constants for various XML syntax (mainly element and * attribute names). * * For consistency, new constants should start with "PCMK_", followed by "XE" * for XML element names, "XA" for XML attribute names, and "META" for meta * attribute names. Old names that don't follow this policy should eventually be * deprecated and replaced with names that do. */ /* * XML elements */ #define PCMK_XE_DATE_EXPRESSION "date_expression" #define PCMK_XE_OP_EXPRESSION "op_expression" /* This has been deprecated as a CIB element (an alias for with * "promotable" set to "true") since 2.0.0. */ #define PCMK_XE_PROMOTABLE_LEGACY "master" #define PCMK_XE_RSC_EXPRESSION "rsc_expression" /* * XML attributes */ #define PCMK_XA_FORMAT "format" /* These have been deprecated as CIB element attributes (aliases for * "promoted-max" and "promoted-node-max") since 2.0.0. */ #define PCMK_XA_PROMOTED_MAX_LEGACY "master-max" #define PCMK_XA_PROMOTED_NODE_MAX_LEGACY "master-node-max" /* * Meta attributes */ #define PCMK_META_CLONE_MAX "clone-max" #define PCMK_META_CLONE_MIN "clone-min" #define PCMK_META_CLONE_NODE_MAX "clone-node-max" #define PCMK_META_ENABLED "enabled" #define PCMK_META_FAILURE_TIMEOUT "failure-timeout" #define PCMK_META_MIGRATION_THRESHOLD "migration-threshold" #define PCMK_META_PROMOTED_MAX "promoted-max" #define PCMK_META_PROMOTED_NODE_MAX "promoted-node-max" /* * Cluster options */ #define PCMK_OPT_BATCH_LIMIT "batch-limit" #define PCMK_OPT_CLUSTER_DELAY "cluster-delay" #define PCMK_OPT_CLUSTER_INFRASTRUCTURE "cluster-infrastructure" #define PCMK_OPT_CLUSTER_IPC_LIMIT "cluster-ipc-limit" #define PCMK_OPT_CLUSTER_NAME "cluster-name" #define PCMK_OPT_CLUSTER_RECHECK_INTERVAL "cluster-recheck-interval" #define PCMK_OPT_CONCURRENT_FENCING "concurrent-fencing" #define PCMK_OPT_DC_DEADTIME "dc-deadtime" #define PCMK_OPT_DC_VERSION "dc-version" #define PCMK_OPT_ELECTION_TIMEOUT "election-timeout" #define PCMK_OPT_ENABLE_ACL "enable-acl" #define PCMK_OPT_ENABLE_STARTUP_PROBES "enable-startup-probes" #define PCMK_OPT_FENCE_REACTION "fence-reaction" #define PCMK_OPT_HAVE_WATCHDOG "have-watchdog" #define PCMK_OPT_JOIN_FINALIZATION_TIMEOUT "join-finalization-timeout" #define PCMK_OPT_JOIN_INTEGRATION_TIMEOUT "join-integration-timeout" #define PCMK_OPT_LOAD_THRESHOLD "load-threshold" #define PCMK_OPT_MAINTENANCE_MODE "maintenance-mode" #define PCMK_OPT_MIGRATION_LIMIT "migration-limit" #define PCMK_OPT_NO_QUORUM_POLICY "no-quorum-policy" #define PCMK_OPT_NODE_ACTION_LIMIT "node-action-limit" #define PCMK_OPT_NODE_PENDING_TIMEOUT "node-pending-timeout" #define PCMK_OPT_PE_ERROR_SERIES_MAX "pe-error-series-max" #define PCMK_OPT_PE_INPUT_SERIES_MAX "pe-input-series-max" #define PCMK_OPT_PE_WARN_SERIES_MAX "pe-warn-series-max" #define PCMK_OPT_PLACEMENT_STRATEGY "placement-strategy" #define PCMK_OPT_PRIORITY_FENCING_DELAY "priority-fencing-delay" #define PCMK_OPT_SHUTDOWN_ESCALATION "shutdown-escalation" #define PCMK_OPT_SHUTDOWN_LOCK "shutdown-lock" #define PCMK_OPT_SHUTDOWN_LOCK_LIMIT "shutdown-lock-limit" #define PCMK_OPT_START_FAILURE_IS_FATAL "start-failure-is-fatal" #define PCMK_OPT_STARTUP_FENCING "startup-fencing" #define PCMK_OPT_STONITH_ACTION "stonith-action" #define PCMK_OPT_STONITH_ENABLED "stonith-enabled" #define PCMK_OPT_STONITH_MAX_ATTEMPTS "stonith-max-attempts" #define PCMK_OPT_STONITH_TIMEOUT "stonith-timeout" +#define PCMK_OPT_STONITH_WATCHDOG_TIMEOUT "stonith-watchdog-timeout" /* * Older constants that don't follow current naming */ # ifndef F_ORIG # define F_ORIG "src" # endif # ifndef F_SEQ # define F_SEQ "seq" # endif # ifndef F_SUBTYPE # define F_SUBTYPE "subt" # endif # ifndef F_TYPE # define F_TYPE "t" # endif # ifndef F_CLIENTNAME # define F_CLIENTNAME "cn" # endif # ifndef F_XML_TAGNAME # define F_XML_TAGNAME "__name__" # endif # ifndef T_CRM # define T_CRM "crmd" # endif # ifndef T_ATTRD # define T_ATTRD "attrd" # endif # define CIB_OPTIONS_FIRST "cib-bootstrap-options" # define F_CRM_DATA "crm_xml" # define F_CRM_TASK "crm_task" # define F_CRM_HOST_TO "crm_host_to" # define F_CRM_MSG_TYPE F_SUBTYPE # define F_CRM_SYS_TO "crm_sys_to" # define F_CRM_SYS_FROM "crm_sys_from" # define F_CRM_HOST_FROM F_ORIG # define F_CRM_REFERENCE XML_ATTR_REFERENCE # define F_CRM_VERSION XML_ATTR_VERSION # define F_CRM_ORIGIN "origin" # define F_CRM_USER "crm_user" # define F_CRM_JOIN_ID "join_id" # define F_CRM_DC_LEAVING "dc-leaving" # define F_CRM_ELECTION_ID "election-id" # define F_CRM_ELECTION_AGE_S "election-age-sec" # define F_CRM_ELECTION_AGE_US "election-age-nano-sec" # define F_CRM_ELECTION_OWNER "election-owner" # define F_CRM_TGRAPH "crm-tgraph-file" # define F_CRM_TGRAPH_INPUT "crm-tgraph-in" # define F_CRM_THROTTLE_MODE "crm-limit-mode" # define F_CRM_THROTTLE_MAX "crm-limit-max" /*---- Common tags/attrs */ # define XML_DIFF_MARKER "__crm_diff_marker__" # define XML_TAG_CIB "cib" # define XML_TAG_FAILED "failed" # define XML_ATTR_CRM_VERSION "crm_feature_set" # define XML_ATTR_DIGEST "digest" # define XML_ATTR_VALIDATION "validate-with" # define XML_ATTR_QUORUM_PANIC "no-quorum-panic" # define XML_ATTR_HAVE_QUORUM "have-quorum" # define XML_ATTR_GENERATION "epoch" # define XML_ATTR_GENERATION_ADMIN "admin_epoch" # define XML_ATTR_NUMUPDATES "num_updates" # define XML_ATTR_TIMEOUT "timeout" # define XML_ATTR_ORIGIN "crm-debug-origin" # define XML_ATTR_TSTAMP "crm-timestamp" # define XML_CIB_ATTR_WRITTEN "cib-last-written" # define XML_ATTR_VERSION "version" # define XML_ATTR_DESC "description" # define XML_ATTR_ID "id" # define XML_ATTR_NAME "name" # define XML_ATTR_IDREF "id-ref" # define XML_ATTR_ID_LONG "long-id" # define XML_ATTR_TYPE "type" # define XML_ATTR_OP "op" # define XML_ATTR_DC_UUID "dc-uuid" # define XML_ATTR_UPDATE_ORIG "update-origin" # define XML_ATTR_UPDATE_CLIENT "update-client" # define XML_ATTR_UPDATE_USER "update-user" # define XML_BOOLEAN_TRUE "true" # define XML_BOOLEAN_FALSE "false" # define XML_BOOLEAN_YES XML_BOOLEAN_TRUE # define XML_BOOLEAN_NO XML_BOOLEAN_FALSE # define XML_TAG_OPTIONS "options" /*---- top level tags/attrs */ # define XML_ATTR_REQUEST "request" # define XML_ATTR_RESPONSE "response" # define XML_ATTR_UNAME "uname" # define XML_ATTR_REFERENCE "reference" # define XML_CRM_TAG_PING "ping_response" # define XML_PING_ATTR_STATUS "result" # define XML_PING_ATTR_SYSFROM "crm_subsystem" # define XML_PING_ATTR_CRMDSTATE "crmd_state" # define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state" # define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init" # define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons" # define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping" # define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running" # define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down" # define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete" # define XML_PING_ATTR_PACEMAKERDSTATE_REMOTE "remote" # define XML_FAIL_TAG_CIB "failed_update" # define XML_FAILCIB_ATTR_ID "id" # define XML_FAILCIB_ATTR_OBJTYPE "object_type" # define XML_FAILCIB_ATTR_OP "operation" # define XML_FAILCIB_ATTR_REASON "reason" /*---- CIB specific tags/attrs */ # define XML_CIB_TAG_SECTION_ALL "all" # define XML_CIB_TAG_CONFIGURATION "configuration" # define XML_CIB_TAG_STATUS "status" # define XML_CIB_TAG_RESOURCES "resources" # define XML_CIB_TAG_NODES "nodes" # define XML_CIB_TAG_CONSTRAINTS "constraints" # define XML_CIB_TAG_CRMCONFIG "crm_config" # define XML_CIB_TAG_OPCONFIG "op_defaults" # define XML_CIB_TAG_RSCCONFIG "rsc_defaults" # define XML_CIB_TAG_ACLS "acls" # define XML_CIB_TAG_ALERTS "alerts" # define XML_CIB_TAG_ALERT "alert" # define XML_CIB_TAG_ALERT_RECIPIENT "recipient" # define XML_CIB_TAG_ALERT_SELECT "select" # define XML_CIB_TAG_ALERT_ATTRIBUTES "select_attributes" # define XML_CIB_TAG_ALERT_FENCING "select_fencing" # define XML_CIB_TAG_ALERT_NODES "select_nodes" # define XML_CIB_TAG_ALERT_RESOURCES "select_resources" # define XML_CIB_TAG_ALERT_ATTR "attribute" # define XML_CIB_TAG_STATE "node_state" # define XML_CIB_TAG_NODE "node" # define XML_CIB_TAG_NVPAIR "nvpair" # define XML_CIB_TAG_PROPSET "cluster_property_set" # define XML_TAG_ATTR_SETS "instance_attributes" # define XML_TAG_META_SETS "meta_attributes" # define XML_TAG_ATTRS "attributes" # define XML_TAG_PARAMS "parameters" # define XML_TAG_PARAM "param" # define XML_TAG_UTILIZATION "utilization" # define XML_TAG_RESOURCE_REF "resource_ref" # define XML_CIB_TAG_RESOURCE "primitive" # define XML_CIB_TAG_GROUP "group" # define XML_CIB_TAG_INCARNATION "clone" # define XML_CIB_TAG_CONTAINER "bundle" # define XML_CIB_TAG_RSC_TEMPLATE "template" # define XML_RSC_ATTR_TARGET "container-attribute-target" # define XML_RSC_ATTR_RESTART "restart-type" # define XML_RSC_ATTR_ORDERED "ordered" # define XML_RSC_ATTR_INTERLEAVE "interleave" # define XML_RSC_ATTR_INCARNATION "clone" # define XML_RSC_ATTR_PROMOTABLE "promotable" # define XML_RSC_ATTR_MANAGED "is-managed" # define XML_RSC_ATTR_TARGET_ROLE "target-role" # define XML_RSC_ATTR_UNIQUE "globally-unique" # define XML_RSC_ATTR_NOTIFY "notify" # define XML_RSC_ATTR_STICKINESS "resource-stickiness" # define XML_RSC_ATTR_MULTIPLE "multiple-active" # define XML_RSC_ATTR_REQUIRES "requires" # define XML_RSC_ATTR_CONTAINER "container" # define XML_RSC_ATTR_INTERNAL_RSC "internal_rsc" # define XML_RSC_ATTR_MAINTENANCE "maintenance" # define XML_RSC_ATTR_REMOTE_NODE "remote-node" # define XML_RSC_ATTR_CLEAR_OP "clear_failure_op" # define XML_RSC_ATTR_CLEAR_INTERVAL "clear_failure_interval" # define XML_RSC_ATTR_REMOTE_RA_ADDR "addr" # define XML_RSC_ATTR_REMOTE_RA_SERVER "server" # define XML_RSC_ATTR_REMOTE_RA_PORT "port" # define XML_RSC_ATTR_CRITICAL "critical" # define XML_REMOTE_ATTR_RECONNECT_INTERVAL "reconnect_interval" # define XML_OP_ATTR_ON_FAIL "on-fail" # define XML_OP_ATTR_START_DELAY "start-delay" # define XML_OP_ATTR_ALLOW_MIGRATE "allow-migrate" # define XML_OP_ATTR_ORIGIN "interval-origin" # define XML_OP_ATTR_PENDING "record-pending" # define XML_OP_ATTR_DIGESTS_ALL "digests-all" # define XML_OP_ATTR_DIGESTS_SECURE "digests-secure" # define XML_CIB_TAG_LRM "lrm" # define XML_LRM_TAG_RESOURCES "lrm_resources" # define XML_LRM_TAG_RESOURCE "lrm_resource" # define XML_LRM_TAG_RSC_OP "lrm_rsc_op" # define XML_AGENT_ATTR_CLASS "class" # define XML_AGENT_ATTR_PROVIDER "provider" //! \deprecated Do not use (will be removed in a future release) # define XML_CIB_ATTR_REPLACE "replace" # define XML_CIB_ATTR_PRIORITY "priority" # define XML_NODE_IS_REMOTE "remote_node" # define XML_NODE_IS_FENCED "node_fenced" # define XML_NODE_IS_MAINTENANCE "node_in_maintenance" # define XML_CIB_ATTR_SHUTDOWN "shutdown" /* Aside from being an old name for the executor, LRM is a misnomer here because * the controller and scheduler use these to track actions, which are not always * executor operations. */ // XML attribute that takes interval specification (user-facing configuration) # define XML_LRM_ATTR_INTERVAL "interval" // XML attribute that takes interval in milliseconds (daemon APIs) // (identical value as above, but different constant allows clearer code intent) # define XML_LRM_ATTR_INTERVAL_MS XML_LRM_ATTR_INTERVAL # define XML_LRM_ATTR_TASK "operation" # define XML_LRM_ATTR_TASK_KEY "operation_key" # define XML_LRM_ATTR_TARGET "on_node" # define XML_LRM_ATTR_TARGET_UUID "on_node_uuid" /*! Actions to be executed on Pacemaker Remote nodes are routed through the * controller on the cluster node hosting the remote connection. That cluster * node is considered the router node for the action. */ # define XML_LRM_ATTR_ROUTER_NODE "router_node" # define XML_LRM_ATTR_RSCID "rsc-id" # define XML_LRM_ATTR_OPSTATUS "op-status" # define XML_LRM_ATTR_RC "rc-code" # define XML_LRM_ATTR_CALLID "call-id" # define XML_LRM_ATTR_OP_DIGEST "op-digest" # define XML_LRM_ATTR_OP_RESTART "op-force-restart" # define XML_LRM_ATTR_OP_SECURE "op-secure-params" # define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest" # define XML_LRM_ATTR_SECURE_DIGEST "op-secure-digest" # define XML_LRM_ATTR_EXIT_REASON "exit-reason" # define XML_RSC_OP_LAST_CHANGE "last-rc-change" # define XML_RSC_OP_T_EXEC "exec-time" # define XML_RSC_OP_T_QUEUE "queue-time" # define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source" # define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target" # define XML_TAG_GRAPH "transition_graph" # define XML_GRAPH_TAG_RSC_OP "rsc_op" # define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event" # define XML_GRAPH_TAG_CRM_EVENT "crm_event" # define XML_GRAPH_TAG_DOWNED "downed" # define XML_GRAPH_TAG_MAINTENANCE "maintenance" # define XML_TAG_RULE "rule" # define XML_RULE_ATTR_SCORE "score" # define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute" # define XML_RULE_ATTR_ROLE "role" # define XML_RULE_ATTR_BOOLEAN_OP "boolean-op" # define XML_TAG_EXPRESSION "expression" # define XML_EXPR_ATTR_ATTRIBUTE "attribute" # define XML_EXPR_ATTR_OPERATION "operation" # define XML_EXPR_ATTR_VALUE "value" # define XML_EXPR_ATTR_TYPE "type" # define XML_EXPR_ATTR_VALUE_SOURCE "value-source" # define XML_CONS_TAG_RSC_DEPEND "rsc_colocation" # define XML_CONS_TAG_RSC_ORDER "rsc_order" # define XML_CONS_TAG_RSC_LOCATION "rsc_location" # define XML_CONS_TAG_RSC_TICKET "rsc_ticket" # define XML_CONS_TAG_RSC_SET "resource_set" # define XML_CONS_ATTR_SYMMETRICAL "symmetrical" # define XML_LOCATION_ATTR_DISCOVERY "resource-discovery" # define XML_COLOC_ATTR_SOURCE "rsc" # define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role" # define XML_COLOC_ATTR_TARGET "with-rsc" # define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role" # define XML_COLOC_ATTR_NODE_ATTR "node-attribute" # define XML_COLOC_ATTR_INFLUENCE "influence" //! \deprecated Deprecated since 2.1.5 # define XML_COLOC_ATTR_SOURCE_INSTANCE "rsc-instance" //! \deprecated Deprecated since 2.1.5 # define XML_COLOC_ATTR_TARGET_INSTANCE "with-rsc-instance" # define XML_LOC_ATTR_SOURCE "rsc" # define XML_LOC_ATTR_SOURCE_PATTERN "rsc-pattern" # define XML_ORDER_ATTR_FIRST "first" # define XML_ORDER_ATTR_THEN "then" # define XML_ORDER_ATTR_FIRST_ACTION "first-action" # define XML_ORDER_ATTR_THEN_ACTION "then-action" # define XML_ORDER_ATTR_KIND "kind" //! \deprecated Deprecated since 2.1.5 # define XML_ORDER_ATTR_FIRST_INSTANCE "first-instance" //! \deprecated Deprecated since 2.1.5 # define XML_ORDER_ATTR_THEN_INSTANCE "then-instance" # define XML_TICKET_ATTR_TICKET "ticket" # define XML_TICKET_ATTR_LOSS_POLICY "loss-policy" # define XML_NVPAIR_ATTR_NAME "name" # define XML_NVPAIR_ATTR_VALUE "value" # define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled" # define XML_ALERT_ATTR_PATH "path" # define XML_ALERT_ATTR_TIMEOUT "timeout" # define XML_ALERT_ATTR_TSTAMP_FORMAT "timestamp-format" # define XML_ALERT_ATTR_REC_VALUE "value" # define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple" # define XML_ATTR_TRANSITION_MAGIC "transition-magic" # define XML_ATTR_TRANSITION_KEY "transition-key" # define XML_ATTR_TE_NOWAIT "op_no_wait" # define XML_ATTR_TE_TARGET_RC "op_target_rc" # define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes" //! \deprecated Do not use (will be removed in a future release) # define XML_TAG_DIFF_ADDED "diff-added" //! \deprecated Do not use (will be removed in a future release) # define XML_TAG_DIFF_REMOVED "diff-removed" # define XML_ACL_TAG_USER "acl_target" # define XML_ACL_TAG_USERv1 "acl_user" # define XML_ACL_TAG_GROUP "acl_group" # define XML_ACL_TAG_ROLE "acl_role" # define XML_ACL_TAG_PERMISSION "acl_permission" # define XML_ACL_TAG_ROLE_REF "role" # define XML_ACL_TAG_ROLE_REFv1 "role_ref" # define XML_ACL_ATTR_KIND "kind" # define XML_ACL_TAG_READ "read" # define XML_ACL_TAG_WRITE "write" # define XML_ACL_TAG_DENY "deny" # define XML_ACL_ATTR_REF "reference" # define XML_ACL_ATTR_REFv1 "ref" # define XML_ACL_ATTR_TAG "object-type" # define XML_ACL_ATTR_TAGv1 "tag" # define XML_ACL_ATTR_XPATH "xpath" # define XML_ACL_ATTR_ATTRIBUTE "attribute" # define XML_CIB_TAG_TICKETS "tickets" # define XML_CIB_TAG_TICKET_STATE "ticket_state" # define XML_CIB_TAG_TAGS "tags" # define XML_CIB_TAG_TAG "tag" # define XML_CIB_TAG_OBJ_REF "obj_ref" # define XML_TAG_FENCING_TOPOLOGY "fencing-topology" # define XML_TAG_FENCING_LEVEL "fencing-level" # define XML_ATTR_STONITH_INDEX "index" # define XML_ATTR_STONITH_TARGET "target" # define XML_ATTR_STONITH_TARGET_VALUE "target-value" # define XML_ATTR_STONITH_TARGET_PATTERN "target-pattern" # define XML_ATTR_STONITH_TARGET_ATTRIBUTE "target-attribute" # define XML_ATTR_STONITH_DEVICES "devices" # define XML_TAG_DIFF "diff" # define XML_DIFF_VERSION "version" # define XML_DIFF_VSOURCE "source" # define XML_DIFF_VTARGET "target" # define XML_DIFF_CHANGE "change" # define XML_DIFF_LIST "change-list" # define XML_DIFF_ATTR "change-attr" # define XML_DIFF_RESULT "change-result" # define XML_DIFF_OP "operation" # define XML_DIFF_PATH "path" # define XML_DIFF_POSITION "position" # define ID(x) crm_element_value(x, XML_ATTR_ID) #ifdef __cplusplus } #endif #endif diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index e5692140ad..e05e6d80c0 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -1,310 +1,317 @@ /* - * Copyright 2013-2023 the Pacemaker project contributors + * Copyright 2013-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include +#include + static pid_t sbd_pid = 0; static void sysrq_trigger(char t) { #if HAVE_LINUX_PROCFS FILE *procf; // Root can always write here, regardless of kernel.sysrq value procf = fopen("/proc/sysrq-trigger", "a"); if (!procf) { crm_perror(LOG_WARNING, "Opening sysrq-trigger failed"); return; } crm_info("sysrq-trigger: %c", t); fprintf(procf, "%c\n", t); fclose(procf); #endif // HAVE_LINUX_PROCFS return; } /*! * \internal * \brief Panic the local host (if root) or tell pacemakerd to do so */ static void panic_local(void) { int rc = pcmk_ok; uid_t uid = geteuid(); pid_t ppid = getppid(); const char *panic_action = pcmk__env_option(PCMK__ENV_PANIC_ACTION); if(uid != 0 && ppid > 1) { /* We're a non-root pacemaker daemon (pacemaker-based, * pacemaker-controld, pacemaker-schedulerd, pacemaker-attrd, etc.) with * the original pacemakerd parent. * * Of these, only the controller is likely to be initiating resets. */ crm_emerg("Signaling parent %lld to panic", (long long) ppid); crm_exit(CRM_EX_PANIC); return; } else if (uid != 0) { #if HAVE_LINUX_PROCFS /* * No permissions, and no pacemakerd parent to escalate to. * Track down the new pacemakerd process and send a signal instead. */ union sigval signal_value; memset(&signal_value, 0, sizeof(signal_value)); ppid = pcmk__procfs_pid_of("pacemakerd"); crm_emerg("Signaling pacemakerd[%lld] to panic", (long long) ppid); if(ppid > 1 && sigqueue(ppid, SIGQUIT, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal pacemakerd[%lld] to panic", (long long) ppid); } #endif // HAVE_LINUX_PROCFS /* The best we can do now is die */ crm_exit(CRM_EX_PANIC); return; } /* We're either pacemakerd, or a pacemaker daemon running as root */ if (pcmk__str_eq(panic_action, "crash", pcmk__str_casei)) { sysrq_trigger('c'); } else if (pcmk__str_eq(panic_action, "sync-crash", pcmk__str_casei)) { sync(); sysrq_trigger('c'); } else { if (pcmk__str_eq(panic_action, "sync-reboot", pcmk__str_casei)) { sync(); } sysrq_trigger('b'); } /* reboot(RB_HALT_SYSTEM); rc = errno; */ reboot(RB_AUTOBOOT); rc = errno; crm_emerg("Reboot failed, escalating to parent %lld: %s " CRM_XS " rc=%d", (long long) ppid, pcmk_rc_str(rc), rc); if(ppid > 1) { /* child daemon */ exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ exit(CRM_EX_FATAL); } } /*! * \internal * \brief Tell sbd to kill the local host, then exit */ static void panic_sbd(void) { union sigval signal_value; pid_t ppid = getppid(); crm_emerg("Signaling sbd[%lld] to panic", (long long) sbd_pid); memset(&signal_value, 0, sizeof(signal_value)); /* TODO: Arrange for a slightly less brutal option? */ if(sigqueue(sbd_pid, SIGKILL, signal_value) < 0) { crm_perror(LOG_EMERG, "Cannot signal sbd[%lld] to terminate", (long long) sbd_pid); panic_local(); } if(ppid > 1) { /* child daemon */ exit(CRM_EX_PANIC); } else { /* pacemakerd or orphan child */ exit(CRM_EX_FATAL); } } /*! * \internal * \brief Panic the local host * * Panic the local host either by sbd (if running), directly, or by asking * pacemakerd. If trace logging this function, exit instead. * * \param[in] origin Function caller (for logging only) */ void pcmk__panic(const char *origin) { /* Ensure sbd_pid is set */ (void) pcmk__locate_sbd(); pcmk__if_tracing( { // getppid() == 1 means our original parent no longer exists crm_emerg("Shutting down instead of panicking the node " CRM_XS " origin=%s sbd=%lld parent=%d", origin, (long long) sbd_pid, getppid()); crm_exit(CRM_EX_FATAL); return; }, {} ); if(sbd_pid > 1) { crm_emerg("Signaling sbd[%lld] to panic the system: %s", (long long) sbd_pid, origin); panic_sbd(); } else { crm_emerg("Panicking the system directly: %s", origin); panic_local(); } } /*! * \internal * \brief Return the process ID of sbd (or 0 if it is not running) */ pid_t pcmk__locate_sbd(void) { char *pidfile = NULL; char *sbd_path = NULL; int rc; if(sbd_pid > 1) { return sbd_pid; } /* Look for the pid file */ pidfile = crm_strdup_printf(PCMK_RUN_DIR "/sbd.pid"); sbd_path = crm_strdup_printf("%s/sbd", SBIN_DIR); /* Read the pid file */ rc = pcmk__pidfile_matches(pidfile, 0, sbd_path, &sbd_pid); if (rc == pcmk_rc_ok) { crm_trace("SBD detected at pid %lld (via PID file %s)", (long long) sbd_pid, pidfile); #if HAVE_LINUX_PROCFS } else { /* Fall back to /proc for systems that support it */ sbd_pid = pcmk__procfs_pid_of("sbd"); crm_trace("SBD detected at pid %lld (via procfs)", (long long) sbd_pid); #endif // HAVE_LINUX_PROCFS } if(sbd_pid < 0) { sbd_pid = 0; crm_trace("SBD not detected"); } free(pidfile); free(sbd_path); return sbd_pid; } long pcmk__get_sbd_timeout(void) { static long sbd_timeout = -2; if (sbd_timeout == -2) { sbd_timeout = crm_get_msec(getenv("SBD_WATCHDOG_TIMEOUT")); } return sbd_timeout; } bool pcmk__get_sbd_sync_resource_startup(void) { static int sync_resource_startup = PCMK__SBD_SYNC_DEFAULT; static bool checked_sync_resource_startup = false; if (!checked_sync_resource_startup) { const char *sync_env = getenv("SBD_SYNC_RESOURCE_STARTUP"); if (sync_env == NULL) { crm_trace("Defaulting to %sstart-up synchronization with sbd", (PCMK__SBD_SYNC_DEFAULT? "" : "no ")); } else if (crm_str_to_boolean(sync_env, &sync_resource_startup) < 0) { crm_warn("Defaulting to %sstart-up synchronization with sbd " "because environment value '%s' is invalid", (PCMK__SBD_SYNC_DEFAULT? "" : "no "), sync_env); } checked_sync_resource_startup = true; } return sync_resource_startup != 0; } long pcmk__auto_watchdog_timeout(void) { long sbd_timeout = pcmk__get_sbd_timeout(); return (sbd_timeout <= 0)? 0 : (2 * sbd_timeout); } bool pcmk__valid_sbd_timeout(const char *value) { long st_timeout = value? crm_get_msec(value) : 0; if (st_timeout < 0) { st_timeout = pcmk__auto_watchdog_timeout(); - crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)", + crm_debug("Using calculated value %ld for " + PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " (%s)", st_timeout, value); } if (st_timeout == 0) { - crm_debug("Watchdog may be enabled but stonith-watchdog-timeout is disabled (%s)", + crm_debug("Watchdog may be enabled but " + PCMK_OPT_STONITH_WATCHDOG_TIMEOUT " is disabled (%s)", value? value : "default"); } else if (pcmk__locate_sbd() == 0) { - crm_emerg("Shutting down: stonith-watchdog-timeout configured (%s) " - "but SBD not active", (value? value : "auto")); + crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT + " configured (%s) but SBD not active", + pcmk__s(value, "auto")); crm_exit(CRM_EX_FATAL); return false; } else { long sbd_timeout = pcmk__get_sbd_timeout(); if (st_timeout < sbd_timeout) { - crm_emerg("Shutting down: stonith-watchdog-timeout (%s) too short " - "(must be >%ldms)", value, sbd_timeout); + crm_emerg("Shutting down: " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT + " (%s) too short (must be >%ldms)", + value, sbd_timeout); crm_exit(CRM_EX_FATAL); return false; } - crm_info("Watchdog configured with stonith-watchdog-timeout %s and SBD timeout %ldms", + crm_info("Watchdog configured with " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT + " %s and SBD timeout %ldms", value, sbd_timeout); } return true; } diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c index 400d3b0935..f9b4bd11ff 100644 --- a/lib/lrmd/lrmd_client.c +++ b/lib/lrmd/lrmd_client.c @@ -1,2570 +1,2570 @@ /* - * Copyright 2012-2023 the Pacemaker project contributors + * Copyright 2012-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include // uint32_t, uint64_t #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_GNUTLS_GNUTLS_H # include #endif #include #include #include #include #include #define MAX_TLS_RECV_WAIT 10000 CRM_TRACE_INIT_DATA(lrmd); static int lrmd_api_disconnect(lrmd_t * lrmd); static int lrmd_api_is_connected(lrmd_t * lrmd); /* IPC proxy functions */ int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); static void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); #ifdef HAVE_GNUTLS_GNUTLS_H # define LRMD_CLIENT_HANDSHAKE_TIMEOUT 5000 /* 5 seconds */ gnutls_psk_client_credentials_t psk_cred_s; static void lrmd_tls_disconnect(lrmd_t * lrmd); static int global_remote_msg_id = 0; static void lrmd_tls_connection_destroy(gpointer userdata); #endif typedef struct lrmd_private_s { uint64_t type; char *token; mainloop_io_t *source; /* IPC parameters */ crm_ipc_t *ipc; pcmk__remote_t *remote; /* Extra TLS parameters */ char *remote_nodename; #ifdef HAVE_GNUTLS_GNUTLS_H char *server; int port; gnutls_psk_client_credentials_t psk_cred_c; /* while the async connection is occurring, this is the id * of the connection timeout timer. */ int async_timer; int sock; /* since tls requires a round trip across the network for a * request/reply, there are times where we just want to be able * to send a request from the client and not wait around (or even care * about) what the reply is. */ int expected_late_replies; GList *pending_notify; crm_trigger_t *process_notify; #endif lrmd_event_callback callback; /* Internal IPC proxy msg passing for remote guests */ void (*proxy_callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg); void *proxy_callback_userdata; char *peer_version; } lrmd_private_t; static lrmd_list_t * lrmd_list_add(lrmd_list_t * head, const char *value) { lrmd_list_t *p, *end; p = calloc(1, sizeof(lrmd_list_t)); p->val = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_list_freeall(lrmd_list_t * head) { lrmd_list_t *p; while (head) { char *val = (char *)head->val; p = head->next; free(val); free(head); head = p; } } lrmd_key_value_t * lrmd_key_value_add(lrmd_key_value_t * head, const char *key, const char *value) { lrmd_key_value_t *p, *end; p = calloc(1, sizeof(lrmd_key_value_t)); p->key = strdup(key); p->value = strdup(value); end = head; while (end && end->next) { end = end->next; } if (end) { end->next = p; } else { head = p; } return head; } void lrmd_key_value_freeall(lrmd_key_value_t * head) { lrmd_key_value_t *p; while (head) { p = head->next; free(head->key); free(head->value); free(head); head = p; } } /*! * \brief Create a new lrmd_event_data_t object * * \param[in] rsc_id ID of resource involved in event * \param[in] task Action name * \param[in] interval_ms Action interval * * \return Newly allocated and initialized lrmd_event_data_t * \note This functions asserts on memory errors, so the return value is * guaranteed to be non-NULL. The caller is responsible for freeing the * result with lrmd_free_event(). */ lrmd_event_data_t * lrmd_new_event(const char *rsc_id, const char *task, guint interval_ms) { lrmd_event_data_t *event = calloc(1, sizeof(lrmd_event_data_t)); CRM_ASSERT(event != NULL); pcmk__str_update((char **) &event->rsc_id, rsc_id); pcmk__str_update((char **) &event->op_type, task); event->interval_ms = interval_ms; return event; } lrmd_event_data_t * lrmd_copy_event(lrmd_event_data_t * event) { lrmd_event_data_t *copy = NULL; copy = calloc(1, sizeof(lrmd_event_data_t)); copy->type = event->type; pcmk__str_update((char **) ©->rsc_id, event->rsc_id); pcmk__str_update((char **) ©->op_type, event->op_type); pcmk__str_update((char **) ©->user_data, event->user_data); copy->call_id = event->call_id; copy->timeout = event->timeout; copy->interval_ms = event->interval_ms; copy->start_delay = event->start_delay; copy->rsc_deleted = event->rsc_deleted; copy->rc = event->rc; copy->op_status = event->op_status; pcmk__str_update((char **) ©->output, event->output); copy->t_run = event->t_run; copy->t_rcchange = event->t_rcchange; copy->exec_time = event->exec_time; copy->queue_time = event->queue_time; copy->connection_rc = event->connection_rc; copy->params = pcmk__str_table_dup(event->params); pcmk__str_update((char **) ©->remote_nodename, event->remote_nodename); pcmk__str_update((char **) ©->exit_reason, event->exit_reason); return copy; } /*! * \brief Free an executor event * * \param[in,out] Executor event object to free */ void lrmd_free_event(lrmd_event_data_t *event) { if (event == NULL) { return; } // @TODO Why are these const char *? free((void *) event->rsc_id); free((void *) event->op_type); free((void *) event->user_data); free((void *) event->remote_nodename); lrmd__reset_result(event); if (event->params != NULL) { g_hash_table_destroy(event->params); } free(event); } static void lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg) { const char *type; const char *proxy_session = crm_element_value(msg, F_LRMD_IPC_SESSION); lrmd_private_t *native = lrmd->lrmd_private; lrmd_event_data_t event = { 0, }; if (proxy_session != NULL) { /* this is proxy business */ lrmd_internal_proxy_dispatch(lrmd, msg); return; } else if (!native->callback) { /* no callback set */ crm_trace("notify event received but client has not set callback"); return; } event.remote_nodename = native->remote_nodename; type = crm_element_value(msg, F_LRMD_OPERATION); crm_element_value_int(msg, F_LRMD_CALLID, &event.call_id); event.rsc_id = crm_element_value(msg, F_LRMD_RSC_ID); if (pcmk__str_eq(type, LRMD_OP_RSC_REG, pcmk__str_none)) { event.type = lrmd_event_register; } else if (pcmk__str_eq(type, LRMD_OP_RSC_UNREG, pcmk__str_none)) { event.type = lrmd_event_unregister; } else if (pcmk__str_eq(type, LRMD_OP_RSC_EXEC, pcmk__str_none)) { time_t epoch = 0; crm_element_value_int(msg, F_LRMD_TIMEOUT, &event.timeout); crm_element_value_ms(msg, F_LRMD_RSC_INTERVAL, &event.interval_ms); crm_element_value_int(msg, F_LRMD_RSC_START_DELAY, &event.start_delay); crm_element_value_int(msg, F_LRMD_EXEC_RC, (int *)&event.rc); crm_element_value_int(msg, F_LRMD_OP_STATUS, &event.op_status); crm_element_value_int(msg, F_LRMD_RSC_DELETED, &event.rsc_deleted); crm_element_value_epoch(msg, F_LRMD_RSC_RUN_TIME, &epoch); event.t_run = (unsigned int) epoch; crm_element_value_epoch(msg, F_LRMD_RSC_RCCHANGE_TIME, &epoch); event.t_rcchange = (unsigned int) epoch; crm_element_value_int(msg, F_LRMD_RSC_EXEC_TIME, (int *)&event.exec_time); crm_element_value_int(msg, F_LRMD_RSC_QUEUE_TIME, (int *)&event.queue_time); event.op_type = crm_element_value(msg, F_LRMD_RSC_ACTION); event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR); event.type = lrmd_event_exec_complete; /* output and exit_reason may be freed by a callback */ event.output = crm_element_value_copy(msg, F_LRMD_RSC_OUTPUT); lrmd__set_result(&event, event.rc, event.op_status, crm_element_value(msg, F_LRMD_RSC_EXIT_REASON)); event.params = xml2list(msg); } else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) { event.type = lrmd_event_new_client; } else if (pcmk__str_eq(type, LRMD_OP_POKE, pcmk__str_none)) { event.type = lrmd_event_poke; } else { return; } crm_trace("op %s notify event received", type); native->callback(&event); if (event.params) { g_hash_table_destroy(event.params); } lrmd__reset_result(&event); } // \return Always 0, to indicate that IPC mainloop source should be kept static int lrmd_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; if (native->callback != NULL) { xmlNode *msg = string2xml(buffer); lrmd_dispatch_internal(lrmd, msg); free_xml(msg); } return 0; } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_free_xml(gpointer userdata) { free_xml((xmlNode *) userdata); } static bool remote_executor_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; return (native->remote->tls_session != NULL); } /*! * \internal * \brief TLS dispatch function (for both trigger and file descriptor sources) * * \param[in,out] userdata API connection * * \return Always return a nonnegative value, which as a file descriptor * dispatch function means keep the mainloop source, and as a * trigger dispatch function, 0 means remove the trigger from the * mainloop while 1 means keep it (and job completed) */ static int lrmd_tls_dispatch(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *xml = NULL; int rc = pcmk_rc_ok; if (!remote_executor_connected(lrmd)) { crm_trace("TLS dispatch triggered after disconnect"); return 0; } crm_trace("TLS dispatch triggered"); /* First check if there are any pending notifies to process that came * while we were waiting for replies earlier. */ if (native->pending_notify) { GList *iter = NULL; crm_trace("Processing pending notifies"); for (iter = native->pending_notify; iter; iter = iter->next) { lrmd_dispatch_internal(lrmd, iter->data); } g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } /* Next read the current buffer and see if there are any messages to handle. */ switch (pcmk__remote_ready(native->remote, 0)) { case pcmk_rc_ok: rc = pcmk__read_remote_message(native->remote, -1); xml = pcmk__remote_message_xml(native->remote); break; case ETIME: // Nothing to read, check if a full message is already in buffer xml = pcmk__remote_message_xml(native->remote); break; default: rc = ENOTCONN; break; } while (xml) { const char *msg_type = crm_element_value(xml, F_LRMD_REMOTE_MSG_TYPE); if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { lrmd_dispatch_internal(lrmd, xml); } else if (pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { int reply_id = 0; crm_element_value_int(xml, F_LRMD_CALLID, &reply_id); /* if this happens, we want to know about it */ crm_err("Got outdated Pacemaker Remote reply %d", reply_id); } } free_xml(xml); xml = pcmk__remote_message_xml(native->remote); } if (rc == ENOTCONN) { crm_info("Lost %s executor connection while reading data", (native->remote_nodename? native->remote_nodename : "local")); lrmd_tls_disconnect(lrmd); return 0; } return 1; } #endif /* Not used with mainloop */ int lrmd_poll(lrmd_t * lrmd, int timeout) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_ready(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: if (native->pending_notify) { return 1; } else { int rc = pcmk__remote_ready(native->remote, 0); switch (rc) { case pcmk_rc_ok: return 1; case ETIME: return 0; default: return pcmk_rc2legacy(rc); } } #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return -EPROTONOSUPPORT; } } /* Not used with mainloop */ bool lrmd_dispatch(lrmd_t * lrmd) { lrmd_private_t *private = NULL; CRM_ASSERT(lrmd != NULL); private = lrmd->lrmd_private; switch (private->type) { case pcmk__client_ipc: while (crm_ipc_ready(private->ipc)) { if (crm_ipc_read(private->ipc) > 0) { const char *msg = crm_ipc_buffer(private->ipc); lrmd_ipc_dispatch(msg, strlen(msg), lrmd); } } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: lrmd_tls_dispatch(lrmd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", private->type); } if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Connection closed"); return FALSE; } return TRUE; } static xmlNode * lrmd_create_op(const char *token, const char *op, xmlNode *data, int timeout, enum lrmd_call_options options) { xmlNode *op_msg = create_xml_node(NULL, "lrmd_command"); CRM_CHECK(op_msg != NULL, return NULL); CRM_CHECK(token != NULL, return NULL); crm_xml_add(op_msg, F_XML_TAGNAME, "lrmd_command"); crm_xml_add(op_msg, F_TYPE, T_LRMD); crm_xml_add(op_msg, F_LRMD_CALLBACK_TOKEN, token); crm_xml_add(op_msg, F_LRMD_OPERATION, op); crm_xml_add_int(op_msg, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(op_msg, F_LRMD_CALLOPTS, options); if (data != NULL) { add_message_xml(op_msg, F_LRMD_CALLDATA, data); } crm_trace("Created executor %s command with call options %.8lx (%d)", op, (long)options, options); return op_msg; } static void lrmd_ipc_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: crm_info("Disconnected from local executor"); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: crm_info("Disconnected from remote executor on %s", native->remote_nodename); break; #endif default: crm_err("Unsupported executor connection type %d (bug?)", native->type); } /* Prevent these from being cleaned up in lrmd_api_disconnect() */ native->ipc = NULL; native->source = NULL; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_disconnect; event.remote_nodename = native->remote_nodename; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_connection_destroy(gpointer userdata) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; crm_info("TLS connection destroyed"); if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); } if (native->psk_cred_c) { gnutls_psk_free_client_credentials(native->psk_cred_c); } if (native->sock) { close(native->sock); } if (native->process_notify) { mainloop_destroy_trigger(native->process_notify); native->process_notify = NULL; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } free(native->remote->buffer); free(native->remote->start_state); native->remote->buffer = NULL; native->remote->start_state = NULL; native->source = 0; native->sock = 0; native->psk_cred_c = NULL; native->remote->tls_session = NULL; native->sock = 0; if (native->callback) { lrmd_event_data_t event = { 0, }; event.remote_nodename = native->remote_nodename; event.type = lrmd_event_disconnect; native->callback(&event); } return; } // \return Standard Pacemaker return code int lrmd__remote_send_xml(pcmk__remote_t *session, xmlNode *msg, uint32_t id, const char *msg_type) { crm_xml_add_int(msg, F_LRMD_REMOTE_MSG_ID, id); crm_xml_add(msg, F_LRMD_REMOTE_MSG_TYPE, msg_type); return pcmk__remote_send_xml(session, msg); } // \return Standard Pacemaker return code static int read_remote_reply(lrmd_t *lrmd, int total_timeout, int expected_reply_id, xmlNode **reply) { lrmd_private_t *native = lrmd->lrmd_private; time_t start = time(NULL); const char *msg_type = NULL; int reply_id = 0; int remaining_timeout = 0; int rc = pcmk_rc_ok; /* A timeout of 0 here makes no sense. We have to wait a period of time * for the response to come back. If -1 or 0, default to 10 seconds. */ if (total_timeout <= 0 || total_timeout > MAX_TLS_RECV_WAIT) { total_timeout = MAX_TLS_RECV_WAIT; } for (*reply = NULL; *reply == NULL; ) { *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { /* read some more off the tls buffer if we still have time left. */ if (remaining_timeout) { remaining_timeout = total_timeout - ((time(NULL) - start) * 1000); } else { remaining_timeout = total_timeout; } if (remaining_timeout <= 0) { return ETIME; } rc = pcmk__read_remote_message(native->remote, remaining_timeout); if (rc != pcmk_rc_ok) { return rc; } *reply = pcmk__remote_message_xml(native->remote); if (*reply == NULL) { return ENOMSG; } } crm_element_value_int(*reply, F_LRMD_REMOTE_MSG_ID, &reply_id); msg_type = crm_element_value(*reply, F_LRMD_REMOTE_MSG_TYPE); if (!msg_type) { crm_err("Empty msg type received while waiting for reply"); free_xml(*reply); *reply = NULL; } else if (pcmk__str_eq(msg_type, "notify", pcmk__str_casei)) { /* got a notify while waiting for reply, trigger the notify to be processed later */ crm_info("queueing notify"); native->pending_notify = g_list_append(native->pending_notify, *reply); if (native->process_notify) { crm_info("notify trigger set."); mainloop_set_trigger(native->process_notify); } *reply = NULL; } else if (!pcmk__str_eq(msg_type, "reply", pcmk__str_casei)) { /* msg isn't a reply, make some noise */ crm_err("Expected a reply, got %s", msg_type); free_xml(*reply); *reply = NULL; } else if (reply_id != expected_reply_id) { if (native->expected_late_replies > 0) { native->expected_late_replies--; } else { crm_err("Got outdated reply, expected id %d got id %d", expected_reply_id, reply_id); } free_xml(*reply); *reply = NULL; } } if (native->remote->buffer && native->process_notify) { mainloop_set_trigger(native->process_notify); } return rc; } // \return Standard Pacemaker return code static int send_remote_message(lrmd_t *lrmd, xmlNode *msg) { int rc = pcmk_rc_ok; lrmd_private_t *native = lrmd->lrmd_private; global_remote_msg_id++; if (global_remote_msg_id <= 0) { global_remote_msg_id = 1; } rc = lrmd__remote_send_xml(native->remote, msg, global_remote_msg_id, "request"); if (rc != pcmk_rc_ok) { crm_err("Disconnecting because TLS message could not be sent to " "Pacemaker Remote: %s", pcmk_rc_str(rc)); lrmd_tls_disconnect(lrmd); } return rc; } static int lrmd_tls_send_recv(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = 0; xmlNode *xml = NULL; if (!remote_executor_connected(lrmd)) { return -ENOTCONN; } rc = send_remote_message(lrmd, msg); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } rc = read_remote_reply(lrmd, timeout, global_remote_msg_id, &xml); if (rc != pcmk_rc_ok) { crm_err("Disconnecting remote after request %d reply not received: %s " CRM_XS " rc=%d timeout=%dms", global_remote_msg_id, pcmk_rc_str(rc), rc, timeout); lrmd_tls_disconnect(lrmd); } if (reply) { *reply = xml; } else { free_xml(xml); } return pcmk_rc2legacy(rc); } #endif static int lrmd_send_xml(lrmd_t * lrmd, xmlNode * msg, int timeout, xmlNode ** reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_client_response, timeout, reply); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_send_recv(lrmd, msg, timeout, reply); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_send_xml_no_reply(lrmd_t * lrmd, xmlNode * msg) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = crm_ipc_send(native->ipc, msg, crm_ipc_flags_none, 0, NULL); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = send_remote_message(lrmd, msg); if (rc == pcmk_rc_ok) { /* we don't want to wait around for the reply, but * since the request/reply protocol needs to behave the same * as libqb, a reply will eventually come later anyway. */ native->expected_late_replies++; } rc = pcmk_rc2legacy(rc); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static int lrmd_api_is_connected(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: return crm_ipc_connected(native->ipc); #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: return remote_executor_connected(lrmd); #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); return 0; } } /*! * \internal * \brief Send a prepared API command to the executor * * \param[in,out] lrmd Existing connection to the executor * \param[in] op Name of API command to send * \param[in] data Command data XML to add to the sent command * \param[out] output_data If expecting a reply, it will be stored here * \param[in] timeout Timeout in milliseconds (if 0, defaults to * a sensible value per the type of connection, * standard vs. pacemaker remote); * also propagated to the command XML * \param[in] call_options Call options to pass to server when sending * \param[in] expect_reply If TRUE, wait for a reply from the server; * must be TRUE for IPC (as opposed to TLS) clients * * \return pcmk_ok on success, -errno on error */ static int lrmd_send_command(lrmd_t *lrmd, const char *op, xmlNode *data, xmlNode **output_data, int timeout, enum lrmd_call_options options, gboolean expect_reply) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *op_msg = NULL; xmlNode *op_reply = NULL; if (!lrmd_api_is_connected(lrmd)) { return -ENOTCONN; } if (op == NULL) { crm_err("No operation specified"); return -EINVAL; } CRM_CHECK(native->token != NULL,; ); crm_trace("Sending %s op to executor", op); op_msg = lrmd_create_op(native->token, op, data, timeout, options); if (op_msg == NULL) { return -EINVAL; } if (expect_reply) { rc = lrmd_send_xml(lrmd, op_msg, timeout, &op_reply); } else { rc = lrmd_send_xml_no_reply(lrmd, op_msg); goto done; } if (rc < 0) { crm_perror(LOG_ERR, "Couldn't perform %s operation (timeout=%d): %d", op, timeout, rc); goto done; } else if(op_reply == NULL) { rc = -ENOMSG; goto done; } rc = pcmk_ok; crm_trace("%s op reply received", op); if (crm_element_value_int(op_reply, F_LRMD_RC, &rc) != 0) { rc = -ENOMSG; goto done; } crm_log_xml_trace(op_reply, "Reply"); if (output_data) { *output_data = op_reply; op_reply = NULL; /* Prevent subsequent free */ } done: if (lrmd_api_is_connected(lrmd) == FALSE) { crm_err("Executor disconnected"); } free_xml(op_msg); free_xml(op_reply); return rc; } static int lrmd_api_poke_connection(lrmd_t * lrmd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); rc = lrmd_send_command(lrmd, LRMD_OP_POKE, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return rc < 0 ? rc : pcmk_ok; } // \return Standard Pacemaker return code int lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash) { int rc = pcmk_rc_ok; const char *value; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *data = create_xml_node(NULL, F_LRMD_OPERATION); crm_xml_add(data, F_LRMD_ORIGIN, __func__); - value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); + value = g_hash_table_lookup(hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT); if ((value) && (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) { crm_xml_add(data, F_LRMD_WATCHDOG, value); } rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, (native->type == pcmk__client_ipc)); free_xml(data); return (rc < 0)? pcmk_legacy2rc(rc) : pcmk_rc_ok; } static int lrmd_handshake(lrmd_t * lrmd, const char *name) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; xmlNode *reply = NULL; xmlNode *hello = create_xml_node(NULL, "lrmd_command"); crm_xml_add(hello, F_TYPE, T_LRMD); crm_xml_add(hello, F_LRMD_OPERATION, CRM_OP_REGISTER); crm_xml_add(hello, F_LRMD_CLIENTNAME, name); crm_xml_add(hello, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); /* advertise that we are a proxy provider */ if (native->proxy_callback) { pcmk__xe_set_bool_attr(hello, F_LRMD_IS_IPC_PROVIDER, true); } rc = lrmd_send_xml(lrmd, hello, -1, &reply); if (rc < 0) { crm_perror(LOG_DEBUG, "Couldn't complete registration with the executor API: %d", rc); rc = -ECOMM; } else if (reply == NULL) { crm_err("Did not receive registration reply"); rc = -EPROTO; } else { const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); const char *start_state = crm_element_value(reply, PCMK__XA_NODE_START_STATE); long long uptime = -1; crm_element_value_int(reply, F_LRMD_RC, &rc); /* The remote executor may add its uptime to the XML reply, which is * useful in handling transient attributes when the connection to the * remote node unexpectedly drops. If no parameter is given, just * default to -1. */ crm_element_value_ll(reply, PCMK__XA_UPTIME, &uptime); native->remote->uptime = uptime; if (start_state) { native->remote->start_state = strdup(start_state); } if (rc == -EPROTO) { crm_err("Executor protocol version mismatch between client (%s) and server (%s)", LRMD_PROTOCOL_VERSION, version); crm_log_xml_err(reply, "Protocol Error"); } else if (!pcmk__str_eq(msg_type, CRM_OP_REGISTER, pcmk__str_casei)) { crm_err("Invalid registration message: %s", msg_type); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else if (tmp_ticket == NULL) { crm_err("No registration token provided"); crm_log_xml_err(reply, "Bad reply"); rc = -EPROTO; } else { crm_trace("Obtained registration token: %s", tmp_ticket); native->token = strdup(tmp_ticket); native->peer_version = strdup(version?version:"1.0"); /* Included since 1.1 */ rc = pcmk_ok; } } free_xml(reply); free_xml(hello); if (rc != pcmk_ok) { lrmd_api_disconnect(lrmd); } return rc; } static int lrmd_ipc_connect(lrmd_t * lrmd, int *fd) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; struct ipc_client_callbacks lrmd_callbacks = { .dispatch = lrmd_ipc_dispatch, .destroy = lrmd_ipc_connection_destroy }; crm_info("Connecting to executor"); if (fd) { /* No mainloop */ native->ipc = crm_ipc_new(CRM_SYSTEM_LRMD, 0); if (native->ipc != NULL) { rc = pcmk__connect_generic_ipc(native->ipc); if (rc == pcmk_rc_ok) { rc = pcmk__ipc_fd(native->ipc, fd); } if (rc != pcmk_rc_ok) { crm_err("Connection to executor failed: %s", pcmk_rc_str(rc)); rc = -ENOTCONN; } } } else { native->source = mainloop_add_ipc_client(CRM_SYSTEM_LRMD, G_PRIORITY_HIGH, 0, lrmd, &lrmd_callbacks); native->ipc = mainloop_get_ipc_client(native->source); } if (native->ipc == NULL) { crm_debug("Could not connect to the executor API"); rc = -ENOTCONN; } return rc; } #ifdef HAVE_GNUTLS_GNUTLS_H static void copy_gnutls_datum(gnutls_datum_t *dest, gnutls_datum_t *source) { CRM_ASSERT((dest != NULL) && (source != NULL) && (source->data != NULL)); dest->data = gnutls_malloc(source->size); CRM_ASSERT(dest->data); memcpy(dest->data, source->data, source->size); dest->size = source->size; } static void clear_gnutls_datum(gnutls_datum_t *datum) { gnutls_free(datum->data); datum->data = NULL; datum->size = 0; } #define KEY_READ_LEN 256 // Chunk size for reading key from file // \return Standard Pacemaker return code static int read_gnutls_key(const char *location, gnutls_datum_t *key) { FILE *stream = NULL; size_t buf_len = KEY_READ_LEN; if ((location == NULL) || (key == NULL)) { return EINVAL; } stream = fopen(location, "r"); if (stream == NULL) { return errno; } key->data = gnutls_malloc(buf_len); key->size = 0; while (!feof(stream)) { int next = fgetc(stream); if (next == EOF) { if (!feof(stream)) { crm_warn("Pacemaker Remote key read was partially successful " "(copy in memory may be corrupted)"); } break; } if (key->size == buf_len) { buf_len = key->size + KEY_READ_LEN; key->data = gnutls_realloc(key->data, buf_len); CRM_ASSERT(key->data); } key->data[key->size++] = (unsigned char) next; } fclose(stream); if (key->size == 0) { clear_gnutls_datum(key); return ENOKEY; } return pcmk_rc_ok; } // Cache the most recently used Pacemaker Remote authentication key struct key_cache_s { time_t updated; // When cached key was read (valid for 1 minute) const char *location; // Where cached key was read from gnutls_datum_t key; // Cached key }; static bool key_is_cached(struct key_cache_s *key_cache) { return key_cache->updated != 0; } static bool key_cache_expired(struct key_cache_s *key_cache) { return (time(NULL) - key_cache->updated) >= 60; } static void clear_key_cache(struct key_cache_s *key_cache) { clear_gnutls_datum(&(key_cache->key)); if ((key_cache->updated != 0) || (key_cache->location != NULL)) { key_cache->updated = 0; key_cache->location = NULL; crm_debug("Cleared Pacemaker Remote key cache"); } } static void get_cached_key(struct key_cache_s *key_cache, gnutls_datum_t *key) { copy_gnutls_datum(key, &(key_cache->key)); crm_debug("Using cached Pacemaker Remote key from %s", pcmk__s(key_cache->location, "unknown location")); } static void cache_key(struct key_cache_s *key_cache, gnutls_datum_t *key, const char *location) { key_cache->updated = time(NULL); key_cache->location = location; copy_gnutls_datum(&(key_cache->key), key); crm_debug("Using (and cacheing) Pacemaker Remote key from %s", pcmk__s(location, "unknown location")); } /*! * \internal * \brief Get Pacemaker Remote authentication key from file or cache * * \param[in] location Path to key file to try (this memory must * persist across all calls of this function) * \param[out] key Key from location or cache * * \return Standard Pacemaker return code */ static int get_remote_key(const char *location, gnutls_datum_t *key) { static struct key_cache_s key_cache = { 0, }; int rc = pcmk_rc_ok; if ((location == NULL) || (key == NULL)) { return EINVAL; } if (key_is_cached(&key_cache)) { if (key_cache_expired(&key_cache)) { clear_key_cache(&key_cache); } else { get_cached_key(&key_cache, key); return pcmk_rc_ok; } } rc = read_gnutls_key(location, key); if (rc != pcmk_rc_ok) { return rc; } cache_key(&key_cache, key, location); return pcmk_rc_ok; } /*! * \internal * \brief Initialize the Pacemaker Remote authentication key * * Try loading the Pacemaker Remote authentication key from cache if available, * otherwise from these locations, in order of preference: the value of the * PCMK_authkey_location environment variable, if set; the Pacemaker default key * file location; or (for historical reasons) /etc/corosync/authkey. * * \param[out] key Where to store key * * \return Standard Pacemaker return code */ int lrmd__init_remote_key(gnutls_datum_t *key) { static const char *env_location = NULL; static bool need_env = true; int env_rc = pcmk_rc_ok; int default_rc = pcmk_rc_ok; int alt_rc = pcmk_rc_ok; bool env_is_default = false; bool env_is_fallback = false; if (need_env) { env_location = pcmk__env_option(PCMK__ENV_AUTHKEY_LOCATION); need_env = false; } // Try location in environment variable, if set if (env_location != NULL) { env_rc = get_remote_key(env_location, key); if (env_rc == pcmk_rc_ok) { return pcmk_rc_ok; } env_is_default = !strcmp(env_location, DEFAULT_REMOTE_KEY_LOCATION); env_is_fallback = !strcmp(env_location, ALT_REMOTE_KEY_LOCATION); /* @TODO It would be more secure to fail, rather than fall back to the * default, if an explicitly set key location is not readable, and it * would be better to never use the Corosync location as a fallback. * However, that would break any deployments currently working with the * fallbacks. */ } // Try default location, if environment wasn't explicitly set to it if (env_is_default) { default_rc = env_rc; } else { default_rc = get_remote_key(DEFAULT_REMOTE_KEY_LOCATION, key); } // Try fallback location, if environment wasn't set to it and default failed if (env_is_fallback) { alt_rc = env_rc; } else if (default_rc != pcmk_rc_ok) { alt_rc = get_remote_key(ALT_REMOTE_KEY_LOCATION, key); } // We have all results, so log and return if ((env_rc != pcmk_rc_ok) && (default_rc != pcmk_rc_ok) && (alt_rc != pcmk_rc_ok)) { // Environment set, everything failed crm_warn("Could not read Pacemaker Remote key from %s (%s%s%s%s%s): %s", env_location, env_is_default? "" : "or default location ", env_is_default? "" : DEFAULT_REMOTE_KEY_LOCATION, !env_is_default && !env_is_fallback? " " : "", env_is_fallback? "" : "or fallback location ", env_is_fallback? "" : ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(env_rc)); return ENOKEY; } if (env_rc != pcmk_rc_ok) { // Environment set but failed, using a default crm_warn("Could not read Pacemaker Remote key from %s " "(using %s location %s instead): %s", env_location, (default_rc == pcmk_rc_ok)? "default" : "fallback", (default_rc == pcmk_rc_ok)? DEFAULT_REMOTE_KEY_LOCATION : ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(env_rc)); return pcmk_rc_ok; } if ((default_rc != pcmk_rc_ok) && (alt_rc != pcmk_rc_ok)) { // Environment unset, defaults failed crm_warn("Could not read Pacemaker Remote key from default location %s" " (or fallback location %s): %s", DEFAULT_REMOTE_KEY_LOCATION, ALT_REMOTE_KEY_LOCATION, pcmk_rc_str(default_rc)); return ENOKEY; } return pcmk_rc_ok; // Environment variable unset, a default worked } static void lrmd_gnutls_global_init(void) { static int gnutls_init = 0; if (!gnutls_init) { crm_gnutls_global_init(); } gnutls_init = 1; } #endif static void report_async_connection_result(lrmd_t * lrmd, int rc) { lrmd_private_t *native = lrmd->lrmd_private; if (native->callback) { lrmd_event_data_t event = { 0, }; event.type = lrmd_event_connect; event.remote_nodename = native->remote_nodename; event.connection_rc = rc; native->callback(&event); } } #ifdef HAVE_GNUTLS_GNUTLS_H static inline int lrmd__tls_client_handshake(pcmk__remote_t *remote) { return pcmk__tls_client_handshake(remote, LRMD_CLIENT_HANDSHAKE_TIMEOUT); } /*! * \internal * \brief Add trigger and file descriptor mainloop sources for TLS * * \param[in,out] lrmd API connection with established TLS session * \param[in] do_handshake Whether to perform executor handshake * * \return Standard Pacemaker return code */ static int add_tls_to_mainloop(lrmd_t *lrmd, bool do_handshake) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_rc_ok; char *name = crm_strdup_printf("pacemaker-remote-%s:%d", native->server, native->port); struct mainloop_fd_callbacks tls_fd_callbacks = { .dispatch = lrmd_tls_dispatch, .destroy = lrmd_tls_connection_destroy, }; native->process_notify = mainloop_add_trigger(G_PRIORITY_HIGH, lrmd_tls_dispatch, lrmd); native->source = mainloop_add_fd(name, G_PRIORITY_HIGH, native->sock, lrmd, &tls_fd_callbacks); /* Async connections lose the client name provided by the API caller, so we * have to use our generated name here to perform the executor handshake. * * @TODO Keep track of the caller-provided name. Perhaps we should be using * that name in this function instead of generating one anyway. */ if (do_handshake) { rc = lrmd_handshake(lrmd, name); rc = pcmk_legacy2rc(rc); } free(name); return rc; } static void lrmd_tcp_connect_cb(void *userdata, int rc, int sock) { lrmd_t *lrmd = userdata; lrmd_private_t *native = lrmd->lrmd_private; gnutls_datum_t psk_key = { NULL, 0 }; native->async_timer = 0; if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } /* The TCP connection was successful, so establish the TLS connection. * @TODO make this async to avoid blocking code in client */ native->sock = sock; rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { crm_info("Could not connect to Pacemaker Remote at %s:%d: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); return; } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EPROTO); return; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_warn("Disconnecting after TLS handshake with Pacemaker Remote server %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); report_async_connection_result(lrmd, -EKEYREJECTED); return; } crm_info("TLS connection to Pacemaker Remote server %s:%d succeeded", native->server, native->port); rc = add_tls_to_mainloop(lrmd, true); report_async_connection_result(lrmd, pcmk_rc2legacy(rc)); } static int lrmd_tls_connect_async(lrmd_t * lrmd, int timeout /*ms */ ) { int rc; int timer_id = 0; lrmd_private_t *native = lrmd->lrmd_private; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, timeout, &timer_id, &(native->sock), lrmd, lrmd_tcp_connect_cb); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } native->async_timer = timer_id; return pcmk_ok; } static int lrmd_tls_connect(lrmd_t * lrmd, int *fd) { int rc; lrmd_private_t *native = lrmd->lrmd_private; gnutls_datum_t psk_key = { NULL, 0 }; lrmd_gnutls_global_init(); native->sock = -1; rc = pcmk__connect_remote(native->server, native->port, 0, NULL, &(native->sock), NULL, NULL); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%d failed: %s " CRM_XS " rc=%d", native->server, native->port, pcmk_rc_str(rc), rc); lrmd_tls_connection_destroy(lrmd); return -ENOTCONN; } rc = lrmd__init_remote_key(&psk_key); if (rc != pcmk_rc_ok) { lrmd_tls_connection_destroy(lrmd); return pcmk_rc2legacy(rc); } gnutls_psk_allocate_client_credentials(&native->psk_cred_c); gnutls_psk_set_client_credentials(native->psk_cred_c, DEFAULT_REMOTE_USERNAME, &psk_key, GNUTLS_PSK_KEY_RAW); gnutls_free(psk_key.data); native->remote->tls_session = pcmk__new_tls_session(native->sock, GNUTLS_CLIENT, GNUTLS_CRD_PSK, native->psk_cred_c); if (native->remote->tls_session == NULL) { lrmd_tls_connection_destroy(lrmd); return -EPROTO; } if (lrmd__tls_client_handshake(native->remote) != pcmk_rc_ok) { crm_err("Session creation for %s:%d failed", native->server, native->port); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = NULL; lrmd_tls_connection_destroy(lrmd); return -EKEYREJECTED; } crm_info("Client TLS connection established with Pacemaker Remote server %s:%d", native->server, native->port); if (fd) { *fd = native->sock; } else { add_tls_to_mainloop(lrmd, false); } return pcmk_ok; } #endif static int lrmd_api_connect(lrmd_t * lrmd, const char *name, int *fd) { int rc = -ENOTCONN; lrmd_private_t *native = lrmd->lrmd_private; switch (native->type) { case pcmk__client_ipc: rc = lrmd_ipc_connect(lrmd, fd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect(lrmd, fd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } if (rc == pcmk_ok) { rc = lrmd_handshake(lrmd, name); } return rc; } static int lrmd_api_connect_async(lrmd_t * lrmd, const char *name, int timeout) { int rc = pcmk_ok; lrmd_private_t *native = lrmd->lrmd_private; CRM_CHECK(native && native->callback, return -EINVAL); switch (native->type) { case pcmk__client_ipc: /* fake async connection with ipc. it should be fast * enough that we gain very little from async */ rc = lrmd_api_connect(lrmd, name, NULL); if (!rc) { report_async_connection_result(lrmd, rc); } break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: rc = lrmd_tls_connect_async(lrmd, timeout); if (rc) { /* connection failed, report rc now */ report_async_connection_result(lrmd, rc); } break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } return rc; } static void lrmd_ipc_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; native->ipc = NULL; } else if (native->ipc) { /* Not attached to mainloop */ crm_ipc_t *ipc = native->ipc; native->ipc = NULL; crm_ipc_close(ipc); crm_ipc_destroy(ipc); } } #ifdef HAVE_GNUTLS_GNUTLS_H static void lrmd_tls_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote->tls_session) { gnutls_bye(*native->remote->tls_session, GNUTLS_SHUT_RDWR); gnutls_deinit(*native->remote->tls_session); gnutls_free(native->remote->tls_session); native->remote->tls_session = 0; } if (native->async_timer) { g_source_remove(native->async_timer); native->async_timer = 0; } if (native->source != NULL) { /* Attached to mainloop */ mainloop_del_ipc_client(native->source); native->source = NULL; } else if (native->sock) { close(native->sock); native->sock = 0; } if (native->pending_notify) { g_list_free_full(native->pending_notify, lrmd_free_xml); native->pending_notify = NULL; } } #endif static int lrmd_api_disconnect(lrmd_t * lrmd) { lrmd_private_t *native = lrmd->lrmd_private; int rc = pcmk_ok; switch (native->type) { case pcmk__client_ipc: crm_debug("Disconnecting from local executor"); lrmd_ipc_disconnect(lrmd); break; #ifdef HAVE_GNUTLS_GNUTLS_H case pcmk__client_tls: crm_debug("Disconnecting from remote executor on %s", native->remote_nodename); lrmd_tls_disconnect(lrmd); break; #endif default: crm_err("Unsupported executor connection type (bug?): %d", native->type); rc = -EPROTONOSUPPORT; } free(native->token); native->token = NULL; free(native->peer_version); native->peer_version = NULL; return rc; } static int lrmd_api_register_rsc(lrmd_t * lrmd, const char *rsc_id, const char *class, const char *provider, const char *type, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = NULL; if (!class || !type || !rsc_id) { return -EINVAL; } if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && (provider == NULL)) { return -EINVAL; } data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_CLASS, class); crm_xml_add(data, F_LRMD_PROVIDER, provider); crm_xml_add(data, F_LRMD_TYPE, type); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_REG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } static int lrmd_api_unregister_rsc(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_UNREG, data, NULL, 0, options, TRUE); free_xml(data); return rc; } lrmd_rsc_info_t * lrmd_new_rsc_info(const char *rsc_id, const char *standard, const char *provider, const char *type) { lrmd_rsc_info_t *rsc_info = calloc(1, sizeof(lrmd_rsc_info_t)); CRM_ASSERT(rsc_info); pcmk__str_update(&rsc_info->id, rsc_id); pcmk__str_update(&rsc_info->standard, standard); pcmk__str_update(&rsc_info->provider, provider); pcmk__str_update(&rsc_info->type, type); return rsc_info; } lrmd_rsc_info_t * lrmd_copy_rsc_info(lrmd_rsc_info_t * rsc_info) { return lrmd_new_rsc_info(rsc_info->id, rsc_info->standard, rsc_info->provider, rsc_info->type); } void lrmd_free_rsc_info(lrmd_rsc_info_t * rsc_info) { if (!rsc_info) { return; } free(rsc_info->id); free(rsc_info->type); free(rsc_info->standard); free(rsc_info->provider); free(rsc_info); } static lrmd_rsc_info_t * lrmd_api_get_rsc_info(lrmd_t * lrmd, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc_info = NULL; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *output = NULL; const char *class = NULL; const char *provider = NULL; const char *type = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); lrmd_send_command(lrmd, LRMD_OP_RSC_INFO, data, &output, 0, options, TRUE); free_xml(data); if (!output) { return NULL; } class = crm_element_value(output, F_LRMD_CLASS); provider = crm_element_value(output, F_LRMD_PROVIDER); type = crm_element_value(output, F_LRMD_TYPE); if (!class || !type) { free_xml(output); return NULL; } else if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider) && !provider) { free_xml(output); return NULL; } rsc_info = lrmd_new_rsc_info(rsc_id, class, provider, type); free_xml(output); return rsc_info; } void lrmd_free_op_info(lrmd_op_info_t *op_info) { if (op_info) { free(op_info->rsc_id); free(op_info->action); free(op_info->interval_ms_s); free(op_info->timeout_ms_s); free(op_info); } } static int lrmd_api_get_recurring_ops(lrmd_t *lrmd, const char *rsc_id, int timeout_ms, enum lrmd_call_options options, GList **output) { xmlNode *data = NULL; xmlNode *output_xml = NULL; int rc = pcmk_ok; if (output == NULL) { return -EINVAL; } *output = NULL; // Send request if (rsc_id) { data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); } rc = lrmd_send_command(lrmd, LRMD_OP_GET_RECURRING, data, &output_xml, timeout_ms, options, TRUE); if (data) { free_xml(data); } // Process reply if ((rc != pcmk_ok) || (output_xml == NULL)) { return rc; } for (xmlNode *rsc_xml = first_named_child(output_xml, F_LRMD_RSC); (rsc_xml != NULL) && (rc == pcmk_ok); rsc_xml = crm_next_same_xml(rsc_xml)) { rsc_id = crm_element_value(rsc_xml, F_LRMD_RSC_ID); if (rsc_id == NULL) { crm_err("Could not parse recurring operation information from executor"); continue; } for (xmlNode *op_xml = first_named_child(rsc_xml, T_LRMD_RSC_OP); op_xml != NULL; op_xml = crm_next_same_xml(op_xml)) { lrmd_op_info_t *op_info = calloc(1, sizeof(lrmd_op_info_t)); if (op_info == NULL) { rc = -ENOMEM; break; } op_info->rsc_id = strdup(rsc_id); op_info->action = crm_element_value_copy(op_xml, F_LRMD_RSC_ACTION); op_info->interval_ms_s = crm_element_value_copy(op_xml, F_LRMD_RSC_INTERVAL); op_info->timeout_ms_s = crm_element_value_copy(op_xml, F_LRMD_TIMEOUT); *output = g_list_prepend(*output, op_info); } } free_xml(output_xml); return rc; } static void lrmd_api_set_callback(lrmd_t * lrmd, lrmd_event_callback callback) { lrmd_private_t *native = lrmd->lrmd_private; native->callback = callback; } void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)) { lrmd_private_t *native = lrmd->lrmd_private; native->proxy_callback = callback; native->proxy_callback_userdata = userdata; } void lrmd_internal_proxy_dispatch(lrmd_t *lrmd, xmlNode *msg) { lrmd_private_t *native = lrmd->lrmd_private; if (native->proxy_callback) { crm_log_xml_trace(msg, "PROXY_INBOUND"); native->proxy_callback(lrmd, native->proxy_callback_userdata, msg); } } int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg) { if (lrmd == NULL) { return -ENOTCONN; } crm_xml_add(msg, F_LRMD_OPERATION, CRM_OP_IPC_FWD); crm_log_xml_trace(msg, "PROXY_OUTBOUND"); return lrmd_send_xml_no_reply(lrmd, msg); } static int stonith_get_metadata(const char *provider, const char *type, char **output) { int rc = pcmk_ok; stonith_t *stonith_api = stonith_api_new(); if (stonith_api == NULL) { crm_err("Could not get fence agent meta-data: API memory allocation failed"); return -ENOMEM; } rc = stonith_api->cmds->metadata(stonith_api, st_opt_sync_call, type, provider, output, 0); if ((rc == pcmk_ok) && (*output == NULL)) { rc = -EIO; } stonith_api->cmds->free(stonith_api); return rc; } static int lrmd_api_get_metadata(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options) { return lrmd->cmds->get_metadata_params(lrmd, standard, provider, type, output, options, NULL); } static int lrmd_api_get_metadata_params(lrmd_t *lrmd, const char *standard, const char *provider, const char *type, char **output, enum lrmd_call_options options, lrmd_key_value_t *params) { svc_action_t *action = NULL; GHashTable *params_table = NULL; if (!standard || !type) { lrmd_key_value_freeall(params); return -EINVAL; } if (pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { lrmd_key_value_freeall(params); return stonith_get_metadata(provider, type, output); } params_table = pcmk__strkey_table(free, free); for (const lrmd_key_value_t *param = params; param; param = param->next) { g_hash_table_insert(params_table, strdup(param->key), strdup(param->value)); } action = services__create_resource_action(type, standard, provider, type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_METADATA_TIMEOUT_MS, params_table, 0); lrmd_key_value_freeall(params); if (action == NULL) { return -ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { services_action_free(action); return -EINVAL; } if (!services_action_sync(action)) { crm_err("Failed to retrieve meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } if (!action->stdout_data) { crm_err("Failed to receive meta-data for %s:%s:%s", standard, provider, type); services_action_free(action); return -EIO; } *output = strdup(action->stdout_data); services_action_free(action); return pcmk_ok; } static int lrmd_api_exec(lrmd_t *lrmd, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout, /* ms */ int start_delay, /* ms */ enum lrmd_call_options options, lrmd_key_value_t * params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_USERDATA_STR, userdata); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); crm_xml_add_int(data, F_LRMD_RSC_START_DELAY, start_delay); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_RSC_EXEC, data, NULL, timeout, options, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } /* timeout is in ms */ static int lrmd_api_exec_alert(lrmd_t *lrmd, const char *alert_id, const char *alert_path, int timeout, lrmd_key_value_t *params) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_ALERT); xmlNode *args = create_xml_node(data, XML_TAG_ATTRS); lrmd_key_value_t *tmp = NULL; crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_ALERT_ID, alert_id); crm_xml_add(data, F_LRMD_ALERT_PATH, alert_path); crm_xml_add_int(data, F_LRMD_TIMEOUT, timeout); for (tmp = params; tmp; tmp = tmp->next) { hash2smartfield((gpointer) tmp->key, (gpointer) tmp->value, args); } rc = lrmd_send_command(lrmd, LRMD_OP_ALERT_EXEC, data, NULL, timeout, lrmd_opt_notify_orig_only, TRUE); free_xml(data); lrmd_key_value_freeall(params); return rc; } static int lrmd_api_cancel(lrmd_t *lrmd, const char *rsc_id, const char *action, guint interval_ms) { int rc = pcmk_ok; xmlNode *data = create_xml_node(NULL, F_LRMD_RSC); crm_xml_add(data, F_LRMD_ORIGIN, __func__); crm_xml_add(data, F_LRMD_RSC_ACTION, action); crm_xml_add(data, F_LRMD_RSC_ID, rsc_id); crm_xml_add_ms(data, F_LRMD_RSC_INTERVAL, interval_ms); rc = lrmd_send_command(lrmd, LRMD_OP_RSC_CANCEL, data, NULL, 0, 0, TRUE); free_xml(data); return rc; } static int list_stonith_agents(lrmd_list_t ** resources) { int rc = 0; stonith_t *stonith_api = stonith_api_new(); stonith_key_value_t *stonith_resources = NULL; stonith_key_value_t *dIter = NULL; if (stonith_api == NULL) { crm_err("Could not list fence agents: API memory allocation failed"); return -ENOMEM; } stonith_api->cmds->list_agents(stonith_api, st_opt_sync_call, NULL, &stonith_resources, 0); stonith_api->cmds->free(stonith_api); for (dIter = stonith_resources; dIter; dIter = dIter->next) { rc++; if (resources) { *resources = lrmd_list_add(*resources, dIter->value); } } stonith_key_value_freeall(stonith_resources, 1, 0); return rc; } static int lrmd_api_list_agents(lrmd_t * lrmd, lrmd_list_t ** resources, const char *class, const char *provider) { int rc = 0; int stonith_count = 0; // Initially, whether to include stonith devices if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { stonith_count = 1; } else { GList *gIter = NULL; GList *agents = resources_list_agents(class, provider); for (gIter = agents; gIter != NULL; gIter = gIter->next) { *resources = lrmd_list_add(*resources, (const char *)gIter->data); rc++; } g_list_free_full(agents, free); if (!class) { stonith_count = 1; } } if (stonith_count) { // Now, if stonith devices are included, how many there are stonith_count = list_stonith_agents(resources); if (stonith_count > 0) { rc += stonith_count; } } if (rc == 0) { crm_notice("No agents found for class %s", class); rc = -EPROTONOSUPPORT; } return rc; } static bool does_provider_have_agent(const char *agent, const char *provider, const char *class) { bool found = false; GList *agents = NULL; GList *gIter2 = NULL; agents = resources_list_agents(class, provider); for (gIter2 = agents; gIter2 != NULL; gIter2 = gIter2->next) { if (pcmk__str_eq(agent, gIter2->data, pcmk__str_casei)) { found = true; } } g_list_free_full(agents, free); return found; } static int lrmd_api_list_ocf_providers(lrmd_t * lrmd, const char *agent, lrmd_list_t ** providers) { int rc = pcmk_ok; char *provider = NULL; GList *ocf_providers = NULL; GList *gIter = NULL; ocf_providers = resources_list_providers(PCMK_RESOURCE_CLASS_OCF); for (gIter = ocf_providers; gIter != NULL; gIter = gIter->next) { provider = gIter->data; if (!agent || does_provider_have_agent(agent, provider, PCMK_RESOURCE_CLASS_OCF)) { *providers = lrmd_list_add(*providers, (const char *)gIter->data); rc++; } } g_list_free_full(ocf_providers, free); return rc; } static int lrmd_api_list_standards(lrmd_t * lrmd, lrmd_list_t ** supported) { int rc = 0; GList *standards = NULL; GList *gIter = NULL; standards = resources_list_standards(); for (gIter = standards; gIter != NULL; gIter = gIter->next) { *supported = lrmd_list_add(*supported, (const char *)gIter->data); rc++; } if (list_stonith_agents(NULL) > 0) { *supported = lrmd_list_add(*supported, PCMK_RESOURCE_CLASS_STONITH); rc++; } g_list_free_full(standards, free); return rc; } /*! * \internal * \brief Create an executor API object * * \param[out] api Will be set to newly created API object (it is the * caller's responsibility to free this value with * lrmd_api_delete() if this function succeeds) * \param[in] nodename If the object will be used for a remote connection, * the node name to use in cluster for remote executor * \param[in] server If the object will be used for a remote connection, * the resolvable host name to connect to * \param[in] port If the object will be used for a remote connection, * port number on \p server to connect to * * \return Standard Pacemaker return code * \note If the caller leaves one of \p nodename or \p server NULL, the other's * value will be used for both. If the caller leaves both NULL, an API * object will be created for a local executor connection. */ int lrmd__new(lrmd_t **api, const char *nodename, const char *server, int port) { lrmd_private_t *pvt = NULL; if (api == NULL) { return EINVAL; } *api = NULL; // Allocate all memory needed *api = calloc(1, sizeof(lrmd_t)); if (*api == NULL) { return ENOMEM; } pvt = calloc(1, sizeof(lrmd_private_t)); if (pvt == NULL) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } (*api)->lrmd_private = pvt; // @TODO Do we need to do this for local connections? pvt->remote = calloc(1, sizeof(pcmk__remote_t)); (*api)->cmds = calloc(1, sizeof(lrmd_api_operations_t)); if ((pvt->remote == NULL) || ((*api)->cmds == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } // Set methods (*api)->cmds->connect = lrmd_api_connect; (*api)->cmds->connect_async = lrmd_api_connect_async; (*api)->cmds->is_connected = lrmd_api_is_connected; (*api)->cmds->poke_connection = lrmd_api_poke_connection; (*api)->cmds->disconnect = lrmd_api_disconnect; (*api)->cmds->register_rsc = lrmd_api_register_rsc; (*api)->cmds->unregister_rsc = lrmd_api_unregister_rsc; (*api)->cmds->get_rsc_info = lrmd_api_get_rsc_info; (*api)->cmds->get_recurring_ops = lrmd_api_get_recurring_ops; (*api)->cmds->set_callback = lrmd_api_set_callback; (*api)->cmds->get_metadata = lrmd_api_get_metadata; (*api)->cmds->exec = lrmd_api_exec; (*api)->cmds->cancel = lrmd_api_cancel; (*api)->cmds->list_agents = lrmd_api_list_agents; (*api)->cmds->list_ocf_providers = lrmd_api_list_ocf_providers; (*api)->cmds->list_standards = lrmd_api_list_standards; (*api)->cmds->exec_alert = lrmd_api_exec_alert; (*api)->cmds->get_metadata_params = lrmd_api_get_metadata_params; if ((nodename == NULL) && (server == NULL)) { pvt->type = pcmk__client_ipc; } else { #ifdef HAVE_GNUTLS_GNUTLS_H if (nodename == NULL) { nodename = server; } else if (server == NULL) { server = nodename; } pvt->type = pcmk__client_tls; pvt->remote_nodename = strdup(nodename); pvt->server = strdup(server); if ((pvt->remote_nodename == NULL) || (pvt->server == NULL)) { lrmd_api_delete(*api); *api = NULL; return ENOMEM; } pvt->port = port; if (pvt->port == 0) { pvt->port = crm_default_remote_port(); } #else crm_err("Cannot communicate with Pacemaker Remote " "because GnuTLS is not enabled for this build"); lrmd_api_delete(*api); *api = NULL; return EOPNOTSUPP; #endif } return pcmk_rc_ok; } lrmd_t * lrmd_api_new(void) { lrmd_t *api = NULL; CRM_ASSERT(lrmd__new(&api, NULL, NULL, 0) == pcmk_rc_ok); return api; } lrmd_t * lrmd_remote_api_new(const char *nodename, const char *server, int port) { lrmd_t *api = NULL; CRM_ASSERT(lrmd__new(&api, nodename, server, port) == pcmk_rc_ok); return api; } void lrmd_api_delete(lrmd_t * lrmd) { if (lrmd == NULL) { return; } if (lrmd->cmds != NULL) { // Never NULL, but make static analysis happy if (lrmd->cmds->disconnect != NULL) { // Also never really NULL lrmd->cmds->disconnect(lrmd); // No-op if already disconnected } free(lrmd->cmds); } if (lrmd->lrmd_private != NULL) { lrmd_private_t *native = lrmd->lrmd_private; #ifdef HAVE_GNUTLS_GNUTLS_H free(native->server); #endif free(native->remote_nodename); free(native->remote); free(native->token); free(native->peer_version); free(lrmd->lrmd_private); } free(lrmd); } struct metadata_cb { void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data); void *user_data; }; /*! * \internal * \brief Process asynchronous metadata completion * * \param[in,out] action Metadata action that completed */ static void metadata_complete(svc_action_t *action) { struct metadata_cb *metadata_cb = (struct metadata_cb *) action->cb_data; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; pcmk__set_result(&result, action->rc, action->status, services__exit_reason(action)); pcmk__set_result_output(&result, action->stdout_data, action->stderr_data); metadata_cb->callback(0, &result, metadata_cb->user_data); result.action_stdout = NULL; // Prevent free, because action owns it result.action_stderr = NULL; // Prevent free, because action owns it pcmk__reset_result(&result); free(metadata_cb); } /*! * \internal * \brief Retrieve agent metadata asynchronously * * \param[in] rsc Resource agent specification * \param[in] callback Function to call with result (this will always be * called, whether by this function directly or later * via the main loop, and on success the metadata will * be in its result argument's action_stdout) * \param[in,out] user_data User data to pass to callback * * \return Standard Pacemaker return code * \note This function is not a lrmd_api_operations_t method because it does not * need an lrmd_t object and does not go through the executor, but * executes the agent directly. */ int lrmd__metadata_async(const lrmd_rsc_info_t *rsc, void (*callback)(int pid, const pcmk__action_result_t *result, void *user_data), void *user_data) { svc_action_t *action = NULL; struct metadata_cb *metadata_cb = NULL; pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; CRM_CHECK(callback != NULL, return EINVAL); if ((rsc == NULL) || (rsc->standard == NULL) || (rsc->type == NULL)) { pcmk__set_result(&result, PCMK_OCF_NOT_CONFIGURED, PCMK_EXEC_ERROR_FATAL, "Invalid resource specification"); callback(0, &result, user_data); pcmk__reset_result(&result); return EINVAL; } if (strcmp(rsc->standard, PCMK_RESOURCE_CLASS_STONITH) == 0) { return stonith__metadata_async(rsc->type, PCMK_DEFAULT_METADATA_TIMEOUT_MS / 1000, callback, user_data); } action = services__create_resource_action(pcmk__s(rsc->id, rsc->type), rsc->standard, rsc->provider, rsc->type, PCMK_ACTION_META_DATA, 0, PCMK_DEFAULT_METADATA_TIMEOUT_MS, NULL, 0); if (action == NULL) { pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } if (action->rc != PCMK_OCF_UNKNOWN) { pcmk__set_result(&result, action->rc, action->status, services__exit_reason(action)); callback(0, &result, user_data); pcmk__reset_result(&result); services_action_free(action); return EINVAL; } action->cb_data = calloc(1, sizeof(struct metadata_cb)); if (action->cb_data == NULL) { services_action_free(action); pcmk__set_result(&result, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, "Out of memory"); callback(0, &result, user_data); pcmk__reset_result(&result); return ENOMEM; } metadata_cb = (struct metadata_cb *) action->cb_data; metadata_cb->callback = callback; metadata_cb->user_data = user_data; if (!services_action_async(action, metadata_complete)) { services_action_free(action); return pcmk_rc_error; // @TODO Derive from action->rc and ->status } // The services library has taken responsibility for action return pcmk_rc_ok; } /*! * \internal * \brief Set the result of an executor event * * \param[in,out] event Executor event to set * \param[in] rc OCF exit status of event * \param[in] op_status Executor status of event * \param[in] exit_reason Human-friendly description of event */ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, int op_status, const char *exit_reason) { if (event == NULL) { return; } event->rc = rc; event->op_status = op_status; pcmk__str_update((char **) &event->exit_reason, exit_reason); } /*! * \internal * \brief Clear an executor event's exit reason, output, and error output * * \param[in,out] event Executor event to reset */ void lrmd__reset_result(lrmd_event_data_t *event) { if (event == NULL) { return; } free((void *) event->exit_reason); event->exit_reason = NULL; free((void *) event->output); event->output = NULL; } /*! * \internal * \brief Get the uptime of a remote resource connection * * When the cluster connects to a remote resource, part of that resource's * handshake includes the uptime of the remote resource's connection. This * uptime is stored in the lrmd_t object. * * \return The connection's uptime, or -1 if unknown */ time_t lrmd__uptime(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return -1; } else { return native->remote->uptime; } } const char * lrmd__node_start_state(lrmd_t *lrmd) { lrmd_private_t *native = lrmd->lrmd_private; if (native->remote == NULL) { return NULL; } else { return native->remote->start_state; } } diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c index 7b166c03ed..93175a3dc2 100644 --- a/lib/pengine/unpack.c +++ b/lib/pengine/unpack.c @@ -1,5108 +1,5109 @@ /* * Copyright 2004-2024 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include CRM_TRACE_INIT_DATA(pe_status); // A (parsed) resource action history entry struct action_history { pcmk_resource_t *rsc; // Resource that history is for pcmk_node_t *node; // Node that history is for xmlNode *xml; // History entry XML // Parsed from entry XML const char *id; // XML ID of history entry const char *key; // Operation key of action const char *task; // Action name const char *exit_reason; // Exit reason given for result guint interval_ms; // Action interval int call_id; // Call ID of action int expected_exit_status; // Expected exit status of action int exit_status; // Actual exit status of action int execution_status; // Execution status of action }; /* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than * use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the * flag is stringified more readably in log messages. */ #define set_config_flag(scheduler, option, flag) do { \ const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \ if (scf_value != NULL) { \ if (crm_is_true(scf_value)) { \ (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } else { \ (scheduler)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, "Scheduler", \ crm_system_name, (scheduler)->flags, \ (flag), #flag); \ } \ } \ } while(0) static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed); static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node); static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler); static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler); static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler); static gboolean is_dangling_guest_node(pcmk_node_t *node) { /* we are looking for a remote-node that was supposed to be mapped to a * container resource, but all traces of that container have disappeared * from both the config and the status section. */ if (pe__is_guest_or_remote_node(node) && node->details->remote_rsc && node->details->remote_rsc->container == NULL && pcmk_is_set(node->details->remote_rsc->flags, pcmk_rsc_removed_filler)) { return TRUE; } return FALSE; } /*! * \brief Schedule a fence action for a node * * \param[in,out] scheduler Scheduler data * \param[in,out] node Node to fence * \param[in] reason Text description of why fencing is needed * \param[in] priority_delay Whether to consider * \c PCMK_OPT_PRIORITY_FENCING_DELAY */ void pe_fence_node(pcmk_scheduler_t *scheduler, pcmk_node_t *node, const char *reason, bool priority_delay) { CRM_CHECK(node, return); /* A guest node is fenced by marking its container as failed */ if (pe__is_guest_node(node)) { pcmk_resource_t *rsc = node->details->remote_rsc->container; if (!pcmk_is_set(rsc->flags, pcmk_rsc_failed)) { if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { crm_notice("Not fencing guest node %s " "(otherwise would because %s): " "its guest resource %s is unmanaged", pe__node_name(node), reason, rsc->id); } else { pcmk__sched_warn("Guest node %s will be fenced " "(by recovering its guest resource %s): %s", pe__node_name(node), rsc->id, reason); /* We don't mark the node as unclean because that would prevent the * node from running resources. We want to allow it to run resources * in this transition if the recovery succeeds. */ node->details->remote_requires_reset = TRUE; pe__set_resource_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } } } else if (is_dangling_guest_node(node)) { crm_info("Cleaning up dangling connection for guest node %s: " "fencing was already done because %s, " "and guest resource no longer exists", pe__node_name(node), reason); pe__set_resource_flags(node->details->remote_rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } else if (pe__is_remote_node(node)) { pcmk_resource_t *rsc = node->details->remote_rsc; if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { crm_notice("Not fencing remote node %s " "(otherwise would because %s): connection is unmanaged", pe__node_name(node), reason); } else if(node->details->remote_requires_reset == FALSE) { node->details->remote_requires_reset = TRUE; pcmk__sched_warn("Remote node %s %s: %s", pe__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); } node->details->unclean = TRUE; // No need to apply PCMK_OPT_PRIORITY_FENCING_DELAY for remote nodes pe_fence_op(node, NULL, TRUE, reason, FALSE, scheduler); } else if (node->details->unclean) { crm_trace("Cluster node %s %s because %s", pe__node_name(node), pe_can_fence(scheduler, node)? "would also be fenced" : "also is unclean", reason); } else { pcmk__sched_warn("Cluster node %s %s: %s", pe__node_name(node), pe_can_fence(scheduler, node)? "will be fenced" : "is unclean", reason); node->details->unclean = TRUE; pe_fence_op(node, NULL, TRUE, reason, priority_delay, scheduler); } } // @TODO xpaths can't handle templates, rules, or id-refs // nvpair with provides or requires set to unfencing #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \ "[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \ "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \ "and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']" // unfencing in rsc_defaults or any resource #define XPATH_ENABLE_UNFENCING \ "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \ "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \ "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \ "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR static void set_if_xpath(uint64_t flag, const char *xpath, pcmk_scheduler_t *scheduler) { xmlXPathObjectPtr result = NULL; if (!pcmk_is_set(scheduler->flags, flag)) { result = xpath_search(scheduler->input, xpath); if (result && (numXpathResults(result) > 0)) { pe__set_working_set_flags(scheduler, flag); } freeXpathObject(result); } } gboolean unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) { const char *value = NULL; GHashTable *config_hash = pcmk__strkey_table(free, free); pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = pcmk_role_unknown, .now = scheduler->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; scheduler->config_hash = config_hash; pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, CIB_OPTIONS_FIRST, FALSE, scheduler); verify_pe_options(scheduler->config_hash); set_config_flag(scheduler, PCMK_OPT_ENABLE_STARTUP_PROBES, pcmk_sched_probe_resources); if (!pcmk_is_set(scheduler->flags, pcmk_sched_probe_resources)) { crm_info("Startup probes: disabled (dangerous)"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_HAVE_WATCHDOG); if (value && crm_is_true(value)) { crm_info("Watchdog-based self-fencing will be performed via SBD if " - "fencing is required and stonith-watchdog-timeout is nonzero"); + "fencing is required and " PCMK_OPT_STONITH_WATCHDOG_TIMEOUT + " is nonzero"); pe__set_working_set_flags(scheduler, pcmk_sched_have_fencing); } /* Set certain flags via xpath here, so they can be used before the relevant * configuration sections are unpacked. */ set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING, scheduler); value = pe_pref(scheduler->config_hash, PCMK_OPT_STONITH_TIMEOUT); scheduler->stonith_timeout = (int) crm_parse_interval_spec(value); crm_debug("STONITH timeout: %d", scheduler->stonith_timeout); set_config_flag(scheduler, PCMK_OPT_STONITH_ENABLED, pcmk_sched_fencing_enabled); if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { crm_debug("STONITH of failed nodes is enabled"); } else { crm_debug("STONITH of failed nodes is disabled"); } scheduler->stonith_action = pe_pref(scheduler->config_hash, PCMK_OPT_STONITH_ACTION); if (!strcmp(scheduler->stonith_action, "poweroff")) { pcmk__warn_once(pcmk__wo_poweroff, "Support for " PCMK_OPT_STONITH_ACTION " of " "'poweroff' is deprecated and will be removed in a " "future release (use 'off' instead)"); scheduler->stonith_action = PCMK_ACTION_OFF; } crm_trace("STONITH will %s nodes", scheduler->stonith_action); set_config_flag(scheduler, PCMK_OPT_CONCURRENT_FENCING, pcmk_sched_concurrent_fencing); if (pcmk_is_set(scheduler->flags, pcmk_sched_concurrent_fencing)) { crm_debug("Concurrent fencing is enabled"); } else { crm_debug("Concurrent fencing is disabled"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_PRIORITY_FENCING_DELAY); if (value) { scheduler->priority_fencing_delay = crm_parse_interval_spec(value) / 1000; crm_trace("Priority fencing delay is %ds", scheduler->priority_fencing_delay); } set_config_flag(scheduler, "stop-all-resources", pcmk_sched_stop_all); crm_debug("Stop all active resources: %s", pcmk__btoa(pcmk_is_set(scheduler->flags, pcmk_sched_stop_all))); set_config_flag(scheduler, "symmetric-cluster", pcmk_sched_symmetric_cluster); if (pcmk_is_set(scheduler->flags, pcmk_sched_symmetric_cluster)) { crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_NO_QUORUM_POLICY); if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_ignore; } else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_freeze; } else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) { scheduler->no_quorum_policy = pcmk_no_quorum_demote; } else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) { if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { int do_panic = 0; crm_element_value_int(scheduler->input, XML_ATTR_QUORUM_PANIC, &do_panic); if (do_panic || pcmk_is_set(scheduler->flags, pcmk_sched_quorate)) { scheduler->no_quorum_policy = pcmk_no_quorum_fence; } else { crm_notice("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop': cluster has never had quorum"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { pcmk__config_err("Resetting " PCMK_OPT_NO_QUORUM_POLICY " to 'stop' because fencing is disabled"); scheduler->no_quorum_policy = pcmk_no_quorum_stop; } } else { scheduler->no_quorum_policy = pcmk_no_quorum_stop; } switch (scheduler->no_quorum_policy) { case pcmk_no_quorum_freeze: crm_debug("On loss of quorum: Freeze resources"); break; case pcmk_no_quorum_stop: crm_debug("On loss of quorum: Stop ALL resources"); break; case pcmk_no_quorum_demote: crm_debug("On loss of quorum: " "Demote promotable resources and stop other resources"); break; case pcmk_no_quorum_fence: crm_notice("On loss of quorum: Fence all remaining nodes"); break; case pcmk_no_quorum_ignore: crm_notice("On loss of quorum: Ignore"); break; } set_config_flag(scheduler, "stop-orphan-resources", pcmk_sched_stop_removed_resources); if (pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) { crm_trace("Orphan resources are stopped"); } else { crm_trace("Orphan resources are ignored"); } set_config_flag(scheduler, "stop-orphan-actions", pcmk_sched_cancel_removed_actions); if (pcmk_is_set(scheduler->flags, pcmk_sched_cancel_removed_actions)) { crm_trace("Orphan resource actions are stopped"); } else { crm_trace("Orphan resource actions are ignored"); } value = pe_pref(scheduler->config_hash, PCMK__OPT_REMOVE_AFTER_STOP); if (value != NULL) { if (crm_is_true(value)) { pe__set_working_set_flags(scheduler, pcmk_sched_remove_after_stop); #ifndef PCMK__COMPAT_2_0 pcmk__warn_once(pcmk__wo_remove_after, "Support for the " PCMK__OPT_REMOVE_AFTER_STOP " cluster property is deprecated and will be " "removed in a future release"); #endif } else { pe__clear_working_set_flags(scheduler, pcmk_sched_remove_after_stop); } } set_config_flag(scheduler, PCMK_OPT_MAINTENANCE_MODE, pcmk_sched_in_maintenance); crm_trace("Maintenance mode: %s", pcmk__btoa(pcmk_is_set(scheduler->flags, pcmk_sched_in_maintenance))); set_config_flag(scheduler, PCMK_OPT_START_FAILURE_IS_FATAL, pcmk_sched_start_failure_fatal); if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) { crm_trace("Start failures are always fatal"); } else { crm_trace("Start failures are handled by failcount"); } if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { set_config_flag(scheduler, PCMK_OPT_STARTUP_FENCING, pcmk_sched_startup_fencing); } if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) { crm_trace("Unseen nodes will be fenced"); } else { pcmk__warn_once(pcmk__wo_blind, "Blind faith: not fencing unseen nodes"); } pe__unpack_node_health_scores(scheduler); scheduler->placement_strategy = pe_pref(scheduler->config_hash, PCMK_OPT_PLACEMENT_STRATEGY); crm_trace("Placement strategy: %s", scheduler->placement_strategy); set_config_flag(scheduler, PCMK_OPT_SHUTDOWN_LOCK, pcmk_sched_shutdown_lock); if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { value = pe_pref(scheduler->config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT); scheduler->shutdown_lock = crm_parse_interval_spec(value) / 1000; crm_trace("Resources will be locked to nodes that were cleanly " "shut down (locks expire after %s)", pcmk__readable_interval(scheduler->shutdown_lock)); } else { crm_trace("Resources will not be locked to nodes that were cleanly " "shut down"); } value = pe_pref(scheduler->config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT); scheduler->node_pending_timeout = crm_parse_interval_spec(value) / 1000; if (scheduler->node_pending_timeout == 0) { crm_trace("Do not fence pending nodes"); } else { crm_trace("Fence pending nodes after %s", pcmk__readable_interval(scheduler->node_pending_timeout * 1000)); } return TRUE; } pcmk_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pcmk_scheduler_t *scheduler) { pcmk_node_t *new_node = NULL; if (pe_find_node(scheduler->nodes, uname) != NULL) { pcmk__config_warn("More than one node entry has name '%s'", uname); } new_node = calloc(1, sizeof(pcmk_node_t)); if (new_node == NULL) { pcmk__sched_err("Could not allocate memory for node %s", uname); return NULL; } new_node->weight = char2score(score); new_node->details = calloc(1, sizeof(struct pe_node_shared_s)); if (new_node->details == NULL) { free(new_node); pcmk__sched_err("Could not allocate memory for node %s", uname); return NULL; } crm_trace("Creating node for entry %s/%s", uname, id); new_node->details->id = id; new_node->details->uname = uname; new_node->details->online = FALSE; new_node->details->shutdown = FALSE; new_node->details->rsc_discovery_enabled = TRUE; new_node->details->running_rsc = NULL; new_node->details->data_set = scheduler; if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) { new_node->details->type = pcmk_node_variant_cluster; } else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) { new_node->details->type = pcmk_node_variant_remote; pe__set_working_set_flags(scheduler, pcmk_sched_have_remote_nodes); } else { /* @COMPAT 'ping' is the default for backward compatibility, but it * should be changed to 'member' at a compatibility break */ if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) { pcmk__config_warn("Node %s has unrecognized type '%s', " "assuming 'ping'", pcmk__s(uname, "without name"), type); } pcmk__warn_once(pcmk__wo_ping_node, "Support for nodes of type 'ping' (such as %s) is " "deprecated and will be removed in a future release", pcmk__s(uname, "unnamed node")); new_node->details->type = node_ping; } new_node->details->attrs = pcmk__strkey_table(free, free); if (pe__is_guest_or_remote_node(new_node)) { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("remote")); } else { g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("cluster")); } new_node->details->utilization = pcmk__strkey_table(free, free); new_node->details->digest_cache = pcmk__strkey_table(free, pe__free_digests); scheduler->nodes = g_list_insert_sorted(scheduler->nodes, new_node, pe__cmp_node_name); return new_node; } static const char * expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pcmk_scheduler_t *data) { xmlNode *attr_set = NULL; xmlNode *attr = NULL; const char *container_id = ID(xml_obj); const char *remote_name = NULL; const char *remote_server = NULL; const char *remote_port = NULL; const char *connect_timeout = "60s"; const char *remote_allow_migrate=NULL; const char *is_managed = NULL; for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL; attr_set = pcmk__xe_next(attr_set)) { if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS, pcmk__str_casei)) { continue; } for (attr = pcmk__xe_first_child(attr_set); attr != NULL; attr = pcmk__xe_next(attr)) { const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE); const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME); if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) { remote_name = value; } else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) { remote_server = value; } else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) { remote_port = value; } else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) { connect_timeout = value; } else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) { remote_allow_migrate=value; } else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) { is_managed = value; } } } if (remote_name == NULL) { return NULL; } if (pe_find_resource(data->resources, remote_name) != NULL) { return NULL; } pe_create_remote_xml(parent, remote_name, container_id, remote_allow_migrate, is_managed, connect_timeout, remote_server, remote_port); return remote_name; } static void handle_startup_fencing(pcmk_scheduler_t *scheduler, pcmk_node_t *new_node) { if ((new_node->details->type == pcmk_node_variant_remote) && (new_node->details->remote_rsc == NULL)) { /* Ignore fencing for remote nodes that don't have a connection resource * associated with them. This happens when remote node entries get left * in the nodes section after the connection resource is removed. */ return; } if (pcmk_is_set(scheduler->flags, pcmk_sched_startup_fencing)) { // All nodes are unclean until we've seen their status entry new_node->details->unclean = TRUE; } else { // Blind faith ... new_node->details->unclean = FALSE; } /* We need to be able to determine if a node's status section * exists or not separate from whether the node is unclean. */ new_node->details->unseen = TRUE; } gboolean unpack_nodes(xmlNode *xml_nodes, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; pcmk_node_t *new_node = NULL; const char *id = NULL; const char *uname = NULL; const char *type = NULL; const char *score = NULL; for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) { new_node = NULL; id = crm_element_value(xml_obj, XML_ATTR_ID); uname = crm_element_value(xml_obj, XML_ATTR_UNAME); type = crm_element_value(xml_obj, XML_ATTR_TYPE); score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE); crm_trace("Processing node %s/%s", uname, id); if (id == NULL) { pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE "> entry in configuration without id"); continue; } new_node = pe_create_node(id, uname, type, score, scheduler); if (new_node == NULL) { return FALSE; } handle_startup_fencing(scheduler, new_node); add_node_attrs(xml_obj, new_node, FALSE, scheduler); crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME)); } } if (scheduler->localhost && (pe_find_node(scheduler->nodes, scheduler->localhost) == NULL)) { crm_info("Creating a fake local node"); pe_create_node(scheduler->localhost, scheduler->localhost, NULL, 0, scheduler); } return TRUE; } static void setup_container(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler) { const char *container_id = NULL; if (rsc->children) { g_list_foreach(rsc->children, (GFunc) setup_container, scheduler); return; } container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER); if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) { pcmk_resource_t *container = pe_find_resource(scheduler->resources, container_id); if (container) { rsc->container = container; pe__set_resource_flags(container, pcmk_rsc_has_filler); container->fillers = g_list_append(container->fillers, rsc); pcmk__rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id); } else { pcmk__config_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id); } } } gboolean unpack_remote_nodes(xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; /* Create remote nodes and guest nodes from the resource configuration * before unpacking resources. */ for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { const char *new_node_id = NULL; /* Check for remote nodes, which are defined by ocf:pacemaker:remote * primitives. */ if (xml_contains_remote_node(xml_obj)) { new_node_id = ID(xml_obj); /* The "pe_find_node" check is here to make sure we don't iterate over * an expanded node that has already been added to the node list. */ if (new_node_id && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found remote node %s defined by resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, scheduler); } continue; } /* Check for guest nodes, which are defined by special meta-attributes * of a primitive of any type (for example, VirtualDomain or Xen). */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) { /* This will add an ocf:pacemaker:remote primitive to the * configuration for the guest node's connection, to be unpacked * later. */ new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, scheduler); if (new_node_id && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s", new_node_id, ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, scheduler); } continue; } /* Check for guest nodes inside a group. Clones are currently not * supported as guest nodes. */ if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) { xmlNode *xml_obj2 = NULL; for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = pcmk__xe_next(xml_obj2)) { new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, scheduler); if (new_node_id && (pe_find_node(scheduler->nodes, new_node_id) == NULL)) { crm_trace("Found guest node %s in resource %s inside group %s", new_node_id, ID(xml_obj2), ID(xml_obj)); pe_create_node(new_node_id, new_node_id, "remote", NULL, scheduler); } } } } return TRUE; } /* Call this after all the nodes and resources have been * unpacked, but before the status section is read. * * A remote node's online status is reflected by the state * of the remote node's connection resource. We need to link * the remote node to this connection resource so we can have * easy access to the connection resource during the scheduler calculations. */ static void link_rsc2remotenode(pcmk_scheduler_t *scheduler, pcmk_resource_t *new_rsc) { pcmk_node_t *remote_node = NULL; if (new_rsc->is_remote_node == FALSE) { return; } if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) { /* remote_nodes and remote_resources are not linked in quick location calculations */ return; } remote_node = pe_find_node(scheduler->nodes, new_rsc->id); CRM_CHECK(remote_node != NULL, return); pcmk__rsc_trace(new_rsc, "Linking remote connection resource %s to %s", new_rsc->id, pe__node_name(remote_node)); remote_node->details->remote_rsc = new_rsc; if (new_rsc->container == NULL) { /* Handle start-up fencing for remote nodes (as opposed to guest nodes) * the same as is done for cluster nodes. */ handle_startup_fencing(scheduler, remote_node); } else { /* pe_create_node() marks the new node as "remote" or "cluster"; now * that we know the node is a guest node, update it correctly. */ g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND), strdup("container")); } } static void destroy_tag(gpointer data) { pcmk_tag_t *tag = data; if (tag) { free(tag->id); g_list_free_full(tag->refs, free); free(tag); } } /*! * \internal * \brief Parse configuration XML for resource information * * \param[in] xml_resources Top of resource configuration XML * \param[in,out] scheduler Scheduler data * * \return TRUE * * \note unpack_remote_nodes() MUST be called before this, so that the nodes can * be used when pe__unpack_resource() calls resource_location() */ gboolean unpack_resources(const xmlNode *xml_resources, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; GList *gIter = NULL; scheduler->template_rsc_sets = pcmk__strkey_table(free, destroy_tag); for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { pcmk_resource_t *new_rsc = NULL; const char *id = ID(xml_obj); if (pcmk__str_empty(id)) { pcmk__config_err("Ignoring <%s> resource without ID", xml_obj->name); continue; } if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, pcmk__str_none)) { if (g_hash_table_lookup_extended(scheduler->template_rsc_sets, id, NULL, NULL) == FALSE) { /* Record the template's ID for the knowledge of its existence anyway. */ g_hash_table_insert(scheduler->template_rsc_sets, strdup(id), NULL); } continue; } crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>", xml_obj->name, id); if (pe__unpack_resource(xml_obj, &new_rsc, NULL, scheduler) == pcmk_rc_ok) { scheduler->resources = g_list_append(scheduler->resources, new_rsc); pcmk__rsc_trace(new_rsc, "Added resource %s", new_rsc->id); } else { pcmk__config_err("Ignoring <%s> resource '%s' " "because configuration is invalid", xml_obj->name, id); } } for (gIter = scheduler->resources; gIter != NULL; gIter = gIter->next) { pcmk_resource_t *rsc = (pcmk_resource_t *) gIter->data; setup_container(rsc, scheduler); link_rsc2remotenode(scheduler, rsc); } scheduler->resources = g_list_sort(scheduler->resources, pe__cmp_rsc_priority); if (pcmk_is_set(scheduler->flags, pcmk_sched_location_only)) { /* Ignore */ } else if (pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled) && !pcmk_is_set(scheduler->flags, pcmk_sched_have_fencing)) { pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined"); pcmk__config_err("Either configure some or disable STONITH with the " PCMK_OPT_STONITH_ENABLED " option"); pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity"); } return TRUE; } gboolean unpack_tags(xmlNode *xml_tags, pcmk_scheduler_t *scheduler) { xmlNode *xml_tag = NULL; scheduler->tags = pcmk__strkey_table(free, destroy_tag); for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL; xml_tag = pcmk__xe_next(xml_tag)) { xmlNode *xml_obj_ref = NULL; const char *tag_id = ID(xml_tag); if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) { continue; } if (tag_id == NULL) { pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID, (const char *) xml_tag->name); continue; } for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = pcmk__xe_next(xml_obj_ref)) { const char *obj_ref = ID(xml_obj_ref); if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) { continue; } if (obj_ref == NULL) { pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID, xml_obj_ref->name, tag_id); continue; } if (add_tag_ref(scheduler->tags, tag_id, obj_ref) == FALSE) { return FALSE; } } } return TRUE; } /* The ticket state section: * "/cib/status/tickets/ticket_state" */ static gboolean unpack_ticket_state(xmlNode *xml_ticket, pcmk_scheduler_t *scheduler) { const char *ticket_id = NULL; const char *granted = NULL; const char *last_granted = NULL; const char *standby = NULL; xmlAttrPtr xIter = NULL; pcmk_ticket_t *ticket = NULL; ticket_id = ID(xml_ticket); if (pcmk__str_empty(ticket_id)) { return FALSE; } crm_trace("Processing ticket state for %s", ticket_id); ticket = g_hash_table_lookup(scheduler->tickets, ticket_id); if (ticket == NULL) { ticket = ticket_new(ticket_id, scheduler); if (ticket == NULL) { return FALSE; } } for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) { const char *prop_name = (const char *)xIter->name; const char *prop_value = pcmk__xml_attr_value(xIter); if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) { continue; } g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value)); } granted = g_hash_table_lookup(ticket->state, "granted"); if (granted && crm_is_true(granted)) { ticket->granted = TRUE; crm_info("We have ticket '%s'", ticket->id); } else { ticket->granted = FALSE; crm_info("We do not have ticket '%s'", ticket->id); } last_granted = g_hash_table_lookup(ticket->state, "last-granted"); if (last_granted) { long long last_granted_ll; pcmk__scan_ll(last_granted, &last_granted_ll, 0LL); ticket->last_granted = (time_t) last_granted_ll; } standby = g_hash_table_lookup(ticket->state, "standby"); if (standby && crm_is_true(standby)) { ticket->standby = TRUE; if (ticket->granted) { crm_info("Granted ticket '%s' is in standby-mode", ticket->id); } } else { ticket->standby = FALSE; } crm_trace("Done with ticket state for %s", ticket_id); return TRUE; } static gboolean unpack_tickets_state(xmlNode *xml_tickets, pcmk_scheduler_t *scheduler) { xmlNode *xml_obj = NULL; for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL; xml_obj = pcmk__xe_next(xml_obj)) { if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) { continue; } unpack_ticket_state(xml_obj, scheduler); } return TRUE; } static void unpack_handle_remote_attrs(pcmk_node_t *this_node, const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *resource_discovery_enabled = NULL; const xmlNode *attrs = NULL; pcmk_resource_t *rsc = NULL; if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { return; } if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) { return; } crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node)); pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_MAINTENANCE), &(this_node->details->remote_maintenance), 0); rsc = this_node->details->remote_rsc; if (this_node->details->remote_requires_reset == FALSE) { this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; } attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, this_node, TRUE, scheduler); if (pe__shutdown_requested(this_node)) { crm_info("%s is shutting down", pe__node_name(this_node)); this_node->details->shutdown = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) { crm_info("%s is in standby mode", pe__node_name(this_node)); this_node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) || ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_managed))) { crm_info("%s is in maintenance mode", pe__node_name(this_node)); this_node->details->maintenance = TRUE; } resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY); if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) { if (pe__is_remote_node(this_node) && !pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { pcmk__config_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute on Pacemaker Remote node %s" " because fencing is disabled", pe__node_name(this_node)); } else { /* This is either a remote node with fencing enabled, or a guest * node. We don't care whether fencing is enabled when fencing guest * nodes, because they are "fenced" by recovering their containing * resource. */ crm_info("%s has resource discovery disabled", pe__node_name(this_node)); this_node->details->rsc_discovery_enabled = FALSE; } } } /*! * \internal * \brief Unpack a cluster node's transient attributes * * \param[in] state CIB node state XML * \param[in,out] node Cluster node whose attributes are being unpacked * \param[in,out] scheduler Scheduler data */ static void unpack_transient_attributes(const xmlNode *state, pcmk_node_t *node, pcmk_scheduler_t *scheduler) { const char *discovery = NULL; const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE); add_node_attrs(attrs, node, TRUE, scheduler); if (crm_is_true(pe_node_attribute_raw(node, "standby"))) { crm_info("%s is in standby mode", pe__node_name(node)); node->details->standby = TRUE; } if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) { crm_info("%s is in maintenance mode", pe__node_name(node)); node->details->maintenance = TRUE; } discovery = pe_node_attribute_raw(node, XML_NODE_ATTR_RSC_DISCOVERY); if ((discovery != NULL) && !crm_is_true(discovery)) { pcmk__config_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY " attribute " "for %s because disabling resource discovery is " "not allowed for cluster nodes", pe__node_name(node)); } } /*! * \internal * \brief Unpack a node state entry (first pass) * * Unpack one node state entry from status. This unpacks information from the * node_state element itself and node attributes inside it, but not the * resource history inside it. Multiple passes through the status are needed to * fully unpack everything. * * \param[in] state CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_state(const xmlNode *state, pcmk_scheduler_t *scheduler) { const char *id = NULL; const char *uname = NULL; pcmk_node_t *this_node = NULL; id = crm_element_value(state, XML_ATTR_ID); if (id == NULL) { pcmk__config_err("Ignoring invalid " XML_CIB_TAG_STATE " entry without " XML_ATTR_ID); crm_log_xml_info(state, "missing-id"); return; } uname = crm_element_value(state, XML_ATTR_UNAME); if (uname == NULL) { /* If a joining peer makes the cluster acquire the quorum from corosync * meanwhile it has not joined CPG membership of pacemaker-controld yet, * it's possible that the created node_state entry doesn't have an uname * yet. We should recognize the node as `pending` and wait for it to * join CPG. */ crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without " XML_ATTR_UNAME, id); } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) " "because it is no longer in the configuration", id, pcmk__s(uname, "uname unknown")); return; } if (pe__is_guest_or_remote_node(this_node)) { /* We can't determine the online status of Pacemaker Remote nodes until * after all resource history has been unpacked. In this first pass, we * do need to mark whether the node has been fenced, as this plays a * role during unpacking cluster node resource state. */ pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_FENCED), &(this_node->details->remote_was_fenced), 0); return; } unpack_transient_attributes(state, this_node, scheduler); /* Provisionally mark this cluster node as clean. We have at least seen it * in the current cluster's lifetime. */ this_node->details->unclean = FALSE; this_node->details->unseen = FALSE; crm_trace("Determining online status of cluster node %s (id %s)", pe__node_name(this_node), id); determine_online_status(state, this_node, scheduler); if (!pcmk_is_set(scheduler->flags, pcmk_sched_quorate) && this_node->details->online && (scheduler->no_quorum_policy == pcmk_no_quorum_fence)) { /* Everything else should flow from this automatically * (at least until the scheduler becomes able to migrate off * healthy resources) */ pe_fence_node(scheduler, this_node, "cluster does not have quorum", FALSE); } } /*! * \internal * \brief Unpack nodes' resource history as much as possible * * Unpack as many nodes' resource history as possible in one pass through the * status. We need to process Pacemaker Remote nodes' connections/containers * before unpacking their history; the connection/container history will be * in another node's history, so it might take multiple passes to unpack * everything. * * \param[in] status CIB XML status section * \param[in] fence If true, treat any not-yet-unpacked nodes as unseen * \param[in,out] scheduler Scheduler data * * \return Standard Pacemaker return code (specifically pcmk_rc_ok if done, * or EAGAIN if more unpacking remains to be done) */ static int unpack_node_history(const xmlNode *status, bool fence, pcmk_scheduler_t *scheduler) { int rc = pcmk_rc_ok; // Loop through all node_state entries in CIB status for (const xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE); state != NULL; state = crm_next_same_xml(state)) { const char *id = ID(state); const char *uname = crm_element_value(state, XML_ATTR_UNAME); pcmk_node_t *this_node = NULL; if ((id == NULL) || (uname == NULL)) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history from malformed " XML_CIB_TAG_STATE " without id and/or uname"); continue; } this_node = pe_find_node_any(scheduler->nodes, id, uname); if (this_node == NULL) { // Warning already logged in first pass through status section crm_trace("Not unpacking resource history for node %s because " "no longer in configuration", id); continue; } if (this_node->details->unpacked) { crm_trace("Not unpacking resource history for node %s because " "already unpacked", id); continue; } if (fence) { // We're processing all remaining nodes } else if (pe__is_guest_node(this_node)) { /* We can unpack a guest node's history only after we've unpacked * other resource history to the point that we know that the node's * connection and containing resource are both up. */ pcmk_resource_t *rsc = this_node->details->remote_rsc; if ((rsc == NULL) || (rsc->role != pcmk_role_started) || (rsc->container->role != pcmk_role_started)) { crm_trace("Not unpacking resource history for guest node %s " "because container and connection are not known to " "be up", id); continue; } } else if (pe__is_remote_node(this_node)) { /* We can unpack a remote node's history only after we've unpacked * other resource history to the point that we know that the node's * connection is up, with the exception of when shutdown locks are * in use. */ pcmk_resource_t *rsc = this_node->details->remote_rsc; if ((rsc == NULL) || (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock) && (rsc->role != pcmk_role_started))) { crm_trace("Not unpacking resource history for remote node %s " "because connection is not known to be up", id); continue; } /* If fencing and shutdown locks are disabled and we're not processing * unseen nodes, then we don't want to unpack offline nodes until online * nodes have been unpacked. This allows us to number active clone * instances first. */ } else if (!pcmk_any_flags_set(scheduler->flags, pcmk_sched_fencing_enabled |pcmk_sched_shutdown_lock) && !this_node->details->online) { crm_trace("Not unpacking resource history for offline " "cluster node %s", id); continue; } if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); unpack_handle_remote_attrs(this_node, state, scheduler); } crm_trace("Unpacking resource history for %snode %s", (fence? "unseen " : ""), id); this_node->details->unpacked = TRUE; unpack_node_lrm(this_node, state, scheduler); rc = EAGAIN; // Other node histories might depend on this one } return rc; } /* remove nodes that are down, stopping */ /* create positive rsc_to_node constraints between resources and the nodes they are running on */ /* anything else? */ gboolean unpack_status(xmlNode *status, pcmk_scheduler_t *scheduler) { xmlNode *state = NULL; crm_trace("Beginning unpack"); if (scheduler->tickets == NULL) { scheduler->tickets = pcmk__strkey_table(free, destroy_ticket); } for (state = pcmk__xe_first_child(status); state != NULL; state = pcmk__xe_next(state)) { if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) { unpack_tickets_state((xmlNode *) state, scheduler); } else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { unpack_node_state(state, scheduler); } } while (unpack_node_history(status, FALSE, scheduler) == EAGAIN) { crm_trace("Another pass through node resource histories is needed"); } // Now catch any nodes we didn't see unpack_node_history(status, pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled), scheduler); /* Now that we know where resources are, we can schedule stops of containers * with failed bundle connections */ if (scheduler->stop_needed != NULL) { for (GList *item = scheduler->stop_needed; item; item = item->next) { pcmk_resource_t *container = item->data; pcmk_node_t *node = pe__current_node(container); if (node) { stop_action(container, node, FALSE); } } g_list_free(scheduler->stop_needed); scheduler->stop_needed = NULL; } /* Now that we know status of all Pacemaker Remote connections and nodes, * we can stop connections for node shutdowns, and check the online status * of remote/guest nodes that didn't have any node history to unpack. */ for (GList *gIter = scheduler->nodes; gIter != NULL; gIter = gIter->next) { pcmk_node_t *this_node = gIter->data; if (!pe__is_guest_or_remote_node(this_node)) { continue; } if (this_node->details->shutdown && (this_node->details->remote_rsc != NULL)) { pe__set_next_role(this_node->details->remote_rsc, pcmk_role_stopped, "remote shutdown"); } if (!this_node->details->unpacked) { determine_remote_online_status(scheduler, this_node); } } return TRUE; } /*! * \internal * \brief Unpack node's time when it became a member at the cluster layer * * \param[in] node_state Node's node_state entry * \param[in,out] scheduler Scheduler data * * \return Epoch time when node became a cluster member * (or scheduler effective time for legacy entries) if a member, * 0 if not a member, or -1 if no valid information available */ static long long unpack_node_member(const xmlNode *node_state, pcmk_scheduler_t *scheduler) { const char *member_time = crm_element_value(node_state, PCMK__XA_IN_CCM); int member = 0; if (member_time == NULL) { return -1LL; } else if (crm_str_to_boolean(member_time, &member) == 1) { /* If in_ccm=0, we'll return 0 here. If in_ccm=1, either the entry was * recorded as a boolean for a DC < 2.1.7, or the node is pending * shutdown and has left the CPG, in which case it was set to 1 to avoid * fencing for PCMK_OPT_NODE_PENDING_TIMEOUT. * * We return the effective time for in_ccm=1 because what's important to * avoid fencing is that effective time minus this value is less than * the pending node timeout. */ return member? (long long) get_effective_time(scheduler) : 0LL; } else { long long when_member = 0LL; if ((pcmk__scan_ll(member_time, &when_member, 0LL) != pcmk_rc_ok) || (when_member < 0LL)) { crm_warn("Unrecognized value '%s' for " PCMK__XA_IN_CCM " in " XML_CIB_TAG_STATE " entry", member_time); return -1LL; } return when_member; } } /*! * \internal * \brief Unpack node's time when it became online in process group * * \param[in] node_state Node's node_state entry * * \return Epoch time when node became online in process group (or 0 if not * online, or 1 for legacy online entries) */ static long long unpack_node_online(const xmlNode *node_state) { const char *peer_time = crm_element_value(node_state, PCMK__XA_CRMD); // @COMPAT Entries recorded for DCs < 2.1.7 have "online" or "offline" if (pcmk__str_eq(peer_time, OFFLINESTATUS, pcmk__str_casei|pcmk__str_null_matches)) { return 0LL; } else if (pcmk__str_eq(peer_time, ONLINESTATUS, pcmk__str_casei)) { return 1LL; } else { long long when_online = 0LL; if ((pcmk__scan_ll(peer_time, &when_online, 0LL) != pcmk_rc_ok) || (when_online < 0)) { crm_warn("Unrecognized value '%s' for " PCMK__XA_CRMD " in " XML_CIB_TAG_STATE " entry, assuming offline", peer_time); return 0LL; } return when_online; } } /*! * \internal * \brief Unpack node attribute for user-requested fencing * * \param[in] node Node to check * \param[in] node_state Node's node_state entry in CIB status * * \return \c true if fencing has been requested for \p node, otherwise \c false */ static bool unpack_node_terminate(const pcmk_node_t *node, const xmlNode *node_state) { long long value = 0LL; int value_i = 0; const char *value_s = pe_node_attribute_raw(node, PCMK_NODE_ATTR_TERMINATE); // Value may be boolean or an epoch time if (crm_str_to_boolean(value_s, &value_i) == 1) { return (value_i != 0); } if (pcmk__scan_ll(value_s, &value, 0LL) == pcmk_rc_ok) { return (value > 0); } crm_warn("Ignoring unrecognized value '%s' for " PCMK_NODE_ATTR_TERMINATE "node attribute for %s", value_s, pe__node_name(node)); return false; } static gboolean determine_online_status_no_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { gboolean online = FALSE; const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); if (when_member <= 0) { crm_trace("Node %s is %sdown", pe__node_name(this_node), ((when_member < 0)? "presumed " : "")); } else if (when_online > 0) { if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { online = TRUE; } else { crm_debug("Node %s is not ready to run resources: %s", pe__node_name(this_node), join); } } else if (this_node->details->expected_up == FALSE) { crm_trace("Node %s controller is down: " "member@%lld online@%lld join=%s expected=%s", pe__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } else { /* mark it unclean */ pe_fence_node(scheduler, this_node, "peer is unexpectedly down", FALSE); crm_info("Node %s member@%lld online@%lld join=%s expected=%s", pe__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, "")); } return online; } /*! * \internal * \brief Check whether a node has taken too long to join controller group * * \param[in,out] scheduler Scheduler data * \param[in] node Node to check * \param[in] when_member Epoch time when node became a cluster member * \param[in] when_online Epoch time when node joined controller group * * \return true if node has been pending (on the way up) longer than * \c PCMK_OPT_NODE_PENDING_TIMEOUT, otherwise false * \note This will also update the cluster's recheck time if appropriate. */ static inline bool pending_too_long(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, long long when_member, long long when_online) { if ((scheduler->node_pending_timeout > 0) && (when_member > 0) && (when_online <= 0)) { // There is a timeout on pending nodes, and node is pending time_t timeout = when_member + scheduler->node_pending_timeout; if (get_effective_time(node->details->data_set) >= timeout) { return true; // Node has timed out } // Node is pending, but still has time pe__update_recheck_time(timeout, scheduler, "pending node timeout"); } return false; } static bool determine_online_status_fencing(pcmk_scheduler_t *scheduler, const xmlNode *node_state, pcmk_node_t *this_node) { bool termination_requested = unpack_node_terminate(this_node, node_state); const char *join = crm_element_value(node_state, PCMK__XA_JOIN); const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); long long when_member = unpack_node_member(node_state, scheduler); long long when_online = unpack_node_online(node_state); /* - PCMK__XA_JOIN ::= member|down|pending|banned - PCMK__XA_EXPECTED ::= member|down @COMPAT with entries recorded for DCs < 2.1.7 - PCMK__XA_IN_CCM ::= true|false - PCMK__XA_CRMD ::= online|offline Since crm_feature_set 3.18.0 (pacemaker-2.1.7): - PCMK__XA_IN_CCM ::= |0 Since when node has been a cluster member. A value 0 of means the node is not a cluster member. - PCMK__XA_CRMD ::= |0 Since when peer has been online in CPG. A value 0 means the peer is offline in CPG. */ crm_trace("Node %s member@%lld online@%lld join=%s expected=%s%s", pe__node_name(this_node), when_member, when_online, pcmk__s(join, ""), pcmk__s(exp_state, ""), (termination_requested? " (termination requested)" : "")); if (this_node->details->shutdown) { crm_debug("%s is shutting down", pe__node_name(this_node)); /* Slightly different criteria since we can't shut down a dead peer */ return (when_online > 0); } if (when_member < 0) { pe_fence_node(scheduler, this_node, "peer has not been seen by the cluster", FALSE); return false; } if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_none)) { pe_fence_node(scheduler, this_node, "peer failed Pacemaker membership criteria", FALSE); } else if (termination_requested) { if ((when_member <= 0) && (when_online <= 0) && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_none)) { crm_info("%s was fenced as requested", pe__node_name(this_node)); return false; } pe_fence_node(scheduler, this_node, "fencing was requested", false); } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_null_matches)) { if (pending_too_long(scheduler, this_node, when_member, when_online)) { pe_fence_node(scheduler, this_node, "peer pending timed out on joining the process group", FALSE); } else if ((when_member > 0) || (when_online > 0)) { crm_info("- %s is not ready to run resources", pe__node_name(this_node)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { crm_trace("%s is down or still coming up", pe__node_name(this_node)); } } else if (when_member <= 0) { // Consider PCMK_OPT_PRIORITY_FENCING_DELAY for lost nodes pe_fence_node(scheduler, this_node, "peer is no longer part of the cluster", TRUE); } else if (when_online <= 0) { pe_fence_node(scheduler, this_node, "peer process is no longer available", FALSE); /* Everything is running at this point, now check join state */ } else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_none)) { crm_info("%s is active", pe__node_name(this_node)); } else if (pcmk__str_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) { crm_info("%s is not ready to run resources", pe__node_name(this_node)); this_node->details->standby = TRUE; this_node->details->pending = TRUE; } else { pe_fence_node(scheduler, this_node, "peer was in an unknown state", FALSE); } return (when_member > 0); } static void determine_remote_online_status(pcmk_scheduler_t *scheduler, pcmk_node_t *this_node) { pcmk_resource_t *rsc = this_node->details->remote_rsc; pcmk_resource_t *container = NULL; pcmk_node_t *host = NULL; /* If there is a node state entry for a (former) Pacemaker Remote node * but no resource creating that node, the node's connection resource will * be NULL. Consider it an offline remote node in that case. */ if (rsc == NULL) { this_node->details->online = FALSE; goto remote_online_done; } container = rsc->container; if (container && pcmk__list_of_1(rsc->running_on)) { host = rsc->running_on->data; } /* If the resource is currently started, mark it online. */ if (rsc->role == pcmk_role_started) { crm_trace("%s node %s presumed ONLINE because connection resource is started", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = TRUE; } /* consider this node shutting down if transitioning start->stop */ if ((rsc->role == pcmk_role_started) && (rsc->next_role == pcmk_role_stopped)) { crm_trace("%s node %s shutting down because connection resource is stopping", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->shutdown = TRUE; } /* Now check all the failure conditions. */ if(container && pcmk_is_set(container->flags, pcmk_rsc_failed)) { crm_trace("Guest node %s UNCLEAN because guest resource failed", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } else if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) { crm_trace("%s node %s OFFLINE because connection resource failed", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; } else if ((rsc->role == pcmk_role_stopped) || ((container != NULL) && (container->role == pcmk_role_stopped))) { crm_trace("%s node %s OFFLINE because its resource is stopped", (container? "Guest" : "Remote"), this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = FALSE; } else if (host && (host->details->online == FALSE) && host->details->unclean) { crm_trace("Guest node %s UNCLEAN because host is unclean", this_node->details->id); this_node->details->online = FALSE; this_node->details->remote_requires_reset = TRUE; } remote_online_done: crm_trace("Remote node %s online=%s", this_node->details->id, this_node->details->online ? "TRUE" : "FALSE"); } static void determine_online_status(const xmlNode *node_state, pcmk_node_t *this_node, pcmk_scheduler_t *scheduler) { gboolean online = FALSE; const char *exp_state = crm_element_value(node_state, PCMK__XA_EXPECTED); CRM_CHECK(this_node != NULL, return); this_node->details->shutdown = FALSE; this_node->details->expected_up = FALSE; if (pe__shutdown_requested(this_node)) { this_node->details->shutdown = TRUE; } else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) { this_node->details->expected_up = TRUE; } if (this_node->details->type == node_ping) { this_node->details->unclean = FALSE; online = FALSE; /* As far as resource management is concerned, * the node is safely offline. * Anyone caught abusing this logic will be shot */ } else if (!pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { online = determine_online_status_no_fencing(scheduler, node_state, this_node); } else { online = determine_online_status_fencing(scheduler, node_state, this_node); } if (online) { this_node->details->online = TRUE; } else { /* remove node from contention */ this_node->fixed = TRUE; // @COMPAT deprecated and unused this_node->weight = -INFINITY; } if (online && this_node->details->shutdown) { /* don't run resources here */ this_node->fixed = TRUE; // @COMPAT deprecated and unused this_node->weight = -INFINITY; } if (this_node->details->type == node_ping) { crm_info("%s is not a Pacemaker node", pe__node_name(this_node)); } else if (this_node->details->unclean) { pcmk__sched_warn("%s is unclean", pe__node_name(this_node)); } else if (this_node->details->online) { crm_info("%s is %s", pe__node_name(this_node), this_node->details->shutdown ? "shutting down" : this_node->details->pending ? "pending" : this_node->details->standby ? "standby" : this_node->details->maintenance ? "maintenance" : "online"); } else { crm_trace("%s is offline", pe__node_name(this_node)); } } /*! * \internal * \brief Find the end of a resource's name, excluding any clone suffix * * \param[in] id Resource ID to check * * \return Pointer to last character of resource's base name */ const char * pe_base_name_end(const char *id) { if (!pcmk__str_empty(id)) { const char *end = id + strlen(id) - 1; for (const char *s = end; s > id; --s) { switch (*s) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; case ':': return (s == end)? s : (s - 1); default: return end; } } return end; } return NULL; } /*! * \internal * \brief Get a resource name excluding any clone suffix * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_strip(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); char *basename = NULL; CRM_ASSERT(end); basename = strndup(last_rsc_id, end - last_rsc_id + 1); CRM_ASSERT(basename); return basename; } /*! * \internal * \brief Get the name of the first instance of a cloned resource * * \param[in] last_rsc_id Resource ID to check * * \return Pointer to newly allocated string with resource's base name plus :0 * \note It is the caller's responsibility to free() the result. * This asserts on error, so callers can assume result is not NULL. */ char * clone_zero(const char *last_rsc_id) { const char *end = pe_base_name_end(last_rsc_id); size_t base_name_len = end - last_rsc_id + 1; char *zero = NULL; CRM_ASSERT(end); zero = calloc(base_name_len + 3, sizeof(char)); CRM_ASSERT(zero); memcpy(zero, last_rsc_id, base_name_len); zero[base_name_len] = ':'; zero[base_name_len + 1] = '0'; return zero; } static pcmk_resource_t * create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE); copy_in_properties(xml_rsc, rsc_entry); crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); crm_log_xml_debug(xml_rsc, "Orphan resource"); if (pe__unpack_resource(xml_rsc, &rsc, NULL, scheduler) != pcmk_rc_ok) { return NULL; } if (xml_contains_remote_node(xml_rsc)) { pcmk_node_t *node; crm_debug("Detected orphaned remote node %s", rsc_id); node = pe_find_node(scheduler->nodes, rsc_id); if (node == NULL) { node = pe_create_node(rsc_id, rsc_id, "remote", NULL, scheduler); } link_rsc2remotenode(scheduler, rsc); if (node) { crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id); node->details->shutdown = TRUE; } } if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) { /* This orphaned rsc needs to be mapped to a container. */ crm_trace("Detected orphaned container filler %s", rsc_id); pe__set_resource_flags(rsc, pcmk_rsc_removed_filler); } pe__set_resource_flags(rsc, pcmk_rsc_removed); scheduler->resources = g_list_append(scheduler->resources, rsc); return rsc; } /*! * \internal * \brief Create orphan instance for anonymous clone resource history * * \param[in,out] parent Clone resource that orphan will be added to * \param[in] rsc_id Orphan's resource ID * \param[in] node Where orphan is active (for logging only) * \param[in,out] scheduler Scheduler data * * \return Newly added orphaned instance of \p parent */ static pcmk_resource_t * create_anonymous_orphan(pcmk_resource_t *parent, const char *rsc_id, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *top = pe__create_clone_child(parent, scheduler); // find_rsc() because we might be a cloned group pcmk_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pcmk_rsc_match_clone_only); pcmk__rsc_debug(parent, "Created orphan %s for %s: %s on %s", top->id, parent->id, rsc_id, pe__node_name(node)); return orphan; } /*! * \internal * \brief Check a node for an instance of an anonymous clone * * Return a child instance of the specified anonymous clone, in order of * preference: (1) the instance running on the specified node, if any; * (2) an inactive instance (i.e. within the total of clone-max instances); * (3) a newly created orphan (i.e. clone-max instances are already active). * * \param[in,out] scheduler Scheduler data * \param[in] node Node on which to check for instance * \param[in,out] parent Clone to check * \param[in] rsc_id Name of cloned resource in history (no instance) */ static pcmk_resource_t * find_anonymous_clone(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, pcmk_resource_t *parent, const char *rsc_id) { GList *rIter = NULL; pcmk_resource_t *rsc = NULL; pcmk_resource_t *inactive_instance = NULL; gboolean skip_inactive = FALSE; CRM_ASSERT(parent != NULL); CRM_ASSERT(pe_rsc_is_clone(parent)); CRM_ASSERT(!pcmk_is_set(parent->flags, pcmk_rsc_unique)); // Check for active (or partially active, for cloned groups) instance pcmk__rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, pe__node_name(node), parent->id); for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) { GList *locations = NULL; pcmk_resource_t *child = rIter->data; /* Check whether this instance is already known to be active or pending * anywhere, at this stage of unpacking. Because this function is called * for a resource before the resource's individual operation history * entries are unpacked, locations will generally not contain the * desired node. * * However, there are three exceptions: * (1) when child is a cloned group and we have already unpacked the * history of another member of the group on the same node; * (2) when we've already unpacked the history of another numbered * instance on the same node (which can happen if globally-unique * was flipped from true to false); and * (3) when we re-run calculations on the same scheduler data as part of * a simulation. */ child->fns->location(child, &locations, 2); if (locations) { /* We should never associate the same numbered anonymous clone * instance with multiple nodes, and clone instances can't migrate, * so there must be only one location, regardless of history. */ CRM_LOG_ASSERT(locations->next == NULL); if (((pcmk_node_t *) locations->data)->details == node->details) { /* This child instance is active on the requested node, so check * for a corresponding configured resource. We use find_rsc() * instead of child because child may be a cloned group, and we * need the particular member corresponding to rsc_id. * * If the history entry is orphaned, rsc will be NULL. */ rsc = parent->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); if (rsc) { /* If there are multiple instance history entries for an * anonymous clone in a single node's history (which can * happen if globally-unique is switched from true to * false), we want to consider the instances beyond the * first as orphans, even if there are inactive instance * numbers available. */ if (rsc->running_on) { crm_notice("Active (now-)anonymous clone %s has " "multiple (orphan) instance histories on %s", parent->id, pe__node_name(node)); skip_inactive = TRUE; rsc = NULL; } else { pcmk__rsc_trace(parent, "Resource %s, active", rsc->id); } } } g_list_free(locations); } else { pcmk__rsc_trace(parent, "Resource %s, skip inactive", child->id); if (!skip_inactive && !inactive_instance && !pcmk_is_set(child->flags, pcmk_rsc_blocked)) { // Remember one inactive instance in case we don't find active inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL, pcmk_rsc_match_clone_only); /* ... but don't use it if it was already associated with a * pending action on another node */ if (inactive_instance && inactive_instance->pending_node && (inactive_instance->pending_node->details != node->details)) { inactive_instance = NULL; } } } } if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) { pcmk__rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id); rsc = inactive_instance; } /* If the resource has "requires" set to "quorum" or "nothing", and we don't * have a clone instance for every node, we don't want to consume a valid * instance number for unclean nodes. Such instances may appear to be active * according to the history, but should be considered inactive, so we can * start an instance elsewhere. Treat such instances as orphans. * * An exception is instances running on guest nodes -- since guest node * "fencing" is actually just a resource stop, requires shouldn't apply. * * @TODO Ideally, we'd use an inactive instance number if it is not needed * for any clean instances. However, we don't know that at this point. */ if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pcmk_rsc_needs_fencing) && (!node->details->online || node->details->unclean) && !pe__is_guest_node(node) && !pe__is_universal_clone(parent, scheduler)) { rsc = NULL; } if (rsc == NULL) { rsc = create_anonymous_orphan(parent, rsc_id, node, scheduler); pcmk__rsc_trace(parent, "Resource %s, orphan", rsc->id); } return rsc; } static pcmk_resource_t * unpack_find_resource(pcmk_scheduler_t *scheduler, const pcmk_node_t *node, const char *rsc_id) { pcmk_resource_t *rsc = NULL; pcmk_resource_t *parent = NULL; crm_trace("looking for %s", rsc_id); rsc = pe_find_resource(scheduler->resources, rsc_id); if (rsc == NULL) { /* If we didn't find the resource by its name in the operation history, * check it again as a clone instance. Even when clone-max=0, we create * a single :0 orphan to match against here. */ char *clone0_id = clone_zero(rsc_id); pcmk_resource_t *clone0 = pe_find_resource(scheduler->resources, clone0_id); if (clone0 && !pcmk_is_set(clone0->flags, pcmk_rsc_unique)) { rsc = clone0; parent = uber_parent(clone0); crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id); } else { crm_trace("%s is not known as %s either (orphan)", rsc_id, clone0_id); } free(clone0_id); } else if (rsc->variant > pcmk_rsc_variant_primitive) { crm_trace("Resource history for %s is orphaned because it is no longer primitive", rsc_id); return NULL; } else { parent = uber_parent(rsc); } if (pe_rsc_is_anon_clone(parent)) { if (pe_rsc_is_bundled(parent)) { rsc = pe__find_bundle_replica(parent->parent, node); } else { char *base = clone_strip(rsc_id); rsc = find_anonymous_clone(scheduler, node, parent, base); free(base); CRM_ASSERT(rsc != NULL); } } if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei) && !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) { pcmk__str_update(&rsc->clone_name, rsc_id); pcmk__rsc_debug(rsc, "Internally renamed %s on %s to %s%s", rsc_id, pe__node_name(node), rsc->id, pcmk_is_set(rsc->flags, pcmk_rsc_removed)? " (ORPHAN)" : ""); } return rsc; } static pcmk_resource_t * process_orphan_resource(const xmlNode *rsc_entry, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { pcmk_resource_t *rsc = NULL; const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node)); rsc = create_fake_resource(rsc_id, rsc_entry, scheduler); if (rsc == NULL) { return NULL; } if (!pcmk_is_set(scheduler->flags, pcmk_sched_stop_removed_resources)) { pe__clear_resource_flags(rsc, pcmk_rsc_managed); } else { CRM_CHECK(rsc != NULL, return NULL); pcmk__rsc_trace(rsc, "Added orphan %s", rsc->id); resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", scheduler); } return rsc; } static void process_rsc_state(pcmk_resource_t *rsc, pcmk_node_t *node, enum action_fail_response on_fail) { pcmk_node_t *tmpnode = NULL; char *reason = NULL; enum action_fail_response save_on_fail = pcmk_on_fail_ignore; CRM_ASSERT(rsc); pcmk__rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s", rsc->id, role2text(rsc->role), pe__node_name(node), fail2text(on_fail)); /* process current state */ if (rsc->role != pcmk_role_unknown) { pcmk_resource_t *iter = rsc; while (iter) { if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) { pcmk_node_t *n = pe__copy_node(node); pcmk__rsc_trace(rsc, "%s%s%s known on %s", rsc->id, ((rsc->clone_name == NULL)? "" : " also known as "), ((rsc->clone_name == NULL)? "" : rsc->clone_name), pe__node_name(n)); g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n); } if (pcmk_is_set(iter->flags, pcmk_rsc_unique)) { break; } iter = iter->parent; } } /* If a managed resource is believed to be running, but node is down ... */ if ((rsc->role > pcmk_role_stopped) && node->details->online == FALSE && node->details->maintenance == FALSE && pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { gboolean should_fence = FALSE; /* If this is a guest node, fence it (regardless of whether fencing is * enabled, because guest node fencing is done by recovery of the * container resource rather than by the fencer). Mark the resource * we're processing as failed. When the guest comes back up, its * operation history in the CIB will be cleared, freeing the affected * resource to run again once we are sure we know its state. */ if (pe__is_guest_node(node)) { pe__set_resource_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); should_fence = TRUE; } else if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) { if (pe__is_remote_node(node) && node->details->remote_rsc && !pcmk_is_set(node->details->remote_rsc->flags, pcmk_rsc_failed)) { /* Setting unseen means that fencing of the remote node will * occur only if the connection resource is not going to start * somewhere. This allows connection resources on a failed * cluster node to move to another node without requiring the * remote nodes to be fenced as well. */ node->details->unseen = TRUE; reason = crm_strdup_printf("%s is active there (fencing will be" " revoked if remote connection can " "be re-established elsewhere)", rsc->id); } should_fence = TRUE; } if (should_fence) { if (reason == NULL) { reason = crm_strdup_printf("%s is thought to be active there", rsc->id); } pe_fence_node(rsc->cluster, node, reason, FALSE); } free(reason); } /* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */ save_on_fail = on_fail; if (node->details->unclean) { /* No extra processing needed * Also allows resources to be started again after a node is shot */ on_fail = pcmk_on_fail_ignore; } switch (on_fail) { case pcmk_on_fail_ignore: /* nothing to do */ break; case pcmk_on_fail_demote: pe__set_resource_flags(rsc, pcmk_rsc_failed); demote_action(rsc, node, FALSE); break; case pcmk_on_fail_fence_node: /* treat it as if it is still running * but also mark the node as unclean */ reason = crm_strdup_printf("%s failed there", rsc->id); pe_fence_node(rsc->cluster, node, reason, FALSE); free(reason); break; case pcmk_on_fail_standby_node: node->details->standby = TRUE; node->details->standby_onfail = TRUE; break; case pcmk_on_fail_block: /* is_managed == FALSE will prevent any * actions being sent for the resource */ pe__clear_resource_flags(rsc, pcmk_rsc_managed); pe__set_resource_flags(rsc, pcmk_rsc_blocked); break; case pcmk_on_fail_ban: /* make sure it comes up somewhere else * or not at all */ resource_location(rsc, node, -INFINITY, "__action_migration_auto__", rsc->cluster); break; case pcmk_on_fail_stop: pe__set_next_role(rsc, pcmk_role_stopped, "on-fail=stop"); break; case pcmk_on_fail_restart: if ((rsc->role != pcmk_role_stopped) && (rsc->role != pcmk_role_unknown)) { pe__set_resource_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); stop_action(rsc, node, FALSE); } break; case pcmk_on_fail_restart_container: pe__set_resource_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); if (rsc->container && pe_rsc_is_bundled(rsc)) { /* A bundle's remote connection can run on a different node than * the bundle's container. We don't necessarily know where the * container is running yet, so remember it and add a stop * action for it later. */ rsc->cluster->stop_needed = g_list_prepend(rsc->cluster->stop_needed, rsc->container); } else if (rsc->container) { stop_action(rsc->container, node, FALSE); } else if ((rsc->role != pcmk_role_stopped) && (rsc->role != pcmk_role_unknown)) { stop_action(rsc, node, FALSE); } break; case pcmk_on_fail_reset_remote: pe__set_resource_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); if (pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled)) { tmpnode = NULL; if (rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); } if (tmpnode && pe__is_remote_node(tmpnode) && tmpnode->details->remote_was_fenced == 0) { /* The remote connection resource failed in a way that * should result in fencing the remote node. */ pe_fence_node(rsc->cluster, tmpnode, "remote connection is unrecoverable", FALSE); } } /* require the stop action regardless if fencing is occurring or not. */ if (rsc->role > pcmk_role_stopped) { stop_action(rsc, node, FALSE); } /* if reconnect delay is in use, prevent the connection from exiting the * "STOPPED" role until the failure is cleared by the delay timeout. */ if (rsc->remote_reconnect_ms) { pe__set_next_role(rsc, pcmk_role_stopped, "remote reset"); } break; } /* ensure a remote-node connection failure forces an unclean remote-node * to be fenced. By setting unseen = FALSE, the remote-node failure will * result in a fencing operation regardless if we're going to attempt to * reconnect to the remote-node in this transition or not. */ if (pcmk_is_set(rsc->flags, pcmk_rsc_failed) && rsc->is_remote_node) { tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id); if (tmpnode && tmpnode->details->unclean) { tmpnode->details->unseen = FALSE; } } if ((rsc->role != pcmk_role_stopped) && (rsc->role != pcmk_role_unknown)) { if (pcmk_is_set(rsc->flags, pcmk_rsc_removed)) { if (pcmk_is_set(rsc->flags, pcmk_rsc_managed)) { pcmk__config_warn("Detected active orphan %s running on %s", rsc->id, pe__node_name(node)); } else { pcmk__config_warn("Resource '%s' must be stopped manually on " "%s because cluster is configured not to " "stop active orphans", rsc->id, pe__node_name(node)); } } native_add_running(rsc, node, rsc->cluster, (save_on_fail != pcmk_on_fail_ignore)); switch (on_fail) { case pcmk_on_fail_ignore: break; case pcmk_on_fail_demote: case pcmk_on_fail_block: pe__set_resource_flags(rsc, pcmk_rsc_failed); break; default: pe__set_resource_flags(rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); break; } } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) { /* Only do this for older status sections that included instance numbers * Otherwise stopped instances will appear as orphans */ pcmk__rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id); free(rsc->clone_name); rsc->clone_name = NULL; } else { GList *possible_matches = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, FALSE); GList *gIter = possible_matches; for (; gIter != NULL; gIter = gIter->next) { pcmk_action_t *stop = (pcmk_action_t *) gIter->data; pe__set_action_flags(stop, pcmk_action_optional); } g_list_free(possible_matches); } /* A successful stop after migrate_to on the migration source doesn't make * the partially migrated resource stopped on the migration target. */ if ((rsc->role == pcmk_role_stopped) && rsc->partial_migration_source && rsc->partial_migration_source->details == node->details && rsc->partial_migration_target && rsc->running_on) { rsc->role = pcmk_role_started; } } /* create active recurring operations as optional */ static void process_recurring(pcmk_node_t *node, pcmk_resource_t *rsc, int start_index, int stop_index, GList *sorted_op_list, pcmk_scheduler_t *scheduler) { int counter = -1; const char *task = NULL; const char *status = NULL; GList *gIter = sorted_op_list; CRM_ASSERT(rsc); pcmk__rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index); for (; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; guint interval_ms = 0; char *key = NULL; const char *id = ID(rsc_op); counter++; if (node->details->online == FALSE) { pcmk__rsc_trace(rsc, "Skipping %s on %s: node is offline", rsc->id, pe__node_name(node)); break; /* Need to check if there's a monitor for role="Stopped" */ } else if (start_index < stop_index && counter <= stop_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: resource is not active", id, pe__node_name(node)); continue; } else if (counter < start_index) { pcmk__rsc_trace(rsc, "Skipping %s on %s: old %d", id, pe__node_name(node), counter); continue; } crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms == 0) { pcmk__rsc_trace(rsc, "Skipping %s on %s: non-recurring", id, pe__node_name(node)); continue; } status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(status, "-1", pcmk__str_casei)) { pcmk__rsc_trace(rsc, "Skipping %s on %s: status", id, pe__node_name(node)); continue; } task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); /* create the action */ key = pcmk__op_key(rsc->id, task, interval_ms); pcmk__rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node)); custom_action(rsc, key, task, node, TRUE, scheduler); } } void calculate_active_ops(const GList *sorted_op_list, int *start_index, int *stop_index) { int counter = -1; int implied_monitor_start = -1; int implied_clone_start = -1; const char *task = NULL; const char *status = NULL; *stop_index = -1; *start_index = -1; for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) { const xmlNode *rsc_op = (const xmlNode *) iter->data; counter++; task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK); status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS); if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei) && pcmk__str_eq(status, "0", pcmk__str_casei)) { *stop_index = counter; } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MIGRATE_FROM, NULL)) { *start_index = counter; } else if ((implied_monitor_start <= *stop_index) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC); if (pcmk__strcase_any_of(rc, "0", "8", NULL)) { implied_monitor_start = counter; } } else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, NULL)) { implied_clone_start = counter; } } if (*start_index == -1) { if (implied_clone_start != -1) { *start_index = implied_clone_start; } else if (implied_monitor_start != -1) { *start_index = implied_monitor_start; } } } // If resource history entry has shutdown lock, remember lock node and time static void unpack_shutdown_lock(const xmlNode *rsc_entry, pcmk_resource_t *rsc, const pcmk_node_t *node, pcmk_scheduler_t *scheduler) { time_t lock_time = 0; // When lock started (i.e. node shutdown time) if ((crm_element_value_epoch(rsc_entry, PCMK_OPT_SHUTDOWN_LOCK, &lock_time) == pcmk_ok) && (lock_time != 0)) { if ((scheduler->shutdown_lock > 0) && (get_effective_time(scheduler) > (lock_time + scheduler->shutdown_lock))) { pcmk__rsc_info(rsc, "Shutdown lock for %s on %s expired", rsc->id, pe__node_name(node)); pe__clear_resource_history(rsc, node); } else { /* @COMPAT I don't like breaking const signatures, but * rsc->lock_node should really be const -- we just can't change it * until the next API compatibility break. */ rsc->lock_node = (pcmk_node_t *) node; rsc->lock_time = lock_time; } } } /*! * \internal * \brief Unpack one lrm_resource entry from a node's CIB status * * \param[in,out] node Node whose status is being unpacked * \param[in] rsc_entry lrm_resource XML being unpacked * \param[in,out] scheduler Scheduler data * * \return Resource corresponding to the entry, or NULL if no operation history */ static pcmk_resource_t * unpack_lrm_resource(pcmk_node_t *node, const xmlNode *lrm_resource, pcmk_scheduler_t *scheduler) { GList *gIter = NULL; int stop_index = -1; int start_index = -1; enum rsc_role_e req_role = pcmk_role_unknown; const char *rsc_id = ID(lrm_resource); pcmk_resource_t *rsc = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; xmlNode *rsc_op = NULL; xmlNode *last_failure = NULL; enum action_fail_response on_fail = pcmk_on_fail_ignore; enum rsc_role_e saved_role = pcmk_role_unknown; if (rsc_id == NULL) { pcmk__config_err("Ignoring invalid " XML_LRM_TAG_RESOURCE " entry: No " XML_ATTR_ID); crm_log_xml_info(lrm_resource, "missing-id"); return NULL; } crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s", rsc_id, pe__node_name(node)); // Build a list of individual lrm_rsc_op entries, so we can sort them for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP); rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) { op_list = g_list_prepend(op_list, rsc_op); } if (!pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } } /* find the resource */ rsc = unpack_find_resource(scheduler, node, rsc_id); if (rsc == NULL) { if (op_list == NULL) { // If there are no operations, there is nothing to do return NULL; } else { rsc = process_orphan_resource(lrm_resource, node, scheduler); } } CRM_ASSERT(rsc != NULL); // Check whether the resource is "shutdown-locked" to this node if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { unpack_shutdown_lock(lrm_resource, rsc, node, scheduler); } /* process operations */ saved_role = rsc->role; rsc->role = pcmk_role_unknown; sorted_op_list = g_list_sort(op_list, sort_op_by_callid); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail); } /* create active recurring operations as optional */ calculate_active_ops(sorted_op_list, &start_index, &stop_index); process_recurring(node, rsc, start_index, stop_index, sorted_op_list, scheduler); /* no need to free the contents */ g_list_free(sorted_op_list); process_rsc_state(rsc, node, on_fail); if (get_target_role(rsc, &req_role)) { if ((rsc->next_role == pcmk_role_unknown) || (req_role < rsc->next_role)) { pe__set_next_role(rsc, req_role, XML_RSC_ATTR_TARGET_ROLE); } else if (req_role > rsc->next_role) { pcmk__rsc_info(rsc, "%s: Not overwriting calculated next role %s" " with requested next role %s", rsc->id, role2text(rsc->next_role), role2text(req_role)); } } if (saved_role > rsc->role) { rsc->role = saved_role; } return rsc; } static void handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list, pcmk_scheduler_t *scheduler) { for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list); rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) { pcmk_resource_t *rsc; pcmk_resource_t *container; const char *rsc_id; const char *container_id; if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) { continue; } container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER); rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID); if (container_id == NULL || rsc_id == NULL) { continue; } container = pe_find_resource(scheduler->resources, container_id); if (container == NULL) { continue; } rsc = pe_find_resource(scheduler->resources, rsc_id); if ((rsc == NULL) || (rsc->container != NULL) || !pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) { continue; } pcmk__rsc_trace(rsc, "Mapped container of orphaned resource %s to %s", rsc->id, container_id); rsc->container = container; container->fillers = g_list_append(container->fillers, rsc); } } /*! * \internal * \brief Unpack one node's lrm status section * * \param[in,out] node Node whose status is being unpacked * \param[in] xml CIB node state XML * \param[in,out] scheduler Scheduler data */ static void unpack_node_lrm(pcmk_node_t *node, const xmlNode *xml, pcmk_scheduler_t *scheduler) { bool found_orphaned_container_filler = false; // Drill down to lrm_resources section xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE); if (xml == NULL) { return; } xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE); if (xml == NULL) { return; } // Unpack each lrm_resource entry for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE); rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) { pcmk_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, scheduler); if ((rsc != NULL) && pcmk_is_set(rsc->flags, pcmk_rsc_removed_filler)) { found_orphaned_container_filler = true; } } /* Now that all resource state has been unpacked for this node, map any * orphaned container fillers to their container resource. */ if (found_orphaned_container_filler) { handle_orphaned_container_fillers(xml, scheduler); } } static void set_active(pcmk_resource_t *rsc) { const pcmk_resource_t *top = pe__const_top_resource(rsc, false); if (top && pcmk_is_set(top->flags, pcmk_rsc_promotable)) { rsc->role = pcmk_role_unpromoted; } else { rsc->role = pcmk_role_started; } } static void set_node_score(gpointer key, gpointer value, gpointer user_data) { pcmk_node_t *node = value; int *score = user_data; node->weight = *score; } #define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \ "/" XML_CIB_TAG_STATE #define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \ "/" XML_LRM_TAG_RESOURCES \ "/" XML_LRM_TAG_RESOURCE #define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP static xmlNode * find_lrm_op(const char *resource, const char *op, const char *node, const char *source, int target_rc, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']" SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']" SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'", NULL); /* Need to check against transition_magic too? */ if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) { pcmk__g_strcat(xpath, " and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']", NULL); } else if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) { pcmk__g_strcat(xpath, " and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']", NULL); } else { g_string_append_c(xpath, ']'); } xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); if (xml && target_rc >= 0) { int rc = PCMK_OCF_UNKNOWN_ERROR; int status = PCMK_EXEC_ERROR; crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc); crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status); if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) { return NULL; } } return xml; } static xmlNode * find_lrm_resource(const char *rsc_id, const char *node_name, pcmk_scheduler_t *scheduler) { GString *xpath = NULL; xmlNode *xml = NULL; CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL); xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']", NULL); xml = get_xpath_object((const char *) xpath->str, scheduler->input, LOG_DEBUG); g_string_free(xpath, TRUE); return xml; } /*! * \internal * \brief Check whether a resource has no completed action history on a node * * \param[in,out] rsc Resource to check * \param[in] node_name Node to check * * \return true if \p rsc_id is unknown on \p node_name, otherwise false */ static bool unknown_on_node(pcmk_resource_t *rsc, const char *node_name) { bool result = false; xmlXPathObjectPtr search; GString *xpath = g_string_sized_new(256); pcmk__g_strcat(xpath, XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']" SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']" SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']", NULL); search = xpath_search(rsc->cluster->input, (const char *) xpath->str); result = (numXpathResults(search) == 0); freeXpathObject(search); g_string_free(xpath, TRUE); return result; } /*! * \brief Check whether a probe/monitor indicating the resource was not running * on a node happened after some event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that monitor is being compared to * \param[in] same_node Whether the operations are on the same node * \param[in,out] scheduler Scheduler data * * \return true if such a monitor happened after event, false otherwise */ static bool monitor_not_running_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, pcmk_scheduler_t *scheduler) { /* Any probe/monitor operation on the node indicating it was not running * there */ xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name, NULL, PCMK_OCF_NOT_RUNNING, scheduler); return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0); } /*! * \brief Check whether any non-monitor operation on a node happened after some * event * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] xml_op Event that non-monitor is being compared to * \param[in] same_node Whether the operations are on the same node * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool non_monitor_after(const char *rsc_id, const char *node_name, const xmlNode *xml_op, bool same_node, pcmk_scheduler_t *scheduler) { xmlNode *lrm_resource = NULL; lrm_resource = find_lrm_resource(rsc_id, node_name, scheduler); if (lrm_resource == NULL) { return false; } for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP); op != NULL; op = crm_next_same_xml(op)) { const char * task = NULL; if (op == xml_op) { continue; } task = crm_element_value(op, XML_LRM_ATTR_TASK); if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, NULL) && pe__is_newer_op(op, xml_op, same_node) > 0) { return true; } } return false; } /*! * \brief Check whether the resource has newer state on a node after a migration * attempt * * \param[in] rsc_id Resource being checked * \param[in] node_name Node being checked * \param[in] migrate_to Any migrate_to event that is being compared to * \param[in] migrate_from Any migrate_from event that is being compared to * \param[in,out] scheduler Scheduler data * * \return true if such a operation happened after event, false otherwise */ static bool newer_state_after_migrate(const char *rsc_id, const char *node_name, const xmlNode *migrate_to, const xmlNode *migrate_from, pcmk_scheduler_t *scheduler) { const xmlNode *xml_op = migrate_to; const char *source = NULL; const char *target = NULL; bool same_node = false; if (migrate_from) { xml_op = migrate_from; } source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE); target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET); /* It's preferred to compare to the migrate event on the same node if * existing, since call ids are more reliable. */ if (pcmk__str_eq(node_name, target, pcmk__str_casei)) { if (migrate_from) { xml_op = migrate_from; same_node = true; } else { xml_op = migrate_to; } } else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) { if (migrate_to) { xml_op = migrate_to; same_node = true; } else { xml_op = migrate_from; } } /* If there's any newer non-monitor operation on the node, or any newer * probe/monitor operation on the node indicating it was not running there, * the migration events potentially no longer matter for the node. */ return non_monitor_after(rsc_id, node_name, xml_op, same_node, scheduler) || monitor_not_running_after(rsc_id, node_name, xml_op, same_node, scheduler); } /*! * \internal * \brief Parse migration source and target node names from history entry * * \param[in] entry Resource history entry for a migration action * \param[in] source_node If not NULL, source must match this node * \param[in] target_node If not NULL, target must match this node * \param[out] source_name Where to store migration source node name * \param[out] target_name Where to store migration target node name * * \return Standard Pacemaker return code */ static int get_migration_node_names(const xmlNode *entry, const pcmk_node_t *source_node, const pcmk_node_t *target_node, const char **source_name, const char **target_name) { *source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE); *target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET); if ((*source_name == NULL) || (*target_name == NULL)) { pcmk__config_err("Ignoring resource history entry %s without " XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET, ID(entry)); return pcmk_rc_unpack_error; } if ((source_node != NULL) && !pcmk__str_eq(*source_name, source_node->details->uname, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s", ID(entry), *source_name, pe__node_name(source_node)); return pcmk_rc_unpack_error; } if ((target_node != NULL) && !pcmk__str_eq(*target_name, target_node->details->uname, pcmk__str_casei|pcmk__str_null_matches)) { pcmk__config_err("Ignoring resource history entry %s because " XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s", ID(entry), *target_name, pe__node_name(target_node)); return pcmk_rc_unpack_error; } return pcmk_rc_ok; } /* * \internal * \brief Add a migration source to a resource's list of dangling migrations * * If the migrate_to and migrate_from actions in a live migration both * succeeded, but there is no stop on the source, the migration is considered * "dangling." Add the source to the resource's dangling migration list, which * will be used to schedule a stop on the source without affecting the target. * * \param[in,out] rsc Resource involved in migration * \param[in] node Migration source */ static void add_dangling_migration(pcmk_resource_t *rsc, const pcmk_node_t *node) { pcmk__rsc_trace(rsc, "Dangling migration of %s requires stop on %s", rsc->id, pe__node_name(node)); rsc->role = pcmk_role_stopped; rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, (gpointer) node); } /*! * \internal * \brief Update resource role etc. after a successful migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_success(struct action_history *history) { /* A complete migration sequence is: * 1. migrate_to on source node (which succeeded if we get to this function) * 2. migrate_from on target node * 3. stop on source node * * If no migrate_from has happened, the migration is considered to be * "partial". If the migrate_from succeeded but no stop has happened, the * migration is considered to be "dangling". * * If a successful migrate_to and stop have happened on the source node, we * still need to check for a partial migration, due to scenarios (easier to * produce with batch-limit=1) like: * * - A resource is migrating from node1 to node2, and a migrate_to is * initiated for it on node1. * * - node2 goes into standby mode while the migrate_to is pending, which * aborts the transition. * * - Upon completion of the migrate_to, a new transition schedules a stop * on both nodes and a start on node1. * * - If the new transition is aborted for any reason while the resource is * stopping on node1, the transition after that stop completes will see * the migrate_to and stop on the source, but it's still a partial * migration, and the resource must be stopped on node2 because it is * potentially active there due to the migrate_to. * * We also need to take into account that either node's history may be * cleared at any point in the migration process. */ int from_rc = PCMK_OCF_OK; int from_status = PCMK_EXEC_PENDING; pcmk_node_t *target_node = NULL; xmlNode *migrate_from = NULL; const char *source = NULL; const char *target = NULL; bool source_newer_op = false; bool target_newer_state = false; bool active_on_target = false; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } // Check for newer state on the source source_newer_op = non_monitor_after(history->rsc->id, source, history->xml, true, history->rsc->cluster); // Check for a migrate_from action from this source on the target migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, -1, history->rsc->cluster); if (migrate_from != NULL) { if (source_newer_op) { /* There's a newer non-monitor operation on the source and a * migrate_from on the target, so this migrate_to is irrelevant to * the resource's state. */ return; } crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc); crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status); } /* If the resource has newer state on both the source and target after the * migration events, this migrate_to is irrelevant to the resource's state. */ target_newer_state = newer_state_after_migrate(history->rsc->id, target, history->xml, migrate_from, history->rsc->cluster); if (source_newer_op && target_newer_state) { return; } /* Check for dangling migration (migrate_from succeeded but stop not done). * We know there's no stop because we already returned if the target has a * migrate_from and the source has any newer non-monitor operation. */ if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) { add_dangling_migration(history->rsc, history->node); return; } /* Without newer state, this migrate_to implies the resource is active. * (Clones are not allowed to migrate, so role can't be promoted.) */ history->rsc->role = pcmk_role_started; target_node = pe_find_node(history->rsc->cluster->nodes, target); active_on_target = !target_newer_state && (target_node != NULL) && target_node->details->online; if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target if (active_on_target) { native_add_running(history->rsc, target_node, history->rsc->cluster, TRUE); } else { // Mark resource as failed, require recovery, and prevent migration pe__set_resource_flags(history->rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable); } return; } // The migrate_from is pending, complete but erased, or to be scheduled /* If there is no history at all for the resource on an online target, then * it was likely cleaned. Just return, and we'll schedule a probe. Once we * have the probe result, it will be reflected in target_newer_state. */ if ((target_node != NULL) && target_node->details->online && unknown_on_node(history->rsc, target)) { return; } if (active_on_target) { pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, source); native_add_running(history->rsc, target_node, history->rsc->cluster, FALSE); if ((source_node != NULL) && source_node->details->online) { /* This is a partial migration: the migrate_to completed * successfully on the source, but the migrate_from has not * completed. Remember the source and target; if the newly * chosen target remains the same when we schedule actions * later, we may continue with the migration. */ history->rsc->partial_migration_target = target_node; history->rsc->partial_migration_source = source_node; } } else if (!source_newer_op) { // Mark resource as failed, require recovery, and prevent migration pe__set_resource_flags(history->rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); pe__clear_resource_flags(history->rsc, pcmk_rsc_migratable); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_to action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_to_failure(struct action_history *history) { xmlNode *target_migrate_from = NULL; const char *source = NULL; const char *target = NULL; // Get source and target node names from XML if (get_migration_node_names(history->xml, history->node, NULL, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->role = pcmk_role_started; // Check for migrate_from on the target target_migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM, target, source, PCMK_OCF_OK, history->rsc->cluster); if (/* If the resource state is unknown on the target, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, target) /* If the resource has newer state on the target after the migration * events, this migrate_to no longer matters for the target. */ && !newer_state_after_migrate(history->rsc->id, target, history->xml, target_migrate_from, history->rsc->cluster)) { /* The resource has no newer state on the target, so assume it's still * active there. * (if it is up). */ pcmk_node_t *target_node = pe_find_node(history->rsc->cluster->nodes, target); if (target_node && target_node->details->online) { native_add_running(history->rsc, target_node, history->rsc->cluster, FALSE); } } else if (!non_monitor_after(history->rsc->id, source, history->xml, true, history->rsc->cluster)) { /* We know the resource has newer state on the target, but this * migrate_to still matters for the source as long as there's no newer * non-monitor operation there. */ // Mark node as having dangling migration so we can force a stop later history->rsc->dangling_migrations = g_list_prepend(history->rsc->dangling_migrations, (gpointer) history->node); } } /*! * \internal * \brief Update resource role etc. after a failed migrate_from action * * \param[in,out] history Parsed action result history */ static void unpack_migrate_from_failure(struct action_history *history) { xmlNode *source_migrate_to = NULL; const char *source = NULL; const char *target = NULL; // Get source and target node names from XML if (get_migration_node_names(history->xml, NULL, history->node, &source, &target) != pcmk_rc_ok) { return; } /* If a migration failed, we have to assume the resource is active. Clones * are not allowed to migrate, so role can't be promoted. */ history->rsc->role = pcmk_role_started; // Check for a migrate_to on the source source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO, source, target, PCMK_OCF_OK, history->rsc->cluster); if (/* If the resource state is unknown on the source, it will likely be * probed there. * Don't just consider it running there. We will get back here anyway in * case the probe detects it's running there. */ !unknown_on_node(history->rsc, source) /* If the resource has newer state on the source after the migration * events, this migrate_from no longer matters for the source. */ && !newer_state_after_migrate(history->rsc->id, source, source_migrate_to, history->xml, history->rsc->cluster)) { /* The resource has no newer state on the source, so assume it's still * active there (if it is up). */ pcmk_node_t *source_node = pe_find_node(history->rsc->cluster->nodes, source); if (source_node && source_node->details->online) { native_add_running(history->rsc, source_node, history->rsc->cluster, TRUE); } } } /*! * \internal * \brief Add an action to cluster's list of failed actions * * \param[in,out] history Parsed action result history */ static void record_failed_op(struct action_history *history) { if (!(history->node->details->online)) { return; } for (const xmlNode *xIter = history->rsc->cluster->failed->children; xIter != NULL; xIter = xIter->next) { const char *key = pe__xe_history_key(xIter); const char *uname = crm_element_value(xIter, XML_ATTR_UNAME); if (pcmk__str_eq(history->key, key, pcmk__str_none) && pcmk__str_eq(uname, history->node->details->uname, pcmk__str_casei)) { crm_trace("Skipping duplicate entry %s on %s", history->key, pe__node_name(history->node)); return; } } crm_trace("Adding entry for %s on %s to failed action list", history->key, pe__node_name(history->node)); crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname); crm_xml_add(history->xml, XML_LRM_ATTR_RSCID, history->rsc->id); add_node_copy(history->rsc->cluster->failed, history->xml); } static char * last_change_str(const xmlNode *xml_op) { time_t when; char *result = NULL; if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE, &when) == pcmk_ok) { char *when_s = pcmk__epoch2str(&when, 0); const char *p = strchr(when_s, ' '); // Skip day of week to make message shorter if ((p != NULL) && (*(++p) != '\0')) { result = strdup(p); CRM_ASSERT(result != NULL); } free(when_s); } if (result == NULL) { result = strdup("unknown time"); CRM_ASSERT(result != NULL); } return result; } /*! * \internal * \brief Compare two on-fail values * * \param[in] first One on-fail value to compare * \param[in] second The other on-fail value to compare * * \return A negative number if second is more severe than first, zero if they * are equal, or a positive number if first is more severe than second. * \note This is only needed until the action_fail_response values can be * renumbered at the next API compatibility break. */ static int cmp_on_fail(enum action_fail_response first, enum action_fail_response second) { switch (first) { case pcmk_on_fail_demote: switch (second) { case pcmk_on_fail_ignore: return 1; case pcmk_on_fail_demote: return 0; default: return -1; } break; case pcmk_on_fail_reset_remote: switch (second) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: return 1; case pcmk_on_fail_reset_remote: return 0; default: return -1; } break; case pcmk_on_fail_restart_container: switch (second) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: case pcmk_on_fail_reset_remote: return 1; case pcmk_on_fail_restart_container: return 0; default: return -1; } break; default: break; } switch (second) { case pcmk_on_fail_demote: return (first == pcmk_on_fail_ignore)? -1 : 1; case pcmk_on_fail_reset_remote: switch (first) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: return -1; default: return 1; } break; case pcmk_on_fail_restart_container: switch (first) { case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: case pcmk_on_fail_reset_remote: return -1; default: return 1; } break; default: break; } return first - second; } /*! * \internal * \brief Ban a resource (or its clone if an anonymous instance) from all nodes * * \param[in,out] rsc Resource to ban */ static void ban_from_all_nodes(pcmk_resource_t *rsc) { int score = -INFINITY; pcmk_resource_t *fail_rsc = rsc; if (fail_rsc->parent != NULL) { pcmk_resource_t *parent = uber_parent(fail_rsc); if (pe_rsc_is_anon_clone(parent)) { /* For anonymous clones, if an operation with on-fail=stop fails for * any instance, the entire clone must stop. */ fail_rsc = parent; } } // Ban the resource from all nodes crm_notice("%s will not be started under current conditions", fail_rsc->id); if (fail_rsc->allowed_nodes != NULL) { g_hash_table_destroy(fail_rsc->allowed_nodes); } fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes); g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score); } /*! * \internal * \brief Get configured failure handling and role after failure for an action * * \param[in,out] history Unpacked action history entry * \param[out] on_fail Where to set configured failure handling * \param[out] fail_role Where to set to role after failure */ static void unpack_failure_handling(struct action_history *history, enum action_fail_response *on_fail, enum rsc_role_e *fail_role) { xmlNode *config = pcmk__find_action_config(history->rsc, history->task, history->interval_ms, true); GHashTable *meta = pcmk__unpack_action_meta(history->rsc, history->node, history->task, history->interval_ms, config); const char *on_fail_str = g_hash_table_lookup(meta, XML_OP_ATTR_ON_FAIL); *on_fail = pcmk__parse_on_fail(history->rsc, history->task, history->interval_ms, on_fail_str); *fail_role = pcmk__role_after_failure(history->rsc, history->task, *on_fail, meta); g_hash_table_destroy(meta); } /*! * \internal * \brief Update resource role, failure handling, etc., after a failed action * * \param[in,out] history Parsed action result history * \param[in] config_on_fail Action failure handling from configuration * \param[in] fail_role Resource's role after failure of this action * \param[out] last_failure This will be set to the history XML * \param[in,out] on_fail Actual handling of action result */ static void unpack_rsc_op_failure(struct action_history *history, enum action_fail_response config_on_fail, enum rsc_role_e fail_role, xmlNode **last_failure, enum action_fail_response *on_fail) { bool is_probe = false; char *last_change_s = NULL; *last_failure = history->xml; is_probe = pcmk_xe_is_probe(history->xml); last_change_s = last_change_str(history->xml); if (!pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_symmetric_cluster) && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { crm_trace("Unexpected result (%s%s%s) was recorded for " "%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pe__node_name(history->node), last_change_s, history->exit_status, history->id); } else { pcmk__sched_warn("Unexpected result (%s%s%s) was recorded for %s of " "%s on %s at %s " CRM_XS " exit-status=%d id=%s", services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), (is_probe? "probe" : history->task), history->rsc->id, pe__node_name(history->node), last_change_s, history->exit_status, history->id); if (is_probe && (history->exit_status != PCMK_OCF_OK) && (history->exit_status != PCMK_OCF_NOT_RUNNING) && (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) { /* A failed (not just unexpected) probe result could mean the user * didn't know resources will be probed even where they can't run. */ crm_notice("If it is not possible for %s to run on %s, see " "the resource-discovery option for location constraints", history->rsc->id, pe__node_name(history->node)); } record_failed_op(history); } free(last_change_s); if (cmp_on_fail(*on_fail, config_on_fail) < 0) { pcmk__rsc_trace(history->rsc, "on-fail %s -> %s for %s", fail2text(*on_fail), fail2text(config_on_fail), history->key); *on_fail = config_on_fail; } if (strcmp(history->task, PCMK_ACTION_STOP) == 0) { resource_location(history->rsc, history->node, -INFINITY, "__stop_fail__", history->rsc->cluster); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) { unpack_migrate_to_failure(history); } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) { unpack_migrate_from_failure(history); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->role = pcmk_role_promoted; } else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) { if (config_on_fail == pcmk_on_fail_block) { history->rsc->role = pcmk_role_promoted; pe__set_next_role(history->rsc, pcmk_role_stopped, "demote with on-fail=block"); } else if (history->exit_status == PCMK_OCF_NOT_RUNNING) { history->rsc->role = pcmk_role_stopped; } else { /* Staying in the promoted role would put the scheduler and * controller into a loop. Setting the role to unpromoted is not * dangerous because the resource will be stopped as part of * recovery, and any promotion will be ordered after that stop. */ history->rsc->role = pcmk_role_unpromoted; } } if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) { /* leave stopped */ pcmk__rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id); history->rsc->role = pcmk_role_stopped; } else if (history->rsc->role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "Setting %s active", history->rsc->id); set_active(history->rsc); } pcmk__rsc_trace(history->rsc, "Resource %s: role=%s unclean=%s on_fail=%s fail_role=%s", history->rsc->id, role2text(history->rsc->role), pcmk__btoa(history->node->details->unclean), fail2text(config_on_fail), role2text(fail_role)); if ((fail_role != pcmk_role_started) && (history->rsc->next_role < fail_role)) { pe__set_next_role(history->rsc, fail_role, "failure"); } if (fail_role == pcmk_role_stopped) { ban_from_all_nodes(history->rsc); } } /*! * \internal * \brief Block a resource with a failed action if it cannot be recovered * * If resource action is a failed stop and fencing is not possible, mark the * resource as unmanaged and blocked, since recovery cannot be done. * * \param[in,out] history Parsed action history entry */ static void block_if_unrecoverable(struct action_history *history) { char *last_change_s = NULL; if (strcmp(history->task, PCMK_ACTION_STOP) != 0) { return; // All actions besides stop are always recoverable } if (pe_can_fence(history->node->details->data_set, history->node)) { return; // Failed stops are recoverable via fencing } last_change_s = last_change_str(history->xml); pcmk__sched_err("No further recovery can be attempted for %s " "because %s on %s failed (%s%s%s) at %s " CRM_XS " rc=%d id=%s", history->rsc->id, history->task, pe__node_name(history->node), services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, ""), last_change_s, history->exit_status, history->id); free(last_change_s); pe__clear_resource_flags(history->rsc, pcmk_rsc_managed); pe__set_resource_flags(history->rsc, pcmk_rsc_blocked); } /*! * \internal * \brief Update action history's execution status and why * * \param[in,out] history Parsed action history entry * \param[out] why Where to store reason for update * \param[in] value New value * \param[in] reason Description of why value was changed */ static inline void remap_because(struct action_history *history, const char **why, int value, const char *reason) { if (history->execution_status != value) { history->execution_status = value; *why = reason; } } /*! * \internal * \brief Remap informational monitor results and operation status * * For the monitor results, certain OCF codes are for providing extended information * to the user about services that aren't yet failed but not entirely healthy either. * These must be treated as the "normal" result by Pacemaker. * * For operation status, the action result can be used to determine an appropriate * status for the purposes of responding to the action. The status provided by the * executor is not directly usable since the executor does not know what was expected. * * \param[in,out] history Parsed action history entry * \param[in,out] on_fail What should be done about the result * \param[in] expired Whether result is expired * * \note If the result is remapped and the node is not shutting down or failed, * the operation will be recorded in the scheduler data's list of failed * operations to highlight it for the user. * * \note This may update the resource's current and next role. */ static void remap_operation(struct action_history *history, enum action_fail_response *on_fail, bool expired) { bool is_probe = false; int orig_exit_status = history->exit_status; int orig_exec_status = history->execution_status; const char *why = NULL; const char *task = history->task; // Remap degraded results to their successful counterparts history->exit_status = pcmk__effective_rc(history->exit_status); if (history->exit_status != orig_exit_status) { why = "degraded result"; if (!expired && (!history->node->details->shutdown || history->node->details->online)) { record_failed_op(history); } } if (!pe_rsc_is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && ((history->execution_status != PCMK_EXEC_DONE) || (history->exit_status != PCMK_OCF_NOT_RUNNING))) { history->execution_status = PCMK_EXEC_DONE; history->exit_status = PCMK_OCF_NOT_RUNNING; why = "equivalent probe result"; } /* If the executor reported an execution status of anything but done or * error, consider that final. But for done or error, we know better whether * it should be treated as a failure or not, because we know the expected * result. */ switch (history->execution_status) { case PCMK_EXEC_DONE: case PCMK_EXEC_ERROR: break; // These should be treated as node-fatal case PCMK_EXEC_NO_FENCE_DEVICE: case PCMK_EXEC_NO_SECRETS: remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "node-fatal error"); goto remap_done; default: goto remap_done; } is_probe = pcmk_xe_is_probe(history->xml); if (is_probe) { task = "probe"; } if (history->expected_exit_status < 0) { /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the * expected exit status in the transition key, which (along with the * similar case of a corrupted transition key in the CIB) will be * reported to this function as -1. Pacemaker 2.0+ does not support * rolling upgrades from those versions or processing of saved CIB files * from those versions, so we do not need to care much about this case. */ remap_because(history, &why, PCMK_EXEC_ERROR, "obsolete history format"); pcmk__config_warn("Expected result not found for %s on %s " "(corrupt or obsolete CIB?)", history->key, pe__node_name(history->node)); } else if (history->exit_status == history->expected_exit_status) { remap_because(history, &why, PCMK_EXEC_DONE, "expected result"); } else { remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result"); pcmk__rsc_debug(history->rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)", history->key, pe__node_name(history->node), history->expected_exit_status, services_ocf_exitcode_str(history->expected_exit_status), history->exit_status, services_ocf_exitcode_str(history->exit_status), (pcmk__str_empty(history->exit_reason)? "" : ": "), pcmk__s(history->exit_reason, "")); } switch (history->exit_status) { case PCMK_OCF_OK: if (is_probe && (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active on %s at %s", history->rsc->id, pe__node_name(history->node), last_change_s); free(last_change_s); } break; case PCMK_OCF_NOT_RUNNING: if (is_probe || (history->expected_exit_status == history->exit_status) || !pcmk_is_set(history->rsc->flags, pcmk_rsc_managed)) { /* For probes, recurring monitors for the Stopped role, and * unmanaged resources, "not running" is not considered a * failure. */ remap_because(history, &why, PCMK_EXEC_DONE, "exit status"); history->rsc->role = pcmk_role_stopped; *on_fail = pcmk_on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "not running"); } break; case PCMK_OCF_RUNNING_PROMOTED: if (is_probe && (history->exit_status != history->expected_exit_status)) { char *last_change_s = last_change_str(history->xml); remap_because(history, &why, PCMK_EXEC_DONE, "probe"); pcmk__rsc_info(history->rsc, "Probe found %s active and promoted on %s at %s", history->rsc->id, pe__node_name(history->node), last_change_s); free(last_change_s); } if (!expired || (history->exit_status == history->expected_exit_status)) { history->rsc->role = pcmk_role_promoted; } break; case PCMK_OCF_FAILED_PROMOTED: if (!expired) { history->rsc->role = pcmk_role_promoted; } remap_because(history, &why, PCMK_EXEC_ERROR, "exit status"); break; case PCMK_OCF_NOT_CONFIGURED: remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status"); break; case PCMK_OCF_UNIMPLEMENT_FEATURE: { guint interval_ms = 0; crm_element_value_ms(history->xml, XML_LRM_ATTR_INTERVAL_MS, &interval_ms); if (interval_ms == 0) { if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); } else { remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED, "exit status"); } } break; case PCMK_OCF_NOT_INSTALLED: case PCMK_OCF_INVALID_PARAM: case PCMK_OCF_INSUFFICIENT_PRIV: if (!expired) { block_if_unrecoverable(history); } remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status"); break; default: if (history->execution_status == PCMK_EXEC_DONE) { char *last_change_s = last_change_str(history->xml); crm_info("Treating unknown exit status %d from %s of %s " "on %s at %s as failure", history->exit_status, task, history->rsc->id, pe__node_name(history->node), last_change_s); remap_because(history, &why, PCMK_EXEC_ERROR, "unknown exit status"); free(last_change_s); } break; } remap_done: if (why != NULL) { pcmk__rsc_trace(history->rsc, "Remapped %s result from [%s: %s] to [%s: %s] " "because of %s", history->key, pcmk_exec_status_str(orig_exec_status), crm_exit_str(orig_exit_status), pcmk_exec_status_str(history->execution_status), crm_exit_str(history->exit_status), why); } } // return TRUE if start or monitor last failure but parameters changed static bool should_clear_for_param_change(const xmlNode *xml_op, const char *task, pcmk_resource_t *rsc, pcmk_node_t *node) { if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) { if (pe__bundle_needs_remote_name(rsc)) { /* We haven't allocated resources yet, so we can't reliably * substitute addr parameters for the REMOTE_CONTAINER_HACK. * When that's needed, defer the check until later. */ pe__add_param_check(xml_op, rsc, node, pcmk__check_last_failure, rsc->cluster); } else { pcmk__op_digest_t *digest_data = NULL; digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster); switch (digest_data->rc) { case pcmk__digest_unknown: crm_trace("Resource %s history entry %s on %s" " has no digest to compare", rsc->id, pe__xe_history_key(xml_op), node->details->id); break; case pcmk__digest_match: break; default: return TRUE; } } } return FALSE; } // Order action after fencing of remote node, given connection rsc static void order_after_remote_fencing(pcmk_action_t *action, pcmk_resource_t *remote_conn, pcmk_scheduler_t *scheduler) { pcmk_node_t *remote_node = pe_find_node(scheduler->nodes, remote_conn->id); if (remote_node) { pcmk_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, scheduler); order_actions(fence, action, pcmk__ar_first_implies_then); } } static bool should_ignore_failure_timeout(const pcmk_resource_t *rsc, const char *task, guint interval_ms, bool is_last_failure) { /* Clearing failures of recurring monitors has special concerns. The * executor reports only changes in the monitor result, so if the * monitor is still active and still getting the same failure result, * that will go undetected after the failure is cleared. * * Also, the operation history will have the time when the recurring * monitor result changed to the given code, not the time when the * result last happened. * * @TODO We probably should clear such failures only when the failure * timeout has passed since the last occurrence of the failed result. * However we don't record that information. We could maybe approximate * that by clearing only if there is a more recent successful monitor or * stop result, but we don't even have that information at this point * since we are still unpacking the resource's operation history. * * This is especially important for remote connection resources with a * reconnect interval, so in that case, we skip clearing failures * if the remote node hasn't been fenced. */ if (rsc->remote_reconnect_ms && pcmk_is_set(rsc->cluster->flags, pcmk_sched_fencing_enabled) && (interval_ms != 0) && pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) { pcmk_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id); if (remote_node && !remote_node->details->remote_was_fenced) { if (is_last_failure) { crm_info("Waiting to clear monitor failure for remote node %s" " until fencing has occurred", rsc->id); } return TRUE; } } return FALSE; } /*! * \internal * \brief Check operation age and schedule failure clearing when appropriate * * This function has two distinct purposes. The first is to check whether an * operation history entry is expired (i.e. the resource has a failure timeout, * the entry is older than the timeout, and the resource either has no fail * count or its fail count is entirely older than the timeout). The second is to * schedule fail count clearing when appropriate (i.e. the operation is expired * and either the resource has an expired fail count or the operation is a * last_failure for a remote connection resource with a reconnect interval, * or the operation is a last_failure for a start or monitor operation and the * resource's parameters have changed since the operation). * * \param[in,out] history Parsed action result history * * \return true if operation history entry is expired, otherwise false */ static bool check_operation_expiry(struct action_history *history) { bool expired = false; bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0"); time_t last_run = 0; int unexpired_fail_count = 0; const char *clear_reason = NULL; if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) { pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not expired: " "Not Installed does not expire", history->id, pe__node_name(history->node)); return false; // "Not installed" must always be cleared manually } if ((history->rsc->failure_timeout > 0) && (crm_element_value_epoch(history->xml, XML_RSC_OP_LAST_CHANGE, &last_run) == 0)) { // Resource has a failure-timeout, and history entry has a timestamp time_t now = get_effective_time(history->rsc->cluster); time_t last_failure = 0; // Is this particular operation history older than the failure timeout? if ((now >= (last_run + history->rsc->failure_timeout)) && !should_ignore_failure_timeout(history->rsc, history->task, history->interval_ms, is_last_failure)) { expired = true; } // Does the resource as a whole have an unexpired fail count? unexpired_fail_count = pe_get_failcount(history->node, history->rsc, &last_failure, pcmk__fc_effective, history->xml); // Update scheduler recheck time according to *last* failure crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds" " last-failure@%lld", history->id, (long long) last_run, (expired? "" : "not "), (long long) now, unexpired_fail_count, history->rsc->failure_timeout, (long long) last_failure); last_failure += history->rsc->failure_timeout + 1; if (unexpired_fail_count && (now < last_failure)) { pe__update_recheck_time(last_failure, history->rsc->cluster, "fail count expiration"); } } if (expired) { if (pe_get_failcount(history->node, history->rsc, NULL, pcmk__fc_default, history->xml)) { // There is a fail count ignoring timeout if (unexpired_fail_count == 0) { // There is no fail count considering timeout clear_reason = "it expired"; } else { /* This operation is old, but there is an unexpired fail count. * In a properly functioning cluster, this should only be * possible if this operation is not a failure (otherwise the * fail count should be expired too), so this is really just a * failsafe. */ pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Unexpired fail count", history->id, pe__node_name(history->node)); expired = false; } } else if (is_last_failure && (history->rsc->remote_reconnect_ms != 0)) { /* Clear any expired last failure when reconnect interval is set, * even if there is no fail count. */ clear_reason = "reconnect interval is set"; } } if (!expired && is_last_failure && should_clear_for_param_change(history->xml, history->task, history->rsc, history->node)) { clear_reason = "resource parameters have changed"; } if (clear_reason != NULL) { pcmk_action_t *clear_op = NULL; // Schedule clearing of the fail count clear_op = pe__clear_failcount(history->rsc, history->node, clear_reason, history->rsc->cluster); if (pcmk_is_set(history->rsc->cluster->flags, pcmk_sched_fencing_enabled) && (history->rsc->remote_reconnect_ms != 0)) { /* If we're clearing a remote connection due to a reconnect * interval, we want to wait until any scheduled fencing * completes. * * We could limit this to remote_node->details->unclean, but at * this point, that's always true (it won't be reliable until * after unpack_node_history() is done). */ crm_info("Clearing %s failure will wait until any scheduled " "fencing of %s completes", history->task, history->rsc->id); order_after_remote_fencing(clear_op, history->rsc, history->rsc->cluster); } } if (expired && (history->interval_ms == 0) && pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { switch (history->exit_status) { case PCMK_OCF_OK: case PCMK_OCF_NOT_RUNNING: case PCMK_OCF_RUNNING_PROMOTED: case PCMK_OCF_DEGRADED: case PCMK_OCF_DEGRADED_PROMOTED: // Don't expire probes that return these values pcmk__rsc_trace(history->rsc, "Resource history entry %s on %s is not " "expired: Probe result", history->id, pe__node_name(history->node)); expired = false; break; } } return expired; } int pe__target_rc_from_xml(const xmlNode *xml_op) { int target_rc = 0; const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY); if (key == NULL) { return -1; } decode_transition_key(key, NULL, NULL, NULL, &target_rc); return target_rc; } /*! * \internal * \brief Update a resource's state for an action result * * \param[in,out] history Parsed action history entry * \param[in] exit_status Exit status to base new state on * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void update_resource_state(struct action_history *history, int exit_status, const xmlNode *last_failure, enum action_fail_response *on_fail) { bool clear_past_failure = false; if ((exit_status == PCMK_OCF_NOT_INSTALLED) || (!pe_rsc_is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml))) { history->rsc->role = pcmk_role_stopped; } else if (exit_status == PCMK_OCF_NOT_RUNNING) { clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) { if ((last_failure != NULL) && pcmk__str_eq(history->key, pe__xe_history_key(last_failure), pcmk__str_none)) { clear_past_failure = true; } if (history->rsc->role < pcmk_role_started) { set_active(history->rsc); } } else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) { history->rsc->role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) { history->rsc->role = pcmk_role_stopped; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE, pcmk__str_none)) { history->rsc->role = pcmk_role_promoted; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE, pcmk__str_none)) { if (*on_fail == pcmk_on_fail_demote) { // Demote clears an error only if on-fail=demote clear_past_failure = true; } history->rsc->role = pcmk_role_unpromoted; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_none)) { history->rsc->role = pcmk_role_started; clear_past_failure = true; } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO, pcmk__str_none)) { unpack_migrate_to_success(history); } else if (history->rsc->role < pcmk_role_started) { pcmk__rsc_trace(history->rsc, "%s active on %s", history->rsc->id, pe__node_name(history->node)); set_active(history->rsc); } if (!clear_past_failure) { return; } switch (*on_fail) { case pcmk_on_fail_stop: case pcmk_on_fail_ban: case pcmk_on_fail_standby_node: case pcmk_on_fail_fence_node: pcmk__rsc_trace(history->rsc, "%s (%s) is not cleared by a completed %s", history->rsc->id, fail2text(*on_fail), history->task); break; case pcmk_on_fail_block: case pcmk_on_fail_ignore: case pcmk_on_fail_demote: case pcmk_on_fail_restart: case pcmk_on_fail_restart_container: *on_fail = pcmk_on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures"); break; case pcmk_on_fail_reset_remote: if (history->rsc->remote_reconnect_ms == 0) { /* With no reconnect interval, the connection is allowed to * start again after the remote node is fenced and * completely stopped. (With a reconnect interval, we wait * for the failure to be cleared entirely before attempting * to reconnect.) */ *on_fail = pcmk_on_fail_ignore; pe__set_next_role(history->rsc, pcmk_role_unknown, "clear past failures and reset remote"); } break; } } /*! * \internal * \brief Check whether a given history entry matters for resource state * * \param[in] history Parsed action history entry * * \return true if action can affect resource state, otherwise false */ static inline bool can_affect_state(struct action_history *history) { #if 0 /* @COMPAT It might be better to parse only actions we know we're interested * in, rather than exclude a couple we don't. However that would be a * behavioral change that should be done at a major or minor series release. * Currently, unknown operations can affect whether a resource is considered * active and/or failed. */ return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR, PCMK_ACTION_START, PCMK_ACTION_STOP, PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM, "asyncmon", NULL); #else return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY, PCMK_ACTION_META_DATA, NULL); #endif } /*! * \internal * \brief Unpack execution/exit status and exit reason from a history entry * * \param[in,out] history Action history entry to unpack * * \return Standard Pacemaker return code */ static int unpack_action_result(struct action_history *history) { if ((crm_element_value_int(history->xml, XML_LRM_ATTR_OPSTATUS, &(history->execution_status)) < 0) || (history->execution_status < PCMK_EXEC_PENDING) || (history->execution_status > PCMK_EXEC_MAX) || (history->execution_status == PCMK_EXEC_CANCELLED)) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " XML_LRM_ATTR_OPSTATUS " '%s'", history->id, history->rsc->id, pe__node_name(history->node), pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_OPSTATUS), "")); return pcmk_rc_unpack_error; } if ((crm_element_value_int(history->xml, XML_LRM_ATTR_RC, &(history->exit_status)) < 0) || (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) { #if 0 /* @COMPAT We should ignore malformed entries, but since that would * change behavior, it should be done at a major or minor series * release. */ pcmk__config_err("Ignoring resource history entry %s for %s on %s " "with invalid " XML_LRM_ATTR_RC " '%s'", history->id, history->rsc->id, pe__node_name(history->node), pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_RC), "")); return pcmk_rc_unpack_error; #else history->exit_status = CRM_EX_ERROR; #endif } history->exit_reason = crm_element_value(history->xml, XML_LRM_ATTR_EXIT_REASON); return pcmk_rc_ok; } /*! * \internal * \brief Process an action history entry whose result expired * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * * \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the * entry needs no further processing) */ static int process_expired_result(struct action_history *history, int orig_exit_status) { if (!pe_rsc_is_bundled(history->rsc) && pcmk_xe_mask_probe_failure(history->xml) && (orig_exit_status != history->expected_exit_status)) { if (history->rsc->role <= pcmk_role_stopped) { history->rsc->role = pcmk_role_unknown; } crm_trace("Ignoring resource history entry %s for probe of %s on %s: " "Masked failure expired", history->id, history->rsc->id, pe__node_name(history->node)); return pcmk_rc_ok; } if (history->exit_status == history->expected_exit_status) { return pcmk_rc_undetermined; // Only failures expire } if (history->interval_ms == 0) { crm_notice("Ignoring resource history entry %s for %s of %s on %s: " "Expired failure", history->id, history->task, history->rsc->id, pe__node_name(history->node)); return pcmk_rc_ok; } if (history->node->details->online && !history->node->details->unclean) { /* Reschedule the recurring action. schedule_cancel() won't work at * this stage, so as a hacky workaround, forcibly change the restart * digest so pcmk__check_action_config() does what we want later. * * @TODO We should skip this if there is a newer successful monitor. * Also, this causes rescheduling only if the history entry * has an op-digest (which the expire-non-blocked-failure * scheduler regression test doesn't, but that may not be a * realistic scenario in production). */ crm_notice("Rescheduling %s-interval %s of %s on %s " "after failure expired", pcmk__readable_interval(history->interval_ms), history->task, history->rsc->id, pe__node_name(history->node)); crm_xml_add(history->xml, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout"); return pcmk_rc_ok; } return pcmk_rc_undetermined; } /*! * \internal * \brief Process a masked probe failure * * \param[in,out] history Parsed action history entry * \param[in] orig_exit_status Action exit status before remapping * \param[in] last_failure Resource's last_failure entry, if known * \param[in,out] on_fail Resource's current failure handling */ static void mask_probe_failure(struct action_history *history, int orig_exit_status, const xmlNode *last_failure, enum action_fail_response *on_fail) { pcmk_resource_t *ban_rsc = history->rsc; if (!pcmk_is_set(history->rsc->flags, pcmk_rsc_unique)) { ban_rsc = uber_parent(history->rsc); } crm_notice("Treating probe result '%s' for %s on %s as 'not running'", services_ocf_exitcode_str(orig_exit_status), history->rsc->id, pe__node_name(history->node)); update_resource_state(history, history->expected_exit_status, last_failure, on_fail); crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname); record_failed_op(history); resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure", history->rsc->cluster); } /*! * \internal Check whether a given failure is for a given pending action * * \param[in] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known * * \return true if \p last_failure is failure of pending action in \p history, * otherwise false * \note Both \p history and \p last_failure must come from the same * lrm_resource block, as node and resource are assumed to be the same. */ static bool failure_is_newer(const struct action_history *history, const xmlNode *last_failure) { guint failure_interval_ms = 0U; long long failure_change = 0LL; long long this_change = 0LL; if (last_failure == NULL) { return false; // Resource has no last_failure entry } if (!pcmk__str_eq(history->task, crm_element_value(last_failure, XML_LRM_ATTR_TASK), pcmk__str_none)) { return false; // last_failure is for different action } if ((crm_element_value_ms(last_failure, XML_LRM_ATTR_INTERVAL_MS, &failure_interval_ms) != pcmk_ok) || (history->interval_ms != failure_interval_ms)) { return false; // last_failure is for action with different interval } if ((pcmk__scan_ll(crm_element_value(history->xml, XML_RSC_OP_LAST_CHANGE), &this_change, 0LL) != pcmk_rc_ok) || (pcmk__scan_ll(crm_element_value(last_failure, XML_RSC_OP_LAST_CHANGE), &failure_change, 0LL) != pcmk_rc_ok) || (failure_change < this_change)) { return false; // Failure is not known to be newer } return true; } /*! * \internal * \brief Update a resource's role etc. for a pending action * * \param[in,out] history Parsed history entry for pending action * \param[in] last_failure Resource's last_failure entry, if known */ static void process_pending_action(struct action_history *history, const xmlNode *last_failure) { /* For recurring monitors, a failure is recorded only in RSC_last_failure_0, * and there might be a RSC_monitor_INTERVAL entry with the last successful * or pending result. * * If last_failure contains the failure of the pending recurring monitor * we're processing here, and is newer, the action is no longer pending. * (Pending results have call ID -1, which sorts last, so the last failure * if any should be known.) */ if (failure_is_newer(history, last_failure)) { return; } if (strcmp(history->task, PCMK_ACTION_START) == 0) { pe__set_resource_flags(history->rsc, pcmk_rsc_start_pending); set_active(history->rsc); } else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) { history->rsc->role = pcmk_role_promoted; } else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) && history->node->details->unclean) { /* A migrate_to action is pending on a unclean source, so force a stop * on the target. */ const char *migrate_target = NULL; pcmk_node_t *target = NULL; migrate_target = crm_element_value(history->xml, XML_LRM_ATTR_MIGRATE_TARGET); target = pe_find_node(history->rsc->cluster->nodes, migrate_target); if (target != NULL) { stop_action(history->rsc, target, FALSE); } } if (history->rsc->pending_task != NULL) { /* There should never be multiple pending actions, but as a failsafe, * just remember the first one processed for display purposes. */ return; } if (pcmk_is_probe(history->task, history->interval_ms)) { /* Pending probes are currently never displayed, even if pending * operations are requested. If we ever want to change that, * enable the below and the corresponding part of * native.c:native_pending_task(). */ #if 0 history->rsc->pending_task = strdup("probe"); history->rsc->pending_node = history->node; #endif } else { history->rsc->pending_task = strdup(history->task); history->rsc->pending_node = history->node; } } static void unpack_rsc_op(pcmk_resource_t *rsc, pcmk_node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *on_fail) { int old_rc = 0; bool expired = false; pcmk_resource_t *parent = rsc; enum rsc_role_e fail_role = pcmk_role_unknown; enum action_fail_response failure_strategy = pcmk_on_fail_restart; struct action_history history = { .rsc = rsc, .node = node, .xml = xml_op, .execution_status = PCMK_EXEC_UNKNOWN, }; CRM_CHECK(rsc && node && xml_op, return); history.id = ID(xml_op); if (history.id == NULL) { pcmk__config_err("Ignoring resource history entry for %s on %s " "without ID", rsc->id, pe__node_name(node)); return; } // Task and interval history.task = crm_element_value(xml_op, XML_LRM_ATTR_TASK); if (history.task == NULL) { pcmk__config_err("Ignoring resource history entry %s for %s on %s " "without " XML_LRM_ATTR_TASK, history.id, rsc->id, pe__node_name(node)); return; } crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &(history.interval_ms)); if (!can_affect_state(&history)) { pcmk__rsc_trace(rsc, "Ignoring resource history entry %s for %s on %s " "with irrelevant action '%s'", history.id, rsc->id, pe__node_name(node), history.task); return; } if (unpack_action_result(&history) != pcmk_rc_ok) { return; // Error already logged } history.expected_exit_status = pe__target_rc_from_xml(xml_op); history.key = pe__xe_history_key(xml_op); crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &(history.call_id)); pcmk__rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)", history.id, history.task, history.call_id, pe__node_name(node), pcmk_exec_status_str(history.execution_status), crm_exit_str(history.exit_status)); if (node->details->unclean) { pcmk__rsc_trace(rsc, "%s is running on %s, which is unclean (further action " "depends on value of stop's on-fail attribute)", rsc->id, pe__node_name(node)); } expired = check_operation_expiry(&history); old_rc = history.exit_status; remap_operation(&history, on_fail, expired); if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) { goto done; } if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) { mask_probe_failure(&history, old_rc, *last_failure, on_fail); goto done; } if (!pcmk_is_set(rsc->flags, pcmk_rsc_unique)) { parent = uber_parent(rsc); } switch (history.execution_status) { case PCMK_EXEC_PENDING: process_pending_action(&history, *last_failure); goto done; case PCMK_EXEC_DONE: update_resource_state(&history, history.exit_status, *last_failure, on_fail); goto done; case PCMK_EXEC_NOT_INSTALLED: unpack_failure_handling(&history, &failure_strategy, &fail_role); if (failure_strategy == pcmk_on_fail_ignore) { crm_warn("Cannot ignore failed %s of %s on %s: " "Resource agent doesn't exist " CRM_XS " status=%d rc=%d id=%s", history.task, rsc->id, pe__node_name(node), history.execution_status, history.exit_status, history.id); /* Also for printing it as "FAILED" by marking it as * pcmk_rsc_failed later */ *on_fail = pcmk_on_fail_ban; } resource_location(parent, node, -INFINITY, "hard-error", rsc->cluster); unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); goto done; case PCMK_EXEC_NOT_CONNECTED: if (pe__is_guest_or_remote_node(node) && pcmk_is_set(node->details->remote_rsc->flags, pcmk_rsc_managed)) { /* We should never get into a situation where a managed remote * connection resource is considered OK but a resource action * behind the connection gets a "not connected" status. But as a * fail-safe in case a bug or unusual circumstances do lead to * that, ensure the remote connection is considered failed. */ pe__set_resource_flags(node->details->remote_rsc, pcmk_rsc_failed|pcmk_rsc_stop_if_failed); } break; // Not done, do error handling case PCMK_EXEC_ERROR: case PCMK_EXEC_ERROR_HARD: case PCMK_EXEC_ERROR_FATAL: case PCMK_EXEC_TIMEOUT: case PCMK_EXEC_NOT_SUPPORTED: case PCMK_EXEC_INVALID: break; // Not done, do error handling default: // No other value should be possible at this point break; } unpack_failure_handling(&history, &failure_strategy, &fail_role); if ((failure_strategy == pcmk_on_fail_ignore) || ((failure_strategy == pcmk_on_fail_restart_container) && (strcmp(history.task, PCMK_ACTION_STOP) == 0))) { char *last_change_s = last_change_str(xml_op); crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded " CRM_XS " %s", history.task, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), rsc->id, pe__node_name(node), last_change_s, history.id); free(last_change_s); update_resource_state(&history, history.expected_exit_status, *last_failure, on_fail); crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname); pe__set_resource_flags(rsc, pcmk_rsc_ignore_failure); record_failed_op(&history); if ((failure_strategy == pcmk_on_fail_restart_container) && cmp_on_fail(*on_fail, pcmk_on_fail_restart) <= 0) { *on_fail = failure_strategy; } } else { unpack_rsc_op_failure(&history, failure_strategy, fail_role, last_failure, on_fail); if (history.execution_status == PCMK_EXEC_ERROR_HARD) { uint8_t log_level = LOG_ERR; if (history.exit_status == PCMK_OCF_NOT_INSTALLED) { log_level = LOG_NOTICE; } do_crm_log(log_level, "Preventing %s from restarting on %s because " "of hard failure (%s%s%s) " CRM_XS " %s", parent->id, pe__node_name(node), services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, node, -INFINITY, "hard-error", rsc->cluster); } else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) { pcmk__sched_err("Preventing %s from restarting anywhere because " "of fatal failure (%s%s%s) " CRM_XS " %s", parent->id, services_ocf_exitcode_str(history.exit_status), (pcmk__str_empty(history.exit_reason)? "" : ": "), pcmk__s(history.exit_reason, ""), history.id); resource_location(parent, NULL, -INFINITY, "fatal-error", rsc->cluster); } } done: pcmk__rsc_trace(rsc, "%s role on %s after %s is %s (next %s)", rsc->id, pe__node_name(node), history.id, role2text(rsc->role), role2text(rsc->next_role)); } static void add_node_attrs(const xmlNode *xml_obj, pcmk_node_t *node, bool overwrite, pcmk_scheduler_t *scheduler) { const char *cluster_name = NULL; pe_rule_eval_data_t rule_data = { .node_hash = NULL, .role = pcmk_role_unknown, .now = scheduler->now, .match_data = NULL, .rsc_data = NULL, .op_data = NULL }; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_UNAME), strdup(node->details->uname)); g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID), strdup(node->details->id)); if (pcmk__str_eq(node->details->id, scheduler->dc_uuid, pcmk__str_casei)) { scheduler->dc_node = node; node->details->is_dc = TRUE; g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE)); } else { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE)); } cluster_name = g_hash_table_lookup(scheduler->config_hash, PCMK_OPT_CLUSTER_NAME); if (cluster_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME), strdup(cluster_name)); } pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data, node->details->attrs, NULL, overwrite, scheduler); pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data, node->details->utilization, NULL, FALSE, scheduler); if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) { const char *site_name = pe_node_attribute_raw(node, "site-name"); if (site_name) { g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(site_name)); } else if (cluster_name) { /* Default to cluster-name if unset */ g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_SITE_NAME), strdup(cluster_name)); } } } static GList * extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter) { int counter = -1; int stop_index = -1; int start_index = -1; xmlNode *rsc_op = NULL; GList *gIter = NULL; GList *op_list = NULL; GList *sorted_op_list = NULL; /* extract operations */ op_list = NULL; sorted_op_list = NULL; for (rsc_op = pcmk__xe_first_child(rsc_entry); rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) { if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, pcmk__str_none)) { crm_xml_add(rsc_op, "resource", rsc); crm_xml_add(rsc_op, XML_ATTR_UNAME, node); op_list = g_list_prepend(op_list, rsc_op); } } if (op_list == NULL) { /* if there are no operations, there is nothing to do */ return NULL; } sorted_op_list = g_list_sort(op_list, sort_op_by_callid); /* create active recurring operations as optional */ if (active_filter == FALSE) { return sorted_op_list; } op_list = NULL; calculate_active_ops(sorted_op_list, &start_index, &stop_index); for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) { xmlNode *rsc_op = (xmlNode *) gIter->data; counter++; if (start_index < stop_index) { crm_trace("Skipping %s: not active", ID(rsc_entry)); break; } else if (counter < start_index) { crm_trace("Skipping %s: old", ID(rsc_op)); continue; } op_list = g_list_append(op_list, rsc_op); } g_list_free(sorted_op_list); return op_list; } GList * find_operations(const char *rsc, const char *node, gboolean active_filter, pcmk_scheduler_t *scheduler) { GList *output = NULL; GList *intermediate = NULL; xmlNode *tmp = NULL; xmlNode *status = find_xml_node(scheduler->input, XML_CIB_TAG_STATUS, TRUE); pcmk_node_t *this_node = NULL; xmlNode *node_state = NULL; for (node_state = pcmk__xe_first_child(status); node_state != NULL; node_state = pcmk__xe_next(node_state)) { if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) { const char *uname = crm_element_value(node_state, XML_ATTR_UNAME); if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) { continue; } this_node = pe_find_node(scheduler->nodes, uname); if(this_node == NULL) { CRM_LOG_ASSERT(this_node != NULL); continue; } else if (pe__is_guest_or_remote_node(this_node)) { determine_remote_online_status(scheduler, this_node); } else { determine_online_status(node_state, this_node, scheduler); } if (this_node->details->online || pcmk_is_set(scheduler->flags, pcmk_sched_fencing_enabled)) { /* offline nodes run no resources... * unless stonith is enabled in which case we need to * make sure rsc start events happen after the stonith */ xmlNode *lrm_rsc = NULL; tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE); tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE); for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL; lrm_rsc = pcmk__xe_next(lrm_rsc)) { if (pcmk__str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, pcmk__str_none)) { const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID); if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) { continue; } intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter); output = g_list_concat(output, intermediate); } } } } } return output; }