diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 3c59192a68..b0fb58f602 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -1,836 +1,836 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include qb_ipcs_service_t *ipcs = NULL; #if SUPPORT_COROSYNC extern gboolean crm_connect_corosync(crm_cluster_t * cluster); #endif void crm_shutdown(int nsig); gboolean crm_read_options(gpointer user_data); gboolean fsa_has_quorum = FALSE; crm_trigger_t *fsa_source = NULL; crm_trigger_t *config_read = NULL; bool no_quorum_suicide_escalation = FALSE; bool controld_shutdown_lock_enabled = false; /* A_HA_CONNECT */ void do_ha_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { gboolean registered = FALSE; static crm_cluster_t *cluster = NULL; if (cluster == NULL) { cluster = calloc(1, sizeof(crm_cluster_t)); } if (action & A_HA_DISCONNECT) { crm_cluster_disconnect(cluster); crm_info("Disconnected from the cluster"); controld_set_fsa_input_flags(R_HA_DISCONNECTED); } if (action & A_HA_CONNECT) { crm_set_status_callback(&peer_update_callback); crm_set_autoreap(FALSE); if (is_corosync_cluster()) { #if SUPPORT_COROSYNC registered = crm_connect_corosync(cluster); #endif } if (registered == TRUE) { controld_election_init(cluster->uname); fsa_our_uname = cluster->uname; fsa_our_uuid = cluster->uuid; if(cluster->uuid == NULL) { crm_err("Could not obtain local uuid"); registered = FALSE; } } if (registered == FALSE) { controld_set_fsa_input_flags(R_HA_DISCONNECTED); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); return; } populate_cib_nodes(node_update_none, __func__); controld_clear_fsa_input_flags(R_HA_DISCONNECTED); crm_info("Connected to the cluster"); } if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) { crm_err("Unexpected action %s in %s", fsa_action2string(action), __func__); } } /* A_SHUTDOWN */ void do_shutdown(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { /* just in case */ controld_set_fsa_input_flags(R_SHUTDOWN); controld_disconnect_fencer(FALSE); } /* A_SHUTDOWN_REQ */ void do_shutdown_req(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { xmlNode *msg = NULL; controld_set_fsa_input_flags(R_SHUTDOWN); //controld_set_fsa_input_flags(R_STAYDOWN); crm_info("Sending shutdown request to all peers (DC is %s)", (fsa_our_dc? fsa_our_dc : "not set")); msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL); if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } free_xml(msg); } extern char *max_generation_from; extern xmlNode *max_generation_xml; extern GHashTable *resource_history; extern GHashTable *voted; void crmd_fast_exit(crm_exit_t exit_code) { if (pcmk_is_set(fsa_input_register, R_STAYDOWN)) { crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d", exit_code, CRM_EX_FATAL); exit_code = CRM_EX_FATAL; } else if ((exit_code == CRM_EX_OK) && pcmk_is_set(fsa_input_register, R_IN_RECOVERY)) { crm_err("Could not recover from internal error"); exit_code = CRM_EX_ERROR; } crm_exit(exit_code); } crm_exit_t crmd_exit(crm_exit_t exit_code) { GList *gIter = NULL; GMainLoop *mloop = crmd_mainloop; static bool in_progress = FALSE; if (in_progress && (exit_code == CRM_EX_OK)) { crm_debug("Exit is already in progress"); return exit_code; } else if(in_progress) { crm_notice("Error during shutdown process, exiting now with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } in_progress = TRUE; crm_trace("Preparing to exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); /* Suppress secondary errors resulting from us disconnecting everything */ controld_set_fsa_input_flags(R_HA_DISCONNECTED); /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */ if(ipcs) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; } controld_close_attrd_ipc(); - pe_subsystem_free(); + controld_shutdown_schedulerd_ipc(); controld_disconnect_fencer(TRUE); if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) { crm_debug("No mainloop detected"); exit_code = CRM_EX_ERROR; } /* On an error, just get out. * * Otherwise, make the effort to have mainloop exit gracefully so * that it (mostly) cleans up after itself and valgrind has less * to report on - allowing real errors stand out */ if (exit_code != CRM_EX_OK) { crm_notice("Forcing immediate exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); crm_write_blackbox(SIGTRAP, NULL); crmd_fast_exit(exit_code); } /* Clean up as much memory as possible for valgrind */ for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) { fsa_data_t *fsa_data = gIter->data; crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]", fsa_input2string(fsa_data->fsa_input), fsa_state2string(fsa_state), fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin); delete_fsa_input(fsa_data); } controld_clear_fsa_input_flags(R_MEMBERSHIP); g_list_free(fsa_message_queue); fsa_message_queue = NULL; metadata_cache_fini(); controld_election_fini(); /* Tear down the CIB manager connection, but don't free it yet -- it could * be used when we drain the mainloop later. */ controld_disconnect_cib_manager(); verify_stopped(fsa_state, LOG_WARNING); controld_clear_fsa_input_flags(R_LRM_CONNECTED); lrm_state_destroy_all(); /* This basically will not work, since mainloop has a reference to it */ mainloop_destroy_trigger(fsa_source); fsa_source = NULL; mainloop_destroy_trigger(config_read); config_read = NULL; mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL; pcmk__client_cleanup(); crm_peer_destroy(); controld_free_fsa_timers(); te_cleanup_stonith_history_sync(NULL, TRUE); controld_free_sched_timer(); free(fsa_our_dc_version); fsa_our_dc_version = NULL; free(fsa_our_uname); fsa_our_uname = NULL; free(fsa_our_uuid); fsa_our_uuid = NULL; free(fsa_our_dc); fsa_our_dc = NULL; free(fsa_cluster_name); fsa_cluster_name = NULL; free(te_uuid); te_uuid = NULL; free(failed_stop_offset); failed_stop_offset = NULL; free(failed_start_offset); failed_start_offset = NULL; free(max_generation_from); max_generation_from = NULL; free_xml(max_generation_xml); max_generation_xml = NULL; mainloop_destroy_signal(SIGPIPE); mainloop_destroy_signal(SIGUSR1); mainloop_destroy_signal(SIGTERM); mainloop_destroy_signal(SIGTRAP); /* leave SIGCHLD engaged as we might still want to drain some service-actions */ if (mloop) { GMainContext *ctx = g_main_loop_get_context(crmd_mainloop); /* Don't re-enter this block */ crmd_mainloop = NULL; /* no signals on final draining anymore */ mainloop_destroy_signal(SIGCHLD); crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); { int lpc = 0; while((g_main_context_pending(ctx) && lpc < 10)) { lpc++; crm_trace("Iteration %d", lpc); g_main_context_dispatch(ctx); } } crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx)); g_main_loop_quit(mloop); /* Won't do anything yet, since we're inside it now */ g_main_loop_unref(mloop); } else { mainloop_destroy_signal(SIGCHLD); } cib_delete(fsa_cib_conn); fsa_cib_conn = NULL; throttle_fini(); /* Graceful */ crm_trace("Done preparing for exit with status %d (%s)", exit_code, crm_exit_str(exit_code)); return exit_code; } /* A_EXIT_0, A_EXIT_1 */ void do_exit(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_exit_t exit_code = CRM_EX_OK; int log_level = LOG_INFO; const char *exit_type = "gracefully"; if (action & A_EXIT_1) { log_level = LOG_ERR; exit_type = "forcefully"; exit_code = CRM_EX_ERROR; } verify_stopped(cur_state, LOG_ERR); do_crm_log(log_level, "Performing %s - %s exiting the controller", fsa_action2string(action), exit_type); crm_info("[%s] stopped (%d)", crm_system_name, exit_code); crmd_exit(exit_code); } static void sigpipe_ignore(int nsig) { return; } /* A_STARTUP */ void do_startup(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_debug("Registering Signal Handlers"); mainloop_add_signal(SIGTERM, crm_shutdown); mainloop_add_signal(SIGPIPE, sigpipe_ignore); fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL); config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL); transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL); crm_debug("Creating CIB manager and executor objects"); fsa_cib_conn = cib_new(); lrm_state_init_local(); if (controld_init_fsa_timers() == FALSE) { register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } } // \return libqb error code (0 on success, -errno on error) static int32_t accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { crm_trace("Accepting new IPC client connection"); if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } // \return libqb error code (0 on success, -errno on error) static int32_t dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); if (msg == NULL) { pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_PROTOCOL); return 0; } pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_INDETERMINATE); CRM_ASSERT(client->user != NULL); pcmk__update_acl_user(msg, F_CRM_USER, client->user); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (controld_authorize_ipc_message(msg, client, NULL)) { crm_trace("Processing IPC message from client %s", pcmk__client_name(client)); route_message(C_IPC_MESSAGE, msg); } trigger_fsa(); free_xml(msg); return 0; } static int32_t crmd_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client) { crm_trace("Disconnecting %sregistered client %s (%p/%p)", (client->userdata? "" : "un"), pcmk__client_name(client), c, client); free(client->userdata); pcmk__free_client(client); trigger_fsa(); } return 0; } static void crmd_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); crmd_ipc_closed(c); } /* A_STOP */ void do_stop(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { crm_trace("Closing IPC server"); mainloop_del_ipc_server(ipcs); ipcs = NULL; register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } /* A_STARTED */ void do_started(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = accept_controller_client, .connection_created = NULL, .msg_process = dispatch_controller_ipc, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; if (cur_state != S_STARTING) { crm_err("Start cancelled... %s", fsa_state2string(cur_state)); return; } else if (!pcmk_is_set(fsa_input_register, R_MEMBERSHIP)) { crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(fsa_input_register, R_LRM_CONNECTED)) { crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(fsa_input_register, R_CIB_CONNECTED)) { crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(fsa_input_register, R_READ_CONFIG)) { crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG); crmd_fsa_stall(TRUE); return; } else if (!pcmk_is_set(fsa_input_register, R_PEER_DATA)) { crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA); crmd_fsa_stall(TRUE); return; } crm_debug("Init server comms"); ipcs = pcmk__serve_controld_ipc(&crmd_callbacks); if (ipcs == NULL) { crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); } else { crm_notice("Pacemaker controller successfully started and accepting connections"); } controld_trigger_fencer_connect(); controld_clear_fsa_input_flags(R_STARTING); register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL); } /* A_RECOVER */ void do_recover(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { controld_set_fsa_input_flags(R_IN_RECOVERY); crm_warn("Fast-tracking shutdown in response to errors"); register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); } static pcmk__cluster_option_t crmd_opts[] = { /* name, old name, type, allowed values, * default value, validator, * short description, * long description */ { "dc-version", NULL, "string", NULL, "none", NULL, "Pacemaker version on cluster node elected Designated Controller (DC)", "Includes a hash which identifies the exact changeset the code was " "built from. Used for diagnostic purposes." }, { "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, "The messaging stack on which Pacemaker is currently running", "Used for informational and diagnostic purposes." }, { "cluster-name", NULL, "string", NULL, NULL, NULL, "An arbitrary name for the cluster", "This optional value is mostly for users' convenience as desired " "in administration, but may also be used in Pacemaker " "configuration rules via the #cluster-name node attribute, and " "by higher-level tools and resource agents." }, { XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", NULL, "20s", pcmk__valid_interval_spec, "How long to wait for a response from other nodes during start-up", "The optimal value will depend on the speed and load of your network " "and the type of switches used." }, { XML_CONFIG_ATTR_RECHECK, NULL, "time", "Zero disables polling, while positive values are an interval in seconds" "(unless other units are specified, for example \"5min\")", "15min", pcmk__valid_interval_spec, "Polling interval to recheck cluster state and evaluate rules " "with date specifications", "Pacemaker is primarily event-driven, and looks ahead to know when to " "recheck cluster state for failure timeouts and most time-based " "rules. However, it will also recheck the cluster after this " "amount of inactivity, to evaluate rules with date specifications " "and serve as a fail-safe for certain types of scheduler bugs." }, { "load-threshold", NULL, "percentage", NULL, "80%", pcmk__valid_utilization, "Maximum amount of system load that should be used by cluster nodes", "The cluster will slow down its recovery process when the amount of " "system resources used (currently CPU) approaches this limit", }, { "node-action-limit", NULL, "integer", NULL, "0", pcmk__valid_number, "Maximum number of jobs that can be scheduled per node " "(defaults to 2x cores)" }, { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, "How a cluster node should react if notified of its own fencing", "A cluster node may receive notification of its own fencing if fencing " "is misconfigured, or if fabric fencing is in use that doesn't cut " "cluster communication. Allowed values are \"stop\" to attempt to " "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt " "to immediately reboot the local node, falling back to stop on failure." }, { XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, "2min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", "Declare an election failed if it is not decided within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug." }, { XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, "20min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", "Exit immediately if shutdown does not complete within this much " "time. If you need to adjust this value, it probably indicates " "the presence of a bug." }, { "join-integration-timeout", "crmd-integration-timeout", "time", NULL, "3min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", "If you need to adjust this value, it probably indicates " "the presence of a bug." }, { "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, "30min", pcmk__valid_interval_spec, "*** Advanced Use Only ***", "If you need to adjust this value, it probably indicates " "the presence of a bug." }, { "transition-delay", "crmd-transition-delay", "time", NULL, "0s", pcmk__valid_interval_spec, "*** Advanced Use Only *** Enabling this option will slow down " "cluster recovery under all conditions", "Delay cluster recovery for this much time to allow for additional " "events to occur. Useful if your configuration is sensitive to " "the order in which ping updates arrive." }, { "stonith-watchdog-timeout", NULL, "time", NULL, "0", controld_verify_stonith_watchdog_timeout, "How long to wait before we can assume nodes are safely down " "when watchdog-based self-fencing via SBD is in use", "If nonzero, along with `have-watchdog=true` automatically set by the " "cluster, when fencing is required, watchdog-based self-fencing " "will be performed via SBD without requiring a fencing resource " "explicitly configured. " "If `stonith-watchdog-timeout` is set to a positive value, unseen " "nodes are assumed to self-fence within this much time. +WARNING:+ " "It must be ensured that this value is larger than the " "`SBD_WATCHDOG_TIMEOUT` environment variable on all nodes. " "Pacemaker verifies the settings individually on all nodes and " "prevents startup or shuts down if configured wrongly on the fly. " "It's strongly recommended that `SBD_WATCHDOG_TIMEOUT` is set to " "the same value on all nodes. " "If `stonith-watchdog-timeout` is set to a negative value, and " "`SBD_WATCHDOG_TIMEOUT` is set, twice that value will be used. " "+WARNING:+ In this case, it's essential (currently not verified by " "Pacemaker) that `SBD_WATCHDOG_TIMEOUT` is set to the same value on " "all nodes." }, { "stonith-max-attempts", NULL, "integer", NULL, "10", pcmk__valid_positive_number, "How many times fencing can fail before it will no longer be " "immediately re-attempted on a target" }, // Already documented in libpe_status (other values must be kept identical) { "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, NULL, NULL }, { XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, "false", pcmk__valid_boolean, NULL, NULL }, }; void crmd_metadata(void) { pcmk__print_option_metadata("pacemaker-controld", "Pacemaker controller options", "Cluster options used by Pacemaker's " "controller (formerly called crmd)", crmd_opts, PCMK__NELEM(crmd_opts)); } static void verify_crmd_options(GHashTable * options) { pcmk__validate_cluster_options(options, crmd_opts, PCMK__NELEM(crmd_opts)); } static const char * crmd_pref(GHashTable * options, const char *name) { return pcmk__cluster_option(options, crmd_opts, PCMK__NELEM(crmd_opts), name); } static void config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { const char *value = NULL; GHashTable *config_hash = NULL; crm_time_t *now = crm_time_new(NULL); xmlNode *crmconfig = NULL; xmlNode *alerts = NULL; if (rc != pcmk_ok) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc)); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); controld_set_fsa_input_flags(R_STAYDOWN); } goto bail; } crmconfig = output; if ((crmconfig) && (crm_element_name(crmconfig)) && (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) { crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG); } if (!crmconfig) { fsa_data_t *msg_data = NULL; crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed"); register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL); goto bail; } crm_debug("Call %d : Parsing CIB options", call_id); config_hash = pcmk__strkey_table(free, free); pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); verify_crmd_options(config_hash); value = crmd_pref(config_hash, XML_CONFIG_ATTR_DC_DEADTIME); election_trigger->period_ms = crm_parse_interval_spec(value); value = crmd_pref(config_hash, "node-action-limit"); /* Also checks migration-limit */ throttle_update_job_max(value); value = crmd_pref(config_hash, "load-threshold"); if(value) { throttle_set_load_target(strtof(value, NULL) / 100.0); } value = crmd_pref(config_hash, "no-quorum-policy"); if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) { no_quorum_suicide_escalation = TRUE; } set_fence_reaction(crmd_pref(config_hash, XML_CONFIG_ATTR_FENCE_REACTION)); value = crmd_pref(config_hash,"stonith-max-attempts"); update_stonith_max_attempts(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value); crm_debug("Shutdown escalation occurs if DC has not responded to request in %ums", shutdown_escalation_timer->period_ms); value = crmd_pref(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL); controld_set_election_period(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_RECHECK); recheck_interval_ms = crm_parse_interval_spec(value); crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms); value = crmd_pref(config_hash, "transition-delay"); transition_timer->period_ms = crm_parse_interval_spec(value); value = crmd_pref(config_hash, "join-integration-timeout"); integration_timer->period_ms = crm_parse_interval_spec(value); value = crmd_pref(config_hash, "join-finalization-timeout"); finalization_timer->period_ms = crm_parse_interval_spec(value); value = crmd_pref(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK); controld_shutdown_lock_enabled = crm_is_true(value); free(fsa_cluster_name); fsa_cluster_name = NULL; value = g_hash_table_lookup(config_hash, "cluster-name"); if (value) { fsa_cluster_name = strdup(value); } alerts = first_named_child(output, XML_CIB_TAG_ALERTS); crmd_unpack_alerts(alerts); controld_set_fsa_input_flags(R_READ_CONFIG); crm_trace("Triggering FSA: %s", __func__); mainloop_set_trigger(fsa_source); g_hash_table_destroy(config_hash); bail: crm_time_free(now); } gboolean crm_read_options(gpointer user_data) { int call_id = fsa_cib_conn->cmds->query(fsa_cib_conn, "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS, NULL, cib_xpath | cib_scope_local); fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback); crm_trace("Querying the CIB... call %d", call_id); return TRUE; } /* A_READCONFIG */ void do_read_config(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { throttle_init(); mainloop_set_trigger(config_read); } void crm_shutdown(int nsig) { if ((crmd_mainloop == NULL) || !g_main_loop_is_running(crmd_mainloop)) { crmd_exit(CRM_EX_OK); return; } if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Escalating shutdown"); register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL); return; } controld_set_fsa_input_flags(R_SHUTDOWN); register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL); if (shutdown_escalation_timer->period_ms == 0) { const char *value = crmd_pref(NULL, XML_CONFIG_ATTR_FORCE_QUIT); shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value); } crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums", shutdown_escalation_timer->period_ms); controld_start_timer(shutdown_escalation_timer); } diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c index a0129bd514..67c376a426 100644 --- a/daemons/controld/controld_execd_state.c +++ b/daemons/controld/controld_execd_state.c @@ -1,853 +1,854 @@ /* * Copyright 2012-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include GHashTable *lrm_state_table = NULL; extern GHashTable *proxy_table; int lrmd_internal_proxy_send(lrmd_t * lrmd, xmlNode *msg); void lrmd_internal_set_proxy_callback(lrmd_t * lrmd, void *userdata, void (*callback)(lrmd_t *lrmd, void *userdata, xmlNode *msg)); static void free_rsc_info(gpointer value) { lrmd_rsc_info_t *rsc_info = value; lrmd_free_rsc_info(rsc_info); } static void free_deletion_op(gpointer value) { struct pending_deletion_op_s *op = value; free(op->rsc); delete_ha_msg_input(op->input); free(op); } static void free_recurring_op(gpointer value) { active_op_t *op = value; free(op->user_data); free(op->rsc_id); free(op->op_type); free(op->op_key); if (op->params) { g_hash_table_destroy(op->params); } free(op); } static gboolean fail_pending_op(gpointer key, gpointer value, gpointer user_data) { lrmd_event_data_t event = { 0, }; lrm_state_t *lrm_state = user_data; active_op_t *op = value; crm_trace("Pre-emptively failing " PCMK__OP_FMT " on %s (call=%s, %s)", op->rsc_id, op->op_type, op->interval_ms, lrm_state->node_name, (char*)key, op->user_data); event.type = lrmd_event_exec_complete; event.rsc_id = op->rsc_id; event.op_type = op->op_type; event.user_data = op->user_data; event.timeout = 0; event.interval_ms = op->interval_ms; lrmd__set_result(&event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_NOT_CONNECTED, "Action was pending when executor connection was dropped"); event.t_run = (unsigned int) op->start_time; event.t_rcchange = (unsigned int) op->start_time; event.call_id = op->call_id; event.remote_nodename = lrm_state->node_name; event.params = op->params; process_lrm_event(lrm_state, &event, op, NULL); + lrmd__reset_result(&event); return TRUE; } gboolean lrm_state_is_local(lrm_state_t *lrm_state) { if (lrm_state == NULL || fsa_our_uname == NULL) { return FALSE; } if (strcmp(lrm_state->node_name, fsa_our_uname) != 0) { return FALSE; } return TRUE; } lrm_state_t * lrm_state_create(const char *node_name) { lrm_state_t *state = NULL; if (!node_name) { crm_err("No node name given for lrm state object"); return NULL; } state = calloc(1, sizeof(lrm_state_t)); if (!state) { return NULL; } state->node_name = strdup(node_name); state->rsc_info_cache = pcmk__strkey_table(NULL, free_rsc_info); state->deletion_ops = pcmk__strkey_table(free, free_deletion_op); state->pending_ops = pcmk__strkey_table(free, free_recurring_op); state->resource_history = pcmk__strkey_table(NULL, history_free); state->metadata_cache = metadata_cache_new(); g_hash_table_insert(lrm_state_table, (char *)state->node_name, state); return state; } void lrm_state_destroy(const char *node_name) { g_hash_table_remove(lrm_state_table, node_name); } static gboolean remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data) { remote_proxy_t *proxy = value; const char *node_name = user_data; if (pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { return TRUE; } return FALSE; } static void internal_lrm_state_destroy(gpointer data) { lrm_state_t *lrm_state = data; if (!lrm_state) { return; } crm_trace("Destroying proxy table %s with %d members", lrm_state->node_name, g_hash_table_size(proxy_table)); g_hash_table_foreach_remove(proxy_table, remote_proxy_remove_by_node, (char *) lrm_state->node_name); remote_ra_cleanup(lrm_state); lrmd_api_delete(lrm_state->conn); if (lrm_state->rsc_info_cache) { crm_trace("Destroying rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_destroy(lrm_state->rsc_info_cache); } if (lrm_state->resource_history) { crm_trace("Destroying history op cache with %d members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_destroy(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Destroying deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_destroy(lrm_state->deletion_ops); } if (lrm_state->pending_ops) { crm_trace("Destroying pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops)); g_hash_table_destroy(lrm_state->pending_ops); } metadata_cache_free(lrm_state->metadata_cache); free((char *)lrm_state->node_name); free(lrm_state); } void lrm_state_reset_tables(lrm_state_t * lrm_state, gboolean reset_metadata) { if (lrm_state->resource_history) { crm_trace("Re-setting history op cache with %d members", g_hash_table_size(lrm_state->resource_history)); g_hash_table_remove_all(lrm_state->resource_history); } if (lrm_state->deletion_ops) { crm_trace("Re-setting deletion op cache with %d members", g_hash_table_size(lrm_state->deletion_ops)); g_hash_table_remove_all(lrm_state->deletion_ops); } if (lrm_state->pending_ops) { crm_trace("Re-setting pending op cache with %d members", g_hash_table_size(lrm_state->pending_ops)); g_hash_table_remove_all(lrm_state->pending_ops); } if (lrm_state->rsc_info_cache) { crm_trace("Re-setting rsc info cache with %d members", g_hash_table_size(lrm_state->rsc_info_cache)); g_hash_table_remove_all(lrm_state->rsc_info_cache); } if (reset_metadata) { metadata_cache_reset(lrm_state->metadata_cache); } } gboolean lrm_state_init_local(void) { if (lrm_state_table) { return TRUE; } lrm_state_table = pcmk__strikey_table(NULL, internal_lrm_state_destroy); if (!lrm_state_table) { return FALSE; } proxy_table = pcmk__strikey_table(NULL, remote_proxy_free); if (!proxy_table) { g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; return FALSE; } return TRUE; } void lrm_state_destroy_all(void) { if (lrm_state_table) { crm_trace("Destroying state table with %d members", g_hash_table_size(lrm_state_table)); g_hash_table_destroy(lrm_state_table); lrm_state_table = NULL; } if(proxy_table) { crm_trace("Destroying proxy table with %d members", g_hash_table_size(proxy_table)); g_hash_table_destroy(proxy_table); proxy_table = NULL; } } lrm_state_t * lrm_state_find(const char *node_name) { if (!node_name) { return NULL; } return g_hash_table_lookup(lrm_state_table, node_name); } lrm_state_t * lrm_state_find_or_create(const char *node_name) { lrm_state_t *lrm_state; lrm_state = g_hash_table_lookup(lrm_state_table, node_name); if (!lrm_state) { lrm_state = lrm_state_create(node_name); } return lrm_state; } GList * lrm_state_get_list(void) { return g_hash_table_get_values(lrm_state_table); } static remote_proxy_t * find_connected_proxy_by_node(const char * node_name) { GHashTableIter gIter; remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return NULL); g_hash_table_iter_init(&gIter, proxy_table); while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) &proxy)) { if (proxy->source && pcmk__str_eq(node_name, proxy->node_name, pcmk__str_casei)) { return proxy; } } return NULL; } static void remote_proxy_disconnect_by_node(const char * node_name) { remote_proxy_t *proxy = NULL; CRM_CHECK(proxy_table != NULL, return); while ((proxy = find_connected_proxy_by_node(node_name)) != NULL) { /* mainloop_del_ipc_client() eventually calls remote_proxy_disconnected() * , which removes the entry from proxy_table. * Do not do this in a g_hash_table_iter_next() loop. */ if (proxy->source) { mainloop_del_ipc_client(proxy->source); } } return; } void lrm_state_disconnect_only(lrm_state_t * lrm_state) { int removed = 0; if (!lrm_state->conn) { return; } crm_trace("Disconnecting %s", lrm_state->node_name); remote_proxy_disconnect_by_node(lrm_state->node_name); ((lrmd_t *) lrm_state->conn)->cmds->disconnect(lrm_state->conn); if (!pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { removed = g_hash_table_foreach_remove(lrm_state->pending_ops, fail_pending_op, lrm_state); crm_trace("Synthesized %d operation failures for %s", removed, lrm_state->node_name); } } void lrm_state_disconnect(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return; } lrm_state_disconnect_only(lrm_state); lrmd_api_delete(lrm_state->conn); lrm_state->conn = NULL; } int lrm_state_is_connected(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return FALSE; } return ((lrmd_t *) lrm_state->conn)->cmds->is_connected(lrm_state->conn); } int lrm_state_poke_connection(lrm_state_t * lrm_state) { if (!lrm_state->conn) { return -ENOTCONN; } return ((lrmd_t *) lrm_state->conn)->cmds->poke_connection(lrm_state->conn); } // \return Standard Pacemaker return code int controld_connect_local_executor(lrm_state_t *lrm_state) { int rc = pcmk_rc_ok; if (lrm_state->conn == NULL) { lrmd_t *api = NULL; rc = lrmd__new(&api, NULL, NULL, 0); if (rc != pcmk_rc_ok) { return rc; } api->cmds->set_callback(api, lrm_op_callback); lrm_state->conn = api; } rc = ((lrmd_t *) lrm_state->conn)->cmds->connect(lrm_state->conn, CRM_SYSTEM_CRMD, NULL); rc = pcmk_legacy2rc(rc); if (rc == pcmk_rc_ok) { lrm_state->num_lrm_register_fails = 0; } else { lrm_state->num_lrm_register_fails++; } return rc; } static remote_proxy_t * crmd_remote_proxy_new(lrmd_t *lrmd, const char *node_name, const char *session_id, const char *channel) { struct ipc_client_callbacks proxy_callbacks = { .dispatch = remote_proxy_dispatch, .destroy = remote_proxy_disconnected }; remote_proxy_t *proxy = remote_proxy_new(lrmd, &proxy_callbacks, node_name, session_id, channel); return proxy; } gboolean crmd_is_proxy_session(const char *session) { return g_hash_table_lookup(proxy_table, session) ? TRUE : FALSE; } void crmd_proxy_send(const char *session, xmlNode *msg) { remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); lrm_state_t *lrm_state = NULL; if (!proxy) { return; } crm_log_xml_trace(msg, "to-proxy"); lrm_state = lrm_state_find(proxy->node_name); if (lrm_state) { crm_trace("Sending event to %.8s on %s", proxy->session_id, proxy->node_name); remote_proxy_relay_event(proxy, msg); } } static void crmd_proxy_dispatch(const char *session, xmlNode *msg) { crm_trace("Processing proxied IPC message from session %s", session); crm_log_xml_trace(msg, "controller[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, session); if (controld_authorize_ipc_message(msg, NULL, session)) { route_message(C_IPC_MESSAGE, msg); } trigger_fsa(); } static void remote_config_check(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { if (rc != pcmk_ok) { crm_err("Query resulted in an error: %s", pcmk_strerror(rc)); if (rc == -EACCES || rc == -pcmk_err_schema_validation) { crm_err("The cluster is mis-configured - shutting down and staying down"); } } else { lrmd_t * lrmd = (lrmd_t *)user_data; crm_time_t *now = crm_time_new(NULL); GHashTable *config_hash = pcmk__strkey_table(free, free); crm_debug("Call %d : Parsing CIB options", call_id); pe_unpack_nvpairs(output, output, XML_CIB_TAG_PROPSET, NULL, config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); /* Now send it to the remote peer */ lrmd__validate_remote_settings(lrmd, config_hash); g_hash_table_destroy(config_hash); crm_time_free(now); } } static void crmd_remote_proxy_cb(lrmd_t *lrmd, void *userdata, xmlNode *msg) { lrm_state_t *lrm_state = userdata; const char *session = crm_element_value(msg, F_LRMD_IPC_SESSION); remote_proxy_t *proxy = g_hash_table_lookup(proxy_table, session); const char *op = crm_element_value(msg, F_LRMD_IPC_OP); if (pcmk__str_eq(op, LRMD_IPC_OP_NEW, pcmk__str_casei)) { const char *channel = crm_element_value(msg, F_LRMD_IPC_IPC_SERVER); proxy = crmd_remote_proxy_new(lrmd, lrm_state->node_name, session, channel); if (!remote_ra_controlling_guest(lrm_state)) { if (proxy != NULL) { /* Look up stonith-watchdog-timeout and send to the remote peer for validation */ int rc = fsa_cib_conn->cmds->query(fsa_cib_conn, XML_CIB_TAG_CRMCONFIG, NULL, cib_scope_local); fsa_cib_conn->cmds->register_callback_full(fsa_cib_conn, rc, 10, FALSE, lrmd, "remote_config_check", remote_config_check, NULL); } } else { crm_debug("Skipping remote_config_check for guest-nodes"); } } else if (pcmk__str_eq(op, LRMD_IPC_OP_SHUTDOWN_REQ, pcmk__str_casei)) { char *now_s = NULL; crm_notice("%s requested shutdown of its remote connection", lrm_state->node_name); if (!remote_ra_is_in_maintenance(lrm_state)) { now_s = pcmk__ttoa(time(NULL)); update_attrd(lrm_state->node_name, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, TRUE); free(now_s); remote_proxy_ack_shutdown(lrmd); crm_warn("Reconnection attempts to %s may result in failures that must be cleared", lrm_state->node_name); } else { remote_proxy_nack_shutdown(lrmd); crm_notice("Remote resource for %s is not managed so no ordered shutdown happening", lrm_state->node_name); } return; } else if (pcmk__str_eq(op, LRMD_IPC_OP_REQUEST, pcmk__str_casei) && proxy && proxy->is_local) { /* This is for the controller, which we are, so don't try * to send to ourselves over IPC -- do it directly. */ int flags = 0; xmlNode *request = get_message_xml(msg, F_LRMD_IPC_MSG); CRM_CHECK(request != NULL, return); CRM_CHECK(lrm_state->node_name, return); crm_xml_add(request, XML_ACL_TAG_ROLE, "pacemaker-remote"); pcmk__update_acl_user(request, F_LRMD_IPC_USER, lrm_state->node_name); /* Pacemaker Remote nodes don't know their own names (as known to the * cluster). When getting a node info request with no name or ID, add * the name, so we don't return info for ourselves instead of the * Pacemaker Remote node. */ if (pcmk__str_eq(crm_element_value(request, F_CRM_TASK), CRM_OP_NODE_INFO, pcmk__str_casei)) { int node_id = 0; crm_element_value_int(request, XML_ATTR_ID, &node_id); if ((node_id <= 0) && (crm_element_value(request, XML_ATTR_UNAME) == NULL)) { crm_xml_add(request, XML_ATTR_UNAME, lrm_state->node_name); } } crmd_proxy_dispatch(session, request); crm_element_value_int(msg, F_LRMD_IPC_MSG_FLAGS, &flags); if (flags & crm_ipc_client_response) { int msg_id = 0; xmlNode *op_reply = create_xml_node(NULL, "ack"); crm_xml_add(op_reply, "function", __func__); crm_xml_add_int(op_reply, "line", __LINE__); crm_element_value_int(msg, F_LRMD_IPC_MSG_ID, &msg_id); remote_proxy_relay_response(proxy, op_reply, msg_id); free_xml(op_reply); } } else { remote_proxy_cb(lrmd, lrm_state->node_name, msg); } } // \return Standard Pacemaker return code int controld_connect_remote_executor(lrm_state_t *lrm_state, const char *server, int port, int timeout_ms) { int rc = pcmk_rc_ok; if (lrm_state->conn == NULL) { lrmd_t *api = NULL; rc = lrmd__new(&api, lrm_state->node_name, server, port); if (rc != pcmk_rc_ok) { crm_warn("Pacemaker Remote connection to %s:%s failed: %s " CRM_XS " rc=%d", server, port, pcmk_rc_str(rc), rc); return rc; } lrm_state->conn = api; api->cmds->set_callback(api, remote_lrm_op_callback); lrmd_internal_set_proxy_callback(api, lrm_state, crmd_remote_proxy_cb); } crm_trace("Initiating remote connection to %s:%d with timeout %dms", server, port, timeout_ms); rc = ((lrmd_t *) lrm_state->conn)->cmds->connect_async(lrm_state->conn, lrm_state->node_name, timeout_ms); if (rc == pcmk_ok) { lrm_state->num_lrm_register_fails = 0; } else { lrm_state->num_lrm_register_fails++; // Ignored for remote connections } return pcmk_legacy2rc(rc); } int lrm_state_get_metadata(lrm_state_t * lrm_state, const char *class, const char *provider, const char *agent, char **output, enum lrmd_call_options options) { lrmd_key_value_t *params = NULL; if (!lrm_state->conn) { return -ENOTCONN; } /* Add the node name to the environment, as is done with normal resource * action calls. Meta-data calls shouldn't need it, but some agents are * written with an ocf_local_nodename call at the beginning regardless of * action. Without the environment variable, the agent would try to contact * the controller to get the node name -- but the controller would be * blocking on the synchronous meta-data call. * * At this point, we have to assume that agents are unlikely to make other * calls that require the controller, such as crm_node --quorum or * --cluster-id. * * @TODO Make meta-data calls asynchronous. (This will be part of a larger * project to make meta-data calls via the executor rather than directly.) */ params = lrmd_key_value_add(params, CRM_META "_" XML_LRM_ATTR_TARGET, lrm_state->node_name); return ((lrmd_t *) lrm_state->conn)->cmds->get_metadata_params(lrm_state->conn, class, provider, agent, output, options, params); } int lrm_state_cancel(lrm_state_t *lrm_state, const char *rsc_id, const char *action, guint interval_ms) { if (!lrm_state->conn) { return -ENOTCONN; } /* Figure out a way to make this async? * NOTICE: Currently it's synced and directly acknowledged in do_lrm_invoke(). */ if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_cancel(lrm_state, rsc_id, action, interval_ms); } return ((lrmd_t *) lrm_state->conn)->cmds->cancel(lrm_state->conn, rsc_id, action, interval_ms); } lrmd_rsc_info_t * lrm_state_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { lrmd_rsc_info_t *rsc = NULL; if (!lrm_state->conn) { return NULL; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { return remote_ra_get_rsc_info(lrm_state, rsc_id); } rsc = g_hash_table_lookup(lrm_state->rsc_info_cache, rsc_id); if (rsc == NULL) { /* only contact the lrmd if we don't already have a cached rsc info */ rsc = ((lrmd_t *) lrm_state->conn)->cmds->get_rsc_info(lrm_state->conn, rsc_id, options); if (rsc == NULL) { return NULL; } /* cache the result */ g_hash_table_insert(lrm_state->rsc_info_cache, rsc->id, rsc); } return lrmd_copy_rsc_info(rsc); } /*! * \internal * \brief Initiate a resource agent action * * \param[in] lrm_state Executor state object * \param[in] rsc_id ID of resource for action * \param[in] action Action to execute * \param[in] userdata String to copy and pass to execution callback * \param[in] interval_ms Action interval (in milliseconds) * \param[in] timeout_ms Action timeout (in milliseconds) * \param[in] start_delay_ms Delay (in milliseconds) before initiating action * \param[in] params Resource parameters * \param[out] call_id Where to store call ID on success * * \return Standard Pacemaker return code * \note This takes ownership of \p params, which should not be used or freed * after calling this function. */ int controld_execute_resource_agent(lrm_state_t *lrm_state, const char *rsc_id, const char *action, const char *userdata, guint interval_ms, int timeout_ms, int start_delay_ms, lrmd_key_value_t *params, int *call_id) { int rc = pcmk_rc_ok; if (lrm_state->conn == NULL) { lrmd_key_value_freeall(params); rc = ENOTCONN; } else if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { rc = controld_execute_remote_agent(lrm_state, rsc_id, action, userdata, interval_ms, timeout_ms, start_delay_ms, params, call_id); } else { rc = ((lrmd_t *) lrm_state->conn)->cmds->exec(lrm_state->conn, rsc_id, action, userdata, interval_ms, timeout_ms, start_delay_ms, lrmd_opt_notify_changes_only, params); if (rc < 0) { rc = pcmk_legacy2rc(rc); } else { *call_id = rc; rc = pcmk_rc_ok; } } return rc; } int lrm_state_register_rsc(lrm_state_t * lrm_state, const char *rsc_id, const char *class, const char *provider, const char *agent, enum lrmd_call_options options) { lrmd_t *conn = (lrmd_t *) lrm_state->conn; if (conn == NULL) { return -ENOTCONN; } if (is_remote_lrmd_ra(agent, provider, NULL)) { return lrm_state_find_or_create(rsc_id)? pcmk_ok : -EINVAL; } /* @TODO Implement an asynchronous version of this (currently a blocking * call to the lrmd). */ return conn->cmds->register_rsc(lrm_state->conn, rsc_id, class, provider, agent, options); } int lrm_state_unregister_rsc(lrm_state_t * lrm_state, const char *rsc_id, enum lrmd_call_options options) { if (!lrm_state->conn) { return -ENOTCONN; } if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) { lrm_state_destroy(rsc_id); return pcmk_ok; } g_hash_table_remove(lrm_state->rsc_info_cache, rsc_id); /* @TODO Optimize this ... this function is a blocking round trip from * client to daemon. The controld_execd_state.c code path that uses this * function should always treat it as an async operation. The executor API * should make an async version available. */ return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options); } /* * Functions for sending alerts via local executor connection */ static GList *crmd_alert_list = NULL; void crmd_unpack_alerts(xmlNode *alerts) { pe_free_alert_list(crmd_alert_list); crmd_alert_list = pe_unpack_alerts(alerts); } void crmd_alert_node_event(crm_node_t *node) { lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node->uname, node->id, node->state); } void crmd_alert_fencing_op(stonith_event_t * e) { char *desc; lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)", e->action, e->target, (e->executioner? e->executioner : ""), e->client_origin, e->origin, pcmk_strerror(e->result), e->id); lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, e->target, e->operation, desc, e->result); free(desc); } void crmd_alert_resource_op(const char *node, lrmd_event_data_t * op) { lrm_state_t *lrm_state; if (crmd_alert_list == NULL) { return; } lrm_state = lrm_state_find(fsa_our_uname); if (lrm_state == NULL) { return; } lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node, op); } diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c index 2444da7930..ee665f801d 100644 --- a/daemons/controld/controld_schedulerd.c +++ b/daemons/controld/controld_schedulerd.c @@ -1,480 +1,484 @@ /* - * Copyright 2004-2020 the Pacemaker project contributors + * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include /* pid_t, sleep, ssize_t */ #include #include #include #include #include #include +#include +#include #include -static mainloop_io_t *pe_subsystem = NULL; +static void handle_disconnect(void); + +static pcmk_ipc_api_t *schedulerd_api = NULL; /*! * \internal * \brief Close any scheduler connection and free associated memory */ void -pe_subsystem_free(void) +controld_shutdown_schedulerd_ipc(void) { controld_clear_fsa_input_flags(R_PE_REQUIRED); - if (pe_subsystem) { - controld_expect_sched_reply(NULL); - mainloop_del_ipc_client(pe_subsystem); - pe_subsystem = NULL; - controld_clear_fsa_input_flags(R_PE_CONNECTED); - } + pcmk_disconnect_ipc(schedulerd_api); + handle_disconnect(); + + pcmk_free_ipc_api(schedulerd_api); + schedulerd_api = NULL; } /*! * \internal * \brief Save CIB query result to file, raising FSA error * * \param[in] msg Ignored * \param[in] call_id Call ID of CIB query * \param[in] rc Return code of CIB query * \param[in] output Result of CIB query * \param[in] user_data Unique identifier for filename (will be freed) * * \note This is intended to be called after a scheduler connection fails. */ static void save_cib_contents(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data) { char *id = user_data; register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); CRM_CHECK(id != NULL, return); if (rc == pcmk_ok) { char *filename = crm_strdup_printf(PE_STATE_DIR "/pe-core-%s.bz2", id); if (write_xml_file(output, filename, TRUE) < 0) { crm_err("Could not save Cluster Information Base to %s after scheduler crash", filename); } else { crm_notice("Saved Cluster Information Base to %s after scheduler crash", filename); } free(filename); } } /*! * \internal * \brief Respond to scheduler connection failure - * - * \param[in] user_data Ignored */ static void -pe_ipc_destroy(gpointer user_data) +handle_disconnect(void) { // If we aren't connected to the scheduler, we can't expect a reply controld_expect_sched_reply(NULL); if (pcmk_is_set(fsa_input_register, R_PE_REQUIRED)) { int rc = pcmk_ok; char *uuid_str = crm_generate_uuid(); crm_crit("Connection to the scheduler failed " CRM_XS " uuid=%s", uuid_str); /* * The scheduler died... * * Save the current CIB so that we have a chance of * figuring out what killed it. * * Delay raising the I_ERROR until the query below completes or * 5s is up, whichever comes first. * */ rc = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); fsa_register_cib_callback(rc, FALSE, uuid_str, save_cib_contents); } else { crm_info("Connection to the scheduler released"); } controld_clear_fsa_input_flags(R_PE_CONNECTED); - pe_subsystem = NULL; mainloop_set_trigger(fsa_source); return; } -/*! - * \internal - * \brief Handle message from scheduler connection - * - * \param[in] buffer XML message (will be freed) - * \param[in] length Ignored - * \param[in] userdata Ignored - * - * \return 0 - */ -static int -pe_ipc_dispatch(const char *buffer, ssize_t length, gpointer userdata) +static void +handle_reply(pcmk_schedulerd_api_reply_t *reply) { - xmlNode *msg = string2xml(buffer); + const char *msg_ref = NULL; - if (msg) { - route_message(C_IPC_MESSAGE, msg); + if (!AM_I_DC) { + return; + } + + msg_ref = reply->data.graph.reference; + + if (msg_ref == NULL) { + crm_err("%s - Ignoring calculation with no reference", CRM_OP_PECALC); + + } else if (pcmk__str_eq(msg_ref, fsa_pe_ref, pcmk__str_none)) { + ha_msg_input_t fsa_input; + xmlNode *crm_data_node; + + controld_stop_sched_timer(); + + /* do_te_invoke (which will eventually process the fsa_input we are constructing + * here) requires that fsa_input.xml be non-NULL. That will only happen if + * copy_ha_msg_input (which is called by register_fsa_input_adv) sees the + * fsa_input.msg that it is expecting. The scheduler's IPC dispatch function + * gave us the values we need, we just need to put them into XML. + * + * The name of the top level element here is irrelevant. Nothing checks it. + */ + fsa_input.msg = create_xml_node(NULL, "dummy-reply"); + crm_xml_add(fsa_input.msg, XML_ATTR_REFERENCE, msg_ref); + crm_xml_add(fsa_input.msg, F_CRM_TGRAPH_INPUT, reply->data.graph.input); + + crm_data_node = create_xml_node(fsa_input.msg, F_CRM_DATA); + add_node_copy(crm_data_node, reply->data.graph.tgraph); + register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input); + + free_xml(fsa_input.msg); + + } else { + crm_info("%s calculation %s is obsolete", CRM_OP_PECALC, msg_ref); } - free_xml(msg); - return 0; } -/*! - * \internal - * \brief Make new connection to scheduler - * - * \return TRUE on success, FALSE otherwise - */ -static bool -pe_subsystem_new(void) +static void +scheduler_event_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, + crm_exit_t status, void *event_data, void *user_data) { - struct ipc_client_callbacks pe_callbacks = { - .dispatch = pe_ipc_dispatch, - .destroy = pe_ipc_destroy - }; - static bool retry_one = TRUE; + pcmk_schedulerd_api_reply_t *reply = event_data; - controld_set_fsa_input_flags(R_PE_REQUIRED); -retry: - pe_subsystem = mainloop_add_ipc_client(CRM_SYSTEM_PENGINE, - G_PRIORITY_DEFAULT, - 5 * 1024 * 1024 /* 5MB */, - NULL, &pe_callbacks); - if (pe_subsystem == NULL) { - crm_debug("Could not connect to scheduler : %s(%d)", pcmk_rc_str(errno), errno); - if (errno == EAGAIN && retry_one) { - /* In rare cases, a SIGTERM may be received and the connection may fail when the cluster shuts down. */ - /* At this time, the connection will be retried only once. */ - crm_debug("Scheduler connection attempt."); - retry_one = FALSE; - goto retry; - } - return FALSE; + switch (event_type) { + case pcmk_ipc_event_disconnect: + handle_disconnect(); + break; + + case pcmk_ipc_event_reply: + handle_reply(reply); + break; + + default: + break; } - controld_set_fsa_input_flags(R_PE_CONNECTED); - return TRUE; } -/*! - * \internal - * \brief Send an XML message to the scheduler - * - * \param[in] cmd XML message to send - * - * \return pcmk_ok on success, -errno otherwise - */ -static int -pe_subsystem_send(xmlNode *cmd) +static bool +new_schedulerd_ipc_connection(void) { - if (pe_subsystem) { - int sent = crm_ipc_send(mainloop_get_ipc_client(pe_subsystem), cmd, - 0, 0, NULL); - - if (sent == 0) { - sent = -ENODATA; - } else if (sent > 0) { - sent = pcmk_ok; + int rc; + + controld_set_fsa_input_flags(R_PE_REQUIRED); + + if (schedulerd_api == NULL) { + rc = pcmk_new_ipc_api(&schedulerd_api, pcmk_ipc_schedulerd); + + if (rc != pcmk_rc_ok) { + crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); + return false; } - return sent; } - return -ENOTCONN; + + pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); + + rc = pcmk_connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + crm_err("Error connecting to the scheduler: %s", pcmk_rc_str(rc)); + return false; + } + + controld_set_fsa_input_flags(R_PE_CONNECTED); + return true; } static void do_pe_invoke_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *user_data); /* A_PE_START, A_PE_STOP, O_PE_RESTART */ void do_pe_control(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (action & A_PE_STOP) { - pe_subsystem_free(); + controld_clear_fsa_input_flags(R_PE_REQUIRED); + pcmk_disconnect_ipc(schedulerd_api); + handle_disconnect(); } if ((action & A_PE_START) && !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) { if (cur_state == S_STOPPING) { crm_info("Ignoring request to connect to scheduler while shutting down"); - } else if (!pe_subsystem_new()) { + } else if (!new_schedulerd_ipc_connection()) { crm_warn("Could not connect to scheduler"); register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL); } } } int fsa_pe_query = 0; char *fsa_pe_ref = NULL; static mainloop_timer_t *controld_sched_timer = NULL; // @TODO Make this a configurable cluster option if there's demand for it #define SCHED_TIMEOUT_MS (120000) /*! * \internal * \brief Handle a timeout waiting for scheduler reply * * \param[in] user_data Ignored * * \return FALSE (indicating that timer should not be restarted) */ static gboolean controld_sched_timeout(gpointer user_data) { if (AM_I_DC) { /* If this node is the DC but can't communicate with the scheduler, just * exit (and likely get fenced) so this node doesn't interfere with any * further DC elections. * * @TODO We could try something less drastic first, like disconnecting * and reconnecting to the scheduler, but something is likely going * seriously wrong, so perhaps it's better to just fail as quickly as * possible. */ crmd_exit(CRM_EX_FATAL); } return FALSE; } void controld_stop_sched_timer(void) { if (controld_sched_timer && fsa_pe_ref) { crm_trace("Stopping timer for scheduler reply %s", fsa_pe_ref); } mainloop_timer_stop(controld_sched_timer); } /*! * \internal * \brief Set the scheduler request currently being waited on * - * \param[in] msg Request to expect reply to (or NULL for none) + * \param[in] ref Request to expect reply to (or NULL for none) */ void -controld_expect_sched_reply(xmlNode *msg) +controld_expect_sched_reply(char *ref) { - char *ref = NULL; - - if (msg) { - ref = crm_element_value_copy(msg, XML_ATTR_REFERENCE); - CRM_ASSERT(ref != NULL); - + if (ref) { if (controld_sched_timer == NULL) { controld_sched_timer = mainloop_timer_add("scheduler_reply_timer", SCHED_TIMEOUT_MS, FALSE, controld_sched_timeout, NULL); } mainloop_timer_start(controld_sched_timer); } else { controld_stop_sched_timer(); } free(fsa_pe_ref); fsa_pe_ref = ref; } /*! * \internal * \brief Free the scheduler reply timer */ void controld_free_sched_timer(void) { if (controld_sched_timer != NULL) { mainloop_timer_del(controld_sched_timer); controld_sched_timer = NULL; } } /* A_PE_INVOKE */ void do_pe_invoke(long long action, enum crmd_fsa_cause cause, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { if (AM_I_DC == FALSE) { crm_err("Not invoking scheduler because not DC: %s", fsa_action2string(action)); return; } if (!pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) { if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) { crm_err("Cannot shut down gracefully without the scheduler"); register_fsa_input_before(C_FSA_INTERNAL, I_TERMINATE, NULL); } else { crm_info("Waiting for the scheduler to connect"); crmd_fsa_stall(FALSE); controld_set_fsa_action_flags(A_PE_START); trigger_fsa(); } return; } if (cur_state != S_POLICY_ENGINE) { crm_notice("Not invoking scheduler because in state %s", fsa_state2string(cur_state)); return; } if (!pcmk_is_set(fsa_input_register, R_HAVE_CIB)) { crm_err("Attempted to invoke scheduler without consistent Cluster Information Base!"); /* start the join from scratch */ register_fsa_input_before(C_FSA_INTERNAL, I_ELECTION, NULL); return; } fsa_pe_query = fsa_cib_conn->cmds->query(fsa_cib_conn, NULL, NULL, cib_scope_local); crm_debug("Query %d: Requesting the current CIB: %s", fsa_pe_query, fsa_state2string(fsa_state)); controld_expect_sched_reply(NULL); fsa_register_cib_callback(fsa_pe_query, FALSE, NULL, do_pe_invoke_callback); } static void force_local_option(xmlNode *xml, const char *attr_name, const char *attr_value) { int max = 0; int lpc = 0; char *xpath_string = NULL; xmlXPathObjectPtr xpathObj = NULL; xpath_string = crm_strdup_printf("%.128s//%s//nvpair[@name='%.128s']", get_object_path(XML_CIB_TAG_CRMCONFIG), XML_CIB_TAG_PROPSET, attr_name); xpathObj = xpath_search(xml, xpath_string); max = numXpathResults(xpathObj); free(xpath_string); for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); crm_trace("Forcing %s/%s = %s", ID(match), attr_name, attr_value); crm_xml_add(match, XML_NVPAIR_ATTR_VALUE, attr_value); } if(max == 0) { xmlNode *configuration = NULL; xmlNode *crm_config = NULL; xmlNode *cluster_property_set = NULL; crm_trace("Creating %s-%s for %s=%s", CIB_OPTIONS_FIRST, attr_name, attr_name, attr_value); configuration = pcmk__xe_match(xml, XML_CIB_TAG_CONFIGURATION, NULL, NULL); if (configuration == NULL) { configuration = create_xml_node(xml, XML_CIB_TAG_CONFIGURATION); } crm_config = pcmk__xe_match(configuration, XML_CIB_TAG_CRMCONFIG, NULL, NULL); if (crm_config == NULL) { crm_config = create_xml_node(configuration, XML_CIB_TAG_CRMCONFIG); } cluster_property_set = pcmk__xe_match(crm_config, XML_CIB_TAG_PROPSET, NULL, NULL); if (cluster_property_set == NULL) { cluster_property_set = create_xml_node(crm_config, XML_CIB_TAG_PROPSET); crm_xml_add(cluster_property_set, XML_ATTR_ID, CIB_OPTIONS_FIRST); } xml = create_xml_node(cluster_property_set, XML_CIB_TAG_NVPAIR); crm_xml_set_id(xml, "%s-%s", CIB_OPTIONS_FIRST, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_NAME, attr_name); crm_xml_add(xml, XML_NVPAIR_ATTR_VALUE, attr_value); } freeXpathObject(xpathObj); } static void do_pe_invoke_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { - xmlNode *cmd = NULL; + char *ref = NULL; pid_t watchdog = pcmk__locate_sbd(); if (rc != pcmk_ok) { crm_err("Could not retrieve the Cluster Information Base: %s " CRM_XS " rc=%d call=%d", pcmk_strerror(rc), rc, call_id); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); return; } else if (call_id != fsa_pe_query) { crm_trace("Skipping superseded CIB query: %d (current=%d)", call_id, fsa_pe_query); return; } else if (!AM_I_DC || !pcmk_is_set(fsa_input_register, R_PE_CONNECTED)) { crm_debug("No need to invoke the scheduler anymore"); return; } else if (fsa_state != S_POLICY_ENGINE) { crm_debug("Discarding scheduler request in state: %s", fsa_state2string(fsa_state)); return; /* this callback counts as 1 */ } else if (num_cib_op_callbacks() > 1) { crm_debug("Re-asking for the CIB: %d other peer updates still pending", (num_cib_op_callbacks() - 1)); sleep(1); controld_set_fsa_action_flags(A_PE_INVOKE); trigger_fsa(); return; } CRM_LOG_ASSERT(output != NULL); /* Refresh the remote node cache and the known node cache when the * scheduler is invoked */ pcmk__refresh_node_caches_from_cib(output); crm_xml_add(output, XML_ATTR_DC_UUID, fsa_our_uuid); crm_xml_add_int(output, XML_ATTR_HAVE_QUORUM, fsa_has_quorum); force_local_option(output, XML_ATTR_HAVE_WATCHDOG, pcmk__btoa(watchdog)); if (ever_had_quorum && crm_have_quorum == FALSE) { crm_xml_add_int(output, XML_ATTR_QUORUM_PANIC, 1); } - cmd = create_request(CRM_OP_PECALC, output, NULL, CRM_SYSTEM_PENGINE, CRM_SYSTEM_DC, NULL); + rc = pcmk_rc2legacy(pcmk_schedulerd_api_graph(schedulerd_api, output, &ref)); - rc = pe_subsystem_send(cmd); if (rc < 0) { crm_err("Could not contact the scheduler: %s " CRM_XS " rc=%d", pcmk_strerror(rc), rc); register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__); } else { - controld_expect_sched_reply(cmd); + CRM_ASSERT(ref != NULL); + controld_expect_sched_reply(ref); crm_debug("Invoking the scheduler: query=%d, ref=%s, seq=%llu, quorate=%d", fsa_pe_query, fsa_pe_ref, crm_peer_seq, fsa_has_quorum); } - free_xml(cmd); } diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h index 4d8275c5f8..a695194a20 100644 --- a/daemons/controld/controld_utils.h +++ b/daemons/controld/controld_utils.h @@ -1,127 +1,127 @@ /* * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRMD_UTILS__H # define CRMD_UTILS__H # include # include # include // CIB_OP_MODIFY # include // fsa_cib_conn # include # define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" # define fsa_cib_update(section, data, options, call_id, user_name) \ if(fsa_cib_conn != NULL) { \ call_id = cib_internal_op( \ fsa_cib_conn, CIB_OP_MODIFY, NULL, section, data, \ NULL, options, user_name); \ \ } else { \ crm_err("No CIB manager connection available"); \ } static inline void fsa_cib_anon_update(const char *section, xmlNode *data) { if (fsa_cib_conn == NULL) { crm_err("No CIB connection available"); } else { int opts = cib_scope_local | cib_quorum_override | cib_can_create; fsa_cib_conn->cmds->modify(fsa_cib_conn, section, data, opts); } } static inline void fsa_cib_anon_update_discard_reply(const char *section, xmlNode *data) { if (fsa_cib_conn == NULL) { crm_err("No CIB connection available"); } else { int opts = cib_scope_local | cib_quorum_override | cib_can_create | cib_discard_reply; fsa_cib_conn->cmds->modify(fsa_cib_conn, section, data, opts); } } extern gboolean fsa_has_quorum; extern bool controld_shutdown_lock_enabled; extern int last_peer_update; extern int last_resource_update; enum node_update_flags { node_update_none = 0x0000, node_update_quick = 0x0001, node_update_cluster = 0x0010, node_update_peer = 0x0020, node_update_join = 0x0040, node_update_expected = 0x0100, node_update_all = node_update_cluster|node_update_peer|node_update_join|node_update_expected, }; crm_exit_t crmd_exit(crm_exit_t exit_code); _Noreturn void crmd_fast_exit(crm_exit_t exit_code); -void pe_subsystem_free(void); +void controld_shutdown_schedulerd_ipc(void); void controld_stop_sched_timer(void); void controld_free_sched_timer(void); -void controld_expect_sched_reply(xmlNode *msg); +void controld_expect_sched_reply(char *ref); void fsa_dump_actions(uint64_t action, const char *text); void fsa_dump_inputs(int log_level, const char *text, long long input_register); gboolean update_dc(xmlNode * msg); void crm_update_peer_join(const char *source, crm_node_t * node, enum crm_join_phase phase); xmlNode *create_node_state_update(crm_node_t *node, int flags, xmlNode *parent, const char *source); void populate_cib_nodes(enum node_update_flags flags, const char *source); void crm_update_quorum(gboolean quorum, gboolean force_update); void controld_close_attrd_ipc(void); void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node); void update_attrd_remote_node_removed(const char *host, const char *user_name); void update_attrd_clear_failures(const char *host, const char *rsc, const char *op, const char *interval_spec, gboolean is_remote_node); int crmd_join_phase_count(enum crm_join_phase phase); void crmd_join_phase_log(int level); void crmd_peer_down(crm_node_t *peer, bool full); unsigned int cib_op_timeout(void); bool feature_set_compatible(const char *dc_version, const char *join_version); bool controld_action_is_recordable(const char *action); // Subsections of node_state enum controld_section_e { controld_section_lrm, controld_section_lrm_unlocked, controld_section_attrs, controld_section_all, controld_section_all_unlocked }; void controld_delete_node_state(const char *uname, enum controld_section_e section, int options); int controld_delete_resource_history(const char *rsc_id, const char *node, const char *user_name, int call_options); const char *get_node_id(xmlNode *lrm_rsc_op); /* Convenience macro for registering a CIB callback * (assumes that data can be freed with free()) */ # define fsa_register_cib_callback(id, flag, data, fn) do { \ CRM_ASSERT(fsa_cib_conn); \ fsa_cib_conn->cmds->register_callback_full( \ fsa_cib_conn, id, cib_op_timeout(), \ flag, data, #fn, fn, free); \ } while(0) #endif diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 843c18ecc2..3e7a4fdaea 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -1,1673 +1,1673 @@ /* * Copyright 2009-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include // PRIu32, PRIx32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include char *stonith_our_uname = NULL; long stonith_watchdog_timeout_ms = 0; GList *stonith_watchdog_targets = NULL; static GMainLoop *mainloop = NULL; gboolean stand_alone = FALSE; static gboolean no_cib_connect = FALSE; static gboolean stonith_shutdown_flag = FALSE; static qb_ipcs_service_t *ipcs = NULL; static xmlNode *local_cib = NULL; static pe_working_set_t *fenced_data_set = NULL; static cib_t *cib_api = NULL; static pcmk__output_t *out = NULL; pcmk__supported_format_t formats[] = { PCMK__SUPPORTED_FORMAT_LOG, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, { NULL, NULL, NULL } }; static void stonith_shutdown(int nsig); static void stonith_cleanup(void); static int32_t st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { if (stonith_shutdown_flag) { crm_info("Ignoring new client [%d] during shutdown", pcmk__client_pid(c)); return -EPERM; } if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; int call_options = 0; xmlNode *request = NULL; pcmk__client_t *c = pcmk__find_client(qbc); const char *op = NULL; if (c == NULL) { crm_info("Invalid client: %p", qbc); return 0; } request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, "nack", CRM_EX_PROTOCOL); return 0; } op = crm_element_value(request, F_CRM_TASK); if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { crm_xml_add(request, F_TYPE, T_STONITH_NG); crm_xml_add(request, F_STONITH_OPERATION, op); crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); send_cluster_message(NULL, crm_msg_stonith_ng, request, FALSE); free_xml(request); return 0; } if (c->name == NULL) { const char *value = crm_element_value(request, F_STONITH_CLIENTNAME); if (value == NULL) { value = "unknown"; } c->name = crm_strdup_printf("%s.%u", value, c->pid); } crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); crm_trace("Flags 0x%08" PRIx32 "/0x%08x for command %" PRIu32 " from client %s", flags, call_options, id, pcmk__client_name(c)); if (pcmk_is_set(call_options, st_opt_sync_call)) { CRM_ASSERT(flags & crm_ipc_client_response); CRM_LOG_ASSERT(c->request_id == 0); /* This means the client has two synchronous events in-flight */ c->request_id = id; /* Reply only to the last one */ } crm_xml_add(request, F_STONITH_CLIENTID, c->id); crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); stonith_command(c, id, flags, request, NULL); free_xml(request); return 0; } /* Error code means? */ static int32_t st_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p closed", c); pcmk__free_client(client); /* 0 means: yes, go ahead and destroy the connection */ return 0; } static void st_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p destroyed", c); st_ipc_closed(c); } static void stonith_peer_callback(xmlNode * msg, void *private_data) { const char *remote_peer = crm_element_value(msg, F_ORIG); const char *op = crm_element_value(msg, F_STONITH_OPERATION); if (pcmk__str_eq(op, "poke", pcmk__str_none)) { return; } crm_log_xml_trace(msg, "Peer[inbound]"); stonith_command(NULL, 0, 0, msg, remote_peer); } #if SUPPORT_COROSYNC static void stonith_peer_ais_callback(cpg_handle_t handle, const struct cpg_name *groupName, uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len) { uint32_t kind = 0; xmlNode *xml = NULL; const char *from = NULL; char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from); if(data == NULL) { return; } if (kind == crm_class_cluster) { xml = string2xml(data); if (xml == NULL) { crm_err("Invalid XML: '%.120s'", data); free(data); return; } crm_xml_add(xml, F_ORIG, from); /* crm_xml_add_int(xml, F_SEQ, wrapper->id); */ stonith_peer_callback(xml, NULL); } free_xml(xml); free(data); return; } static void stonith_peer_cs_destroy(gpointer user_data) { crm_crit("Lost connection to cluster layer, shutting down"); stonith_shutdown(0); } #endif void do_local_reply(xmlNode * notify_src, const char *client_id, gboolean sync_reply, gboolean from_peer) { /* send callback to originating child */ pcmk__client_t *client_obj = NULL; int local_rc = pcmk_rc_ok; crm_trace("Sending response"); client_obj = pcmk__find_client_by_id(client_id); crm_trace("Sending callback to request originator"); if (client_obj == NULL) { local_rc = EPROTO; crm_trace("No client to sent the response to. F_STONITH_CLIENTID not set."); } else { int rid = 0; if (sync_reply) { CRM_LOG_ASSERT(client_obj->request_id); rid = client_obj->request_id; client_obj->request_id = 0; crm_trace("Sending response %d to client %s%s", rid, pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } else { crm_trace("Sending an event to client %s%s", pcmk__client_name(client_obj), (from_peer? " (originator of delegated request)" : "")); } local_rc = pcmk__ipc_send_xml(client_obj, rid, notify_src, (sync_reply? crm_ipc_flags_none : crm_ipc_server_event)); } if ((local_rc != pcmk_rc_ok) && (client_obj != NULL)) { crm_warn("%s reply to client %s failed: %s", (sync_reply? "Synchronous" : "Asynchronous"), pcmk__client_name(client_obj), pcmk_rc_str(local_rc)); } } uint64_t get_stonith_flag(const char *name) { if (pcmk__str_eq(name, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) { return st_callback_notify_fence; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_ADD, pcmk__str_casei)) { return st_callback_device_add; } else if (pcmk__str_eq(name, STONITH_OP_DEVICE_DEL, pcmk__str_casei)) { return st_callback_device_del; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY, pcmk__str_casei)) { return st_callback_notify_history; } else if (pcmk__str_eq(name, T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk__str_casei)) { return st_callback_notify_history_synced; } return st_callback_unknown; } static void stonith_notify_client(gpointer key, gpointer value, gpointer user_data) { xmlNode *update_msg = user_data; pcmk__client_t *client = value; const char *type = NULL; CRM_CHECK(client != NULL, return); CRM_CHECK(update_msg != NULL, return); type = crm_element_value(update_msg, F_SUBTYPE); CRM_CHECK(type != NULL, crm_log_xml_err(update_msg, "notify"); return); if (client->ipcs == NULL) { crm_trace("Skipping client with NULL channel"); return; } if (pcmk_is_set(client->flags, get_stonith_flag(type))) { int rc = pcmk__ipc_send_xml(client, 0, update_msg, crm_ipc_server_event|crm_ipc_server_error); if (rc != pcmk_rc_ok) { crm_warn("%s notification of client %s failed: %s " CRM_XS " id=%.8s rc=%d", type, pcmk__client_name(client), pcmk_rc_str(rc), client->id, rc); } else { crm_trace("Sent %s notification to client %s", type, pcmk__client_name(client)); } } } void do_stonith_async_timeout_update(const char *client_id, const char *call_id, int timeout) { pcmk__client_t *client = NULL; xmlNode *notify_data = NULL; if (!timeout || !call_id || !client_id) { return; } client = pcmk__find_client_by_id(client_id); if (!client) { return; } notify_data = create_xml_node(NULL, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_TYPE, T_STONITH_TIMEOUT_VALUE); crm_xml_add(notify_data, F_STONITH_CALLID, call_id); crm_xml_add_int(notify_data, F_STONITH_TIMEOUT, timeout); crm_trace("timeout update is %d for client %s and call id %s", timeout, client_id, call_id); if (client) { pcmk__ipc_send_xml(client, 0, notify_data, crm_ipc_server_event); } free_xml(notify_data); } void do_stonith_notify(int options, const char *type, int result, xmlNode * data) { /* TODO: Standardize the contents of data */ xmlNode *update_msg = create_xml_node(NULL, "notify"); CRM_CHECK(type != NULL,;); crm_xml_add(update_msg, F_TYPE, T_STONITH_NOTIFY); crm_xml_add(update_msg, F_SUBTYPE, type); crm_xml_add(update_msg, F_STONITH_OPERATION, type); crm_xml_add_int(update_msg, F_STONITH_RC, result); if (data != NULL) { add_message_xml(update_msg, F_STONITH_CALLDATA, data); } crm_trace("Notifying clients"); pcmk__foreach_ipc_client(stonith_notify_client, update_msg); free_xml(update_msg); crm_trace("Notify complete"); } static void do_stonith_notify_config(int options, const char *op, int rc, const char *desc, int active) { xmlNode *notify_data = create_xml_node(NULL, op); CRM_CHECK(notify_data != NULL, return); crm_xml_add(notify_data, F_STONITH_DEVICE, desc); crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); do_stonith_notify(options, op, rc, notify_data); free_xml(notify_data); } void do_stonith_notify_device(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); } void do_stonith_notify_level(int options, const char *op, int rc, const char *desc) { do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); } static void topology_remove_helper(const char *node, int level) { int rc; char *desc = NULL; xmlNode *data = create_xml_node(NULL, XML_TAG_FENCING_LEVEL); crm_xml_add(data, F_STONITH_ORIGIN, __func__); crm_xml_add_int(data, XML_ATTR_STONITH_INDEX, level); crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); rc = stonith_level_remove(data, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); free_xml(data); free(desc); } static void remove_cib_device(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if(match != NULL) { standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); } if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } rsc_id = crm_element_value(match, XML_ATTR_ID); stonith_device_remove(rsc_id, TRUE); } } static void handle_topology_change(xmlNode *match, bool remove) { int rc; char *desc = NULL; CRM_CHECK(match != NULL, return); crm_trace("Updating %s", ID(match)); if(remove) { int index = 0; char *key = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); topology_remove_helper(key, index); free(key); } rc = stonith_level_register(match, &desc); do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); free(desc); } static void remove_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); CRM_LOG_ASSERT(match != NULL); if (match && crm_element_value(match, XML_DIFF_MARKER)) { /* Deletion */ int index = 0; char *target = stonith_level_key(match, -1); crm_element_value_int(match, XML_ATTR_STONITH_INDEX, &index); if (target == NULL) { crm_err("Invalid fencing target in element %s", ID(match)); } else if (index <= 0) { crm_err("Invalid level for %s in element %s", target, ID(match)); } else { topology_remove_helper(target, index); } /* } else { Deal with modifications during the 'addition' stage */ } } } static void register_fencing_topology(xmlXPathObjectPtr xpathObj) { int max = numXpathResults(xpathObj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpathObj, lpc); handle_topology_change(match, TRUE); } } /* Fencing */ static void fencing_topology_init(void) { xmlXPathObjectPtr xpathObj = NULL; const char *xpath = "//" XML_TAG_FENCING_LEVEL; crm_trace("Full topology refresh"); free_topology_list(); init_topology_list(); /* Grab everything */ xpathObj = xpath_search(local_cib, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } #define rsc_name(x) x->clone_name?x->clone_name:x->id /*! * \internal * \brief Check whether our uname is in a resource's allowed node list * * \param[in] rsc Resource to check * * \return Pointer to node object if found, NULL otherwise */ static pe_node_t * our_node_allowed_for(pe_resource_t *rsc) { GHashTableIter iter; pe_node_t *node = NULL; if (rsc && stonith_our_uname) { g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { if (node && strcmp(node->details->uname, stonith_our_uname) == 0) { break; } node = NULL; } } return node; } static void watchdog_device_update(void) { if (stonith_watchdog_timeout_ms > 0) { if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && !stonith_watchdog_targets) { /* getting here watchdog-fencing enabled, no device there yet and reason isn't stonith_watchdog_targets preventing that */ int rc; xmlNode *xml; xml = create_device_registration_xml( STONITH_WATCHDOG_ID, st_namespace_internal, STONITH_WATCHDOG_AGENT, NULL, /* stonith_device_register will add our own name as PCMK_STONITH_HOST_LIST param so we can skip that here */ NULL); rc = stonith_device_register(xml, NULL, TRUE); free_xml(xml); if (rc != pcmk_ok) { crm_crit("Cannot register watchdog pseudo fence agent"); crm_exit(CRM_EX_FATAL); } } } else { /* be silent if no device - todo parameter to stonith_device_remove */ if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) { stonith_device_remove(STONITH_WATCHDOG_ID, TRUE); } } } static void update_stonith_watchdog_timeout_ms(xmlNode *cib) { xmlNode *stonith_enabled_xml = NULL; const char *stonith_enabled_s = NULL; long timeout_ms = 0; stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", cib, LOG_NEVER); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if (stonith_enabled_s == NULL || crm_is_true(stonith_enabled_s)) { xmlNode *stonith_watchdog_xml = NULL; const char *value = NULL; stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", cib, LOG_NEVER); if (stonith_watchdog_xml) { value = crm_element_value(stonith_watchdog_xml, XML_NVPAIR_ATTR_VALUE); } if (value) { timeout_ms = crm_get_msec(value); } if (timeout_ms < 0) { timeout_ms = pcmk__auto_watchdog_timeout(); } } stonith_watchdog_timeout_ms = timeout_ms; } /*! * \internal * \brief If a resource or any of its children are STONITH devices, update their * definitions given a cluster working set. * * \param[in] rsc Resource to check * \param[in] data_set Cluster working set with device information */ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) { pe_node_t *node = NULL; const char *value = NULL; const char *rclass = NULL; pe_node_t *parent = NULL; /* If this is a complex resource, check children rather than this resource itself. */ if(rsc->children) { GList *gIter = NULL; for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) { cib_device_update(gIter->data, data_set); if(pe_rsc_is_clone(rsc)) { crm_trace("Only processing one copy of the clone %s", rsc->id); break; } } return; } /* We only care about STONITH resources. */ rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); if (!pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { return; } /* If this STONITH resource is disabled, remove it. */ if (pe__resource_is_disabled(rsc)) { crm_info("Device %s has been disabled", rsc->id); return; } /* if watchdog-fencing is disabled handle any watchdog-fence resource as if it was disabled */ if ((stonith_watchdog_timeout_ms <= 0) && pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { crm_info("Watchdog-fencing disabled thus handling " "device %s as disabled", rsc->id); return; } /* Check whether our node is allowed for this resource (and its parent if in a group) */ node = our_node_allowed_for(rsc); if (rsc->parent && (rsc->parent->variant == pe_group)) { parent = our_node_allowed_for(rsc->parent); } if(node == NULL) { /* Our node is disallowed, so remove the device */ GHashTableIter iter; crm_info("Device %s has been disabled on %s: unknown", rsc->id, stonith_our_uname); g_hash_table_iter_init(&iter, rsc->allowed_nodes); while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) { crm_trace("Available: %s = %d", node->details->uname, node->weight); } return; } else if(node->weight < 0 || (parent && parent->weight < 0)) { /* Our node (or its group) is disallowed by score, so remove the device */ char *score = score2char((node->weight < 0) ? node->weight : parent->weight); crm_info("Device %s has been disabled on %s: score=%s", rsc->id, stonith_our_uname, score); free(score); return; } else { /* Our node is allowed, so update the device information */ int rc; xmlNode *data; GHashTable *rsc_params = NULL; GHashTableIter gIter; stonith_key_value_t *params = NULL; const char *name = NULL; const char *agent = crm_element_value(rsc->xml, XML_EXPR_ATTR_TYPE); const char *rsc_provides = NULL; crm_debug("Device %s is allowed on %s: score=%d", rsc->id, stonith_our_uname, node->weight); rsc_params = pe_rsc_params(rsc, node, data_set); get_meta_attributes(rsc->meta, rsc, node, data_set); rsc_provides = g_hash_table_lookup(rsc->meta, PCMK_STONITH_PROVIDES); g_hash_table_iter_init(&gIter, rsc_params); while (g_hash_table_iter_next(&gIter, (gpointer *) & name, (gpointer *) & value)) { if (!name || !value) { continue; } params = stonith_key_value_add(params, name, value); crm_trace(" %s=%s", name, value); } data = create_device_registration_xml(rsc_name(rsc), st_namespace_any, agent, params, rsc_provides); stonith_key_value_freeall(params, 1, 1); rc = stonith_device_register(data, NULL, TRUE); CRM_ASSERT(rc == pcmk_ok); free_xml(data); } } /*! * \internal * \brief Update all STONITH device definitions based on current CIB */ static void cib_devices_update(void) { GHashTableIter iter; stonith_device_t *device = NULL; crm_info("Updating devices to version %s.%s.%s", crm_element_value(local_cib, XML_ATTR_GENERATION_ADMIN), crm_element_value(local_cib, XML_ATTR_GENERATION), crm_element_value(local_cib, XML_ATTR_NUMUPDATES)); CRM_ASSERT(fenced_data_set != NULL); fenced_data_set->input = local_cib; fenced_data_set->now = crm_time_new(NULL); fenced_data_set->localhost = stonith_our_uname; pe__set_working_set_flags(fenced_data_set, pe_flag_quick_location); cluster_status(fenced_data_set); pcmk__schedule_actions(fenced_data_set, NULL, NULL); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->cib_registered) { device->dirty = TRUE; } } /* have list repopulated if cib has a watchdog-fencing-resource TODO: keep a cached list for queries happening while we are refreshing */ g_list_free_full(stonith_watchdog_targets, free); stonith_watchdog_targets = NULL; g_list_foreach(fenced_data_set->resources, (GFunc) cib_device_update, fenced_data_set); g_hash_table_iter_init(&iter, device_list); while (g_hash_table_iter_next(&iter, NULL, (void **)&device)) { if (device->dirty) { g_hash_table_iter_remove(&iter); } } fenced_data_set->input = NULL; // Wasn't a copy, so don't let API free it pe_reset_working_set(fenced_data_set); } static void update_cib_stonith_devices_v2(const char *event, xmlNode * msg) { xmlNode *change = NULL; char *reason = NULL; bool needs_update = FALSE; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); const char *shortpath = NULL; if ((op == NULL) || (strcmp(op, "move") == 0) || strstr(xpath, "/"XML_CIB_TAG_STATUS)) { continue; } else if (pcmk__str_eq(op, "delete", pcmk__str_casei) && strstr(xpath, "/"XML_CIB_TAG_RESOURCE)) { const char *rsc_id = NULL; char *search = NULL; char *mutable = NULL; if (strstr(xpath, XML_TAG_ATTR_SETS) || strstr(xpath, XML_TAG_META_SETS)) { needs_update = TRUE; reason = strdup("(meta) attribute deleted from resource"); break; } mutable = strdup(xpath); rsc_id = strstr(mutable, "primitive[@id=\'"); if (rsc_id != NULL) { rsc_id += strlen("primitive[@id=\'"); search = strchr(rsc_id, '\''); } if (search != NULL) { *search = 0; stonith_device_remove(rsc_id, TRUE); /* watchdog_device_update called afterwards to fall back to implicit definition if needed */ } else { crm_warn("Ignoring malformed CIB update (resource deletion)"); } free(mutable); } else if (strstr(xpath, "/"XML_CIB_TAG_RESOURCES) || strstr(xpath, "/"XML_CIB_TAG_CONSTRAINTS) || strstr(xpath, "/"XML_CIB_TAG_RSCCONFIG)) { shortpath = strrchr(xpath, '/'); CRM_ASSERT(shortpath); reason = crm_strdup_printf("%s %s", op, shortpath+1); needs_update = TRUE; break; } } if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } else { crm_trace("No updates for device list found in CIB"); } free(reason); } static void update_cib_stonith_devices_v1(const char *event, xmlNode * msg) { const char *reason = "none"; gboolean needs_update = FALSE; xmlXPathObjectPtr xpath_obj = NULL; /* process new constraints */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_CONS_TAG_RSC_LOCATION); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; /* Safest and simplest to always recompute */ needs_update = TRUE; reason = "new location constraint"; for (lpc = 0; lpc < max; lpc++) { xmlNode *match = getXpathResult(xpath_obj, lpc); crm_log_xml_trace(match, "new constraint"); } } freeXpathObject(xpath_obj); /* process deletions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { remove_cib_device(xpath_obj); } freeXpathObject(xpath_obj); /* process additions */ xpath_obj = xpath_search(msg, "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_RESOURCE); if (numXpathResults(xpath_obj) > 0) { int max = numXpathResults(xpath_obj), lpc = 0; for (lpc = 0; lpc < max; lpc++) { const char *rsc_id = NULL; const char *standard = NULL; xmlNode *match = getXpathResult(xpath_obj, lpc); rsc_id = crm_element_value(match, XML_ATTR_ID); standard = crm_element_value(match, XML_AGENT_ATTR_CLASS); if (!pcmk__str_eq(standard, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { continue; } crm_trace("Fencing resource %s was added or modified", rsc_id); reason = "new resource"; needs_update = TRUE; } } freeXpathObject(xpath_obj); if(needs_update) { crm_info("Updating device list from CIB: %s", reason); cib_devices_update(); } } static void update_cib_stonith_devices(const char *event, xmlNode * msg) { int format = 1; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); switch(format) { case 1: update_cib_stonith_devices_v1(event, msg); break; case 2: update_cib_stonith_devices_v2(event, msg); break; default: crm_warn("Unknown patch format: %d", format); } } /* Needs to hold node name + attribute name + attribute value + 75 */ #define XPATH_MAX 512 /*! * \internal * \brief Check whether a node has a specific attribute name/value * * \param[in] node Name of node to check * \param[in] name Name of an attribute to look for * \param[in] value The value the named attribute needs to be set to in order to be considered a match * * \return TRUE if the locally cached CIB has the specified node attribute */ gboolean node_has_attr(const char *node, const char *name, const char *value) { char xpath[XPATH_MAX]; xmlNode *match; int n; CRM_CHECK(local_cib != NULL, return FALSE); /* Search for the node's attributes in the CIB. While the schema allows * multiple sets of instance attributes, and allows instance attributes to * use id-ref to reference values elsewhere, that is intended for resources, * so we ignore that here. */ n = snprintf(xpath, XPATH_MAX, "//" XML_CIB_TAG_NODES "/" XML_CIB_TAG_NODE "[@uname='%s']/" XML_TAG_ATTR_SETS "/" XML_CIB_TAG_NVPAIR "[@name='%s' and @value='%s']", node, name, value); match = get_xpath_object(xpath, local_cib, LOG_NEVER); CRM_CHECK(n < XPATH_MAX, return FALSE); return (match != NULL); } /*! * \internal * \brief Check whether a node does watchdog-fencing * * \param[in] node Name of node to check * * \return TRUE if node found in stonith_watchdog_targets * or stonith_watchdog_targets is empty indicating * all nodes are doing watchdog-fencing */ gboolean node_does_watchdog_fencing(const char *node) { return ((stonith_watchdog_targets == NULL) || pcmk__str_in_list(node, stonith_watchdog_targets, pcmk__str_casei)); } static void update_fencing_topology(const char *event, xmlNode * msg) { int format = 1; const char *xpath; xmlXPathObjectPtr xpathObj = NULL; xmlNode *patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); CRM_ASSERT(patchset); crm_element_value_int(patchset, "format", &format); if(format == 1) { /* Process deletions (only) */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); remove_fencing_topology(xpathObj); freeXpathObject(xpathObj); /* Process additions and changes */ xpath = "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_TAG_FENCING_LEVEL; xpathObj = xpath_search(msg, xpath); register_fencing_topology(xpathObj); freeXpathObject(xpathObj); } else if(format == 2) { xmlNode *change = NULL; int add[] = { 0, 0, 0 }; int del[] = { 0, 0, 0 }; xml_patch_versions(patchset, add, del); for (change = pcmk__xml_first_child(patchset); change != NULL; change = pcmk__xml_next(change)) { const char *op = crm_element_value(change, XML_DIFF_OP); const char *xpath = crm_element_value(change, XML_DIFF_PATH); if(op == NULL) { continue; } else if(strstr(xpath, "/" XML_TAG_FENCING_LEVEL) != NULL) { /* Change to a specific entry */ crm_trace("Handling %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); if(strcmp(op, "move") == 0) { continue; } else if(strcmp(op, "create") == 0) { handle_topology_change(change->children, FALSE); } else if(strcmp(op, "modify") == 0) { xmlNode *match = first_named_child(change, XML_DIFF_RESULT); if(match) { handle_topology_change(match->children, TRUE); } } else if(strcmp(op, "delete") == 0) { /* Nuclear option, all we have is the path and an id... not enough to remove a specific entry */ crm_info("Re-initializing fencing topology after %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else if (strstr(xpath, "/" XML_TAG_FENCING_TOPOLOGY) != NULL) { /* Change to the topology in general */ crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } else if (strstr(xpath, "/" XML_CIB_TAG_CONFIGURATION)) { /* Changes to the whole config section, possibly including the topology as a whild */ if(first_named_child(change, XML_TAG_FENCING_TOPOLOGY) == NULL) { crm_trace("Nothing for us in %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); } else if(strcmp(op, "delete") == 0 || strcmp(op, "create") == 0) { crm_info("Re-initializing fencing topology after top-level %s operation %d.%d.%d for %s.", op, add[0], add[1], add[2], xpath); fencing_topology_init(); return; } } else { crm_trace("Nothing for us in %s operation %d.%d.%d for %s", op, add[0], add[1], add[2], xpath); } } } else { crm_warn("Unknown patch format: %d", format); } } static bool have_cib_devices = FALSE; static void update_cib_cache_cb(const char *event, xmlNode * msg) { int rc = pcmk_ok; xmlNode *stonith_enabled_xml = NULL; const char *stonith_enabled_s = NULL; static gboolean stonith_enabled_saved = TRUE; long timeout_ms_saved = stonith_watchdog_timeout_ms; gboolean need_full_refresh = FALSE; if(!have_cib_devices) { crm_trace("Skipping updates until we get a full dump"); return; } else if(msg == NULL) { crm_trace("Missing %s update", event); return; } /* Maintain a local copy of the CIB so that we have full access * to device definitions, location constraints, and node attributes */ if (local_cib != NULL) { int rc = pcmk_ok; xmlNode *patchset = NULL; crm_element_value_int(msg, F_CIB_RC, &rc); if (rc != pcmk_ok) { return; } patchset = get_message_xml(msg, F_CIB_UPDATE_RESULT); xml_log_patchset(LOG_TRACE, "Config update", patchset); rc = xml_apply_patchset(local_cib, patchset, TRUE); switch (rc) { case pcmk_ok: case -pcmk_err_old_data: break; case -pcmk_err_diff_resync: case -pcmk_err_diff_failed: crm_notice("[%s] Patch aborted: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; break; default: crm_warn("[%s] ABORTED: %s (%d)", event, pcmk_strerror(rc), rc); free_xml(local_cib); local_cib = NULL; } } if (local_cib == NULL) { crm_trace("Re-requesting full CIB"); rc = cib_api->cmds->query(cib_api, NULL, &local_cib, cib_scope_local | cib_sync_call); if(rc != pcmk_ok) { crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); return; } CRM_ASSERT(local_cib != NULL); stonith_enabled_saved = FALSE; /* Trigger a full refresh below */ } pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", local_cib, LOG_NEVER); if (stonith_enabled_xml) { stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); } if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { crm_trace("Ignoring CIB updates while fencing is disabled"); stonith_enabled_saved = FALSE; } else if (stonith_enabled_saved == FALSE) { crm_info("Updating fencing device and topology lists " "now that fencing is enabled"); stonith_enabled_saved = TRUE; need_full_refresh = TRUE; } else { if (timeout_ms_saved != stonith_watchdog_timeout_ms) { need_full_refresh = TRUE; } else { update_fencing_topology(event, msg); update_cib_stonith_devices(event, msg); watchdog_device_update(); } } if (need_full_refresh) { fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } } static void init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) { crm_info("Updating device list from CIB"); have_cib_devices = TRUE; local_cib = copy_xml(output); pcmk__refresh_node_caches_from_cib(local_cib); update_stonith_watchdog_timeout_ms(local_cib); fencing_topology_init(); cib_devices_update(); watchdog_device_update(); } static void stonith_shutdown(int nsig) { crm_info("Terminating with %d clients", pcmk__ipc_client_count()); stonith_shutdown_flag = TRUE; if (mainloop != NULL && g_main_loop_is_running(mainloop)) { g_main_loop_quit(mainloop); } else { stonith_cleanup(); crm_exit(CRM_EX_OK); } } static void cib_connection_destroy(gpointer user_data) { if (stonith_shutdown_flag) { crm_info("Connection to the CIB manager closed"); return; } else { crm_crit("Lost connection to the CIB manager, shutting down"); } if (cib_api) { cib_api->cmds->signoff(cib_api); } stonith_shutdown(0); } static void stonith_cleanup(void) { if (cib_api) { cib_api->cmds->del_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb); cib_api->cmds->signoff(cib_api); } if (ipcs) { qb_ipcs_destroy(ipcs); } crm_peer_destroy(); pcmk__client_cleanup(); free_stonith_remote_op_list(); free_topology_list(); free_device_list(); free_metadata_cache(); free(stonith_our_uname); stonith_our_uname = NULL; free_xml(local_cib); local_cib = NULL; } static pcmk__cli_option_t long_options[] = { // long option, argument type, storage, short option, description, flags { "stand-alone", no_argument, 0, 's', NULL, pcmk__option_default }, { "stand-alone-w-cpg", no_argument, 0, 'c', NULL, pcmk__option_default }, { "logfile", required_argument, 0, 'l', NULL, pcmk__option_default }, { "verbose", no_argument, 0, 'V', NULL, pcmk__option_default }, { "version", no_argument, 0, '$', NULL, pcmk__option_default }, { "help", no_argument, 0, '?', NULL, pcmk__option_default }, { 0, 0, 0, 0 } }; static void setup_cib(void) { int rc, retries = 0; cib_api = cib_new(); if (cib_api == NULL) { crm_err("No connection to the CIB manager"); return; } do { sleep(retries); rc = cib_api->cmds->signon(cib_api, CRM_SYSTEM_STONITHD, cib_command); } while (rc == -ENOTCONN && ++retries < 5); if (rc != pcmk_ok) { crm_err("Could not connect to the CIB manager: %s (%d)", pcmk_strerror(rc), rc); } else if (pcmk_ok != cib_api->cmds->add_notify_callback(cib_api, T_CIB_DIFF_NOTIFY, update_cib_cache_cb)) { crm_err("Could not set CIB notification callback"); } else { rc = cib_api->cmds->query(cib_api, NULL, NULL, cib_scope_local); cib_api->cmds->register_callback(cib_api, rc, 120, FALSE, NULL, "init_cib_cache_cb", init_cib_cache_cb); cib_api->cmds->set_connection_dnotify(cib_api, cib_connection_destroy); crm_info("Watching for fencing topology changes"); } } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy }; /*! * \internal * \brief Callback for peer status changes * * \param[in] type What changed * \param[in] node What peer had the change * \param[in] data Previous value of what changed */ static void st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) { if ((type != crm_status_processes) && !pcmk_is_set(node->flags, crm_remote_node)) { /* * This is a hack until we can send to a nodeid and/or we fix node name lookups * These messages are ignored in stonith_peer_callback() */ xmlNode *query = create_xml_node(NULL, "stonith_command"); crm_xml_add(query, F_XML_TAGNAME, "stonith_command"); crm_xml_add(query, F_TYPE, T_STONITH_NG); crm_xml_add(query, F_STONITH_OPERATION, "poke"); crm_debug("Broadcasting our uname because of node %u", node->id); send_cluster_message(NULL, crm_msg_stonith_ng, query, FALSE); free_xml(query); } } int main(int argc, char **argv) { int flag; int lpc = 0; int argerr = 0; int option_index = 0; crm_cluster_t *cluster = NULL; const char *actions[] = { "reboot", "off", "on", "list", "monitor", "status" }; crm_ipc_t *old_instance = NULL; int rc = pcmk_rc_ok; crm_log_preinit(NULL, argc, argv); pcmk__set_cli_options(NULL, "[options]", long_options, "daemon for executing fencing devices in a " "Pacemaker cluster"); while (1) { flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); if (flag == -1) { break; } switch (flag) { case 'V': crm_bump_log_level(argc, argv); break; case 'l': { int rc = pcmk__add_logfile(optarg); if (rc != pcmk_rc_ok) { /* Logging has not yet been initialized, so stderr is * the only way to get information out */ fprintf(stderr, "Logging to %s is disabled: %s\n", optarg, pcmk_rc_str(rc)); } } break; case 's': stand_alone = TRUE; break; case 'c': stand_alone = FALSE; no_cib_connect = TRUE; break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); break; default: ++argerr; break; } } if (argc - optind == 1 && pcmk__str_eq("metadata", argv[optind], pcmk__str_casei)) { printf("\n"); printf("\n"); printf(" 1.0\n"); printf(" Instance attributes available for all \"stonith\"-class resources" " and used by Pacemaker's fence daemon, formerly known as stonithd\n"); printf(" Instance attributes available for all \"stonith\"-class resources\n"); printf(" \n"); #if 0 // priority is not implemented yet printf(" \n"); printf(" Devices that are not in a topology " "are tried in order of highest to lowest integer priority\n"); printf(" \n"); printf(" \n"); #endif printf(" \n", PCMK_STONITH_HOST_ARGUMENT); printf (" Advanced use only: An alternate parameter to supply instead of 'port'\n"); printf (" Some devices do not support the standard 'port' parameter or may provide additional ones.\n" "Use this to specify an alternate, device-specific, parameter that should indicate the machine to be fenced.\n" "A value of 'none' can be used to tell the cluster not to supply any additional parameters.\n" " \n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_HOST_MAP); printf (" A mapping of host names to ports numbers for devices that do not support host names.\n"); printf (" Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_HOST_LIST); printf(" A list of machines controlled by " "this device (Optional unless %s=static-list).\n", PCMK_STONITH_HOST_CHECK); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_HOST_CHECK); printf (" How to determine which machines are controlled by the device.\n"); printf(" Allowed values: dynamic-list " "(query the device via the 'list' command), static-list " "(check the " PCMK_STONITH_HOST_LIST " attribute), status " "(query the device via the 'status' command), none (assume " "every device can fence every machine)\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_DELAY_MAX); printf(" Enable a delay of no more than the " "time specified before executing fencing actions. Pacemaker " "derives the overall delay by taking the value of " PCMK_STONITH_DELAY_BASE " and adding a random delay value such " "that the sum is kept below this maximum.\n"); printf(" This prevents double fencing when " "using slow devices such as sbd.\nUse this to enable a random " "delay for fencing actions.\nThe overall delay is derived from " "this random delay value adding a static delay so that the sum " "is kept below the maximum delay.\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_DELAY_BASE); printf(" Enable a base delay for " "fencing actions and specify base delay value.\n"); printf(" This prevents double fencing when " "different delays are configured on the nodes.\nUse this to " "enable a static delay for fencing actions.\nThe overall delay " "is derived from a random delay value adding this static delay " "so that the sum is kept below the maximum delay.\nSet to eg. " "node1:1s;node2:5 to set different value per node.\n"); printf(" \n"); printf(" \n"); printf(" \n", PCMK_STONITH_ACTION_LIMIT); printf (" The maximum number of actions can be performed in parallel on this device\n"); printf (" Cluster property concurrent-fencing=true needs to be configured first.\n" "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.\n"); printf(" \n"); printf(" \n"); for (lpc = 0; lpc < PCMK__NELEM(actions); lpc++) { printf(" \n", actions[lpc]); printf (" Advanced use only: An alternate command to run instead of '%s'\n", actions[lpc]); printf (" Some devices do not support the standard commands or may provide additional ones.\n" "Use this to specify an alternate, device-specific, command that implements the '%s' action.\n", actions[lpc]); printf(" \n", actions[lpc]); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: Specify an alternate timeout to use for %s actions instead of stonith-timeout\n", actions[lpc]); printf (" Some devices need much more/less time to complete than normal.\n" "Use this to specify an alternate, device-specific, timeout for '%s' actions.\n", actions[lpc]); printf(" \n"); printf(" \n"); printf(" \n", actions[lpc]); printf (" Advanced use only: The maximum number of times to retry the '%s' command within the timeout period\n", actions[lpc]); printf(" Some devices do not support multiple connections." " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." " Use this option to alter the number of times Pacemaker retries '%s' actions before giving up." "\n", actions[lpc]); printf(" \n"); printf(" \n"); } printf(" \n"); printf("\n"); return CRM_EX_OK; } if (optind != argc) { ++argerr; } if (argerr) { pcmk__cli_help('?', CRM_EX_USAGE); } crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker fencer"); old_instance = crm_ipc_new("stonith-ng", 0); if (crm_ipc_connect(old_instance)) { /* IPC end-point already up */ crm_ipc_close(old_instance); crm_ipc_destroy(old_instance); crm_err("pacemaker-fenced is already active, aborting startup"); crm_exit(CRM_EX_OK); } else { /* not up or not authentic, we'll proceed either way */ crm_ipc_destroy(old_instance); old_instance = NULL; } mainloop_add_signal(SIGTERM, stonith_shutdown); crm_peer_init(); fenced_data_set = pe_new_working_set(); CRM_ASSERT(fenced_data_set != NULL); pe__set_working_set_flags(fenced_data_set, pe_flag_no_counts|pe_flag_no_compat); pe__set_working_set_flags(fenced_data_set, pe_flag_show_utilization); cluster = calloc(1, sizeof(crm_cluster_t)); CRM_ASSERT(cluster != NULL); if (stand_alone == FALSE) { if (is_corosync_cluster()) { #if SUPPORT_COROSYNC cluster->destroy = stonith_peer_cs_destroy; cluster->cpg.cpg_deliver_fn = stonith_peer_ais_callback; cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; #endif } crm_set_status_callback(&st_peer_update_callback); if (crm_cluster_connect(cluster) == FALSE) { crm_crit("Cannot sign in to the cluster... terminating"); crm_exit(CRM_EX_FATAL); } stonith_our_uname = strdup(cluster->uname); if (no_cib_connect == FALSE) { setup_cib(); } } else { stonith_our_uname = strdup("localhost"); } init_device_list(); init_topology_list(); pcmk__serve_fenced_ipc(&ipcs, &ipc_callbacks); pcmk__register_formats(NULL, formats); rc = pcmk__output_new(&out, "log", NULL, argv); if ((rc != pcmk_rc_ok) || (out == NULL)) { crm_err("Can't log resource details due to internal error: %s\n", pcmk_rc_str(rc)); crm_exit(CRM_EX_FATAL); } pe__register_messages(out); pcmk__register_lib_messages(out); pcmk__output_set_log_level(out, LOG_TRACE); fenced_data_set->priv = out; /* Create the mainloop and run it... */ mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker fencer successfully started and accepting connections"); g_main_loop_run(mainloop); stonith_cleanup(); free(cluster->uuid); free(cluster->uname); free(cluster); pe_free_working_set(fenced_data_set); - pcmk__unregister_formats(); out->finish(out, CRM_EX_OK, true, NULL); pcmk__output_free(out); + pcmk__unregister_formats(); crm_exit(CRM_EX_OK); } diff --git a/daemons/schedulerd/Makefile.am b/daemons/schedulerd/Makefile.am index 87c84aa809..57e819bd98 100644 --- a/daemons/schedulerd/Makefile.am +++ b/daemons/schedulerd/Makefile.am @@ -1,50 +1,53 @@ # # Copyright 2004-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk include $(top_srcdir)/mk/man.mk AM_CPPFLAGS += -I$(top_builddir) -I$(top_srcdir) halibdir = $(CRM_DAEMON_DIR) ## binary progs halib_PROGRAMS = pacemaker-schedulerd if BUILD_XML_HELP man7_MANS = pacemaker-schedulerd.7 endif ## SOURCES +noinst_HEADERS = pacemaker-schedulerd.h + pacemaker_schedulerd_CFLAGS = $(CFLAGS_HARDENED_EXE) pacemaker_schedulerd_LDFLAGS = $(LDFLAGS_HARDENED_EXE) pacemaker_schedulerd_LDADD = $(top_builddir)/lib/common/libcrmcommon.la \ $(top_builddir)/lib/pengine/libpe_status.la \ $(top_builddir)/lib/pacemaker/libpacemaker.la # libcib for get_object_root() pacemaker_schedulerd_SOURCES = pacemaker-schedulerd.c +pacemaker_schedulerd_SOURCES += schedulerd_messages.c install-exec-local: $(INSTALL) -d -m 750 $(DESTDIR)/$(PE_STATE_DIR) -chown $(CRM_DAEMON_USER):$(CRM_DAEMON_GROUP) $(DESTDIR)/$(PE_STATE_DIR) if BUILD_LEGACY_LINKS install-exec-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine && $(LN_S) pacemaker-schedulerd pengine uninstall-hook: cd $(DESTDIR)$(CRM_DAEMON_DIR) && rm -f pengine endif uninstall-local: -rmdir $(DESTDIR)/$(PE_STATE_DIR) CLEANFILES = $(man7_MANS) diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c index b93ada7e5a..e44abeb60e 100644 --- a/daemons/schedulerd/pacemaker-schedulerd.c +++ b/daemons/schedulerd/pacemaker-schedulerd.c @@ -1,411 +1,186 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include #include #include #include -#include -#include -#include #include #include -#include - -#include +#include #include #include #include #include -#include -#define OPTARGS "hVc" +#include "pacemaker-schedulerd.h" + +#define SUMMARY "pacemaker-schedulerd - daemon for calculating a Pacemaker cluster's response to events" + +struct { + gchar **remainder; +} options; + +pe_working_set_t *sched_data_set = NULL; +pcmk__output_t *logger_out = NULL; +pcmk__output_t *out = NULL; static GMainLoop *mainloop = NULL; static qb_ipcs_service_t *ipcs = NULL; -static pe_working_set_t *sched_data_set = NULL; -static pcmk__output_t *out = NULL; +static crm_exit_t exit_code = CRM_EX_OK; pcmk__supported_format_t formats[] = { - PCMK__SUPPORTED_FORMAT_LOG, PCMK__SUPPORTED_FORMAT_NONE, PCMK__SUPPORTED_FORMAT_TEXT, + PCMK__SUPPORTED_FORMAT_XML, { NULL, NULL, NULL } }; void pengine_shutdown(int nsig); -static void -init_working_set(void) -{ - crm_config_error = FALSE; - crm_config_warning = FALSE; - - was_processing_error = FALSE; - was_processing_warning = FALSE; - - if (sched_data_set == NULL) { - sched_data_set = pe_new_working_set(); - CRM_ASSERT(sched_data_set != NULL); - pe__set_working_set_flags(sched_data_set, - pe_flag_no_counts|pe_flag_no_compat); - pe__set_working_set_flags(sched_data_set, - pe_flag_show_utilization); - sched_data_set->priv = out; - } else { - pe_reset_working_set(sched_data_set); - } -} - -static void -handle_pecalc_op(xmlNode *msg, xmlNode *xml_data, pcmk__client_t *sender) -{ - static struct series_s { - const char *name; - const char *param; - - /* Maximum number of inputs of this kind to save to disk. - * If -1, save all; if 0, save none. - */ - int wrap; - } series[] = { - { "pe-error", "pe-error-series-max", -1 }, - { "pe-warn", "pe-warn-series-max", 5000 }, - { "pe-input", "pe-input-series-max", 4000 }, - }; - static char *last_digest = NULL; - static char *filename = NULL; - - unsigned int seq; - int series_id = 0; - int series_wrap = 0; - char *digest = NULL; - const char *value = NULL; - time_t execution_date = time(NULL); - xmlNode *converted = NULL; - xmlNode *reply = NULL; - bool is_repoke = false; - bool process = true; - - init_working_set(); - - digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE, - CRM_FEATURE_SET); - converted = copy_xml(xml_data); - if (!cli_config_update(&converted, NULL, TRUE)) { - sched_data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); - crm_xml_add_int(sched_data_set->graph, "transition_id", 0); - crm_xml_add_int(sched_data_set->graph, "cluster-delay", 0); - process = false; - free(digest); - - } else if (pcmk__str_eq(digest, last_digest, pcmk__str_casei)) { - is_repoke = true; - free(digest); - - } else { - free(last_digest); - last_digest = digest; - } - - if (process) { - pcmk__schedule_actions(sched_data_set, converted, NULL); - } - - // Get appropriate index into series[] array - if (was_processing_error) { - series_id = 0; - } else if (was_processing_warning) { - series_id = 1; - } else { - series_id = 2; - } - - value = pe_pref(sched_data_set->config_hash, series[series_id].param); - if ((value == NULL) - || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { - series_wrap = series[series_id].wrap; - } - - if (pcmk__read_series_sequence(PE_STATE_DIR, series[series_id].name, - &seq) != pcmk_rc_ok) { - // @TODO maybe handle errors better ... - seq = 0; - } - crm_trace("Series %s: wrap=%d, seq=%u, pref=%s", - series[series_id].name, series_wrap, seq, value); - - sched_data_set->input = NULL; - reply = create_reply(msg, sched_data_set->graph); - CRM_ASSERT(reply != NULL); - - if (series_wrap == 0) { // Don't save any inputs of this kind - free(filename); - filename = NULL; - - } else if (!is_repoke) { // Input changed, save to disk - free(filename); - filename = pcmk__series_filename(PE_STATE_DIR, - series[series_id].name, seq, true); - } - - crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename); - crm_xml_add_int(reply, "graph-errors", was_processing_error); - crm_xml_add_int(reply, "graph-warnings", was_processing_warning); - crm_xml_add_int(reply, "config-errors", crm_config_error); - crm_xml_add_int(reply, "config-warnings", crm_config_warning); - - if (pcmk__ipc_send_xml(sender, 0, reply, - crm_ipc_server_event) != pcmk_rc_ok) { - int graph_file_fd = 0; - char *graph_file = NULL; - umask(S_IWGRP | S_IWOTH | S_IROTH); - - graph_file = crm_strdup_printf("%s/pengine.graph.XXXXXX", - PE_STATE_DIR); - graph_file_fd = mkstemp(graph_file); - - crm_err("Couldn't send transition graph to peer, writing to %s instead", - graph_file); - - crm_xml_add(reply, F_CRM_TGRAPH, graph_file); - write_xml_fd(sched_data_set->graph, graph_file, graph_file_fd, FALSE); - - free(graph_file); - free_xml(first_named_child(reply, F_CRM_DATA)); - CRM_ASSERT(pcmk__ipc_send_xml(sender, 0, reply, - crm_ipc_server_event) == pcmk_rc_ok); - } - - free_xml(reply); - pcmk__log_transition_summary(filename); - - if (series_wrap == 0) { - crm_debug("Not saving input to disk (disabled by configuration)"); - - } else if (is_repoke) { - crm_info("Input has not changed since last time, not saving to disk"); - - } else { - unlink(filename); - crm_xml_add_ll(xml_data, "execution-date", (long long) execution_date); - write_xml_file(xml_data, filename, TRUE); - pcmk__write_series_sequence(PE_STATE_DIR, series[series_id].name, - ++seq, series_wrap); - } - - free_xml(converted); -} - -static gboolean -process_pe_message(xmlNode *msg, xmlNode *xml_data, pcmk__client_t *sender) -{ - const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); - const char *op = crm_element_value(msg, F_CRM_TASK); - const char *ref = crm_element_value(msg, F_CRM_REFERENCE); - - crm_trace("Processing %s op (ref=%s)...", op, ref); - - if (op == NULL) { - /* error */ - - } else if (strcasecmp(op, CRM_OP_HELLO) == 0) { - /* ignore */ - - } else if (pcmk__str_eq(crm_element_value(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE, pcmk__str_casei)) { - /* ignore */ - - } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) { - crm_trace("Bad sys-to %s", crm_str(sys_to)); - return FALSE; - - } else if (strcasecmp(op, CRM_OP_PECALC) == 0) { - handle_pecalc_op(msg, xml_data, sender); - } - - return TRUE; -} - -static int32_t -pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) -{ - crm_trace("Connection %p", c); - if (pcmk__new_client(c, uid, gid) == NULL) { - return -EIO; - } - return 0; -} - -gboolean process_pe_message(xmlNode *msg, xmlNode *xml_data, - pcmk__client_t *sender); - -static int32_t -pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) -{ - uint32_t id = 0; - uint32_t flags = 0; - pcmk__client_t *c = pcmk__find_client(qbc); - xmlNode *msg = pcmk__client_data2xml(c, data, &id, &flags); +static GOptionContext * +build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; - pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_INDETERMINATE); - if (msg != NULL) { - xmlNode *data_xml = get_message_xml(msg, F_CRM_DATA); + GOptionEntry extra_prog_entries[] = { + { G_OPTION_REMAINING, 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING_ARRAY, &options.remainder, + NULL, + NULL }, - process_pe_message(msg, data_xml, c); - free_xml(msg); - } - return 0; -} - -/* Error code means? */ -static int32_t -pe_ipc_closed(qb_ipcs_connection_t * c) -{ - pcmk__client_t *client = pcmk__find_client(c); + { NULL } + }; - if (client == NULL) { - return 0; - } - crm_trace("Connection %p", c); - pcmk__free_client(client); - return 0; + context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); + pcmk__add_main_args(context, extra_prog_entries); + return context; } -static void -pe_ipc_destroy(qb_ipcs_connection_t * c) -{ - crm_trace("Connection %p", c); - pe_ipc_closed(c); -} - -struct qb_ipcs_service_handlers ipc_callbacks = { - .connection_accept = pe_ipc_accept, - .connection_created = NULL, - .msg_process = pe_ipc_dispatch, - .connection_closed = pe_ipc_closed, - .connection_destroyed = pe_ipc_destroy -}; - -static pcmk__cli_option_t long_options[] = { - // long option, argument type, storage, short option, description, flags - { - "help", no_argument, NULL, '?', - "\tThis text", pcmk__option_default - }, - { - "verbose", no_argument, NULL, 'V', - "\tIncrease debug output", pcmk__option_default - }, - { 0, 0, 0, 0 } -}; - int main(int argc, char **argv) { - int flag; - int index = 0; - int argerr = 0; + GError *error = NULL; int rc = pcmk_rc_ok; - crm_log_preinit(NULL, argc, argv); - pcmk__set_cli_options(NULL, "[options]", long_options, - "daemon for calculating a Pacemaker cluster's " - "response to events"); + GOptionGroup *output_group = NULL; + pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); + gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); + GOptionContext *context = build_arg_context(args, &output_group); + crm_log_preinit(NULL, argc, argv); mainloop_add_signal(SIGTERM, pengine_shutdown); - while (1) { - flag = pcmk__next_cli_option(argc, argv, &index, NULL); - if (flag == -1) - break; - - switch (flag) { - case 'V': - crm_bump_log_level(argc, argv); - break; - case 'h': /* Help message */ - pcmk__cli_help('?', CRM_EX_OK); - break; - default: - ++argerr; - break; - } + pcmk__register_formats(NULL, formats); + if (!g_option_context_parse_strv(context, &processed_args, &error)) { + exit_code = CRM_EX_USAGE; + goto done; } - if (argc - optind == 1 && pcmk__str_eq("metadata", argv[optind], pcmk__str_casei)) { - pe_metadata(); - return CRM_EX_OK; + rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); + if ((rc != pcmk_rc_ok) || (out == NULL)) { + exit_code = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", + args->output_ty, pcmk_rc_str(rc)); + goto done; } - if (optind > argc) { - ++argerr; + pe__register_messages(out); + pcmk__register_lib_messages(out); + + if (options.remainder) { + if (g_strv_length(options.remainder) == 1 && + pcmk__str_eq("metadata", options.remainder[0], pcmk__str_casei)) { + pe_metadata(); + goto done; + } else { + exit_code = CRM_EX_USAGE; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Unsupported extra command line parameters"); + goto done; + } } - if (argerr) { - pcmk__cli_help('?', CRM_EX_USAGE); + if (args->version) { + out->version(out, false); + goto done; } + pcmk__cli_init_logging("pacemaker-schedulerd", args->verbosity); crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); crm_notice("Starting Pacemaker scheduler"); if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { crm_err("Terminating due to bad permissions on " PE_STATE_DIR); - fprintf(stderr, - "ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n"); - fflush(stderr); - return CRM_EX_FATAL; + exit_code = CRM_EX_FATAL; + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "ERROR: Bad permissions on %s (see logs for details)", PE_STATE_DIR); + goto done; } - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_SHM, &ipc_callbacks); + ipcs = pcmk__serve_schedulerd_ipc(&ipc_callbacks); if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); - crm_exit(CRM_EX_FATAL); + g_set_error(&error, PCMK__EXITC_ERROR, exit_code, + "Failed to create pacemaker-schedulerd server: exiting and inhibiting respawn"); + exit_code = CRM_EX_FATAL; + goto done; } - pcmk__register_formats(NULL, formats); - rc = pcmk__output_new(&out, "log", NULL, argv); - if ((rc != pcmk_rc_ok) || (out == NULL)) { - crm_err("Can't log resource details due to internal error: %s\n", - pcmk_rc_str(rc)); - crm_exit(CRM_EX_FATAL); + logger_out = pcmk__new_logger(); + if (logger_out == NULL) { + exit_code = CRM_EX_FATAL; + goto done; } - pe__register_messages(out); - pcmk__register_lib_messages(out); - - pcmk__output_set_log_level(out, LOG_TRACE); + pcmk__output_set_log_level(logger_out, LOG_TRACE); /* Create the mainloop and run it... */ mainloop = g_main_loop_new(NULL, FALSE); crm_notice("Pacemaker scheduler successfully started and accepting connections"); g_main_loop_run(mainloop); + +done: + g_strfreev(options.remainder); + g_strfreev(processed_args); + pcmk__free_arg_context(context); + + pcmk__output_and_clear_error(error, out); pengine_shutdown(0); } void pengine_shutdown(int nsig) { - mainloop_del_ipc_server(ipcs); - ipcs = NULL; + if (ipcs != NULL) { + crm_trace("Closing IPC server"); + mainloop_del_ipc_server(ipcs); + ipcs = NULL; + } - pe_free_working_set(sched_data_set); - sched_data_set = NULL; + if (sched_data_set != NULL) { + pe_free_working_set(sched_data_set); + sched_data_set = NULL; + } + + if (logger_out != NULL) { + logger_out->finish(logger_out, exit_code, true, NULL); + pcmk__output_free(logger_out); + logger_out = NULL; + } - pcmk__unregister_formats(); if (out != NULL) { - out->finish(out, CRM_EX_OK, true, NULL); + out->finish(out, exit_code, true, NULL); pcmk__output_free(out); out = NULL; } - crm_exit(CRM_EX_OK); + pcmk__unregister_formats(); + crm_exit(exit_code); } diff --git a/daemons/schedulerd/pacemaker-schedulerd.h b/daemons/schedulerd/pacemaker-schedulerd.h new file mode 100644 index 0000000000..757e848b29 --- /dev/null +++ b/daemons/schedulerd/pacemaker-schedulerd.h @@ -0,0 +1,21 @@ +/* + * Copyright 2004-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__PACEMAKER_SCHEDULERD__H +#define PCMK__PACEMAKER_SCHEDULERD__H + +#include +#include + +extern pe_working_set_t *sched_data_set; +extern pcmk__output_t *logger_out; +extern pcmk__output_t *out; +extern struct qb_ipcs_service_handlers ipc_callbacks; + +#endif diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/schedulerd_messages.c similarity index 66% copy from daemons/schedulerd/pacemaker-schedulerd.c copy to daemons/schedulerd/schedulerd_messages.c index b93ada7e5a..6adc6facd5 100644 --- a/daemons/schedulerd/pacemaker-schedulerd.c +++ b/daemons/schedulerd/schedulerd_messages.c @@ -1,411 +1,265 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU General Public License version 2 * or later (GPLv2+) WITHOUT ANY WARRANTY. */ #include -#include -#include +#include +#include + #include #include #include #include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#define OPTARGS "hVc" - -static GMainLoop *mainloop = NULL; -static qb_ipcs_service_t *ipcs = NULL; -static pe_working_set_t *sched_data_set = NULL; -static pcmk__output_t *out = NULL; - -pcmk__supported_format_t formats[] = { - PCMK__SUPPORTED_FORMAT_LOG, - PCMK__SUPPORTED_FORMAT_NONE, - PCMK__SUPPORTED_FORMAT_TEXT, - { NULL, NULL, NULL } -}; - -void pengine_shutdown(int nsig); +#include "pacemaker-schedulerd.h" static void init_working_set(void) { crm_config_error = FALSE; crm_config_warning = FALSE; was_processing_error = FALSE; was_processing_warning = FALSE; if (sched_data_set == NULL) { sched_data_set = pe_new_working_set(); CRM_ASSERT(sched_data_set != NULL); pe__set_working_set_flags(sched_data_set, pe_flag_no_counts|pe_flag_no_compat); pe__set_working_set_flags(sched_data_set, pe_flag_show_utilization); - sched_data_set->priv = out; + sched_data_set->priv = logger_out; } else { pe_reset_working_set(sched_data_set); } } static void handle_pecalc_op(xmlNode *msg, xmlNode *xml_data, pcmk__client_t *sender) { static struct series_s { const char *name; const char *param; /* Maximum number of inputs of this kind to save to disk. * If -1, save all; if 0, save none. */ int wrap; } series[] = { { "pe-error", "pe-error-series-max", -1 }, { "pe-warn", "pe-warn-series-max", 5000 }, { "pe-input", "pe-input-series-max", 4000 }, }; static char *last_digest = NULL; static char *filename = NULL; unsigned int seq; int series_id = 0; int series_wrap = 0; char *digest = NULL; const char *value = NULL; time_t execution_date = time(NULL); xmlNode *converted = NULL; xmlNode *reply = NULL; bool is_repoke = false; bool process = true; init_working_set(); digest = calculate_xml_versioned_digest(xml_data, FALSE, FALSE, CRM_FEATURE_SET); converted = copy_xml(xml_data); if (!cli_config_update(&converted, NULL, TRUE)) { sched_data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH); crm_xml_add_int(sched_data_set->graph, "transition_id", 0); crm_xml_add_int(sched_data_set->graph, "cluster-delay", 0); process = false; free(digest); } else if (pcmk__str_eq(digest, last_digest, pcmk__str_casei)) { is_repoke = true; free(digest); } else { free(last_digest); last_digest = digest; } if (process) { pcmk__schedule_actions(sched_data_set, converted, NULL); } // Get appropriate index into series[] array if (was_processing_error) { series_id = 0; } else if (was_processing_warning) { series_id = 1; } else { series_id = 2; } value = pe_pref(sched_data_set->config_hash, series[series_id].param); if ((value == NULL) || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { series_wrap = series[series_id].wrap; } if (pcmk__read_series_sequence(PE_STATE_DIR, series[series_id].name, &seq) != pcmk_rc_ok) { // @TODO maybe handle errors better ... seq = 0; } crm_trace("Series %s: wrap=%d, seq=%u, pref=%s", series[series_id].name, series_wrap, seq, value); sched_data_set->input = NULL; reply = create_reply(msg, sched_data_set->graph); CRM_ASSERT(reply != NULL); if (series_wrap == 0) { // Don't save any inputs of this kind free(filename); filename = NULL; } else if (!is_repoke) { // Input changed, save to disk free(filename); filename = pcmk__series_filename(PE_STATE_DIR, series[series_id].name, seq, true); } crm_xml_add(reply, F_CRM_TGRAPH_INPUT, filename); crm_xml_add_int(reply, "graph-errors", was_processing_error); crm_xml_add_int(reply, "graph-warnings", was_processing_warning); crm_xml_add_int(reply, "config-errors", crm_config_error); crm_xml_add_int(reply, "config-warnings", crm_config_warning); if (pcmk__ipc_send_xml(sender, 0, reply, crm_ipc_server_event) != pcmk_rc_ok) { int graph_file_fd = 0; char *graph_file = NULL; umask(S_IWGRP | S_IWOTH | S_IROTH); graph_file = crm_strdup_printf("%s/pengine.graph.XXXXXX", PE_STATE_DIR); graph_file_fd = mkstemp(graph_file); crm_err("Couldn't send transition graph to peer, writing to %s instead", graph_file); crm_xml_add(reply, F_CRM_TGRAPH, graph_file); write_xml_fd(sched_data_set->graph, graph_file, graph_file_fd, FALSE); free(graph_file); free_xml(first_named_child(reply, F_CRM_DATA)); CRM_ASSERT(pcmk__ipc_send_xml(sender, 0, reply, crm_ipc_server_event) == pcmk_rc_ok); } free_xml(reply); pcmk__log_transition_summary(filename); if (series_wrap == 0) { crm_debug("Not saving input to disk (disabled by configuration)"); } else if (is_repoke) { crm_info("Input has not changed since last time, not saving to disk"); } else { unlink(filename); crm_xml_add_ll(xml_data, "execution-date", (long long) execution_date); write_xml_file(xml_data, filename, TRUE); pcmk__write_series_sequence(PE_STATE_DIR, series[series_id].name, ++seq, series_wrap); } free_xml(converted); } -static gboolean +static void process_pe_message(xmlNode *msg, xmlNode *xml_data, pcmk__client_t *sender) { const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO); const char *op = crm_element_value(msg, F_CRM_TASK); const char *ref = crm_element_value(msg, F_CRM_REFERENCE); crm_trace("Processing %s op (ref=%s)...", op, ref); if (op == NULL) { /* error */ } else if (strcasecmp(op, CRM_OP_HELLO) == 0) { /* ignore */ } else if (pcmk__str_eq(crm_element_value(msg, F_CRM_MSG_TYPE), XML_ATTR_RESPONSE, pcmk__str_casei)) { /* ignore */ } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_PENGINE) != 0) { crm_trace("Bad sys-to %s", crm_str(sys_to)); - return FALSE; } else if (strcasecmp(op, CRM_OP_PECALC) == 0) { handle_pecalc_op(msg, xml_data, sender); } - - return TRUE; } static int32_t pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) { crm_trace("Connection %p", c); if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } -gboolean process_pe_message(xmlNode *msg, xmlNode *xml_data, - pcmk__client_t *sender); - static int32_t pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *c = pcmk__find_client(qbc); xmlNode *msg = pcmk__client_data2xml(c, data, &id, &flags); pcmk__ipc_send_ack(c, id, flags, "ack", CRM_EX_INDETERMINATE); if (msg != NULL) { xmlNode *data_xml = get_message_xml(msg, F_CRM_DATA); process_pe_message(msg, data_xml, c); free_xml(msg); } return 0; } /* Error code means? */ static int32_t pe_ipc_closed(qb_ipcs_connection_t * c) { pcmk__client_t *client = pcmk__find_client(c); if (client == NULL) { return 0; } crm_trace("Connection %p", c); pcmk__free_client(client); return 0; } static void pe_ipc_destroy(qb_ipcs_connection_t * c) { crm_trace("Connection %p", c); pe_ipc_closed(c); } struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = pe_ipc_accept, .connection_created = NULL, .msg_process = pe_ipc_dispatch, .connection_closed = pe_ipc_closed, .connection_destroyed = pe_ipc_destroy }; - -static pcmk__cli_option_t long_options[] = { - // long option, argument type, storage, short option, description, flags - { - "help", no_argument, NULL, '?', - "\tThis text", pcmk__option_default - }, - { - "verbose", no_argument, NULL, 'V', - "\tIncrease debug output", pcmk__option_default - }, - { 0, 0, 0, 0 } -}; - -int -main(int argc, char **argv) -{ - int flag; - int index = 0; - int argerr = 0; - int rc = pcmk_rc_ok; - - crm_log_preinit(NULL, argc, argv); - pcmk__set_cli_options(NULL, "[options]", long_options, - "daemon for calculating a Pacemaker cluster's " - "response to events"); - - mainloop_add_signal(SIGTERM, pengine_shutdown); - - while (1) { - flag = pcmk__next_cli_option(argc, argv, &index, NULL); - if (flag == -1) - break; - - switch (flag) { - case 'V': - crm_bump_log_level(argc, argv); - break; - case 'h': /* Help message */ - pcmk__cli_help('?', CRM_EX_OK); - break; - default: - ++argerr; - break; - } - } - - if (argc - optind == 1 && pcmk__str_eq("metadata", argv[optind], pcmk__str_casei)) { - pe_metadata(); - return CRM_EX_OK; - } - - if (optind > argc) { - ++argerr; - } - - if (argerr) { - pcmk__cli_help('?', CRM_EX_USAGE); - } - - crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - crm_notice("Starting Pacemaker scheduler"); - - if (pcmk__daemon_can_write(PE_STATE_DIR, NULL) == FALSE) { - crm_err("Terminating due to bad permissions on " PE_STATE_DIR); - fprintf(stderr, - "ERROR: Bad permissions on " PE_STATE_DIR " (see logs for details)\n"); - fflush(stderr); - return CRM_EX_FATAL; - } - - ipcs = mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_SHM, &ipc_callbacks); - if (ipcs == NULL) { - crm_err("Failed to create IPC server: shutting down and inhibiting respawn"); - crm_exit(CRM_EX_FATAL); - } - - pcmk__register_formats(NULL, formats); - rc = pcmk__output_new(&out, "log", NULL, argv); - if ((rc != pcmk_rc_ok) || (out == NULL)) { - crm_err("Can't log resource details due to internal error: %s\n", - pcmk_rc_str(rc)); - crm_exit(CRM_EX_FATAL); - } - - pe__register_messages(out); - pcmk__register_lib_messages(out); - - pcmk__output_set_log_level(out, LOG_TRACE); - - /* Create the mainloop and run it... */ - mainloop = g_main_loop_new(NULL, FALSE); - crm_notice("Pacemaker scheduler successfully started and accepting connections"); - g_main_loop_run(mainloop); - pengine_shutdown(0); -} - -void -pengine_shutdown(int nsig) -{ - mainloop_del_ipc_server(ipcs); - ipcs = NULL; - - pe_free_working_set(sched_data_set); - sched_data_set = NULL; - - pcmk__unregister_formats(); - if (out != NULL) { - out->finish(out, CRM_EX_OK, true, NULL); - pcmk__output_free(out); - out = NULL; - } - - crm_exit(CRM_EX_OK); -} diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am index 02f16866e6..076d4bd452 100644 --- a/include/crm/common/Makefile.am +++ b/include/crm/common/Makefile.am @@ -1,25 +1,26 @@ # # Copyright 2004-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # MAINTAINERCLEANFILES = Makefile.in headerdir=$(pkgincludedir)/crm/common header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \ - nvpair.h acl.h agents.h ipc_controld.h ipc_pacemakerd.h output.h \ + nvpair.h acl.h agents.h ipc_controld.h ipc_pacemakerd.h ipc_schedulerd.h \ + output.h \ agents_compat.h \ logging_compat.h \ mainloop_compat.h \ util_compat.h \ xml_compat.h noinst_HEADERS = internal.h alerts_internal.h \ iso8601_internal.h remote_internal.h xml_internal.h \ ipc_internal.h output_internal.h cmdline_internal.h \ attrd_internal.h options_internal.h results_internal.h \ strings_internal.h lists_internal.h logging_internal.h diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h index a83369407d..8d1b7b835b 100644 --- a/include/crm/common/ipc_internal.h +++ b/include/crm/common/ipc_internal.h @@ -1,245 +1,252 @@ /* * Copyright 2013-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef PCMK__IPC_INTERNAL_H #define PCMK__IPC_INTERNAL_H #ifdef __cplusplus extern "C" { #endif #include // bool #include // uint32_t, uint64_t, UINT64_C() #include // struct iovec #include // uid_t, gid_t, pid_t, size_t #ifdef HAVE_GNUTLS_GNUTLS_H # include // gnutls_session_t #endif #include // guint, gpointer, GQueue, ... #include // xmlNode #include // qb_ipcs_connection_t, ... #include // US_AUTH_GETPEEREID #include #include // mainloop_io_t /* denotes "non yieldable PID" on FreeBSD, or actual PID1 in scenarios that require a delicate handling anyway (socket-based activation with systemd); we can be reasonably sure that this PID is never possessed by the actual child daemon, as it gets taken either by the proper init, or by pacemakerd itself (i.e. this precludes anything else); note that value of zero is meant to carry "unset" meaning, and better not to bet on/conditionalize over signedness of pid_t */ #define PCMK__SPECIAL_PID 1 // Timeout (in seconds) to use for IPC client sends, reply waits, etc. #define PCMK__IPC_TIMEOUT 120 #if defined(US_AUTH_GETPEEREID) /* on FreeBSD, we don't want to expose "non-yieldable PID" (leading to "IPC liveness check only") as its nominal representation, which could cause confusion -- this is unambiguous as long as there's no socket-based activation like with systemd (very improbable) */ #define PCMK__SPECIAL_PID_AS_0(p) (((p) == PCMK__SPECIAL_PID) ? 0 : (p)) #else #define PCMK__SPECIAL_PID_AS_0(p) (p) #endif /*! * \internal * \brief Check the authenticity and liveness of the process via IPC end-point * * When IPC daemon under given IPC end-point (name) detected, its authenticity * is verified by the means of comparing against provided referential UID and * GID, and the result of this check can be deduced from the return value. * As an exception, referential UID of 0 (~ root) satisfies arbitrary * detected daemon's credentials. * * \param[in] name IPC name to base the search on * \param[in] refuid referential UID to check against * \param[in] refgid referential GID to check against * \param[out] gotpid to optionally store obtained PID of the found process * upon returning 1 or -2 * (not available on FreeBSD, special value of 1, * see PCMK__SPECIAL_PID, used instead, and the caller * is required to special case this value respectively) * * \return Standard Pacemaker return code * * \note Return codes of particular interest include pcmk_rc_ipc_unresponsive * indicating that no trace of IPC liveness was detected, and * pcmk_rc_ipc_unauthorized indicating that the IPC endpoint is blocked by * an unauthorized process. * \note This function emits a log message for return codes other than * pcmk_rc_ok and pcmk_rc_ipc_unresponsive, and when there isn't a perfect * match in respect to \p reguid and/or \p refgid, for a possible * least privilege principle violation. * * \see crm_ipc_is_authentic_process */ int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid); /* * Server-related */ typedef struct pcmk__client_s pcmk__client_t; struct pcmk__remote_s { /* Shared */ char *buffer; size_t buffer_size; size_t buffer_offset; int auth_timeout; int tcp_socket; mainloop_io_t *source; /* CIB-only */ bool authenticated; char *token; /* TLS only */ # ifdef HAVE_GNUTLS_GNUTLS_H gnutls_session_t *tls_session; bool tls_handshake_complete; # endif }; enum pcmk__client_flags { // Lower 32 bits are reserved for server (not library) use // Next 8 bits are reserved for client type (sort of a cheap enum) pcmk__client_ipc = (UINT64_C(1) << 32), // Client uses plain IPC pcmk__client_tcp = (UINT64_C(1) << 33), // Client uses TCP connection # ifdef HAVE_GNUTLS_GNUTLS_H pcmk__client_tls = (UINT64_C(1) << 34), // Client uses TCP with TLS # endif // The rest are client attributes pcmk__client_proxied = (UINT64_C(1) << 40), // Client IPC is proxied pcmk__client_privileged = (UINT64_C(1) << 41), // root or cluster user pcmk__client_to_proxy = (UINT64_C(1) << 42), // Local client to be proxied }; #define PCMK__CLIENT_TYPE(client) ((client)->flags & UINT64_C(0xff00000000)) struct pcmk__client_s { unsigned int pid; char *id; char *name; char *user; uint64_t flags; // Group of pcmk__client_flags int request_id; void *userdata; int event_timer; GQueue *event_queue; /* Depending on the client type, only some of the following will be * populated/valid. @TODO Maybe convert to a union. */ qb_ipcs_connection_t *ipcs; /* IPC */ struct pcmk__remote_s *remote; /* TCP/TLS */ unsigned int queue_backlog; /* IPC queue length after last flush */ unsigned int queue_max; /* Evict client whose queue grows this big */ }; #define pcmk__set_client_flags(client, flags_to_set) do { \ (client)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Client", pcmk__client_name(client), \ (client)->flags, (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_client_flags(client, flags_to_clear) do { \ (client)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_TRACE, \ "Client", pcmk__client_name(client), \ (client)->flags, (flags_to_clear), #flags_to_clear); \ } while (0) #define pcmk__set_ipc_flags(ipc_flags, ipc_name, flags_to_set) do { \ ipc_flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \ "IPC", (ipc_name), \ (ipc_flags), (flags_to_set), \ #flags_to_set); \ } while (0) #define pcmk__clear_ipc_flags(ipc_flags, ipc_name, flags_to_clear) do { \ ipc_flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \ "IPC", (ipc_name), \ (ipc_flags), (flags_to_clear), \ #flags_to_clear); \ } while (0) guint pcmk__ipc_client_count(void); void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data); void pcmk__client_cleanup(void); pcmk__client_t *pcmk__find_client(qb_ipcs_connection_t *c); pcmk__client_t *pcmk__find_client_by_id(const char *id); const char *pcmk__client_name(pcmk__client_t *c); const char *pcmk__client_type_str(uint64_t client_type); pcmk__client_t *pcmk__new_unauth_client(void *key); pcmk__client_t *pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid); void pcmk__free_client(pcmk__client_t *c); void pcmk__drop_all_clients(qb_ipcs_service_t *s); bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax); int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, crm_exit_t status); #define pcmk__ipc_send_ack(c, req, flags, tag, st) \ pcmk__ipc_send_ack_as(__func__, __LINE__, (c), (req), (flags), (tag), (st)) int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, uint32_t max_send_size, struct iovec **result, ssize_t *bytes); int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, uint32_t flags); int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags); xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags); int pcmk__client_pid(qb_ipcs_connection_t *c); void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb); +qb_ipcs_service_t *pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb); qb_ipcs_service_t *pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb); void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb); void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm); +static inline const char * +pcmk__ipc_sys_name(const char *ipc_name, const char *fallback) +{ + return ipc_name ? ipc_name : ((crm_system_name ? crm_system_name : fallback)); +} + #ifdef __cplusplus } #endif #endif diff --git a/include/crm/common/ipc_schedulerd.h b/include/crm/common/ipc_schedulerd.h new file mode 100644 index 0000000000..995d15a1bd --- /dev/null +++ b/include/crm/common/ipc_schedulerd.h @@ -0,0 +1,63 @@ +/* + * Copyright 2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__IPC_SCHEDULERD__H +# define PCMK__IPC_SCHEDULERD__H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \file + * \brief IPC commands for Schedulerd + * + * \ingroup core + */ + +#include // pcmk_ipc_api_t + +//! Possible types of schedulerd replies +enum pcmk_schedulerd_api_reply { + pcmk_schedulerd_reply_unknown, + pcmk_schedulerd_reply_graph, +}; + +/*! + * Schedulerd reply passed to event callback + */ +typedef struct { + enum pcmk_schedulerd_api_reply reply_type; + + union { + // pcmk__schedulerd_reply_graph + struct { + xmlNode *tgraph; + const char *reference; + const char *input; + } graph; + } data; +} pcmk_schedulerd_api_reply_t; + +/*! + * \brief Make an IPC request to the scheduler for the transition graph + * + * \param[in] api IPC API connection + * \param[in] cib The CIB to create a transition graph for + * \param[out] ref The reference ID a response will have + * + * \return Standard Pacemaker return code + */ +int pcmk_schedulerd_api_graph(pcmk_ipc_api_t *api, xmlNode *cib, char **ref); + +#ifdef __cplusplus +} +#endif + +#endif // PCMK__IPC_SCHEDULERD__H diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index 5b0d069879..8de0337961 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -1,112 +1,113 @@ # # Copyright 2004-2021 the Pacemaker project contributors # # The version control history for this file may have further details. # # This source code is licensed under the GNU General Public License version 2 # or later (GPLv2+) WITHOUT ANY WARRANTY. # include $(top_srcdir)/mk/common.mk include $(top_srcdir)/lib/common/mock.mk AM_CPPFLAGS += -I$(top_builddir)/lib/gnu -I$(top_srcdir)/lib/gnu MOSTLYCLEANFILES = md5.c ## libraries lib_LTLIBRARIES = libcrmcommon.la check_LTLIBRARIES = libcrmcommon_test.la # Disable -Wcast-qual if used, because we do some hacky casting, # and because libxml2 has some signatures that should be const but aren't # for backward compatibility reasons. # s390 needs -fPIC # s390-suse-linux/bin/ld: .libs/ipc.o: relocation R_390_PC32DBL against `__stack_chk_fail@@GLIBC_2.4' can not be used when making a shared object; recompile with -fPIC CFLAGS = $(CFLAGS_COPY:-Wcast-qual=) -fPIC # Without "." here, check-recursive will run through the subdirectories first # and then run "make check" here. This will fail, because there's things in # the subdirectories that need check_LTLIBRARIES built first. Adding "." here # changes the order so the subdirectories are processed afterwards. SUBDIRS = . tests noinst_HEADERS = crmcommon_private.h mock_private.h libcrmcommon_la_LDFLAGS = -version-info 42:0:8 libcrmcommon_la_CFLAGS = $(CFLAGS_HARDENED_LIB) libcrmcommon_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) libcrmcommon_la_LIBADD = @LIBADD_DL@ # If configured with --with-profiling or --with-coverage, BUILD_PROFILING will # be set and -fno-builtin will be added to the CFLAGS. However, libcrmcommon # uses the fabs() function which is normally supplied by gcc as one of its # builtins. Therefore we need to explicitly link against libm here or the # tests won't link. if BUILD_PROFILING libcrmcommon_la_LIBADD += -lm endif # Use += rather than backlashed continuation lines for parsing by bumplibs libcrmcommon_la_SOURCES = libcrmcommon_la_SOURCES += acl.c libcrmcommon_la_SOURCES += agents.c libcrmcommon_la_SOURCES += alerts.c libcrmcommon_la_SOURCES += attrd_client.c if BUILD_CIBSECRETS libcrmcommon_la_SOURCES += cib_secrets.c endif libcrmcommon_la_SOURCES += cmdline.c libcrmcommon_la_SOURCES += digest.c libcrmcommon_la_SOURCES += io.c libcrmcommon_la_SOURCES += ipc_client.c libcrmcommon_la_SOURCES += ipc_common.c libcrmcommon_la_SOURCES += ipc_controld.c libcrmcommon_la_SOURCES += ipc_pacemakerd.c +libcrmcommon_la_SOURCES += ipc_schedulerd.c libcrmcommon_la_SOURCES += ipc_server.c libcrmcommon_la_SOURCES += iso8601.c libcrmcommon_la_SOURCES += lists.c libcrmcommon_la_SOURCES += logging.c libcrmcommon_la_SOURCES += mainloop.c libcrmcommon_la_SOURCES += messages.c libcrmcommon_la_SOURCES += nvpair.c libcrmcommon_la_SOURCES += operations.c libcrmcommon_la_SOURCES += options.c libcrmcommon_la_SOURCES += output.c libcrmcommon_la_SOURCES += output_html.c libcrmcommon_la_SOURCES += output_log.c libcrmcommon_la_SOURCES += output_none.c libcrmcommon_la_SOURCES += output_text.c libcrmcommon_la_SOURCES += output_xml.c libcrmcommon_la_SOURCES += patchset.c libcrmcommon_la_SOURCES += pid.c libcrmcommon_la_SOURCES += procfs.c libcrmcommon_la_SOURCES += remote.c libcrmcommon_la_SOURCES += results.c libcrmcommon_la_SOURCES += schemas.c libcrmcommon_la_SOURCES += strings.c libcrmcommon_la_SOURCES += utils.c libcrmcommon_la_SOURCES += watchdog.c libcrmcommon_la_SOURCES += xml.c libcrmcommon_la_SOURCES += xpath.c # It's possible to build the library adding ../gnu/md5.c directly to SOURCES, # but distclean chokes on that because it tries to include the source's .Plo # file, which may have already been cleaned. nodist_libcrmcommon_la_SOURCES = md5.c libcrmcommon_test_la_SOURCES = $(libcrmcommon_la_SOURCES) libcrmcommon_test_la_SOURCES += mock.c libcrmcommon_test_la_LDFLAGS = $(LDFLAGS_HARDENED_LIB) $(WRAPPED_FLAGS) libcrmcommon_test_la_CFLAGS = $(libcrmcommon_la_CFLAGS) libcrmcommon_test_la_LIBADD = $(libcrmcommon_la_LIBADD) nodist_libcrmcommon_test_la_SOURCES = $(nodist_libcrmcommon_la_SOURCES) md5.c: ../gnu/md5.c cp "$<" "$@" clean-generic: rm -f *.log *.debug *.xml *~ diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h index ba41983441..c1dc32a781 100644 --- a/lib/common/crmcommon_private.h +++ b/lib/common/crmcommon_private.h @@ -1,311 +1,314 @@ /* * Copyright 2018-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #ifndef CRMCOMMON_PRIVATE__H # define CRMCOMMON_PRIVATE__H /* This header is for the sole use of libcrmcommon, so that functions can be * declared with G_GNUC_INTERNAL for efficiency. */ #include // uint8_t, uint32_t #include // bool #include // size_t #include // GList #include // xmlNode, xmlAttr #include // struct qb_ipc_response_header // Decent chunk size for processing large amounts of data #define PCMK__BUFFER_SIZE 4096 /* * XML and ACLs */ enum xml_private_flags { pcmk__xf_none = 0x0000, pcmk__xf_dirty = 0x0001, pcmk__xf_deleted = 0x0002, pcmk__xf_created = 0x0004, pcmk__xf_modified = 0x0008, pcmk__xf_tracking = 0x0010, pcmk__xf_processed = 0x0020, pcmk__xf_skip = 0x0040, pcmk__xf_moved = 0x0080, pcmk__xf_acl_enabled = 0x0100, pcmk__xf_acl_read = 0x0200, pcmk__xf_acl_write = 0x0400, pcmk__xf_acl_deny = 0x0800, pcmk__xf_acl_create = 0x1000, pcmk__xf_acl_denied = 0x2000, pcmk__xf_lazy = 0x4000, }; /* When deleting portions of an XML tree, we keep a record so we can know later * (e.g. when checking differences) that something was deleted. */ typedef struct pcmk__deleted_xml_s { char *path; int position; } pcmk__deleted_xml_t; typedef struct xml_private_s { long check; uint32_t flags; char *user; GList *acls; GList *deleted_objs; // List of pcmk__deleted_xml_t } xml_private_t; #define pcmk__set_xml_flags(xml_priv, flags_to_set) do { \ (xml_priv)->flags = pcmk__set_flags_as(__func__, __LINE__, \ LOG_NEVER, "XML", "XML node", (xml_priv)->flags, \ (flags_to_set), #flags_to_set); \ } while (0) #define pcmk__clear_xml_flags(xml_priv, flags_to_clear) do { \ (xml_priv)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ LOG_NEVER, "XML", "XML node", (xml_priv)->flags, \ (flags_to_clear), #flags_to_clear); \ } while (0) G_GNUC_INTERNAL void pcmk__xml2text(xmlNode *data, int options, char **buffer, int *offset, int *max, int depth); G_GNUC_INTERNAL void pcmk__buffer_add_char(char **buffer, int *offset, int *max, char c); G_GNUC_INTERNAL void pcmk__set_xml_doc_flag(xmlNode *xml, enum xml_private_flags flag); G_GNUC_INTERNAL bool pcmk__tracking_xml_changes(xmlNode *xml, bool lazy); G_GNUC_INTERNAL int pcmk__element_xpath(const char *prefix, xmlNode *xml, char *buffer, int offset, size_t buffer_size); G_GNUC_INTERNAL void pcmk__mark_xml_created(xmlNode *xml); G_GNUC_INTERNAL int pcmk__xml_position(xmlNode *xml, enum xml_private_flags ignore_if_set); G_GNUC_INTERNAL xmlNode *pcmk__xml_match(xmlNode *haystack, xmlNode *needle, bool exact); G_GNUC_INTERNAL void pcmk__xe_log(int log_level, const char *file, const char *function, int line, const char *prefix, xmlNode *data, int depth, int options); G_GNUC_INTERNAL void pcmk__xml_update(xmlNode *parent, xmlNode *target, xmlNode *update, bool as_diff); G_GNUC_INTERNAL xmlNode *pcmk__xc_match(xmlNode *root, xmlNode *search_comment, bool exact); G_GNUC_INTERNAL void pcmk__xc_update(xmlNode *parent, xmlNode *target, xmlNode *update); G_GNUC_INTERNAL void pcmk__free_acls(GList *acls); G_GNUC_INTERNAL void pcmk__unpack_acl(xmlNode *source, xmlNode *target, const char *user); G_GNUC_INTERNAL bool pcmk__check_acl(xmlNode *xml, const char *name, enum xml_private_flags mode); G_GNUC_INTERNAL void pcmk__apply_acl(xmlNode *xml); G_GNUC_INTERNAL void pcmk__apply_creation_acl(xmlNode *xml, bool check_top); G_GNUC_INTERNAL void pcmk__mark_xml_attr_dirty(xmlAttr *a); G_GNUC_INTERNAL bool pcmk__xa_filterable(const char *name); static inline const char * pcmk__xml_attr_value(const xmlAttr *attr) { return ((attr == NULL) || (attr->children == NULL))? NULL : (const char *) attr->children->content; } /* * IPC */ #define PCMK__IPC_VERSION 1 #define PCMK__CONTROLD_API_MAJOR "1" #define PCMK__CONTROLD_API_MINOR "0" // IPC behavior that varies by daemon typedef struct pcmk__ipc_methods_s { /*! * \internal * \brief Allocate any private data needed by daemon IPC * * \param[in] api IPC API connection * * \return Standard Pacemaker return code */ int (*new_data)(pcmk_ipc_api_t *api); /*! * \internal * \brief Free any private data used by daemon IPC * * \param[in] api_data Data allocated by new_data() method */ void (*free_data)(void *api_data); /*! * \internal * \brief Perform daemon-specific handling after successful connection * * Some daemons require clients to register before sending any other * commands. The controller requires a CRM_OP_HELLO (with no reply), and * the CIB manager, executor, and fencer require a CRM_OP_REGISTER (with a * reply). Ideally this would be consistent across all daemons, but for now * this allows each to do its own authorization. * * \param[in] api IPC API connection * * \return Standard Pacemaker return code */ int (*post_connect)(pcmk_ipc_api_t *api); /*! * \internal * \brief Check whether an IPC request results in a reply * * \param[in] api IPC API connection * \param[in] request IPC request XML * * \return true if request would result in an IPC reply, false otherwise */ bool (*reply_expected)(pcmk_ipc_api_t *api, xmlNode *request); /*! * \internal * \brief Perform daemon-specific handling of an IPC message * * \param[in] api IPC API connection * \param[in] msg Message read from IPC connection */ void (*dispatch)(pcmk_ipc_api_t *api, xmlNode *msg); /*! * \internal * \brief Perform daemon-specific handling of an IPC disconnect * * \param[in] api IPC API connection */ void (*post_disconnect)(pcmk_ipc_api_t *api); } pcmk__ipc_methods_t; // Implementation of pcmk_ipc_api_t struct pcmk_ipc_api_s { enum pcmk_ipc_server server; // Daemon this IPC API instance is for enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched size_t ipc_size_max; // maximum IPC buffer size crm_ipc_t *ipc; // IPC connection mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC bool free_on_disconnect; // Whether disconnect should free object pcmk_ipc_callback_t cb; // Caller-registered callback (if any) void *user_data; // Caller-registered data (if any) void *api_data; // For daemon-specific use pcmk__ipc_methods_t *cmds; // Behavior that varies by daemon }; typedef struct pcmk__ipc_header_s { struct qb_ipc_response_header qb; uint32_t size_uncompressed; uint32_t size_compressed; uint32_t flags; uint8_t version; } pcmk__ipc_header_t; G_GNUC_INTERNAL int pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request); G_GNUC_INTERNAL void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data); G_GNUC_INTERNAL unsigned int pcmk__ipc_buffer_size(unsigned int max); G_GNUC_INTERNAL bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__controld_api_methods(void); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void); +G_GNUC_INTERNAL +pcmk__ipc_methods_t *pcmk__schedulerd_api_methods(void); + /* * Logging */ /*! * \brief Check the authenticity of the IPC socket peer process * * If everything goes well, peer's authenticity is verified by the means * of comparing against provided referential UID and GID (either satisfies), * and the result of this check can be deduced from the return value. * As an exception, detected UID of 0 ("root") satisfies arbitrary * provided referential daemon's credentials. * * \param[in] qb_ipc libqb client connection if available * \param[in] sock IPC related, connected Unix socket to check peer of * \param[in] refuid referential UID to check against * \param[in] refgid referential GID to check against * \param[out] gotpid to optionally store obtained PID of the peer * (not available on FreeBSD, special value of 1 * used instead, and the caller is required to * special case this value respectively) * \param[out] gotuid to optionally store obtained UID of the peer * \param[out] gotgid to optionally store obtained GID of the peer * * \return Standard Pacemaker return code * ie: 0 if it the connection is authentic * pcmk_rc_ipc_unauthorized if the connection is not authentic, * standard errors. * * \note While this function is tolerant on what constitutes authorized * IPC daemon process (its effective user matches UID=0 or \p refuid, * or at least its group matches \p refgid), either or both (in case * of UID=0) mismatches on the expected credentials of such peer * process \e shall be investigated at the caller when value of 1 * gets returned there, since higher-than-expected privileges in * respect to the expected/intended credentials possibly violate * the least privilege principle and may pose an additional risk * (i.e. such accidental inconsistency shall be eventually fixed). */ int pcmk__crm_ipc_is_authentic_process(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid); #endif // CRMCOMMON_PRIVATE__H diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index 3864126664..5106fb2844 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -1,1470 +1,1471 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #if defined(US_AUTH_PEERCRED_UCRED) || defined(US_AUTH_PEERCRED_SOCKPEERCRED) # ifdef US_AUTH_PEERCRED_UCRED # ifndef _GNU_SOURCE # define _GNU_SOURCE # endif # endif # include #elif defined(US_AUTH_GETPEERUCRED) # include #endif #include #include #include #include #include /* indirectly: pcmk_err_generic */ #include #include #include #include "crmcommon_private.h" /*! * \brief Create a new object for using Pacemaker daemon IPC * * \param[out] api Where to store new IPC object * \param[in] server Which Pacemaker daemon the object is for * * \return Standard Pacemaker result code * * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). * \note This is intended to supersede crm_ipc_new() but currently only - * supports the controller & pacemakerd IPC API. + * supports the controller, pacemakerd, and schedulerd IPC API. */ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) { if (api == NULL) { return EINVAL; } *api = calloc(1, sizeof(pcmk_ipc_api_t)); if (*api == NULL) { return errno; } (*api)->server = server; if (pcmk_ipc_name(*api, false) == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return EOPNOTSUPP; } (*api)->ipc_size_max = 0; // Set server methods and max_size (if not default) switch (server) { case pcmk_ipc_attrd: break; case pcmk_ipc_based: (*api)->ipc_size_max = 512 * 1024; // 512KB break; case pcmk_ipc_controld: (*api)->cmds = pcmk__controld_api_methods(); break; case pcmk_ipc_execd: break; case pcmk_ipc_fenced: break; case pcmk_ipc_pacemakerd: (*api)->cmds = pcmk__pacemakerd_api_methods(); break; case pcmk_ipc_schedulerd: + (*api)->cmds = pcmk__schedulerd_api_methods(); // @TODO max_size could vary by client, maybe take as argument? (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB break; } if ((*api)->cmds == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), (*api)->ipc_size_max); if ((*api)->ipc == NULL) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } // If daemon API has its own data to track, allocate it if ((*api)->cmds->new_data != NULL) { if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) { pcmk_free_ipc_api(*api); *api = NULL; return ENOMEM; } } crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true)); return pcmk_rc_ok; } static void free_daemon_specific_data(pcmk_ipc_api_t *api) { if ((api != NULL) && (api->cmds != NULL)) { if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) { api->cmds->free_data(api->api_data); api->api_data = NULL; } free(api->cmds); api->cmds = NULL; } } /*! * \internal * \brief Call an IPC API event callback, if one is registed * * \param[in] api IPC API connection * \param[in] event_type The type of event that occurred * \param[in] status Event status * \param[in] event_data Event-specific data */ void pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type, crm_exit_t status, void *event_data) { if ((api != NULL) && (api->cb != NULL)) { api->cb(api, event_type, status, event_data, api->user_data); } } /*! * \internal * \brief Clean up after an IPC disconnect * * \param[in] user_data IPC API connection that disconnected * * \note This function can be used as a main loop IPC destroy callback. */ static void ipc_post_disconnect(gpointer user_data) { pcmk_ipc_api_t *api = user_data; crm_info("Disconnected from %s IPC API", pcmk_ipc_name(api, true)); // Perform any daemon-specific handling needed if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) { api->cmds->post_disconnect(api); } // Call client's registered event callback pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT, NULL); /* If this is being called from a running main loop, mainloop_gio_destroy() * will free ipc and mainloop_io immediately after calling this function. * If this is called from a stopped main loop, these will leak, so the best * practice is to close the connection before stopping the main loop. */ api->ipc = NULL; api->mainloop_io = NULL; if (api->free_on_disconnect) { /* pcmk_free_ipc_api() has already been called, but did not free api * or api->cmds because this function needed them. Do that now. */ free_daemon_specific_data(api); crm_trace("Freeing IPC API object after disconnect"); free(api); } } /*! * \brief Free the contents of an IPC API object * * \param[in] api IPC API object to free */ void pcmk_free_ipc_api(pcmk_ipc_api_t *api) { bool free_on_disconnect = false; if (api == NULL) { return; } crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true)); if (api->ipc != NULL) { if (api->mainloop_io != NULL) { /* We need to keep the api pointer itself around, because it is the * user data for the IPC client destroy callback. That will be * triggered by the pcmk_disconnect_ipc() call below, but it might * happen later in the main loop (if still running). * * This flag tells the destroy callback to free the object. It can't * do that unconditionally, because the application might call this * function after a disconnect that happened by other means. */ free_on_disconnect = api->free_on_disconnect = true; } pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true } if (!free_on_disconnect) { free_daemon_specific_data(api); crm_trace("Freeing IPC API object"); free(api); } } /*! * \brief Get the IPC name used with an IPC API connection * * \param[in] api IPC API connection * \param[in] for_log If true, return human-friendly name instead of IPC name * * \return IPC API's human-friendly or connection name, or if none is available, * "Pacemaker" if for_log is true and NULL if for_log is false */ const char * pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log) { if (api == NULL) { return for_log? "Pacemaker" : NULL; } switch (api->server) { case pcmk_ipc_attrd: return for_log? "attribute manager" : NULL /* T_ATTRD */; case pcmk_ipc_based: return for_log? "CIB manager" : NULL /* PCMK__SERVER_BASED_RW */; case pcmk_ipc_controld: return for_log? "controller" : CRM_SYSTEM_CRMD; case pcmk_ipc_execd: return for_log? "executor" : NULL /* CRM_SYSTEM_LRMD */; case pcmk_ipc_fenced: return for_log? "fencer" : NULL /* "stonith-ng" */; case pcmk_ipc_pacemakerd: return for_log? "launcher" : CRM_SYSTEM_MCP; case pcmk_ipc_schedulerd: - return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */; + return for_log? "scheduler" : CRM_SYSTEM_PENGINE; default: return for_log? "Pacemaker" : NULL; } } /*! * \brief Check whether an IPC API connection is active * * \param[in] api IPC API connection * * \return true if IPC is connected, false otherwise */ bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api) { return (api != NULL) && crm_ipc_connected(api->ipc); } /*! * \internal * \brief Call the daemon-specific API's dispatch function * * Perform daemon-specific handling of IPC reply dispatch. It is the daemon * method's responsibility to call the client's registered event callback, as * well as allocate and free any event data. * * \param[in] api IPC API connection */ static void call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message) { crm_log_xml_trace(message, "ipc-received"); if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) { api->cmds->dispatch(api, message); } } /*! * \internal * \brief Dispatch data read from IPC source * * \param[in] buffer Data read from IPC * \param[in] length Number of bytes of data in buffer (ignored) * \param[in] user_data IPC object * * \return Always 0 (meaning connection is still required) * * \note This function can be used as a main loop IPC dispatch callback. */ static int dispatch_ipc_data(const char *buffer, ssize_t length, gpointer user_data) { pcmk_ipc_api_t *api = user_data; xmlNode *msg; CRM_CHECK(api != NULL, return 0); if (buffer == NULL) { crm_warn("Empty message received from %s IPC", pcmk_ipc_name(api, true)); return 0; } msg = string2xml(buffer); if (msg == NULL) { crm_warn("Malformed message received from %s IPC", pcmk_ipc_name(api, true)); return 0; } call_api_dispatch(api, msg); free_xml(msg); return 0; } /*! * \brief Check whether an IPC connection has data available (without main loop) * * \param[in] api IPC API connection * \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once * and return immediately; otherwise, poll for up to * this many milliseconds * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function to check whether IPC data is available. Return values of * interest include pcmk_rc_ok meaning data is available, and EAGAIN * meaning no data is available; all other values indicate errors. * \todo This does not allow the caller to poll multiple file descriptors at * once. If there is demand for that, we could add a wrapper for * crm_ipc_get_fd(api->ipc), so the caller can call poll() themselves. */ int pcmk_poll_ipc(pcmk_ipc_api_t *api, int timeout_ms) { int rc; struct pollfd pollfd = { 0, }; if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) { return EINVAL; } pollfd.fd = crm_ipc_get_fd(api->ipc); pollfd.events = POLLIN; rc = poll(&pollfd, 1, timeout_ms); if (rc < 0) { return errno; } else if (rc == 0) { return EAGAIN; } return pcmk_rc_ok; } /*! * \brief Dispatch available messages on an IPC connection (without main loop) * * \param[in] api IPC API connection * * \return Standard Pacemaker return code * * \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call * this function when IPC data is available. */ void pcmk_dispatch_ipc(pcmk_ipc_api_t *api) { if (api == NULL) { return; } while (crm_ipc_ready(api->ipc) > 0) { if (crm_ipc_read(api->ipc) > 0) { dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api); } } } // \return Standard Pacemaker return code static int connect_with_main_loop(pcmk_ipc_api_t *api) { int rc; struct ipc_client_callbacks callbacks = { .dispatch = dispatch_ipc_data, .destroy = ipc_post_disconnect, }; rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api, &callbacks, &(api->mainloop_io)); if (rc != pcmk_rc_ok) { return rc; } crm_debug("Connected to %s IPC (attached to main loop)", pcmk_ipc_name(api, true)); /* After this point, api->mainloop_io owns api->ipc, so api->ipc * should not be explicitly freed. */ return pcmk_rc_ok; } // \return Standard Pacemaker return code static int connect_without_main_loop(pcmk_ipc_api_t *api) { int rc; if (!crm_ipc_connect(api->ipc)) { rc = errno; crm_ipc_close(api->ipc); return rc; } crm_debug("Connected to %s IPC (without main loop)", pcmk_ipc_name(api, true)); return pcmk_rc_ok; } /*! * \brief Connect to a Pacemaker daemon via IPC * * \param[in] api IPC API instance * \param[out] dispatch_type How IPC replies should be dispatched * * \return Standard Pacemaker return code */ int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) { int rc = pcmk_rc_ok; if (api == NULL) { crm_err("Cannot connect to uninitialized API object"); return EINVAL; } if (api->ipc == NULL) { api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), api->ipc_size_max); if (api->ipc == NULL) { crm_err("Failed to re-create IPC API"); return ENOMEM; } } if (crm_ipc_connected(api->ipc)) { crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true)); return pcmk_rc_ok; } api->dispatch_type = dispatch_type; switch (dispatch_type) { case pcmk_ipc_dispatch_main: rc = connect_with_main_loop(api); break; case pcmk_ipc_dispatch_sync: case pcmk_ipc_dispatch_poll: rc = connect_without_main_loop(api); break; } if (rc != pcmk_rc_ok) { return rc; } if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) { rc = api->cmds->post_connect(api); if (rc != pcmk_rc_ok) { crm_ipc_close(api->ipc); } } return rc; } /*! * \brief Disconnect an IPC API instance * * \param[in] api IPC API connection * * \return Standard Pacemaker return code * * \note If the connection is attached to a main loop, this function should be * called before quitting the main loop, to ensure that all memory is * freed. */ void pcmk_disconnect_ipc(pcmk_ipc_api_t *api) { if ((api == NULL) || (api->ipc == NULL)) { return; } switch (api->dispatch_type) { case pcmk_ipc_dispatch_main: { mainloop_io_t *mainloop_io = api->mainloop_io; // Make sure no code with access to api can use these again api->mainloop_io = NULL; api->ipc = NULL; mainloop_del_ipc_client(mainloop_io); // After this point api might have already been freed } break; case pcmk_ipc_dispatch_poll: case pcmk_ipc_dispatch_sync: { crm_ipc_t *ipc = api->ipc; // Make sure no code with access to api can use ipc again api->ipc = NULL; // This should always be the case already, but to be safe api->free_on_disconnect = false; crm_ipc_destroy(ipc); ipc_post_disconnect(api); } break; } } /*! * \brief Register a callback for IPC API events * * \param[in] api IPC API connection * \param[in] callback Callback to register * \param[in] userdata Caller data to pass to callback * * \note This function may be called multiple times to update the callback * and/or user data. The caller remains responsible for freeing * userdata in any case (after the IPC is disconnected, if the * user data is still registered with the IPC). */ void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb, void *user_data) { if (api == NULL) { return; } api->cb = cb; api->user_data = user_data; } /*! * \internal * \brief Send an XML request across an IPC API connection * * \param[in] api IPC API connection * \param[in] request XML request to send * * \return Standard Pacemaker return code * * \note Daemon-specific IPC API functions should call this function to send * requests, because it handles different dispatch types appropriately. */ int pcmk__send_ipc_request(pcmk_ipc_api_t *api, xmlNode *request) { int rc; xmlNode *reply = NULL; enum crm_ipc_flags flags = crm_ipc_flags_none; if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) { return EINVAL; } crm_log_xml_trace(request, "ipc-sent"); // Synchronous dispatch requires waiting for a reply if ((api->dispatch_type == pcmk_ipc_dispatch_sync) && (api->cmds != NULL) && (api->cmds->reply_expected != NULL) && (api->cmds->reply_expected(api, request))) { flags = crm_ipc_client_response; } // The 0 here means a default timeout of 5 seconds rc = crm_ipc_send(api->ipc, request, flags, 0, &reply); if (rc < 0) { return pcmk_legacy2rc(rc); } else if (rc == 0) { return ENODATA; } // With synchronous dispatch, we dispatch any reply now if (reply != NULL) { call_api_dispatch(api, reply); free_xml(reply); } return pcmk_rc_ok; } /*! * \internal * \brief Create the XML for an IPC request to purge a node from the peer cache * * \param[in] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Newly allocated IPC request XML * * \note The controller, fencer, and pacemakerd use the same request syntax, but * the attribute manager uses a different one. The CIB manager doesn't * have any syntax for it. The executor and scheduler don't connect to the * cluster layer and thus don't have or need any syntax for it. * * \todo Modify the attribute manager to accept the common syntax (as well * as its current one, for compatibility with older clients). Modify * the CIB manager to accept and honor the common syntax. Modify the * executor and scheduler to accept the syntax (immediately returning * success), just for consistency. Modify this function to use the * common syntax with all daemons if their version supports it. */ static xmlNode * create_purge_node_request(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { xmlNode *request = NULL; const char *client = crm_system_name? crm_system_name : "client"; switch (api->server) { case pcmk_ipc_attrd: request = create_xml_node(NULL, __func__); crm_xml_add(request, F_TYPE, T_ATTRD); crm_xml_add(request, F_ORIG, crm_system_name); crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); crm_xml_add(request, PCMK__XA_ATTR_NODE_NAME, node_name); if (nodeid > 0) { crm_xml_add_int(request, PCMK__XA_ATTR_NODE_ID, (int) nodeid); } break; case pcmk_ipc_controld: case pcmk_ipc_fenced: case pcmk_ipc_pacemakerd: request = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, pcmk_ipc_name(api, false), client, NULL); if (nodeid > 0) { crm_xml_set_id(request, "%lu", (unsigned long) nodeid); } crm_xml_add(request, XML_ATTR_UNAME, node_name); break; case pcmk_ipc_based: case pcmk_ipc_execd: case pcmk_ipc_schedulerd: break; } return request; } /*! * \brief Ask a Pacemaker daemon to purge a node from its peer cache * * \param[in] api IPC API connection * \param[in] node_name If not NULL, name of node to purge * \param[in] nodeid If not 0, node ID of node to purge * * \return Standard Pacemaker return code * * \note At least one of node_name or nodeid must be specified. */ int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid) { int rc = 0; xmlNode *request = NULL; if (api == NULL) { return EINVAL; } if ((node_name == NULL) && (nodeid == 0)) { return EINVAL; } request = create_purge_node_request(api, node_name, nodeid); if (request == NULL) { return EOPNOTSUPP; } rc = pcmk__send_ipc_request(api, request); free_xml(request); crm_debug("%s peer cache purge of node %s[%lu]: rc=%d", pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc); return rc; } /* * Generic IPC API (to eventually be deprecated as public API and made internal) */ struct crm_ipc_s { struct pollfd pfd; unsigned int max_buf_size; // maximum bytes we can send or receive over IPC unsigned int buf_size; // size of allocated buffer int msg_size; int need_reply; char *buffer; char *server_name; // server IPC name being connected to qb_ipcc_connection_t *ipc; }; /*! * \brief Create a new (legacy) object for using Pacemaker daemon IPC * * \param[in] name IPC system name to connect to * \param[in] max_size Use a maximum IPC buffer size of at least this size * * \return Newly allocated IPC object on success, NULL otherwise * * \note The caller is responsible for freeing the result using * crm_ipc_destroy(). * \note This should be considered deprecated for use with daemons supported by * pcmk_new_ipc_api(). */ crm_ipc_t * crm_ipc_new(const char *name, size_t max_size) { crm_ipc_t *client = NULL; client = calloc(1, sizeof(crm_ipc_t)); if (client == NULL) { crm_err("Could not create IPC connection: %s", strerror(errno)); return NULL; } client->server_name = strdup(name); if (client->server_name == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client); return NULL; } client->buf_size = pcmk__ipc_buffer_size(max_size); client->buffer = malloc(client->buf_size); if (client->buffer == NULL) { crm_err("Could not create %s IPC connection: %s", name, strerror(errno)); free(client->server_name); free(client); return NULL; } /* Clients initiating connection pick the max buf size */ client->max_buf_size = client->buf_size; client->pfd.fd = -1; client->pfd.events = POLLIN; client->pfd.revents = 0; return client; } /*! * \brief Establish an IPC connection to a Pacemaker component * * \param[in] client Connection instance obtained from crm_ipc_new() * * \return TRUE on success, FALSE otherwise (in which case errno will be set; * specifically, in case of discovering the remote side is not * authentic, its value is set to ECONNABORTED). */ bool crm_ipc_connect(crm_ipc_t * client) { uid_t cl_uid = 0; gid_t cl_gid = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; int rv; client->need_reply = FALSE; client->ipc = qb_ipcc_connect(client->server_name, client->buf_size); if (client->ipc == NULL) { crm_debug("Could not establish %s IPC connection: %s (%d)", client->server_name, pcmk_strerror(errno), errno); return FALSE; } client->pfd.fd = crm_ipc_get_fd(client); if (client->pfd.fd < 0) { rv = errno; /* message already omitted */ crm_ipc_close(client); errno = rv; return FALSE; } rv = pcmk_daemon_user(&cl_uid, &cl_gid); if (rv < 0) { /* message already omitted */ crm_ipc_close(client); errno = -rv; return FALSE; } if ((rv = pcmk__crm_ipc_is_authentic_process(client->ipc, client->pfd.fd, cl_uid, cl_gid, &found_pid, &found_uid, &found_gid)) == pcmk_rc_ipc_unauthorized) { crm_err("%s IPC provider authentication failed: process %lld has " "uid %lld (expected %lld) and gid %lld (expected %lld)", client->server_name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) cl_uid, (long long) found_gid, (long long) cl_gid); crm_ipc_close(client); errno = ECONNABORTED; return FALSE; } else if (rv != pcmk_rc_ok) { crm_perror(LOG_ERR, "Could not verify authenticity of %s IPC provider", client->server_name); crm_ipc_close(client); if (rv > 0) { errno = rv; } else { errno = ENOTCONN; } return FALSE; } qb_ipcc_context_set(client->ipc, client); client->max_buf_size = qb_ipcc_get_buffer_size(client->ipc); if (client->max_buf_size > client->buf_size) { free(client->buffer); client->buffer = calloc(1, client->max_buf_size); client->buf_size = client->max_buf_size; } return TRUE; } void crm_ipc_close(crm_ipc_t * client) { if (client) { if (client->ipc) { qb_ipcc_connection_t *ipc = client->ipc; client->ipc = NULL; qb_ipcc_disconnect(ipc); } } } void crm_ipc_destroy(crm_ipc_t * client) { if (client) { if (client->ipc && qb_ipcc_is_connected(client->ipc)) { crm_notice("Destroying active %s IPC connection", client->server_name); /* The next line is basically unsafe * * If this connection was attached to mainloop and mainloop is active, * the 'disconnected' callback will end up back here and we'll end * up free'ing the memory twice - something that can still happen * even without this if we destroy a connection and it closes before * we call exit */ /* crm_ipc_close(client); */ } else { crm_trace("Destroying inactive %s IPC connection", client->server_name); } free(client->buffer); free(client->server_name); free(client); } } int crm_ipc_get_fd(crm_ipc_t * client) { int fd = 0; if (client && client->ipc && (qb_ipcc_fd_get(client->ipc, &fd) == 0)) { return fd; } errno = EINVAL; crm_perror(LOG_ERR, "Could not obtain file descriptor for %s IPC", (client? client->server_name : "unspecified")); return -errno; } bool crm_ipc_connected(crm_ipc_t * client) { bool rc = FALSE; if (client == NULL) { crm_trace("No client"); return FALSE; } else if (client->ipc == NULL) { crm_trace("No connection"); return FALSE; } else if (client->pfd.fd < 0) { crm_trace("Bad descriptor"); return FALSE; } rc = qb_ipcc_is_connected(client->ipc); if (rc == FALSE) { client->pfd.fd = -EINVAL; } return rc; } /*! * \brief Check whether an IPC connection is ready to be read * * \param[in] client Connection to check * * \return Positive value if ready to be read, 0 if not ready, -errno on error */ int crm_ipc_ready(crm_ipc_t *client) { int rc; CRM_ASSERT(client != NULL); if (crm_ipc_connected(client) == FALSE) { return -ENOTCONN; } client->pfd.revents = 0; rc = poll(&(client->pfd), 1, 0); return (rc < 0)? -errno : rc; } // \return Standard Pacemaker return code static int crm_ipc_decompress(crm_ipc_t * client) { pcmk__ipc_header_t *header = (pcmk__ipc_header_t *)(void*)client->buffer; if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; /* never let buf size fall below our max size required for ipc reads. */ unsigned int new_buf_size = QB_MAX((sizeof(pcmk__ipc_header_t) + size_u), client->max_buf_size); char *uncompressed = calloc(1, new_buf_size); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed + sizeof(pcmk__ipc_header_t), &size_u, client->buffer + sizeof(pcmk__ipc_header_t), header->size_compressed, 1, 0); if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); return EILSEQ; } /* * This assert no longer holds true. For an identical msg, some clients may * require compression, and others may not. If that same msg (event) is sent * to multiple clients, it could result in some clients receiving a compressed * msg even though compression was not explicitly required for them. * * CRM_ASSERT((header->size_uncompressed + sizeof(pcmk__ipc_header_t)) >= ipc_buffer_max); */ CRM_ASSERT(size_u == header->size_uncompressed); memcpy(uncompressed, client->buffer, sizeof(pcmk__ipc_header_t)); /* Preserve the header */ header = (pcmk__ipc_header_t *)(void*)uncompressed; free(client->buffer); client->buf_size = new_buf_size; client->buffer = uncompressed; } CRM_ASSERT(client->buffer[sizeof(pcmk__ipc_header_t) + header->size_uncompressed - 1] == 0); return pcmk_rc_ok; } long crm_ipc_read(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; CRM_ASSERT(client != NULL); CRM_ASSERT(client->ipc != NULL); CRM_ASSERT(client->buffer != NULL); client->buffer[0] = 0; client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer, client->buf_size, 0); if (client->msg_size >= 0) { int rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return pcmk_rc2legacy(rc); } header = (pcmk__ipc_header_t *)(void*)client->buffer; if (!pcmk__valid_ipc_header(header)) { return -EBADMSG; } crm_trace("Received %s IPC event %d size=%u rc=%d text='%.100s'", client->server_name, header->qb.id, header->qb.size, client->msg_size, client->buffer + sizeof(pcmk__ipc_header_t)); } else { crm_trace("No message received from %s IPC: %s", client->server_name, pcmk_strerror(client->msg_size)); } if (crm_ipc_connected(client) == FALSE || client->msg_size == -ENOTCONN) { crm_err("Connection to %s IPC failed", client->server_name); } if (header) { /* Data excluding the header */ return header->size_uncompressed; } return -ENOMSG; } const char * crm_ipc_buffer(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->buffer + sizeof(pcmk__ipc_header_t); } uint32_t crm_ipc_buffer_flags(crm_ipc_t * client) { pcmk__ipc_header_t *header = NULL; CRM_ASSERT(client != NULL); if (client->buffer == NULL) { return 0; } header = (pcmk__ipc_header_t *)(void*)client->buffer; return header->flags; } const char * crm_ipc_name(crm_ipc_t * client) { CRM_ASSERT(client != NULL); return client->server_name; } // \return Standard Pacemaker return code static int internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, ssize_t *bytes) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); int rc = pcmk_rc_ok; /* get the reply */ crm_trace("Waiting on reply to %s IPC message %d", client->server_name, request_id); do { *bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000); if (*bytes > 0) { pcmk__ipc_header_t *hdr = NULL; rc = crm_ipc_decompress(client); if (rc != pcmk_rc_ok) { return rc; } hdr = (pcmk__ipc_header_t *)(void*)client->buffer; if (hdr->qb.id == request_id) { /* Got it */ break; } else if (hdr->qb.id < request_id) { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "OldIpcReply"); } else { xmlNode *bad = string2xml(crm_ipc_buffer(client)); crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id); crm_log_xml_notice(bad, "ImpossibleReply"); CRM_ASSERT(hdr->qb.id <= request_id); } } else if (crm_ipc_connected(client) == FALSE) { crm_err("%s IPC provider disconnected while waiting for message %d", client->server_name, request_id); break; } } while (time(NULL) < timeout); if (*bytes < 0) { rc = (int) -*bytes; // System errno } return rc; } /*! * \brief Send an IPC XML message * * \param[in] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags * \param[in] ms_timeout Give up if not sent within this much time * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * * \return Negative errno on error, otherwise size of reply received in bytes * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode ** reply) { int rc = 0; ssize_t qb_rc = 0; ssize_t bytes = 0; struct iovec *iov; static uint32_t id = 0; static int factor = 8; pcmk__ipc_header_t *header; if (client == NULL) { crm_notice("Can't send IPC request without connection (bug?): %.100s", message); return -ENOTCONN; } else if (crm_ipc_connected(client) == FALSE) { /* Don't even bother */ crm_notice("Can't send %s IPC requests: Connection closed", client->server_name); return -ENOTCONN; } if (ms_timeout == 0) { ms_timeout = 5000; } if (client->need_reply) { qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (qb_rc < 0) { crm_warn("Sending %s IPC disabled until pending reply received", client->server_name); return -EALREADY; } else { crm_notice("Sending %s IPC re-enabled after pending reply received", client->server_name); client->need_reply = FALSE; } } id++; CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); if (rc != pcmk_rc_ok) { crm_warn("Couldn't prepare %s IPC request: %s " CRM_XS " rc=%d", client->server_name, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } header = iov[0].iov_base; pcmk__set_ipc_flags(header->flags, client->server_name, flags); if (pcmk_is_set(flags, crm_ipc_proxied)) { /* Don't look for a synchronous response */ pcmk__clear_ipc_flags(flags, "client", crm_ipc_client_response); } if(header->size_compressed) { if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { crm_notice("Compressed message exceeds %d0%% of configured IPC " "limit (%u bytes); consider setting PCMK_ipc_buffer to " "%u or higher", factor, client->max_buf_size, 2 * client->max_buf_size); factor++; } } crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->server_name, header->qb.id, header->qb.size, ms_timeout); if ((ms_timeout > 0) || !pcmk_is_set(flags, crm_ipc_client_response)) { time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); do { /* @TODO Is this check really needed? Won't qb_ipcc_sendv() return * an error if it's not connected? */ if (!crm_ipc_connected(client)) { goto send_cleanup; } qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout)); rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { goto send_cleanup; } else if (!pcmk_is_set(flags, crm_ipc_client_response)) { crm_trace("Not waiting for reply to %s IPC request %d", client->server_name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes); if (rc != pcmk_rc_ok) { /* We didn't get the reply in time, so disable future sends for now. * The only alternative would be to close the connection since we * don't know how to detect and discard out-of-sequence replies. * * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } rc = (int) bytes; // Negative system errno, or size of reply received } else { // No timeout, and client response needed do { qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, client->buf_size, -1); } while ((qb_rc == -EAGAIN) && crm_ipc_connected(client)); rc = (int) qb_rc; // Negative system errno, or size of reply received } if (rc > 0) { pcmk__ipc_header_t *hdr = (pcmk__ipc_header_t *)(void*)client->buffer; crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s", rc, hdr->qb.id, client->server_name, header->qb.id, crm_ipc_buffer(client)); if (reply) { *reply = string2xml(crm_ipc_buffer(client)); } } else { crm_trace("No reply to %s IPC %d: rc=%d", client->server_name, header->qb.id, rc); } send_cleanup: if (crm_ipc_connected(client) == FALSE) { crm_notice("Couldn't send %s IPC request %d: Connection closed " CRM_XS " rc=%d", client->server_name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { crm_warn("%s IPC request %d failed: %s after %dms " CRM_XS " rc=%d", client->server_name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { crm_warn("%s IPC request %d failed: %s " CRM_XS " rc=%d", client->server_name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } pcmk_free_ipc_event(iov); return rc; } int pcmk__crm_ipc_is_authentic_process(qb_ipcc_connection_t *qb_ipc, int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = 0; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; #if defined(US_AUTH_PEERCRED_UCRED) struct ucred ucred; socklen_t ucred_len = sizeof(ucred); #endif #ifdef HAVE_QB_IPCC_AUTH_GET if (qb_ipc && !qb_ipcc_auth_get(qb_ipc, &found_pid, &found_uid, &found_gid)) { goto do_checks; } #endif #if defined(US_AUTH_PEERCRED_UCRED) if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) && ucred_len == sizeof(ucred)) { found_pid = ucred.pid; found_uid = ucred.uid; found_gid = ucred.gid; #elif defined(US_AUTH_PEERCRED_SOCKPEERCRED) struct sockpeercred sockpeercred; socklen_t sockpeercred_len = sizeof(sockpeercred); if (!getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &sockpeercred, &sockpeercred_len) && sockpeercred_len == sizeof(sockpeercred_len)) { found_pid = sockpeercred.pid; found_uid = sockpeercred.uid; found_gid = sockpeercred.gid; #elif defined(US_AUTH_GETPEEREID) if (!getpeereid(sock, &found_uid, &found_gid)) { found_pid = PCMK__SPECIAL_PID; /* cannot obtain PID (FreeBSD) */ #elif defined(US_AUTH_GETPEERUCRED) ucred_t *ucred; if (!getpeerucred(sock, &ucred)) { errno = 0; found_pid = ucred_getpid(ucred); found_uid = ucred_geteuid(ucred); found_gid = ucred_getegid(ucred); ret = -errno; ucred_free(ucred); if (ret) { return (ret < 0) ? ret : -pcmk_err_generic; } #else # error "No way to authenticate a Unix socket peer" errno = 0; if (0) { #endif #ifdef HAVE_QB_IPCC_AUTH_GET do_checks: #endif if (gotpid != NULL) { *gotpid = found_pid; } if (gotuid != NULL) { *gotuid = found_uid; } if (gotgid != NULL) { *gotgid = found_gid; } if (found_uid == 0 || found_uid == refuid || found_gid == refgid) { ret = 0; } else { ret = pcmk_rc_ipc_unauthorized; } } else { ret = (errno > 0) ? errno : pcmk_rc_error; } return ret; } int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid, pid_t *gotpid, uid_t *gotuid, gid_t *gotgid) { int ret = pcmk__crm_ipc_is_authentic_process(NULL, sock, refuid, refgid, gotpid, gotuid, gotgid); /* The old function had some very odd return codes*/ if (ret == 0) { return 1; } else if (ret == pcmk_rc_ipc_unauthorized) { return 0; } else { return pcmk_rc2legacy(ret); } } int pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid, gid_t refgid, pid_t *gotpid) { static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */ int fd; int rc = pcmk_rc_ipc_unresponsive; int auth_rc = 0; int32_t qb_rc; pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0; qb_ipcc_connection_t *c; c = qb_ipcc_connect(name, 0); if (c == NULL) { crm_info("Could not connect to %s IPC: %s", name, strerror(errno)); rc = pcmk_rc_ipc_unresponsive; goto bail; } qb_rc = qb_ipcc_fd_get(c, &fd); if (qb_rc != 0) { rc = (int) -qb_rc; // System errno crm_err("Could not get fd from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } auth_rc = pcmk__crm_ipc_is_authentic_process(c, fd, refuid, refgid, &found_pid, &found_uid, &found_gid); if (auth_rc == pcmk_rc_ipc_unauthorized) { crm_err("Daemon (IPC %s) effectively blocked with unauthorized" " process %lld (uid: %lld, gid: %lld)", name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid), (long long) found_uid, (long long) found_gid); rc = pcmk_rc_ipc_unauthorized; goto bail; } if (auth_rc != pcmk_rc_ok) { rc = auth_rc; crm_err("Could not get peer credentials from %s IPC: %s " CRM_XS " rc=%d", name, pcmk_rc_str(rc), rc); goto bail; } if (gotpid != NULL) { *gotpid = found_pid; } rc = pcmk_rc_ok; if ((found_uid != refuid || found_gid != refgid) && strncmp(last_asked_name, name, sizeof(last_asked_name))) { if ((found_uid == 0) && (refuid != 0)) { crm_warn("Daemon (IPC %s) runs as root, whereas the expected" " credentials are %lld:%lld, hazard of violating" " the least privilege principle", name, (long long) refuid, (long long) refgid); } else { crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the" " expected credentials are %lld:%lld, which may" " mean a different set of privileges than expected", name, (long long) found_uid, (long long) found_gid, (long long) refuid, (long long) refgid); } memccpy(last_asked_name, name, '\0', sizeof(last_asked_name)); } bail: if (c != NULL) { qb_ipcc_disconnect(c); } return rc; } diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c index caab7a8539..ddec489260 100644 --- a/lib/common/ipc_pacemakerd.c +++ b/lib/common/ipc_pacemakerd.c @@ -1,251 +1,253 @@ /* * Copyright 2020 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" typedef struct pacemakerd_api_private_s { enum pcmk_pacemakerd_state state; char *client_uuid; } pacemakerd_api_private_t; static const char *pacemakerd_state_str[] = { XML_PING_ATTR_PACEMAKERDSTATE_INIT, XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, XML_PING_ATTR_PACEMAKERDSTATE_RUNNING, XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE }; enum pcmk_pacemakerd_state pcmk_pacemakerd_api_daemon_state_text2enum(const char *state) { int i; if (state == NULL) { return pcmk_pacemakerd_state_invalid; } for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max; i++) { if (pcmk__str_eq(state, pacemakerd_state_str[i], pcmk__str_none)) { return i; } } return pcmk_pacemakerd_state_invalid; } const char * pcmk_pacemakerd_api_daemon_state_enum2text( enum pcmk_pacemakerd_state state) { if ((state >= pcmk_pacemakerd_state_init) && (state <= pcmk_pacemakerd_state_max)) { return pacemakerd_state_str[state]; } return "invalid"; } // \return Standard Pacemaker return code static int new_data(pcmk_ipc_api_t *api) { struct pacemakerd_api_private_s *private = NULL; api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s)); if (api->api_data == NULL) { return errno; } private = api->api_data; private->state = pcmk_pacemakerd_state_invalid; /* other as with cib, controld, ... we are addressing pacemakerd just from the local node -> pid is unique and thus sufficient as an ID */ private->client_uuid = pcmk__getpid_s(); return pcmk_rc_ok; } static void free_data(void *data) { free(((struct pacemakerd_api_private_s *) data)->client_uuid); free(data); } // \return Standard Pacemaker return code static int post_connect(pcmk_ipc_api_t *api) { struct pacemakerd_api_private_s *private = NULL; if (api->api_data == NULL) { return EINVAL; } private = api->api_data; private->state = pcmk_pacemakerd_state_invalid; return pcmk_rc_ok; } static void post_disconnect(pcmk_ipc_api_t *api) { struct pacemakerd_api_private_s *private = NULL; if (api->api_data == NULL) { return; } private = api->api_data; private->state = pcmk_pacemakerd_state_invalid; return; } static bool reply_expected(pcmk_ipc_api_t *api, xmlNode *request) { const char *command = crm_element_value(request, F_CRM_TASK); if (command == NULL) { return false; } // We only need to handle commands that functions in this file can send return pcmk__str_any_of(command, CRM_OP_PING, CRM_OP_QUIT, NULL); } static void dispatch(pcmk_ipc_api_t *api, xmlNode *reply) { crm_exit_t status = CRM_EX_OK; xmlNode *msg_data = NULL; pcmk_pacemakerd_api_reply_t reply_data = { pcmk_pacemakerd_reply_unknown }; const char *value = NULL; long long value_ll = 0; if (pcmk__str_eq((const char *) reply->name, "ack", pcmk__str_casei)) { return; } value = crm_element_value(reply, F_CRM_MSG_TYPE); if ((value == NULL) || (strcmp(value, XML_ATTR_RESPONSE))) { crm_debug("Unrecognizable pacemakerd message: invalid message type '%s'", crm_str(value)); status = CRM_EX_PROTOCOL; goto done; } if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { crm_debug("Unrecognizable pacemakerd message: no reference"); status = CRM_EX_PROTOCOL; goto done; } value = crm_element_value(reply, F_CRM_TASK); // Parse useful info from reply msg_data = get_message_xml(reply, F_CRM_DATA); crm_element_value_ll(msg_data, XML_ATTR_TSTAMP, &value_ll); if (pcmk__str_eq(value, CRM_OP_PING, pcmk__str_none)) { reply_data.reply_type = pcmk_pacemakerd_reply_ping; reply_data.data.ping.state = pcmk_pacemakerd_api_daemon_state_text2enum( crm_element_value(msg_data, XML_PING_ATTR_PACEMAKERDSTATE)); reply_data.data.ping.status = pcmk__str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS), "ok", pcmk__str_casei)?pcmk_rc_ok:pcmk_rc_error; reply_data.data.ping.last_good = (time_t) value_ll; reply_data.data.ping.sys_from = crm_element_value(msg_data, XML_PING_ATTR_SYSFROM); } else if (pcmk__str_eq(value, CRM_OP_QUIT, pcmk__str_none)) { reply_data.reply_type = pcmk_pacemakerd_reply_shutdown; reply_data.data.shutdown.status = atoi(crm_element_value(msg_data, XML_LRM_ATTR_OPSTATUS)); } else { crm_debug("Unrecognizable pacemakerd message: '%s'", crm_str(value)); status = CRM_EX_PROTOCOL; goto done; } done: pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); } pcmk__ipc_methods_t * pcmk__pacemakerd_api_methods() { pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); if (cmds != NULL) { cmds->new_data = new_data; cmds->free_data = free_data; cmds->post_connect = post_connect; cmds->reply_expected = reply_expected; cmds->dispatch = dispatch; cmds->post_disconnect = post_disconnect; } return cmds; } static int do_pacemakerd_api_call(pcmk_ipc_api_t *api, const char *ipc_name, const char *task) { pacemakerd_api_private_t *private; xmlNode *cmd; int rc; - CRM_CHECK(api != NULL, return -EINVAL); + if (api == NULL) { + return EINVAL; + } + private = api->api_data; CRM_ASSERT(private != NULL); cmd = create_request(task, NULL, NULL, CRM_SYSTEM_MCP, - ipc_name?ipc_name:((crm_system_name? crm_system_name : "client")), - private->client_uuid); + pcmk__ipc_sys_name(ipc_name, "client"), + private->client_uuid); if (cmd) { rc = pcmk__send_ipc_request(api, cmd); if (rc != pcmk_rc_ok) { - crm_debug("Couldn't ping pacemakerd: %s rc=%d", - pcmk_rc_str(rc), rc); - rc = ECOMM; + crm_debug("Couldn't send request to pacemakerd: %s rc=%d", + pcmk_rc_str(rc), rc); } free_xml(cmd); } else { rc = ENOMSG; } return rc; } int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name) { return do_pacemakerd_api_call(api, ipc_name, CRM_OP_PING); } int pcmk_pacemakerd_api_shutdown(pcmk_ipc_api_t *api, const char *ipc_name) { return do_pacemakerd_api_call(api, ipc_name, CRM_OP_QUIT); } diff --git a/lib/common/ipc_schedulerd.c b/lib/common/ipc_schedulerd.c new file mode 100644 index 0000000000..0f6a893dd1 --- /dev/null +++ b/lib/common/ipc_schedulerd.c @@ -0,0 +1,177 @@ +/* + * Copyright 2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include "crmcommon_private.h" + +typedef struct schedulerd_api_private_s { + char *client_uuid; +} schedulerd_api_private_t; + +// \return Standard Pacemaker return code +static int +new_data(pcmk_ipc_api_t *api) +{ + struct schedulerd_api_private_s *private = NULL; + + api->api_data = calloc(1, sizeof(struct schedulerd_api_private_s)); + + if (api->api_data == NULL) { + return errno; + } + + private = api->api_data; + /* See comments in ipc_pacemakerd.c. */ + private->client_uuid = pcmk__getpid_s(); + + return pcmk_rc_ok; +} + +static void +free_data(void *data) +{ + free(((struct schedulerd_api_private_s *) data)->client_uuid); + free(data); +} + +// \return Standard Pacemaker return code +static int +post_connect(pcmk_ipc_api_t *api) +{ + if (api->api_data == NULL) { + return EINVAL; + } + + return pcmk_rc_ok; +} + +static bool +reply_expected(pcmk_ipc_api_t *api, xmlNode *request) +{ + const char *command = crm_element_value(request, F_CRM_TASK); + + if (command == NULL) { + return false; + } + + // We only need to handle commands that functions in this file can send + return pcmk__str_any_of(command, CRM_OP_PECALC, NULL); +} + +static void +dispatch(pcmk_ipc_api_t *api, xmlNode *reply) +{ + crm_exit_t status = CRM_EX_OK; + xmlNode *msg_data = NULL; + pcmk_schedulerd_api_reply_t reply_data = { + pcmk_schedulerd_reply_unknown + }; + const char *value = NULL; + + if (pcmk__str_eq((const char *) reply->name, "ack", pcmk__str_casei)) { + return; + } + + value = crm_element_value(reply, F_CRM_MSG_TYPE); + if ((value == NULL) || (strcmp(value, XML_ATTR_RESPONSE))) { + crm_debug("Unrecognizable schedulerd message: invalid message type '%s'", + crm_str(value)); + status = CRM_EX_PROTOCOL; + goto done; + } + + if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { + crm_debug("Unrecognizable schedulerd message: no reference"); + status = CRM_EX_PROTOCOL; + goto done; + } + + // Parse useful info from reply + msg_data = get_message_xml(reply, F_CRM_DATA); + value = crm_element_value(reply, F_CRM_TASK); + + if (pcmk__str_eq(value, CRM_OP_PECALC, pcmk__str_none)) { + reply_data.reply_type = pcmk_schedulerd_reply_graph; + reply_data.data.graph.reference = crm_element_value(reply, XML_ATTR_REFERENCE); + reply_data.data.graph.input = crm_element_value(reply, F_CRM_TGRAPH_INPUT); + reply_data.data.graph.tgraph = msg_data; + } else { + crm_debug("Unrecognizable pacemakerd message: '%s'", crm_str(value)); + status = CRM_EX_PROTOCOL; + goto done; + } + +done: + pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); +} + +pcmk__ipc_methods_t * +pcmk__schedulerd_api_methods() +{ + pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); + + if (cmds != NULL) { + cmds->new_data = new_data; + cmds->free_data = free_data; + cmds->post_connect = post_connect; + cmds->reply_expected = reply_expected; + cmds->dispatch = dispatch; + } + return cmds; +} + +static int +do_schedulerd_api_call(pcmk_ipc_api_t *api, const char *task, xmlNode *cib, char **ref) +{ + schedulerd_api_private_t *private; + xmlNode *cmd = NULL; + int rc; + + if (!pcmk_ipc_is_connected(api)) { + return ENOTCONN; + } + + private = api->api_data; + CRM_ASSERT(private != NULL); + + cmd = create_request(task, cib, NULL, CRM_SYSTEM_PENGINE, + crm_system_name? crm_system_name : "client", + private->client_uuid); + + if (cmd) { + rc = pcmk__send_ipc_request(api, cmd); + if (rc != pcmk_rc_ok) { + crm_debug("Couldn't send request to schedulerd: %s rc=%d", + pcmk_rc_str(rc), rc); + } + + *ref = strdup(crm_element_value(cmd, F_CRM_REFERENCE)); + free_xml(cmd); + } else { + rc = ENOMSG; + } + + return rc; +} + +int +pcmk_schedulerd_api_graph(pcmk_ipc_api_t *api, xmlNode *cib, char **ref) +{ + return do_schedulerd_api_call(api, CRM_OP_PECALC, cib, ref); +} diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c index 250a1de0dd..75977542aa 100644 --- a/lib/common/ipc_server.c +++ b/lib/common/ipc_server.c @@ -1,970 +1,984 @@ /* * Copyright 2004-2021 the Pacemaker project contributors * * The version control history for this file may have further details. * * This source code is licensed under the GNU Lesser General Public License * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. */ #include #include #include #include #include #include #include #include #include #include #include "crmcommon_private.h" /* Evict clients whose event queue grows this large (by default) */ #define PCMK_IPC_DEFAULT_QUEUE_MAX 500 static GHashTable *client_connections = NULL; /*! * \internal * \brief Count IPC clients * * \return Number of active IPC client connections */ guint pcmk__ipc_client_count() { return client_connections? g_hash_table_size(client_connections) : 0; } /*! * \internal * \brief Execute a function for each active IPC client connection * * \param[in] func Function to call * \param[in] user_data Pointer to pass to function * * \note The parameters are the same as for g_hash_table_foreach(). */ void pcmk__foreach_ipc_client(GHFunc func, gpointer user_data) { if ((func != NULL) && (client_connections != NULL)) { g_hash_table_foreach(client_connections, func, user_data); } } pcmk__client_t * pcmk__find_client(qb_ipcs_connection_t *c) { if (client_connections) { return g_hash_table_lookup(client_connections, c); } crm_trace("No client found for %p", c); return NULL; } pcmk__client_t * pcmk__find_client_by_id(const char *id) { gpointer key; pcmk__client_t *client; GHashTableIter iter; if (client_connections && id) { g_hash_table_iter_init(&iter, client_connections); while (g_hash_table_iter_next(&iter, &key, (gpointer *) & client)) { if (strcmp(client->id, id) == 0) { return client; } } } crm_trace("No client found with id=%s", id); return NULL; } /*! * \internal * \brief Get a client identifier for use in log messages * * \param[in] c Client * * \return Client's name, client's ID, or a string literal, as available * \note This is intended to be used in format strings like "client %s". */ const char * pcmk__client_name(pcmk__client_t *c) { if (c == NULL) { return "(unspecified)"; } else if (c->name != NULL) { return c->name; } else if (c->id != NULL) { return c->id; } else { return "(unidentified)"; } } void pcmk__client_cleanup(void) { if (client_connections != NULL) { int active = g_hash_table_size(client_connections); if (active) { crm_err("Exiting with %d active IPC client%s", active, pcmk__plural_s(active)); } g_hash_table_destroy(client_connections); client_connections = NULL; } } void pcmk__drop_all_clients(qb_ipcs_service_t *service) { qb_ipcs_connection_t *c = NULL; if (service == NULL) { return; } c = qb_ipcs_connection_first_get(service); while (c != NULL) { qb_ipcs_connection_t *last = c; c = qb_ipcs_connection_next_get(service, last); /* There really shouldn't be anyone connected at this point */ crm_notice("Disconnecting client %p, pid=%d...", last, pcmk__client_pid(last)); qb_ipcs_disconnect(last); qb_ipcs_connection_unref(last); } } /*! * \internal * \brief Allocate a new pcmk__client_t object based on an IPC connection * * \param[in] c IPC connection (or NULL to allocate generic client) * \param[in] key Connection table key (or NULL to use sane default) * \param[in] uid_client UID corresponding to c (ignored if c is NULL) * * \return Pointer to new pcmk__client_t (or NULL on error) */ static pcmk__client_t * client_from_connection(qb_ipcs_connection_t *c, void *key, uid_t uid_client) { pcmk__client_t *client = calloc(1, sizeof(pcmk__client_t)); if (client == NULL) { crm_perror(LOG_ERR, "Allocating client"); return NULL; } if (c) { client->user = pcmk__uid2username(uid_client); if (client->user == NULL) { client->user = strdup("#unprivileged"); CRM_CHECK(client->user != NULL, free(client); return NULL); crm_err("Unable to enforce ACLs for user ID %d, assuming unprivileged", uid_client); } client->ipcs = c; pcmk__set_client_flags(client, pcmk__client_ipc); client->pid = pcmk__client_pid(c); if (key == NULL) { key = c; } } client->id = crm_generate_uuid(); if (client->id == NULL) { crm_err("Could not generate UUID for client"); free(client->user); free(client); return NULL; } if (key == NULL) { key = client->id; } if (client_connections == NULL) { crm_trace("Creating IPC client table"); client_connections = g_hash_table_new(g_direct_hash, g_direct_equal); } g_hash_table_insert(client_connections, key, client); return client; } /*! * \brief Allocate a new pcmk__client_t object and generate its ID * * \param[in] key What to use as connections hash table key (NULL to use ID) * * \return Pointer to new pcmk__client_t (asserts on failure) */ pcmk__client_t * pcmk__new_unauth_client(void *key) { pcmk__client_t *client = client_from_connection(NULL, key, 0); CRM_ASSERT(client != NULL); return client; } pcmk__client_t * pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) { gid_t uid_cluster = 0; gid_t gid_cluster = 0; pcmk__client_t *client = NULL; CRM_CHECK(c != NULL, return NULL); if (pcmk_daemon_user(&uid_cluster, &gid_cluster) < 0) { static bool need_log = TRUE; if (need_log) { crm_warn("Could not find user and group IDs for user %s", CRM_DAEMON_USER); need_log = FALSE; } } if (uid_client != 0) { crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } /* TODO: Do our own auth checking, return NULL if unauthorized */ client = client_from_connection(c, NULL, uid_client); if (client == NULL) { return NULL; } if ((uid_client == 0) || (uid_client == uid_cluster)) { /* Remember when a connection came from root or hacluster */ pcmk__set_client_flags(client, pcmk__client_privileged); } crm_debug("New IPC client %s for PID %u with uid %d and gid %d", client->id, client->pid, uid_client, gid_client); return client; } static struct iovec * pcmk__new_ipc_event(void) { struct iovec *iov = calloc(2, sizeof(struct iovec)); CRM_ASSERT(iov != NULL); return iov; } /*! * \brief Free an I/O vector created by pcmk__ipc_prepare_iov() * * \param[in] event I/O vector to free */ void pcmk_free_ipc_event(struct iovec *event) { if (event != NULL) { free(event[0].iov_base); free(event[1].iov_base); free(event); } } static void free_event(gpointer data) { pcmk_free_ipc_event((struct iovec *) data); } static void add_event(pcmk__client_t *c, struct iovec *iov) { if (c->event_queue == NULL) { c->event_queue = g_queue_new(); } g_queue_push_tail(c->event_queue, iov); } void pcmk__free_client(pcmk__client_t *c) { if (c == NULL) { return; } if (client_connections) { if (c->ipcs) { crm_trace("Destroying %p/%p (%d remaining)", c, c->ipcs, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->ipcs); } else { crm_trace("Destroying remote connection %p (%d remaining)", c, g_hash_table_size(client_connections) - 1); g_hash_table_remove(client_connections, c->id); } } if (c->event_timer) { g_source_remove(c->event_timer); } if (c->event_queue) { crm_debug("Destroying %d events", g_queue_get_length(c->event_queue)); g_queue_free_full(c->event_queue, free_event); } free(c->id); free(c->name); free(c->user); if (c->remote) { if (c->remote->auth_timeout) { g_source_remove(c->remote->auth_timeout); } free(c->remote->buffer); free(c->remote); } free(c); } /*! * \internal * \brief Raise IPC eviction threshold for a client, if allowed * * \param[in,out] client Client to modify * \param[in] qmax New threshold (as non-NULL string) * * \return true if change was allowed, false otherwise */ bool pcmk__set_client_queue_max(pcmk__client_t *client, const char *qmax) { if (pcmk_is_set(client->flags, pcmk__client_privileged)) { long long qmax_ll; if ((pcmk__scan_ll(qmax, &qmax_ll, 0LL) == pcmk_rc_ok) && (qmax_ll > 0LL) && (qmax_ll <= UINT_MAX)) { client->queue_max = (unsigned int) qmax_ll; return true; } } return false; } int pcmk__client_pid(qb_ipcs_connection_t *c) { struct qb_ipcs_connection_stats stats; stats.client_pid = 0; qb_ipcs_connection_stats_get(c, &stats, 0); return stats.client_pid; } /*! * \internal * \brief Retrieve message XML from data read from client IPC * * \param[in] c IPC client connection * \param[in] data Data read from client connection * \param[out] id Where to store message ID from libqb header * \param[out] flags Where to store flags from libqb header * * \return Message XML on success, NULL otherwise */ xmlNode * pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; char *uncompressed = NULL; char *text = ((char *)data) + sizeof(pcmk__ipc_header_t); pcmk__ipc_header_t *header = data; if (!pcmk__valid_ipc_header(header)) { return NULL; } if (id) { *id = ((struct qb_ipc_response_header *)data)->id; } if (flags) { *flags = header->flags; } if (pcmk_is_set(header->flags, crm_ipc_proxied)) { /* Mark this client as being the endpoint of a proxy connection. * Proxy connections responses are sent on the event channel, to avoid * blocking the controller serving as proxy. */ pcmk__set_client_flags(c, pcmk__client_proxied); } if (header->size_compressed) { int rc = 0; unsigned int size_u = 1 + header->size_uncompressed; uncompressed = calloc(1, size_u); crm_trace("Decompressing message data %u bytes into %u bytes", header->size_compressed, size_u); rc = BZ2_bzBuffToBuffDecompress(uncompressed, &size_u, text, header->size_compressed, 1, 0); text = uncompressed; if (rc != BZ_OK) { crm_err("Decompression failed: %s " CRM_XS " bzerror=%d", bz2_strerror(rc), rc); free(uncompressed); return NULL; } } CRM_ASSERT(text[header->size_uncompressed - 1] == 0); xml = string2xml(text); crm_log_xml_trace(xml, "[IPC received]"); free(uncompressed); return xml; } static int crm_ipcs_flush_events(pcmk__client_t *c); static gboolean crm_ipcs_flush_events_cb(gpointer data) { pcmk__client_t *c = data; c->event_timer = 0; crm_ipcs_flush_events(c); return FALSE; } /*! * \internal * \brief Add progressive delay before next event queue flush * * \param[in,out] c Client connection to add delay to * \param[in] queue_len Current event queue length */ static inline void delay_next_flush(pcmk__client_t *c, unsigned int queue_len) { /* Delay a maximum of 1.5 seconds */ guint delay = (queue_len < 5)? (1000 + 100 * queue_len) : 1500; c->event_timer = g_timeout_add(delay, crm_ipcs_flush_events_cb, c); } /*! * \internal * \brief Send client any messages in its queue * * \param[in] c Client to flush * * \return Standard Pacemaker return value */ static int crm_ipcs_flush_events(pcmk__client_t *c) { int rc = pcmk_rc_ok; ssize_t qb_rc = 0; unsigned int sent = 0; unsigned int queue_len = 0; if (c == NULL) { return rc; } else if (c->event_timer) { /* There is already a timer, wait until it goes off */ crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); return rc; } if (c->event_queue) { queue_len = g_queue_get_length(c->event_queue); } while (sent < 100) { pcmk__ipc_header_t *header = NULL; struct iovec *event = NULL; if (c->event_queue) { // We don't pop unless send is successful event = g_queue_peek_head(c->event_queue); } if (event == NULL) { // Queue is empty break; } qb_rc = qb_ipcs_event_sendv(c->ipcs, event, 2); if (qb_rc < 0) { rc = (int) -qb_rc; break; } event = g_queue_pop_head(c->event_queue); sent++; header = event[0].iov_base; if (header->size_compressed) { crm_trace("Event %d to %p[%d] (%lld compressed bytes) sent", header->qb.id, c->ipcs, c->pid, (long long) qb_rc); } else { crm_trace("Event %d to %p[%d] (%lld bytes) sent: %.120s", header->qb.id, c->ipcs, c->pid, (long long) qb_rc, (char *) (event[1].iov_base)); } pcmk_free_ipc_event(event); } queue_len -= sent; if (sent > 0 || queue_len) { crm_trace("Sent %d events (%d remaining) for %p[%d]: %s (%lld)", sent, queue_len, c->ipcs, c->pid, pcmk_rc_str(rc), (long long) qb_rc); } if (queue_len) { /* Allow clients to briefly fall behind on processing incoming messages, * but drop completely unresponsive clients so the connection doesn't * consume resources indefinitely. */ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { /* Don't evict for a new or shrinking backlog */ crm_warn("Client with process ID %u has a backlog of %u messages " CRM_XS " %p", c->pid, queue_len, c->ipcs); } else { crm_err("Evicting client with process ID %u due to backlog of %u messages " CRM_XS " %p", c->pid, queue_len, c->ipcs); c->queue_backlog = 0; qb_ipcs_disconnect(c->ipcs); return rc; } } c->queue_backlog = queue_len; delay_next_flush(c, queue_len); } else { /* Event queue is empty, there is no backlog */ c->queue_backlog = 0; } return rc; } /*! * \internal * \brief Create an I/O vector for sending an IPC XML message * * \param[in] request Identifier for libqb response header * \param[in] message XML message to send * \param[in] max_send_size If 0, default IPC buffer size is used * \param[out] result Where to store prepared I/O vector * \param[out] bytes Size of prepared data in bytes * * \return Standard Pacemaker return code */ int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, uint32_t max_send_size, struct iovec **result, ssize_t *bytes) { static unsigned int biggest = 0; struct iovec *iov; unsigned int total = 0; char *compressed = NULL; char *buffer = NULL; pcmk__ipc_header_t *header = NULL; if ((message == NULL) || (result == NULL)) { return EINVAL; } header = calloc(1, sizeof(pcmk__ipc_header_t)); if (header == NULL) { return ENOMEM; /* errno mightn't be set by allocator */ } buffer = dump_xml_unformatted(message); if (max_send_size == 0) { max_send_size = crm_ipc_default_buffer_size(); } CRM_LOG_ASSERT(max_send_size != 0); *result = NULL; iov = pcmk__new_ipc_event(); iov[0].iov_len = sizeof(pcmk__ipc_header_t); iov[0].iov_base = header; header->version = PCMK__IPC_VERSION; header->size_uncompressed = 1 + strlen(buffer); total = iov[0].iov_len + header->size_uncompressed; if (total < max_send_size) { iov[1].iov_base = buffer; iov[1].iov_len = header->size_uncompressed; } else { unsigned int new_size = 0; if (pcmk__compress(buffer, (unsigned int) header->size_uncompressed, (unsigned int) max_send_size, &compressed, &new_size) == pcmk_rc_ok) { pcmk__set_ipc_flags(header->flags, "send data", crm_ipc_compressed); header->size_compressed = new_size; iov[1].iov_len = header->size_compressed; iov[1].iov_base = compressed; free(buffer); biggest = QB_MAX(header->size_compressed, biggest); } else { crm_log_xml_trace(message, "EMSGSIZE"); biggest = QB_MAX(header->size_uncompressed, biggest); crm_err("Could not compress %u-byte message into less than IPC " "limit of %u bytes; set PCMK_ipc_buffer to higher value " "(%u bytes suggested)", header->size_uncompressed, max_send_size, 4 * biggest); free(compressed); free(buffer); pcmk_free_ipc_event(iov); return EMSGSIZE; } } header->qb.size = iov[0].iov_len + iov[1].iov_len; header->qb.id = (int32_t)request; /* Replying to a specific request */ *result = iov; CRM_ASSERT(header->qb.size > 0); if (bytes != NULL) { *bytes = header->qb.size; } return pcmk_rc_ok; } int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags) { int rc = pcmk_rc_ok; static uint32_t id = 1; pcmk__ipc_header_t *header = iov[0].iov_base; if (c->flags & pcmk__client_proxied) { /* _ALL_ replies to proxied connections need to be sent as events */ if (!pcmk_is_set(flags, crm_ipc_server_event)) { /* The proxied flag lets us know this was originally meant to be a * response, even though we're sending it over the event channel. */ pcmk__set_ipc_flags(flags, "server event", crm_ipc_server_event |crm_ipc_proxied_relay_response); } } pcmk__set_ipc_flags(header->flags, "server event", flags); if (flags & crm_ipc_server_event) { header->qb.id = id++; /* We don't really use it, but doesn't hurt to set one */ if (flags & crm_ipc_server_free) { crm_trace("Sending the original to %p[%d]", c->ipcs, c->pid); add_event(c, iov); } else { struct iovec *iov_copy = pcmk__new_ipc_event(); crm_trace("Sending a copy to %p[%d]", c->ipcs, c->pid); iov_copy[0].iov_len = iov[0].iov_len; iov_copy[0].iov_base = malloc(iov[0].iov_len); memcpy(iov_copy[0].iov_base, iov[0].iov_base, iov[0].iov_len); iov_copy[1].iov_len = iov[1].iov_len; iov_copy[1].iov_base = malloc(iov[1].iov_len); memcpy(iov_copy[1].iov_base, iov[1].iov_base, iov[1].iov_len); add_event(c, iov_copy); } } else { ssize_t qb_rc; CRM_LOG_ASSERT(header->qb.id != 0); /* Replying to a specific request */ qb_rc = qb_ipcs_response_sendv(c->ipcs, iov, 2); if (qb_rc < header->qb.size) { if (qb_rc < 0) { rc = (int) -qb_rc; } crm_notice("Response %d to pid %d failed: %s " CRM_XS " bytes=%u rc=%lld ipcs=%p", header->qb.id, c->pid, pcmk_rc_str(rc), header->qb.size, (long long) qb_rc, c->ipcs); } else { crm_trace("Response %d sent, %lld bytes to %p[%d]", header->qb.id, (long long) qb_rc, c->ipcs, c->pid); } if (flags & crm_ipc_server_free) { pcmk_free_ipc_event(iov); } } if (flags & crm_ipc_server_event) { rc = crm_ipcs_flush_events(c); } else { crm_ipcs_flush_events(c); } if ((rc == EPIPE) || (rc == ENOTCONN)) { crm_trace("Client %p disconnected", c->ipcs); } return rc; } int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, uint32_t flags) { struct iovec *iov = NULL; int rc = pcmk_rc_ok; if (c == NULL) { return EINVAL; } rc = pcmk__ipc_prepare_iov(request, message, crm_ipc_default_buffer_size(), &iov, NULL); if (rc == pcmk_rc_ok) { pcmk__set_ipc_flags(flags, "send data", crm_ipc_server_free); rc = pcmk__ipc_send_iov(c, iov, flags); } else { pcmk_free_ipc_event(iov); crm_notice("IPC message to pid %d failed: %s " CRM_XS " rc=%d", c->pid, pcmk_rc_str(rc), rc); } return rc; } /*! * \internal * \brief Send an acknowledgement with a status code to a client * * \param[in] function Calling function * \param[in] line Source file line within calling function * \param[in] c Client to send ack to * \param[in] request Request ID being replied to * \param[in] status Exit status code to add to ack * \param[in] flags IPC flags to use when sending * \param[in] tag Element name to use for acknowledgement * \param[in] status Status code to send with acknowledgement * * \return Standard Pacemaker return code */ int pcmk__ipc_send_ack_as(const char *function, int line, pcmk__client_t *c, uint32_t request, uint32_t flags, const char *tag, crm_exit_t status) { int rc = pcmk_rc_ok; if (pcmk_is_set(flags, crm_ipc_client_response)) { xmlNode *ack = create_xml_node(NULL, tag); crm_trace("Ack'ing IPC message from client %s as <%s status=%d>", pcmk__client_name(c), tag, status); c->request_id = 0; crm_xml_add(ack, "function", function); crm_xml_add_int(ack, "line", line); crm_xml_add_int(ack, "status", (int) status); rc = pcmk__ipc_send_xml(c, request, ack, flags); free_xml(ack); } return rc; } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-based API * * \param[out] ipcs_ro New IPC server for read-only pacemaker-based API * \param[out] ipcs_rw New IPC server for read/write pacemaker-based API * \param[out] ipcs_shm New IPC server for shared-memory pacemaker-based API * \param[in] ro_cb IPC callbacks for read-only API * \param[in] rw_cb IPC callbacks for read/write and shared-memory APIs * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_based_ipc(qb_ipcs_service_t **ipcs_ro, qb_ipcs_service_t **ipcs_rw, qb_ipcs_service_t **ipcs_shm, struct qb_ipcs_service_handlers *ro_cb, struct qb_ipcs_service_handlers *rw_cb) { *ipcs_ro = mainloop_add_ipc_server(PCMK__SERVER_BASED_RO, QB_IPC_NATIVE, ro_cb); *ipcs_rw = mainloop_add_ipc_server(PCMK__SERVER_BASED_RW, QB_IPC_NATIVE, rw_cb); *ipcs_shm = mainloop_add_ipc_server(PCMK__SERVER_BASED_SHM, QB_IPC_SHM, rw_cb); if (*ipcs_ro == NULL || *ipcs_rw == NULL || *ipcs_shm == NULL) { crm_err("Failed to create the CIB manager: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled"); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Destroy IPC servers for pacemaker-based API * * \param[out] ipcs_ro IPC server for read-only pacemaker-based API * \param[out] ipcs_rw IPC server for read/write pacemaker-based API * \param[out] ipcs_shm IPC server for shared-memory pacemaker-based API * * \note This is a convenience function for calling qb_ipcs_destroy() for each * argument. */ void pcmk__stop_based_ipc(qb_ipcs_service_t *ipcs_ro, qb_ipcs_service_t *ipcs_rw, qb_ipcs_service_t *ipcs_shm) { qb_ipcs_destroy(ipcs_ro); qb_ipcs_destroy(ipcs_rw); qb_ipcs_destroy(ipcs_shm); } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-controld API * * \param[in] cb IPC callbacks * * \return Newly created IPC server */ qb_ipcs_service_t * pcmk__serve_controld_ipc(struct qb_ipcs_service_handlers *cb) { return mainloop_add_ipc_server(CRM_SYSTEM_CRMD, QB_IPC_NATIVE, cb); } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-attrd API * * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_attrd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(T_ATTRD, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Failed to create pacemaker-attrd server: exiting and inhibiting respawn"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemaker-fenced API * * \param[in] cb IPC callbacks * * \note This function exits fatally if unable to create the servers. */ void pcmk__serve_fenced_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server_with_prio("stonith-ng", QB_IPC_NATIVE, cb, QB_LOOP_HIGH); if (*ipcs == NULL) { crm_err("Failed to create fencer: exiting and inhibiting respawn."); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); crm_exit(CRM_EX_FATAL); } } /*! * \internal * \brief Add an IPC server to the main loop for the pacemakerd API * * \param[in] cb IPC callbacks * * \note This function exits with CRM_EX_OSERR if unable to create the servers. */ void pcmk__serve_pacemakerd_ipc(qb_ipcs_service_t **ipcs, struct qb_ipcs_service_handlers *cb) { *ipcs = mainloop_add_ipc_server(CRM_SYSTEM_MCP, QB_IPC_NATIVE, cb); if (*ipcs == NULL) { crm_err("Couldn't start pacemakerd IPC server"); crm_warn("Verify pacemaker and pacemaker_remote are not both enabled."); /* sub-daemons are observed by pacemakerd. Thus we exit CRM_EX_FATAL * if we want to prevent pacemakerd from restarting them. * With pacemakerd we leave the exit-code shown to e.g. systemd * to what it was prior to moving the code here from pacemakerd.c */ crm_exit(CRM_EX_OSERR); } } +/*! + * \internal + * \brief Add an IPC server to the main loop for the pacemaker-schedulerd API + * + * \param[in] cb IPC callbacks + * + * \note This function exits fatally if unable to create the servers. + */ +qb_ipcs_service_t * +pcmk__serve_schedulerd_ipc(struct qb_ipcs_service_handlers *cb) +{ + return mainloop_add_ipc_server(CRM_SYSTEM_PENGINE, QB_IPC_NATIVE, cb); +} + /*! * \brief Check whether string represents a client name used by cluster daemons * * \param[in] name String to check * * \return true if name is standard client name used by daemons, false otherwise * * \note This is provided by the client, and so cannot be used by itself as a * secure means of authentication. */ bool crm_is_daemon_name(const char *name) { name = pcmk__message_name(name); return (!strcmp(name, CRM_SYSTEM_CRMD) || !strcmp(name, CRM_SYSTEM_STONITHD) || !strcmp(name, "stonith-ng") || !strcmp(name, "attrd") || !strcmp(name, CRM_SYSTEM_CIB) || !strcmp(name, CRM_SYSTEM_MCP) || !strcmp(name, CRM_SYSTEM_DC) || !strcmp(name, CRM_SYSTEM_TENGINE) || !strcmp(name, CRM_SYSTEM_LRMD)); }