Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index 530e786374..ac33096542 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -1,396 +1,397 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h> // bool
#include <stdio.h> // NULL
#include <sys/param.h>
#include <string.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crm/cib.h>
#include <pacemaker-controld.h>
/* From join_dc... */
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
void
crmd_ha_msg_filter(xmlNode * msg)
{
if (AM_I_DC) {
const char *sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
const char *from = crm_element_value(msg, PCMK__XA_SRC);
if (!pcmk__str_eq(from, controld_globals.our_nodename,
pcmk__str_casei)) {
int level = LOG_INFO;
const char *op = crm_element_value(msg, PCMK__XA_CRM_TASK);
/* make sure the election happens NOW */
if (controld_globals.fsa_state != S_ELECTION) {
ha_msg_input_t new_input;
level = LOG_WARNING;
new_input.msg = msg;
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
__func__);
}
do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
goto done;
}
}
} else {
const char *sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
return;
}
}
/* crm_log_xml_trace(msg, "HA[inbound]"); */
route_message(C_HA_MESSAGE, msg);
done:
controld_trigger_fsa();
}
/*!
* \internal
* \brief Check whether a node is online
*
* \param[in] node Node to check
*
* \retval -1 if completely dead
* \retval 0 if partially alive
* \retval 1 if completely alive
*/
static int
node_alive(const pcmk__node_status_t *node)
{
if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
// Pacemaker Remote nodes can't be partially alive
if (pcmk__str_eq(node->state, PCMK_VALUE_MEMBER, pcmk__str_none)) {
return 1;
}
return -1;
} else if (pcmk__cluster_is_node_active(node)) {
// Completely up cluster node: both cluster member and peer
return 1;
} else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
&& !pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
pcmk__str_none)) {
// Completely down cluster node: neither cluster member nor peer
return -1;
}
// Partially up cluster node: only cluster member or only peer
return 0;
}
#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
void
peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
const void *data)
{
uint32_t old = 0;
bool appeared = FALSE;
bool is_remote = pcmk_is_set(node->flags, pcmk__node_status_remote);
controld_node_pending_timer(node);
/* The controller waits to receive some information from the membership
* layer before declaring itself operational. If this is being called for a
* cluster node, indicate that we have it.
*/
if (!is_remote) {
controld_set_fsa_input_flags(R_PEER_DATA);
}
if ((type == pcmk__node_update_processes)
&& pcmk_is_set(node->processes, crm_get_cluster_proc())
&& !AM_I_DC
&& !is_remote) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in crmd_ha_msg_filter()
*/
- xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL,
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD);
crm_debug("Sending hello to node %" PRIu32 " so that it learns our "
"node name",
node->cluster_layer_id);
pcmk__cluster_send_message(node, pcmk_ipc_controld, query);
pcmk__xml_free(query);
}
if (node->name == NULL) {
return;
}
switch (type) {
case pcmk__node_update_name:
/* If we've never seen the node, then it also won't be in the status section */
crm_info("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
node->name, state_text(node->state));
return;
case pcmk__node_update_state:
/* This callback should not be called unless the state actually
* changed, but here's a failsafe just in case.
*/
CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
return);
crm_info("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
node->name, state_text(node->state), state_text(data));
if (pcmk__str_eq(PCMK_VALUE_MEMBER, node->state, pcmk__str_none)) {
appeared = TRUE;
if (!is_remote) {
remove_stonith_cleanup(node->name);
}
} else {
controld_remove_failed_sync_node(node->name);
controld_remove_voter(node->name);
}
crmd_alert_node_event(node);
break;
case pcmk__node_update_processes:
CRM_CHECK(data != NULL, return);
old = *(const uint32_t *)data;
appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
{
const char *dc_s = controld_globals.dc_name;
if ((dc_s == NULL) && AM_I_DC) {
dc_s = PCMK_VALUE_TRUE;
}
crm_info("Node %s is %s a peer " QB_XS
" DC=%s old=%#07x new=%#07x",
node->name, (appeared? "now" : "no longer"),
pcmk__s(dc_s, "<none>"), old, node->processes);
}
if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
/* Peer status did not change. This should not be possible,
* since we don't track process flags other than peer status.
*/
crm_trace("Process flag %#7x did not change from %#7x to %#7x",
crm_get_cluster_proc(), old, node->processes);
return;
}
if (!appeared) {
node->peer_lost = time(NULL);
controld_remove_failed_sync_node(node->name);
controld_remove_voter(node->name);
}
if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_trace("Ignoring peer status change because not connected to CIB");
return;
} else if (controld_globals.fsa_state == S_STOPPING) {
crm_trace("Ignoring peer status change because stopping");
return;
}
if (!appeared
&& pcmk__str_eq(node->name, controld_globals.our_nodename,
pcmk__str_casei)) {
/* Did we get evicted? */
crm_notice("Our peer connection failed");
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
} else if (pcmk__str_eq(node->name, controld_globals.dc_name,
pcmk__str_casei)
&& !pcmk__cluster_is_node_active(node)) {
/* The DC has left, so delete its transient attributes and
* trigger a new election.
*
* A DC sends its shutdown request to all peers, who update the
* DC's expected state to down. This avoids fencing upon
* deletion of its transient attributes.
*/
crm_notice("Our peer on the DC (%s) is dead",
controld_globals.dc_name);
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
controld_delete_node_state(node->name, controld_section_attrs,
cib_none);
} else if (AM_I_DC
|| pcmk_is_set(controld_globals.flags, controld_dc_left)
|| (controld_globals.dc_name == NULL)) {
/* This only needs to be done once, so normally the DC should do
* it. However if there is no DC, every node must do it, since
* there is no other way to ensure some one node does it.
*/
if (appeared) {
te_trigger_stonith_history_sync(FALSE);
} else {
controld_delete_node_state(node->name,
controld_section_attrs,
cib_none);
}
}
break;
}
if (AM_I_DC) {
xmlNode *update = NULL;
int flags = node_update_peer;
int alive = node_alive(node);
pcmk__graph_action_t *down = match_down_event(node->xml_id);
crm_trace("Alive=%d, appeared=%d, down=%d",
alive, appeared, (down? down->id : -1));
if (appeared && (alive > 0) && !is_remote) {
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
if (down) {
const char *task = crm_element_value(down->xml, PCMK_XA_OPERATION);
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_casei)) {
const bool confirmed =
pcmk_is_set(down->flags, pcmk__graph_action_confirmed);
/* tengine_stonith_callback() confirms fence actions */
crm_trace("Updating CIB %s fencer reported fencing of %s complete",
(confirmed? "after" : "before"), node->name);
} else if (!appeared && pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN,
pcmk__str_casei)) {
// Shutdown actions are immediately confirmed (i.e. no_wait)
if (!is_remote) {
flags |= node_update_join | node_update_expected;
crmd_peer_down(node, FALSE);
check_join_state(controld_globals.fsa_state, __func__);
}
if (alive >= 0) {
crm_info("%s of peer %s is in progress " QB_XS " action=%d",
task, node->name, down->id);
} else {
crm_notice("%s of peer %s is complete " QB_XS " action=%d",
task, node->name, down->id);
pcmk__update_graph(controld_globals.transition_graph, down);
trigger_graph();
}
} else {
const char *liveness = "alive";
if (alive == 0) {
liveness = "partially alive";
} else if (alive < 0) {
liveness = "dead";
}
crm_trace("Node %s is %s, was expected to %s (op %d)",
node->name, liveness, task, down->id);
}
} else if (appeared == FALSE) {
if ((controld_globals.transition_graph == NULL)
|| (controld_globals.transition_graph->id == -1)) {
crm_info("Stonith/shutdown of node %s is unknown to the "
"current DC", node->name);
} else {
crm_warn("Stonith/shutdown of node %s was not expected",
node->name);
}
if (!is_remote) {
crm_update_peer_join(__func__, node, controld_join_none);
check_join_state(controld_globals.fsa_state, __func__);
}
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Node failure", NULL);
fail_incompletable_actions(controld_globals.transition_graph,
node->xml_id);
} else {
crm_trace("Node %s came up, was not expected to be down",
node->name);
}
if (is_remote) {
/* A pacemaker_remote node won't have its cluster status updated
* in the CIB by membership-layer callbacks, so do it here.
*/
flags |= node_update_cluster;
/* Trigger resource placement on newly integrated nodes */
if (appeared) {
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Pacemaker Remote node integrated", NULL);
}
}
if (!appeared && (type == pcmk__node_update_processes)
&& (node->when_member > 1)) {
/* The node left CPG but is still a cluster member. Set its
* membership time to 1 to record it in the cluster state as a
* boolean, so we don't fence it due to
* PCMK_OPT_NODE_PENDING_TIMEOUT.
*/
node->when_member = 1;
flags |= node_update_cluster;
controld_node_pending_timer(node);
}
/* Update the CIB node state */
update = create_node_state_update(node, flags, NULL, __func__);
if (update == NULL) {
crm_debug("Node state update not yet possible for %s", node->name);
} else {
fsa_cib_anon_update(PCMK_XE_STATUS, update);
}
pcmk__xml_free(update);
}
controld_trigger_fsa();
}
gboolean
crm_fsa_trigger(gpointer user_data)
{
crm_trace("Invoked (queue len: %d)",
g_list_length(controld_globals.fsa_message_queue));
s_crmd_fsa(C_FSA_INTERNAL);
crm_trace("Exited (queue len: %d)",
g_list_length(controld_globals.fsa_message_queue));
return TRUE;
}
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index 43ab797abf..631bf56387 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,697 +1,698 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static qb_ipcs_service_t *ipcs = NULL;
static crm_trigger_t *config_read_trigger = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(pcmk_cluster_t *cluster);
#endif
static void crm_shutdown(int nsig);
static gboolean crm_read_options(gpointer user_data);
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static pcmk_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = pcmk_cluster_new();
}
if (action & A_HA_DISCONNECT) {
pcmk_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
pcmk__cluster_set_status_callback(&peer_update_callback);
pcmk__cluster_set_autoreap(false);
#if SUPPORT_COROSYNC
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
registered = crm_connect_corosync(cluster);
}
#endif // SUPPORT_COROSYNC
if (registered) {
pcmk__node_status_t *node =
pcmk__get_node(cluster->priv->node_id, cluster->priv->node_name,
NULL, pcmk__node_search_cluster_member);
controld_election_init(cluster->priv->node_name);
controld_globals.our_nodename = cluster->priv->node_name;
free(controld_globals.our_uuid);
controld_globals.our_uuid =
pcmk__str_copy(pcmk__cluster_node_uuid(node));
if (controld_globals.our_uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (!registered) {
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __func__);
controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
controld_set_fsa_input_flags(R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
controld_set_fsa_input_flags(R_SHUTDOWN);
//controld_set_fsa_input_flags(R_STAYDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
pcmk__s(controld_globals.dc_name, "not set"));
- msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_CRMD);
if (!pcmk__cluster_send_message(NULL, pcmk_ipc_controld, msg)) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
pcmk__xml_free(msg);
}
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn " QB_XS " remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& pcmk_is_set(controld_globals.fsa_input_register,
R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
if (controld_globals.logger_out != NULL) {
controld_globals.logger_out->finish(controld_globals.logger_out,
exit_code, true, NULL);
pcmk__output_free(controld_globals.logger_out);
controld_globals.logger_out = NULL;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
GMainLoop *mloop = controld_globals.mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
controld_shutdown_schedulerd_ipc();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(controld_globals.fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
controld_clear_fsa_input_flags(R_MEMBERSHIP);
g_list_free(controld_globals.fsa_message_queue);
controld_globals.fsa_message_queue = NULL;
controld_free_node_pending_timers();
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
controld_disconnect_cib_manager();
verify_stopped(controld_globals.fsa_state, LOG_WARNING);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_destroy_all();
mainloop_destroy_trigger(config_read_trigger);
config_read_trigger = NULL;
controld_destroy_fsa_trigger();
controld_destroy_transition_trigger();
pcmk__client_cleanup();
pcmk__cluster_destroy_node_caches();
controld_free_fsa_timers();
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(controld_globals.our_nodename);
controld_globals.our_nodename = NULL;
free(controld_globals.our_uuid);
controld_globals.our_uuid = NULL;
free(controld_globals.dc_name);
controld_globals.dc_name = NULL;
free(controld_globals.dc_version);
controld_globals.dc_version = NULL;
free(controld_globals.cluster_name);
controld_globals.cluster_name = NULL;
free(controld_globals.te_uuid);
controld_globals.te_uuid = NULL;
free_max_generation();
controld_destroy_failed_sync_table();
controld_destroy_outside_events_table();
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
/* Don't re-enter this block */
controld_globals.mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(controld_globals.cib_conn);
controld_globals.cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
if (pcmk_is_set(action, A_EXIT_1)) {
exit_code = CRM_EX_ERROR;
crm_err("Exiting now due to errors");
}
verify_stopped(cur_state, LOG_ERR);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
crm_read_options, NULL);
controld_init_fsa_trigger();
controld_init_transition_trigger();
crm_debug("Creating CIB manager and executor objects");
controld_globals.cib_conn = cib_new();
lrm_state_init_local();
if (controld_init_fsa_timers() == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("Accepting new IPC client connection");
if (pcmk__new_client(c, uid, gid) == NULL) {
return -ENOMEM;
}
return 0;
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_PROTOCOL);
return 0;
}
pcmk__ipc_send_ack(client, id, flags, PCMK__XE_ACK, NULL,
CRM_EX_INDETERMINATE);
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(msg, PCMK__XA_CRM_USER, client->user);
crm_xml_add(msg, PCMK__XA_CRM_SYS_FROM, client->id);
if (controld_authorize_ipc_message(msg, client, NULL)) {
crm_trace("Processing IPC message from client %s",
pcmk__client_name(client));
route_message(C_IPC_MESSAGE, msg);
}
controld_trigger_fsa();
pcmk__xml_free(msg);
return 0;
}
static int32_t
ipc_client_disconnected(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), pcmk__client_name(client),
c, client);
free(client->userdata);
pcmk__free_client(client);
controld_trigger_fsa();
}
return 0;
}
static void
ipc_connection_destroyed(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p", c);
ipc_client_disconnected(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = accept_controller_client,
.connection_created = NULL,
.msg_process = dispatch_controller_ipc,
.connection_closed = ipc_client_disconnected,
.connection_destroyed = ipc_connection_destroyed
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_MEMBERSHIP)) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_LRM_CONNECTED)) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_READ_CONFIG)) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_set_fsa_input_flags(R_ST_REQUIRED);
controld_timer_fencer_connect(GINT_TO_POINTER(TRUE));
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
controld_set_fsa_input_flags(R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
controld_set_fsa_input_flags(R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig != NULL) && !pcmk__xe_is(crmconfig, PCMK_XE_CRM_CONFIG)) {
crmconfig = pcmk__xe_first_child(crmconfig, PCMK_XE_CRM_CONFIG, NULL,
NULL);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " PCMK_XE_CRM_CONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = pcmk__strkey_table(free, free);
pe_unpack_nvpairs(crmconfig, crmconfig, PCMK_XE_CLUSTER_PROPERTY_SET, NULL,
config_hash, PCMK_VALUE_CIB_BOOTSTRAP_OPTIONS, FALSE, now,
NULL);
// Validate all options, and use defaults if not already present in hash
pcmk__validate_cluster_options(config_hash);
/* Validate the watchdog timeout in the context of the local node
* environment. If invalid, the controller will exit with a fatal error.
*
* We do this via a wrapper in the controller, so that we call
* pcmk__valid_stonith_watchdog_timeout() only if watchdog fencing is
* enabled for the local node. Otherwise, we may exit unnecessarily.
*
* A validator function in libcrmcommon can't act as such a wrapper, because
* it doesn't have a stonith API connection or the local node name.
*/
value = g_hash_table_lookup(config_hash, PCMK_OPT_STONITH_WATCHDOG_TIMEOUT);
controld_verify_stonith_watchdog_timeout(value);
value = g_hash_table_lookup(config_hash, PCMK_OPT_NO_QUORUM_POLICY);
if (pcmk__str_eq(value, PCMK_VALUE_FENCE_LEGACY, pcmk__str_casei)
&& (pcmk__locate_sbd() != 0)) {
controld_set_global_flags(controld_no_quorum_suicide);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK);
if (crm_is_true(value)) {
controld_set_global_flags(controld_shutdown_lock_enabled);
} else {
controld_clear_global_flags(controld_shutdown_lock_enabled);
}
value = g_hash_table_lookup(config_hash, PCMK_OPT_SHUTDOWN_LOCK_LIMIT);
pcmk_parse_interval_spec(value, &controld_globals.shutdown_lock_limit);
controld_globals.shutdown_lock_limit /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_NODE_PENDING_TIMEOUT);
pcmk_parse_interval_spec(value, &controld_globals.node_pending_timeout);
controld_globals.node_pending_timeout /= 1000;
value = g_hash_table_lookup(config_hash, PCMK_OPT_CLUSTER_NAME);
pcmk__str_update(&(controld_globals.cluster_name), value);
// Let subcomponents initialize their own static variables
controld_configure_election(config_hash);
controld_configure_fencing(config_hash);
controld_configure_fsa_timers(config_hash);
controld_configure_throttle(config_hash);
alerts = pcmk__xe_first_child(output, PCMK_XE_ALERTS, NULL, NULL);
crmd_unpack_alerts(alerts);
controld_set_fsa_input_flags(R_READ_CONFIG);
controld_trigger_fsa();
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
/*!
* \internal
* \brief Trigger read and processing of the configuration
*
* \param[in] fn Calling function name
* \param[in] line Line number where call occurred
*/
void
controld_trigger_config_as(const char *fn, int line)
{
if (config_read_trigger != NULL) {
crm_trace("%s:%d - Triggered config processing", fn, line);
mainloop_set_trigger(config_read_trigger);
}
}
gboolean
crm_read_options(gpointer user_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
int call_id = cib_conn->cmds->query(cib_conn,
"//" PCMK_XE_CRM_CONFIG
" | //" PCMK_XE_ALERTS,
NULL, cib_xpath);
fsa_register_cib_callback(call_id, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
controld_trigger_config();
}
static void
crm_shutdown(int nsig)
{
const char *value = NULL;
guint default_period_ms = 0;
if ((controld_globals.mainloop == NULL)
|| !g_main_loop_is_running(controld_globals.mainloop)) {
crmd_exit(CRM_EX_OK);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
return;
}
controld_set_fsa_input_flags(R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
/* If shutdown timer doesn't have a period set, use the default
*
* @TODO: Evaluate whether this is still necessary. As long as
* config_query_callback() has been run at least once, it doesn't look like
* anything could have changed the timer period since then.
*/
value = pcmk__cluster_option(NULL, PCMK_OPT_SHUTDOWN_ESCALATION);
pcmk_parse_interval_spec(value, &default_period_ms);
controld_shutdown_start_countdown(default_period_ms);
}
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
index c1da3a8d3b..6be4f11f05 100644
--- a/daemons/controld/controld_join_client.c
+++ b/daemons/controld/controld_join_client.c
@@ -1,370 +1,370 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/common/xml.h>
#include <pacemaker-controld.h>
void join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
extern ha_msg_input_t *copy_ha_msg_input(ha_msg_input_t * orig);
/*!
* \internal
* \brief Remember if DC is shutting down as we join
*
* If we're joining while the current DC is shutting down, update its expected
* state, so we don't fence it if we become the new DC. (We weren't a peer
* when it broadcast its shutdown request.)
*
* \param[in] msg A join message from the DC
*/
static void
update_dc_expected(const xmlNode *msg)
{
if ((controld_globals.dc_name != NULL)
&& pcmk__xe_attr_is_true(msg, PCMK__XA_DC_LEAVING)) {
pcmk__node_status_t *dc_node =
pcmk__get_node(0, controld_globals.dc_name, NULL,
pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN);
}
}
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void
do_cl_join_query(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
- CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
+ CRM_SYSTEM_DC, CRM_SYSTEM_CRMD);
sleep(1); // Give the cluster layer time to propagate to the DC
update_dc(NULL); /* Unset any existing value so that the result is not discarded */
crm_debug("Querying for a DC");
pcmk__cluster_send_message(NULL, pcmk_ipc_controld, req);
pcmk__xml_free(req);
}
/* A_CL_JOIN_ANNOUNCE */
/* this is kind of a workaround for the fact that we may not be around or
* are otherwise unable to reply when the DC sends out A_DC_JOIN_OFFER_ALL
*/
void
do_cl_join_announce(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* don't announce if we're in one of these states */
if (cur_state != S_PENDING) {
crm_warn("Not announcing cluster join because in state %s",
fsa_state2string(cur_state));
return;
}
if (!pcmk_is_set(controld_globals.fsa_input_register, R_STARTING)) {
/* send as a broadcast */
xmlNode *req = create_request(CRM_OP_JOIN_ANNOUNCE, NULL, NULL,
- CRM_SYSTEM_DC, CRM_SYSTEM_CRMD, NULL);
+ CRM_SYSTEM_DC, CRM_SYSTEM_CRMD);
crm_debug("Announcing availability");
update_dc(NULL);
pcmk__cluster_send_message(NULL, pcmk_ipc_controld, req);
pcmk__xml_free(req);
} else {
/* Delay announce until we have finished local startup */
crm_warn("Delaying announce of cluster join until local startup is complete");
return;
}
}
static int query_call_id = 0;
/* A_CL_JOIN_REQUEST */
/* aka. accept the welcome offer */
void
do_cl_join_offer_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
cib_t *cib_conn = controld_globals.cib_conn;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *welcome_from;
const char *join_id;
CRM_CHECK(input != NULL, return);
welcome_from = crm_element_value(input->msg, PCMK__XA_SRC);
join_id = crm_element_value(input->msg, PCMK__XA_JOIN_ID);
crm_trace("Accepting cluster join offer from node %s " QB_XS " join-%s",
welcome_from, crm_element_value(input->msg, PCMK__XA_JOIN_ID));
/* we only ever want the last one */
if (query_call_id > 0) {
crm_trace("Cancelling previous join query: %d", query_call_id);
remove_cib_op_callback(query_call_id, FALSE);
query_call_id = 0;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding cluster join offer from node %s (expected %s)",
welcome_from, controld_globals.dc_name);
return;
}
update_dc_expected(input->msg);
query_call_id = cib_conn->cmds->query(cib_conn, NULL, NULL,
cib_no_children);
fsa_register_cib_callback(query_call_id, pcmk__str_copy(join_id),
join_query_callback);
crm_trace("Registered join query callback: %d", query_call_id);
controld_set_fsa_action_flags(A_DC_TIMER_STOP);
controld_trigger_fsa();
}
void
join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
char *join_id = user_data;
xmlNode *generation = pcmk__xe_create(NULL, PCMK__XE_GENERATION_TUPLE);
CRM_LOG_ASSERT(join_id != NULL);
if (query_call_id != call_id) {
crm_trace("Query %d superseded", call_id);
goto done;
}
query_call_id = 0;
if(rc != pcmk_ok || output == NULL) {
crm_err("Could not retrieve version details for join-%s: %s (%d)",
join_id, pcmk_strerror(rc), rc);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, NULL, NULL, __func__);
} else if (controld_globals.dc_name == NULL) {
crm_debug("Membership is in flux, not continuing join-%s", join_id);
} else {
xmlNode *reply = NULL;
const pcmk__node_status_t *dc_node =
pcmk__get_node(0, controld_globals.dc_name, NULL,
pcmk__node_search_cluster_member);
crm_debug("Respond to join offer join-%s from %s",
join_id, controld_globals.dc_name);
pcmk__xe_copy_attrs(generation, output, pcmk__xaf_none);
reply = create_request(CRM_OP_JOIN_REQUEST, generation,
controld_globals.dc_name, CRM_SYSTEM_DC,
- CRM_SYSTEM_CRMD, NULL);
+ CRM_SYSTEM_CRMD);
crm_xml_add(reply, PCMK__XA_JOIN_ID, join_id);
crm_xml_add(reply, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
pcmk__cluster_send_message(dc_node, pcmk_ipc_controld, reply);
pcmk__xml_free(reply);
}
done:
pcmk__xml_free(generation);
}
void
set_join_state(const char *start_state, const char *node_name, const char *node_uuid,
bool remote)
{
if (pcmk__str_eq(start_state, PCMK_VALUE_STANDBY, pcmk__str_casei)) {
crm_notice("Forcing node %s to join in %s state per configured "
"environment", node_name, start_state);
cib__update_node_attr(controld_globals.logger_out,
controld_globals.cib_conn, cib_sync_call,
PCMK_XE_NODES, node_uuid,
NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY,
PCMK_VALUE_TRUE, NULL,
(remote? PCMK_VALUE_REMOTE : NULL));
} else if (pcmk__str_eq(start_state, PCMK_VALUE_ONLINE, pcmk__str_casei)) {
crm_notice("Forcing node %s to join in %s state per configured "
"environment", node_name, start_state);
cib__update_node_attr(controld_globals.logger_out,
controld_globals.cib_conn, cib_sync_call,
PCMK_XE_NODES, node_uuid,
NULL, NULL, NULL, PCMK_NODE_ATTR_STANDBY,
PCMK_VALUE_FALSE, NULL,
(remote? PCMK_VALUE_REMOTE : NULL));
} else if (pcmk__str_eq(start_state, PCMK_VALUE_DEFAULT, pcmk__str_casei)) {
crm_debug("Not forcing a starting state on node %s", node_name);
} else {
crm_warn("Unrecognized start state '%s', using "
"'" PCMK_VALUE_DEFAULT "' (%s)",
start_state, node_name);
}
}
static int
update_conn_host_cache(xmlNode *node, void *userdata)
{
const char *remote = crm_element_value(node, PCMK_XA_ID);
const char *conn_host = crm_element_value(node, PCMK__XA_CONNECTION_HOST);
const char *state = crm_element_value(node, PCMK__XA_NODE_STATE);
pcmk__node_status_t *remote_peer =
pcmk__cluster_lookup_remote_node(remote);
if (remote_peer == NULL) {
return pcmk_rc_ok;
}
if (conn_host != NULL) {
pcmk__str_update(&remote_peer->conn_host, conn_host);
}
if (state != NULL) {
pcmk__update_peer_state(__func__, remote_peer, state, 0);
}
return pcmk_rc_ok;
}
/* A_CL_JOIN_RESULT */
/* aka. this is notification that we have (or have not) been accepted */
void
do_cl_join_finalize_respond(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *tmp1 = NULL;
gboolean was_nack = TRUE;
static gboolean first_join = TRUE;
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
int join_id = -1;
const char *op = crm_element_value(input->msg, PCMK__XA_CRM_TASK);
const char *welcome_from = crm_element_value(input->msg, PCMK__XA_SRC);
if (!pcmk__str_eq(op, CRM_OP_JOIN_ACKNAK, pcmk__str_casei)) {
crm_trace("Ignoring op=%s message", op);
return;
}
/* calculate if it was an ack or a nack */
if (pcmk__xe_attr_is_true(input->msg, CRM_OP_JOIN_ACKNAK)) {
was_nack = FALSE;
}
crm_element_value_int(input->msg, PCMK__XA_JOIN_ID, &join_id);
if (was_nack) {
crm_err("Shutting down because cluster join with leader %s failed "
QB_XS " join-%d NACK'd", welcome_from, join_id);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
controld_set_fsa_input_flags(R_STAYDOWN);
return;
}
if (!AM_I_DC
&& pcmk__str_eq(welcome_from, controld_globals.our_nodename,
pcmk__str_casei)) {
crm_warn("Discarding our own welcome - we're no longer the DC");
return;
}
if (update_dc(input->msg) == FALSE) {
crm_warn("Discarding %s from node %s (expected from %s)",
op, welcome_from, controld_globals.dc_name);
return;
}
update_dc_expected(input->msg);
/* record the node's feature set as a transient attribute */
update_attrd(controld_globals.our_nodename, CRM_ATTR_FEATURE_SET,
CRM_FEATURE_SET, NULL, FALSE);
/* send our status section to the DC */
tmp1 = controld_query_executor_state();
if (tmp1 != NULL) {
xmlNode *remotes = NULL;
xmlNode *reply = create_request(CRM_OP_JOIN_CONFIRM, tmp1,
controld_globals.dc_name, CRM_SYSTEM_DC,
- CRM_SYSTEM_CRMD, NULL);
+ CRM_SYSTEM_CRMD);
const pcmk__node_status_t *dc_node =
pcmk__get_node(0, controld_globals.dc_name, NULL,
pcmk__node_search_cluster_member);
crm_xml_add_int(reply, PCMK__XA_JOIN_ID, join_id);
crm_debug("Confirming join-%d: sending local operation history to %s",
join_id, controld_globals.dc_name);
/*
* If this is the node's first join since the controller started on it,
* set its initial state (standby or member) according to the user's
* preference.
*
* We do not clear the LRM history here. Even if the DC failed to do it
* when we last left, removing them here creates a race condition if the
* controller is being recovered. Instead of a list of active resources
* from the executor, we may end up with a blank status section. If we
* are _NOT_ lucky, we will probe for the "wrong" instance of anonymous
* clones and end up with multiple active instances on the machine.
*/
if (first_join
&& !pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
first_join = FALSE;
if (start_state) {
set_join_state(start_state, controld_globals.our_nodename,
controld_globals.our_uuid, false);
}
}
pcmk__cluster_send_message(dc_node, pcmk_ipc_controld, reply);
pcmk__xml_free(reply);
if (AM_I_DC == FALSE) {
register_fsa_input_adv(cause, I_NOT_DC, NULL, A_NOTHING, TRUE,
__func__);
}
pcmk__xml_free(tmp1);
/* Update the remote node cache with information about which node
* is hosting the connection.
*/
remotes = pcmk__xe_first_child(input->msg, PCMK_XE_NODES, NULL, NULL);
if (remotes != NULL) {
pcmk__xe_foreach_child(remotes, PCMK_XE_NODE,
update_conn_host_cache, NULL);
}
} else {
crm_err("Could not confirm join-%d with %s: Local operation history "
"failed", join_id, controld_globals.dc_name);
register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
}
}
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index 4d48ac0a60..34c9367f77 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -1,1083 +1,1083 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h> // bool, true, false
#include <stdio.h> // NULL
#include <stdlib.h> // free(), etc.
#include <glib.h> // gboolean, etc.
#include <libxml/tree.h> // xmlNode
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-controld.h>
static char *max_generation_from = NULL;
static xmlNodePtr max_generation_xml = NULL;
/*!
* \internal
* \brief Nodes from which a CIB sync has failed since the peer joined
*
* This table is of the form (<tt>node_name -> join_id</tt>). \p node_name is
* the name of a client node from which a CIB \p sync_from() call has failed in
* \p do_dc_join_finalize() since the client joined the cluster as a peer.
* \p join_id is the ID of the join round in which the \p sync_from() failed,
* and is intended for use in nack log messages.
*/
static GHashTable *failed_sync_nodes = NULL;
void finalize_join_for(gpointer key, gpointer value, gpointer user_data);
void finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data);
gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
/* Numeric counter used to identify join rounds (an unsigned int would be
* appropriate, except we get and set it in XML as int)
*/
static int current_join_id = 0;
/*!
* \internal
* \brief Get log-friendly string equivalent of a controller group join phase
*
* \param[in] phase Join phase
*
* \return Log-friendly string equivalent of \p phase
*/
static const char *
join_phase_text(enum controld_join_phase phase)
{
switch (phase) {
case controld_join_nack:
return "nack";
case controld_join_none:
return "none";
case controld_join_welcomed:
return "welcomed";
case controld_join_integrated:
return "integrated";
case controld_join_finalized:
return "finalized";
case controld_join_confirmed:
return "confirmed";
default:
return "invalid";
}
}
/*!
* \internal
* \brief Destroy the hash table containing failed sync nodes
*/
void
controld_destroy_failed_sync_table(void)
{
if (failed_sync_nodes != NULL) {
g_hash_table_destroy(failed_sync_nodes);
failed_sync_nodes = NULL;
}
}
/*!
* \internal
* \brief Remove a node from the failed sync nodes table if present
*
* \param[in] node_name Node name to remove
*/
void
controld_remove_failed_sync_node(const char *node_name)
{
if (failed_sync_nodes != NULL) {
g_hash_table_remove(failed_sync_nodes, (gchar *) node_name);
}
}
/*!
* \internal
* \brief Add to a hash table a node whose CIB failed to sync
*
* \param[in] node_name Name of node whose CIB failed to sync
* \param[in] join_id Join round when the failure occurred
*/
static void
record_failed_sync_node(const char *node_name, gint join_id)
{
if (failed_sync_nodes == NULL) {
failed_sync_nodes = pcmk__strikey_table(g_free, NULL);
}
/* If the node is already in the table then we failed to nack it during the
* filter offer step
*/
CRM_LOG_ASSERT(g_hash_table_insert(failed_sync_nodes, g_strdup(node_name),
GINT_TO_POINTER(join_id)));
}
/*!
* \internal
* \brief Look up a node name in the failed sync table
*
* \param[in] node_name Name of node to look up
* \param[out] join_id Where to store the join ID of when the sync failed
*
* \return Standard Pacemaker return code. Specifically, \p pcmk_rc_ok if the
* node name was found, or \p pcmk_rc_node_unknown otherwise.
* \note \p *join_id is set to -1 if the node is not found.
*/
static int
lookup_failed_sync_node(const char *node_name, gint *join_id)
{
*join_id = -1;
if (failed_sync_nodes != NULL) {
gpointer result = g_hash_table_lookup(failed_sync_nodes,
(gchar *) node_name);
if (result != NULL) {
*join_id = GPOINTER_TO_INT(result);
return pcmk_rc_ok;
}
}
return pcmk_rc_node_unknown;
}
void
crm_update_peer_join(const char *source, pcmk__node_status_t *node,
enum controld_join_phase phase)
{
enum controld_join_phase last = controld_get_join_phase(node);
CRM_CHECK(node != NULL, return);
/* Remote nodes do not participate in joins */
if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
return;
}
if (phase == last) {
crm_trace("Node %s join-%d phase is still %s "
QB_XS " nodeid=%" PRIu32 " source=%s",
node->name, current_join_id, join_phase_text(last),
node->cluster_layer_id, source);
return;
}
if ((phase <= controld_join_none) || (phase == (last + 1))) {
struct controld_node_status_data *data =
pcmk__assert_alloc(1, sizeof(struct controld_node_status_data));
data->join_phase = phase;
node->user_data = data;
crm_trace("Node %s join-%d phase is now %s (was %s) "
QB_XS " nodeid=%" PRIu32 " source=%s",
node->name, current_join_id, join_phase_text(phase),
join_phase_text(last), node->cluster_layer_id,
source);
return;
}
crm_warn("Rejecting join-%d phase update for node %s because can't go from "
"%s to %s " QB_XS " nodeid=%" PRIu32 " source=%s",
current_join_id, node->name, join_phase_text(last),
join_phase_text(phase), node->cluster_layer_id, source);
}
static void
start_join_round(void)
{
GHashTableIter iter;
pcmk__node_status_t *peer = NULL;
crm_debug("Starting new join round join-%d", current_join_id);
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
crm_update_peer_join(__func__, peer, controld_join_none);
}
if (max_generation_from != NULL) {
free(max_generation_from);
max_generation_from = NULL;
}
if (max_generation_xml != NULL) {
pcmk__xml_free(max_generation_xml);
max_generation_xml = NULL;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
}
/*!
* \internal
* \brief Create a join message from the DC
*
* \param[in] join_op Join operation name
* \param[in] host_to Recipient of message
*/
static xmlNode *
create_dc_message(const char *join_op, const char *host_to)
{
xmlNode *msg = create_request(join_op, NULL, host_to, CRM_SYSTEM_CRMD,
- CRM_SYSTEM_DC, NULL);
+ CRM_SYSTEM_DC);
/* Identify which election this is a part of */
crm_xml_add_int(msg, PCMK__XA_JOIN_ID, current_join_id);
/* Add a field specifying whether the DC is shutting down. This keeps the
* joining node from fencing the old DC if it becomes the new DC.
*/
pcmk__xe_set_bool_attr(msg, PCMK__XA_DC_LEAVING,
pcmk_is_set(controld_globals.fsa_input_register,
R_SHUTDOWN));
return msg;
}
static void
join_make_offer(gpointer key, gpointer value, gpointer user_data)
{
/* @TODO We don't use user_data except to distinguish one particular call
* from others. Make this clearer.
*/
xmlNode *offer = NULL;
pcmk__node_status_t *member = (pcmk__node_status_t *) value;
CRM_ASSERT(member != NULL);
if (!pcmk__cluster_is_node_active(member)) {
crm_info("Not making join-%d offer to inactive node %s",
current_join_id, pcmk__s(member->name, "with unknown name"));
if ((member->expected == NULL)
&& pcmk__str_eq(member->state, PCMK__VALUE_LOST, pcmk__str_none)) {
/* You would think this unsafe, but in fact this plus an
* active resource is what causes it to be fenced.
*
* Yes, this does mean that any node that dies at the same
* time as the old DC and is not running resource (still)
* won't be fenced.
*
* I'm not happy about this either.
*/
pcmk__update_peer_expected(__func__, member, CRMD_JOINSTATE_DOWN);
}
return;
}
if (member->name == NULL) {
crm_info("Not making join-%d offer to node uuid %s with unknown name",
current_join_id, member->xml_id);
return;
}
if (controld_globals.membership_id != controld_globals.peer_seq) {
controld_globals.membership_id = controld_globals.peer_seq;
crm_info("Making join-%d offers based on membership event %llu",
current_join_id, controld_globals.peer_seq);
}
if (user_data != NULL) {
enum controld_join_phase phase = controld_get_join_phase(member);
if (phase > controld_join_none) {
crm_info("Not making join-%d offer to already known node %s (%s)",
current_join_id, member->name, join_phase_text(phase));
return;
}
}
crm_update_peer_join(__func__, (pcmk__node_status_t*) member,
controld_join_none);
offer = create_dc_message(CRM_OP_JOIN_OFFER, member->name);
// Advertise our feature set so the joining node can bail if not compatible
crm_xml_add(offer, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
crm_info("Sending join-%d offer to %s", current_join_id, member->name);
pcmk__cluster_send_message(member, pcmk_ipc_controld, offer);
pcmk__xml_free(offer);
crm_update_peer_join(__func__, member, controld_join_welcomed);
}
/* A_DC_JOIN_OFFER_ALL */
void
do_dc_join_offer_all(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int count;
/* Reset everyone's status back to down or in_ccm in the CIB.
* Any nodes that are active in the CIB but not in the cluster membership
* will be seen as offline by the scheduler anyway.
*/
current_join_id++;
start_join_round();
update_dc(NULL);
if (cause == C_HA_MESSAGE && current_input == I_NODE_JOIN) {
crm_info("A new node joined the cluster");
}
g_hash_table_foreach(pcmk__peer_cache, join_make_offer, NULL);
count = crmd_join_phase_count(controld_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
/* A_DC_JOIN_OFFER_ONE */
void
do_dc_join_offer_one(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
pcmk__node_status_t *member = NULL;
ha_msg_input_t *welcome = NULL;
int count;
const char *join_to = NULL;
if (msg_data->data == NULL) {
crm_info("Making join-%d offers to any unconfirmed nodes "
"because an unknown node joined", current_join_id);
g_hash_table_foreach(pcmk__peer_cache, join_make_offer, &member);
check_join_state(cur_state, __func__);
return;
}
welcome = fsa_typed_data(fsa_dt_ha_msg);
if (welcome == NULL) {
// fsa_typed_data() already logged an error
return;
}
join_to = crm_element_value(welcome->msg, PCMK__XA_SRC);
if (join_to == NULL) {
crm_err("Can't make join-%d offer to unknown node", current_join_id);
return;
}
member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster_member);
/* It is possible that a node will have been sick or starting up when the
* original offer was made. However, it will either re-announce itself in
* due course, or we can re-store the original offer on the client.
*/
crm_update_peer_join(__func__, member, controld_join_none);
join_make_offer(NULL, member, NULL);
/* If the offer isn't to the local node, make an offer to the local node as
* well, to ensure the correct value for max_generation_from.
*/
if (strcasecmp(join_to, controld_globals.our_nodename) != 0) {
member = pcmk__get_node(0, controld_globals.our_nodename, NULL,
pcmk__node_search_cluster_member);
join_make_offer(NULL, member, NULL);
}
/* This was a genuine join request; cancel any existing transition and
* invoke the scheduler.
*/
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, "Node join",
NULL);
count = crmd_join_phase_count(controld_join_welcomed);
crm_info("Waiting on join-%d requests from %d outstanding node%s",
current_join_id, count, pcmk__plural_s(count));
// Don't waste time by invoking the scheduler yet
}
static int
compare_int_fields(xmlNode * left, xmlNode * right, const char *field)
{
const char *elem_l = crm_element_value(left, field);
const char *elem_r = crm_element_value(right, field);
long long int_elem_l;
long long int_elem_r;
pcmk__scan_ll(elem_l, &int_elem_l, -1LL);
pcmk__scan_ll(elem_r, &int_elem_r, -1LL);
if (int_elem_l < int_elem_r) {
return -1;
} else if (int_elem_l > int_elem_r) {
return 1;
}
return 0;
}
/* A_DC_JOIN_PROCESS_REQ */
void
do_dc_join_filter_offer(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *generation = NULL;
int cmp = 0;
int join_id = -1;
int count = 0;
gint value = 0;
gboolean ack_nack_bool = TRUE;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *join_from = crm_element_value(join_ack->msg, PCMK__XA_SRC);
const char *ref = crm_element_value(join_ack->msg, PCMK_XA_REFERENCE);
const char *join_version = crm_element_value(join_ack->msg,
PCMK_XA_CRM_FEATURE_SET);
pcmk__node_status_t *join_node = NULL;
if (join_from == NULL) {
crm_err("Ignoring invalid join request without node name");
return;
}
join_node = pcmk__get_node(0, join_from, NULL,
pcmk__node_search_cluster_member);
crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id);
if (join_id != current_join_id) {
crm_debug("Ignoring join-%d request from %s because we are on join-%d",
join_id, join_from, current_join_id);
check_join_state(cur_state, __func__);
return;
}
generation = join_ack->xml;
if (max_generation_xml != NULL && generation != NULL) {
int lpc = 0;
const char *attributes[] = {
PCMK_XA_ADMIN_EPOCH,
PCMK_XA_EPOCH,
PCMK_XA_NUM_UPDATES,
};
/* It's not obvious that join_ack->xml is the PCMK__XE_GENERATION_TUPLE
* element from the join client. The "if" guard is for clarity.
*/
if (pcmk__xe_is(generation, PCMK__XE_GENERATION_TUPLE)) {
for (lpc = 0; cmp == 0 && lpc < PCMK__NELEM(attributes); lpc++) {
cmp = compare_int_fields(max_generation_xml, generation,
attributes[lpc]);
}
} else { // Should always be PCMK__XE_GENERATION_TUPLE
CRM_LOG_ASSERT(false);
}
}
if (ref == NULL) {
ref = "none"; // for logging only
}
if (lookup_failed_sync_node(join_from, &value) == pcmk_rc_ok) {
crm_err("Rejecting join-%d request from node %s because we failed to "
"sync its CIB in join-%d " QB_XS " ref=%s",
join_id, join_from, value, ref);
ack_nack_bool = FALSE;
} else if (!pcmk__cluster_is_node_active(join_node)) {
if (match_down_event(join_from) != NULL) {
/* The join request was received after the node was fenced or
* otherwise shutdown in a way that we're aware of. No need to log
* an error in this rare occurrence; we know the client was recently
* shut down, and receiving a lingering in-flight request is not
* cause for alarm.
*/
crm_debug("Rejecting join-%d request from inactive node %s "
QB_XS " ref=%s", join_id, join_from, ref);
} else {
crm_err("Rejecting join-%d request from inactive node %s "
QB_XS " ref=%s", join_id, join_from, ref);
}
ack_nack_bool = FALSE;
} else if (generation == NULL) {
crm_err("Rejecting invalid join-%d request from node %s "
"missing CIB generation " QB_XS " ref=%s",
join_id, join_from, ref);
ack_nack_bool = FALSE;
} else if ((join_version == NULL)
|| !feature_set_compatible(CRM_FEATURE_SET, join_version)) {
crm_err("Rejecting join-%d request from node %s because feature set %s"
" is incompatible with ours (%s) " QB_XS " ref=%s",
join_id, join_from, (join_version? join_version : "pre-3.1.0"),
CRM_FEATURE_SET, ref);
ack_nack_bool = FALSE;
} else if (max_generation_xml == NULL) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with first CIB "
"generation) due to unknown schema version %s "
QB_XS " ref=%s",
join_id, join_from, pcmk__s(validation, "(missing)"), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with first CIB "
"generation) " QB_XS " ref=%s",
join_id, join_from, ref);
max_generation_xml = pcmk__xml_copy(NULL, generation);
pcmk__str_update(&max_generation_from, join_from);
}
} else if ((cmp < 0)
|| ((cmp == 0)
&& pcmk__str_eq(join_from, controld_globals.our_nodename,
pcmk__str_casei))) {
const char *validation = crm_element_value(generation,
PCMK_XA_VALIDATE_WITH);
if (pcmk__get_schema(validation) == NULL) {
crm_err("Rejecting join-%d request from %s (with better CIB "
"generation than current best from %s) due to unknown "
"schema version %s " QB_XS " ref=%s",
join_id, join_from, max_generation_from,
pcmk__s(validation, "(missing)"), ref);
ack_nack_bool = FALSE;
} else {
crm_debug("Accepting join-%d request from %s (with better CIB "
"generation than current best from %s) " QB_XS " ref=%s",
join_id, join_from, max_generation_from, ref);
crm_log_xml_debug(max_generation_xml, "Old max generation");
crm_log_xml_debug(generation, "New max generation");
pcmk__xml_free(max_generation_xml);
max_generation_xml = pcmk__xml_copy(NULL, join_ack->xml);
pcmk__str_update(&max_generation_from, join_from);
}
} else {
crm_debug("Accepting join-%d request from %s " QB_XS " ref=%s",
join_id, join_from, ref);
}
if (!ack_nack_bool) {
crm_update_peer_join(__func__, join_node, controld_join_nack);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_NACK);
} else {
crm_update_peer_join(__func__, join_node, controld_join_integrated);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
}
count = crmd_join_phase_count(controld_join_integrated);
crm_debug("%d node%s currently integrated in join-%d",
count, pcmk__plural_s(count), join_id);
if (check_join_state(cur_state, __func__) == FALSE) {
// Don't waste time by invoking the scheduler yet
count = crmd_join_phase_count(controld_join_welcomed);
crm_debug("Waiting on join-%d requests from %d outstanding node%s",
join_id, count, pcmk__plural_s(count));
}
}
/* A_DC_JOIN_FINALIZE */
void
do_dc_join_finalize(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
char *sync_from = NULL;
int rc = pcmk_ok;
int count_welcomed = crmd_join_phase_count(controld_join_welcomed);
int count_finalizable = crmd_join_phase_count(controld_join_integrated)
+ crmd_join_phase_count(controld_join_nack);
/* This we can do straight away and avoid clients timing us out
* while we compute the latest CIB
*/
if (count_welcomed != 0) {
crm_debug("Waiting on join-%d requests from %d outstanding node%s "
"before finalizing join", current_join_id, count_welcomed,
pcmk__plural_s(count_welcomed));
crmd_join_phase_log(LOG_DEBUG);
/* crmd_fsa_stall(FALSE); Needed? */
return;
} else if (count_finalizable == 0) {
crm_debug("Finalization not needed for join-%d at the current time",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
check_join_state(controld_globals.fsa_state, __func__);
return;
}
controld_clear_fsa_input_flags(R_HAVE_CIB);
if (pcmk__str_eq(max_generation_from, controld_globals.our_nodename,
pcmk__str_null_matches|pcmk__str_casei)) {
controld_set_fsa_input_flags(R_HAVE_CIB);
}
if (!controld_globals.transition_graph->complete) {
crm_warn("Delaying join-%d finalization while transition in progress",
current_join_id);
crmd_join_phase_log(LOG_DEBUG);
crmd_fsa_stall(FALSE);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
// Send our CIB out to everyone
sync_from = pcmk__str_copy(controld_globals.our_nodename);
crm_debug("Finalizing join-%d for %d node%s (sync'ing from local CIB)",
current_join_id, count_finalizable,
pcmk__plural_s(count_finalizable));
crm_log_xml_debug(max_generation_xml, "Requested CIB version");
} else {
// Ask for the agreed best CIB
sync_from = pcmk__str_copy(max_generation_from);
crm_notice("Finalizing join-%d for %d node%s (sync'ing CIB from %s)",
current_join_id, count_finalizable,
pcmk__plural_s(count_finalizable), sync_from);
crm_log_xml_notice(max_generation_xml, "Requested CIB version");
}
crmd_join_phase_log(LOG_DEBUG);
rc = controld_globals.cib_conn->cmds->sync_from(controld_globals.cib_conn,
sync_from, NULL, cib_none);
fsa_register_cib_callback(rc, sync_from, finalize_sync_callback);
}
void
free_max_generation(void)
{
free(max_generation_from);
max_generation_from = NULL;
pcmk__xml_free(max_generation_xml);
max_generation_xml = NULL;
}
void
finalize_sync_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
CRM_LOG_ASSERT(-EPERM != rc);
if (rc != pcmk_ok) {
const char *sync_from = (const char *) user_data;
do_crm_log(((rc == -pcmk_err_old_data)? LOG_WARNING : LOG_ERR),
"Could not sync CIB from %s in join-%d: %s",
sync_from, current_join_id, pcmk_strerror(rc));
if (rc != -pcmk_err_old_data) {
record_failed_sync_node(sync_from, current_join_id);
}
/* restart the whole join process */
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION_DC, NULL, NULL,
__func__);
} else if (!AM_I_DC) {
crm_debug("Sync'ed CIB for join-%d but no longer DC", current_join_id);
} else if (controld_globals.fsa_state != S_FINALIZE_JOIN) {
crm_debug("Sync'ed CIB for join-%d but no longer in S_FINALIZE_JOIN "
"(%s)", current_join_id,
fsa_state2string(controld_globals.fsa_state));
} else {
controld_set_fsa_input_flags(R_HAVE_CIB);
/* make sure dc_uuid is re-set to us */
if (!check_join_state(controld_globals.fsa_state, __func__)) {
int count_finalizable = 0;
count_finalizable = crmd_join_phase_count(controld_join_integrated)
+ crmd_join_phase_count(controld_join_nack);
crm_debug("Notifying %d node%s of join-%d results",
count_finalizable, pcmk__plural_s(count_finalizable),
current_join_id);
g_hash_table_foreach(pcmk__peer_cache, finalize_join_for, NULL);
}
}
}
static void
join_node_state_commit_callback(xmlNode *msg, int call_id, int rc,
xmlNode *output, void *user_data)
{
const char *node = user_data;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL; // for register_fsa_error() macro
crm_crit("join-%d node history update (via CIB call %d) for node %s "
"failed: %s",
current_join_id, call_id, node, pcmk_strerror(rc));
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
crm_debug("join-%d node history update (via CIB call %d) for node %s "
"complete",
current_join_id, call_id, node);
check_join_state(controld_globals.fsa_state, __func__);
}
/* A_DC_JOIN_PROCESS_ACK */
void
do_dc_join_ack(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
int join_id = -1;
ha_msg_input_t *join_ack = fsa_typed_data(fsa_dt_ha_msg);
const char *op = crm_element_value(join_ack->msg, PCMK__XA_CRM_TASK);
char *join_from = crm_element_value_copy(join_ack->msg, PCMK__XA_SRC);
pcmk__node_status_t *peer = NULL;
enum controld_join_phase phase = controld_join_none;
enum controld_section_e section = controld_section_lrm;
char *xpath = NULL;
xmlNode *state = join_ack->xml;
xmlNode *execd_state = NULL;
cib_t *cib = controld_globals.cib_conn;
int rc = pcmk_ok;
// Sanity checks
if (join_from == NULL) {
crm_warn("Ignoring message received without node identification");
goto done;
}
if (op == NULL) {
crm_warn("Ignoring message received from %s without task", join_from);
goto done;
}
if (strcmp(op, CRM_OP_JOIN_CONFIRM)) {
crm_debug("Ignoring '%s' message from %s while waiting for '%s'",
op, join_from, CRM_OP_JOIN_CONFIRM);
goto done;
}
if (crm_element_value_int(join_ack->msg, PCMK__XA_JOIN_ID, &join_id) != 0) {
crm_warn("Ignoring join confirmation from %s without valid join ID",
join_from);
goto done;
}
peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster_member);
phase = controld_get_join_phase(peer);
if (phase != controld_join_finalized) {
crm_info("Ignoring out-of-sequence join-%d confirmation from %s "
"(currently %s not %s)",
join_id, join_from, join_phase_text(phase),
join_phase_text(controld_join_finalized));
goto done;
}
if (join_id != current_join_id) {
crm_err("Rejecting join-%d confirmation from %s "
"because currently on join-%d",
join_id, join_from, current_join_id);
crm_update_peer_join(__func__, peer, controld_join_nack);
goto done;
}
crm_update_peer_join(__func__, peer, controld_join_confirmed);
/* Update CIB with node's current executor state. A new transition will be
* triggered later, when the CIB manager notifies us of the change.
*
* The delete and modify requests are part of an atomic transaction.
*/
rc = cib->cmds->init_transaction(cib);
if (rc != pcmk_ok) {
goto done;
}
// Delete relevant parts of node's current executor state from CIB
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
section = controld_section_lrm_unlocked;
}
controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
rc = cib->cmds->remove(cib, xpath, NULL,
cib_xpath|cib_multiple|cib_transaction);
if (rc != pcmk_ok) {
goto done;
}
// Update CIB with node's latest known executor state
if (pcmk__str_eq(join_from, controld_globals.our_nodename,
pcmk__str_casei)) {
// Use the latest possible state if processing our own join ack
execd_state = controld_query_executor_state();
if (execd_state != NULL) {
crm_debug("Updating local node history for join-%d from query "
"result",
current_join_id);
state = execd_state;
} else {
crm_warn("Updating local node history from join-%d confirmation "
"because query failed",
current_join_id);
}
} else {
crm_debug("Updating node history for %s from join-%d confirmation",
join_from, current_join_id);
}
rc = cib->cmds->modify(cib, PCMK_XE_STATUS, state,
cib_can_create|cib_transaction);
pcmk__xml_free(execd_state);
if (rc != pcmk_ok) {
goto done;
}
// Commit the transaction
rc = cib->cmds->end_transaction(cib, true, cib_none);
fsa_register_cib_callback(rc, join_from, join_node_state_commit_callback);
if (rc > 0) {
// join_from will be freed after callback
join_from = NULL;
rc = pcmk_ok;
}
done:
if (rc != pcmk_ok) {
crm_crit("join-%d node history update for node %s failed: %s",
current_join_id, join_from, pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free(join_from);
free(xpath);
}
void
finalize_join_for(gpointer key, gpointer value, gpointer user_data)
{
xmlNode *acknak = NULL;
xmlNode *tmp1 = NULL;
pcmk__node_status_t *join_node = value;
const char *join_to = join_node->name;
enum controld_join_phase phase = controld_get_join_phase(join_node);
bool integrated = false;
switch (phase) {
case controld_join_integrated:
integrated = true;
break;
case controld_join_nack:
break;
default:
crm_trace("Not updating non-integrated and non-nacked node %s (%s) "
"for join-%d",
join_to, join_phase_text(phase), current_join_id);
return;
}
/* Update the <node> element with the node's name and UUID, in case they
* weren't known before
*/
crm_trace("Updating node name and UUID in CIB for %s", join_to);
tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE);
crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node));
crm_xml_add(tmp1, PCMK_XA_UNAME, join_to);
fsa_cib_anon_update(PCMK_XE_NODES, tmp1);
pcmk__xml_free(tmp1);
join_node = pcmk__get_node(0, join_to, NULL,
pcmk__node_search_cluster_member);
if (!pcmk__cluster_is_node_active(join_node)) {
/*
* NACK'ing nodes that the membership layer doesn't know about yet
* simply creates more churn
*
* Better to leave them waiting and let the join restart when
* the new membership event comes in
*
* All other NACKs (due to versions etc) should still be processed
*/
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_PENDING);
return;
}
// Acknowledge or nack node's join request
crm_debug("%sing join-%d request from %s",
integrated? "Acknowledg" : "Nack", current_join_id, join_to);
acknak = create_dc_message(CRM_OP_JOIN_ACKNAK, join_to);
pcmk__xe_set_bool_attr(acknak, CRM_OP_JOIN_ACKNAK, integrated);
if (integrated) {
// No change needed for a nacked node
crm_update_peer_join(__func__, join_node, controld_join_finalized);
pcmk__update_peer_expected(__func__, join_node, CRMD_JOINSTATE_MEMBER);
/* Iterate through the remote peer cache and add information on which
* node hosts each to the ACK message. This keeps new controllers in
* sync with what has already happened.
*/
if (pcmk__cluster_num_remote_nodes() > 0) {
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
xmlNode *remotes = pcmk__xe_create(acknak, PCMK_XE_NODES);
g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *remote = NULL;
if (!node->conn_host) {
continue;
}
remote = pcmk__xe_create(remotes, PCMK_XE_NODE);
pcmk__xe_set_props(remote,
PCMK_XA_ID, node->name,
PCMK__XA_NODE_STATE, node->state,
PCMK__XA_CONNECTION_HOST, node->conn_host,
NULL);
}
}
}
pcmk__cluster_send_message(join_node, pcmk_ipc_controld, acknak);
pcmk__xml_free(acknak);
return;
}
gboolean
check_join_state(enum crmd_fsa_state cur_state, const char *source)
{
static unsigned long long highest_seq = 0;
if (controld_globals.membership_id != controld_globals.peer_seq) {
crm_debug("join-%d: Membership changed from %llu to %llu "
QB_XS " highest=%llu state=%s for=%s",
current_join_id, controld_globals.membership_id,
controld_globals.peer_seq, highest_seq,
fsa_state2string(cur_state), source);
if (highest_seq < controld_globals.peer_seq) {
/* Don't spam the FSA with duplicates */
highest_seq = controld_globals.peer_seq;
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
} else if (cur_state == S_INTEGRATION) {
if (crmd_join_phase_count(controld_join_welcomed) == 0) {
int count = crmd_join_phase_count(controld_join_integrated);
crm_debug("join-%d: Integration of %d peer%s complete "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
register_fsa_input_before(C_FSA_INTERNAL, I_INTEGRATED, NULL);
return TRUE;
}
} else if (cur_state == S_FINALIZE_JOIN) {
if (!pcmk_is_set(controld_globals.fsa_input_register, R_HAVE_CIB)) {
crm_debug("join-%d: Delaying finalization until we have CIB "
QB_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
return TRUE;
} else if (crmd_join_phase_count(controld_join_welcomed) != 0) {
int count = crmd_join_phase_count(controld_join_welcomed);
crm_debug("join-%d: Still waiting on %d welcomed node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(controld_join_integrated) != 0) {
int count = crmd_join_phase_count(controld_join_integrated);
crm_debug("join-%d: Still waiting on %d integrated node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else if (crmd_join_phase_count(controld_join_finalized) != 0) {
int count = crmd_join_phase_count(controld_join_finalized);
crm_debug("join-%d: Still waiting on %d finalized node%s "
QB_XS " state=%s for=%s",
current_join_id, count, pcmk__plural_s(count),
fsa_state2string(cur_state), source);
crmd_join_phase_log(LOG_DEBUG);
} else {
crm_debug("join-%d: Complete " QB_XS " state=%s for=%s",
current_join_id, fsa_state2string(cur_state), source);
register_fsa_input_later(C_FSA_INTERNAL, I_FINALIZED, NULL);
return TRUE;
}
}
return FALSE;
}
void
do_dc_join_final(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Ensuring DC, quorum and node attributes are up-to-date");
crm_update_quorum(pcmk__cluster_has_quorum(), TRUE);
}
int crmd_join_phase_count(enum controld_join_phase phase)
{
int count = 0;
pcmk__node_status_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
if (controld_get_join_phase(peer) == phase) {
count++;
}
}
return count;
}
void crmd_join_phase_log(int level)
{
pcmk__node_status_t *peer;
GHashTableIter iter;
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &peer)) {
do_crm_log(level, "join-%d: %s=%s", current_join_id, peer->name,
join_phase_text(controld_get_join_phase(peer)));
}
}
diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c
index 26729c09f9..b52d64ce0d 100644
--- a/daemons/controld/controld_membership.c
+++ b/daemons/controld/controld_membership.c
@@ -1,467 +1,467 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
/* put these first so that uuid_t is defined without conflicts */
#include <crm_internal.h>
#include <string.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/cluster/internal.h>
#include <pacemaker-controld.h>
void post_cache_update(int instance);
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
static void
reap_dead_nodes(gpointer key, gpointer value, gpointer user_data)
{
pcmk__node_status_t *node = value;
if (pcmk__cluster_is_node_active(node)) {
return;
}
crm_update_peer_join(__func__, node, controld_join_none);
if ((node != NULL) && (node->name != NULL)) {
if (pcmk__str_eq(controld_globals.our_nodename, node->name,
pcmk__str_casei)) {
crm_err("We're not part of the cluster anymore");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
} else if (!AM_I_DC
&& pcmk__str_eq(node->name, controld_globals.dc_name,
pcmk__str_casei)) {
crm_warn("Our DC node (%s) left the cluster", node->name);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
}
}
if ((controld_globals.fsa_state == S_INTEGRATION)
|| (controld_globals.fsa_state == S_FINALIZE_JOIN)) {
check_join_state(controld_globals.fsa_state, __func__);
}
if ((node != NULL) && (node->xml_id != NULL)) {
fail_incompletable_actions(controld_globals.transition_graph,
node->xml_id);
}
}
void
post_cache_update(int instance)
{
xmlNode *no_op = NULL;
controld_globals.peer_seq = instance;
crm_debug("Updated cache after membership event %d.", instance);
g_hash_table_foreach(pcmk__peer_cache, reap_dead_nodes, NULL);
controld_set_fsa_input_flags(R_MEMBERSHIP);
if (AM_I_DC) {
populate_cib_nodes(node_update_quick | node_update_cluster | node_update_peer |
node_update_expected, __func__);
}
/*
* If we lost nodes, we should re-check the election status
* Safe to call outside of an election
*/
controld_set_fsa_action_flags(A_ELECTION_CHECK);
controld_trigger_fsa();
/* Membership changed, remind everyone we're here.
* This will aid detection of duplicate DCs
*/
no_op = create_request(CRM_OP_NOOP, NULL, NULL, CRM_SYSTEM_CRMD,
- AM_I_DC ? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD, NULL);
+ (AM_I_DC? CRM_SYSTEM_DC : CRM_SYSTEM_CRMD));
pcmk__cluster_send_message(NULL, pcmk_ipc_controld, no_op);
pcmk__xml_free(no_op);
}
static void
crmd_node_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_trace("Node update %d complete", call_id);
} else if(call_id < pcmk_ok) {
crm_err("Node update failed: %s (%d)", pcmk_strerror(call_id), call_id);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
/*!
* \internal
* \brief Create an XML node state tag with updates
*
* \param[in,out] node Node whose state will be used for update
* \param[in] flags Bitmask of node_update_flags indicating what to update
* \param[in,out] parent XML node to contain update (or NULL)
* \param[in] source Who requested the update (only used for logging)
*
* \return Pointer to created node state tag
*/
xmlNode *
create_node_state_update(pcmk__node_status_t *node, int flags,
xmlNode *parent, const char *source)
{
const char *value = NULL;
xmlNode *node_state;
if (!node->state) {
crm_info("Node update for %s cancelled: no state, not seen yet",
node->name);
return NULL;
}
node_state = pcmk__xe_create(parent, PCMK__XE_NODE_STATE);
if (pcmk_is_set(node->flags, pcmk__node_status_remote)) {
pcmk__xe_set_bool_attr(node_state, PCMK_XA_REMOTE_NODE, true);
}
if (crm_xml_add(node_state, PCMK_XA_ID,
pcmk__cluster_node_uuid(node)) == NULL) {
crm_info("Node update for %s cancelled: no ID", node->name);
pcmk__xml_free(node_state);
return NULL;
}
crm_xml_add(node_state, PCMK_XA_UNAME, node->name);
if ((flags & node_update_cluster) && node->state) {
if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
// A value 0 means the node is not a cluster member.
crm_xml_add_ll(node_state, PCMK__XA_IN_CCM, node->when_member);
} else {
pcmk__xe_set_bool_attr(node_state, PCMK__XA_IN_CCM,
pcmk__str_eq(node->state, PCMK_VALUE_MEMBER,
pcmk__str_none));
}
}
if (!pcmk_is_set(node->flags, pcmk__node_status_remote)) {
if (flags & node_update_peer) {
if (compare_version(controld_globals.dc_version, "3.18.0") >= 0) {
// A value 0 means the peer is offline in CPG.
crm_xml_add_ll(node_state, PCMK_XA_CRMD, node->when_online);
} else {
// @COMPAT DCs < 2.1.7 use online/offline rather than timestamp
value = PCMK_VALUE_OFFLINE;
if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
value = PCMK_VALUE_ONLINE;
}
crm_xml_add(node_state, PCMK_XA_CRMD, value);
}
}
if (flags & node_update_join) {
if (controld_get_join_phase(node) <= controld_join_none) {
value = CRMD_JOINSTATE_DOWN;
} else {
value = CRMD_JOINSTATE_MEMBER;
}
crm_xml_add(node_state, PCMK__XA_JOIN, value);
}
if (flags & node_update_expected) {
crm_xml_add(node_state, PCMK_XA_EXPECTED, node->expected);
}
}
crm_xml_add(node_state, PCMK_XA_CRM_DEBUG_ORIGIN, source);
return node_state;
}
static void
remove_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data)
{
char *node_uuid = user_data;
do_crm_log_unlikely(rc == 0 ? LOG_DEBUG : LOG_NOTICE,
"Deletion of the unknown conflicting node \"%s\": %s (rc=%d)",
node_uuid, pcmk_strerror(rc), rc);
}
static void
search_conflicting_node_callback(xmlNode * msg, int call_id, int rc,
xmlNode * output, void *user_data)
{
char *new_node_uuid = user_data;
xmlNode *node_xml = NULL;
if (rc != pcmk_ok) {
if (rc != -ENXIO) {
crm_notice("Searching conflicting nodes for %s failed: %s (%d)",
new_node_uuid, pcmk_strerror(rc), rc);
}
return;
} else if (output == NULL) {
return;
}
if (pcmk__xe_is(output, PCMK_XE_NODE)) {
node_xml = output;
} else {
node_xml = pcmk__xe_first_child(output, PCMK_XE_NODE, NULL, NULL);
}
for (; node_xml != NULL; node_xml = pcmk__xe_next_same(node_xml)) {
const char *node_uuid = NULL;
const char *node_uname = NULL;
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
gboolean known = FALSE;
node_uuid = crm_element_value(node_xml, PCMK_XA_ID);
node_uname = crm_element_value(node_xml, PCMK_XA_UNAME);
if (node_uuid == NULL || node_uname == NULL) {
continue;
}
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if ((node != NULL)
&& pcmk__str_eq(node->xml_id, node_uuid, pcmk__str_casei)
&& pcmk__str_eq(node->name, node_uname, pcmk__str_casei)) {
known = TRUE;
break;
}
}
if (known == FALSE) {
cib_t *cib_conn = controld_globals.cib_conn;
int delete_call_id = 0;
xmlNode *node_state_xml = NULL;
crm_notice("Deleting unknown node %s/%s which has conflicting uname with %s",
node_uuid, node_uname, new_node_uuid);
delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_NODES,
node_xml, cib_none);
fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
remove_conflicting_node_callback);
node_state_xml = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
crm_xml_add(node_state_xml, PCMK_XA_ID, node_uuid);
crm_xml_add(node_state_xml, PCMK_XA_UNAME, node_uname);
delete_call_id = cib_conn->cmds->remove(cib_conn, PCMK_XE_STATUS,
node_state_xml, cib_none);
fsa_register_cib_callback(delete_call_id, pcmk__str_copy(node_uuid),
remove_conflicting_node_callback);
pcmk__xml_free(node_state_xml);
}
}
}
static void
node_list_update_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if(call_id < pcmk_ok) {
crm_err("Node list update failed: %s (%d)", pcmk_strerror(call_id), call_id);
crm_log_xml_debug(msg, "update:failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else if(rc < pcmk_ok) {
crm_err("Node update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "update:failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
void
populate_cib_nodes(enum node_update_flags flags, const char *source)
{
cib_t *cib_conn = controld_globals.cib_conn;
int call_id = 0;
gboolean from_hashtable = TRUE;
xmlNode *node_list = pcmk__xe_create(NULL, PCMK_XE_NODES);
#if SUPPORT_COROSYNC
if (!pcmk_is_set(flags, node_update_quick)
&& (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync)) {
from_hashtable = pcmk__corosync_add_nodes(node_list);
}
#endif
if (from_hashtable) {
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
GString *xpath = NULL;
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *new_node = NULL;
if ((node->xml_id != NULL) && (node->name != NULL)) {
crm_trace("Creating node entry for %s/%s",
node->name, node->xml_id);
if (xpath == NULL) {
xpath = g_string_sized_new(512);
} else {
g_string_truncate(xpath, 0);
}
/* We need both to be valid */
new_node = pcmk__xe_create(node_list, PCMK_XE_NODE);
crm_xml_add(new_node, PCMK_XA_ID, node->xml_id);
crm_xml_add(new_node, PCMK_XA_UNAME, node->name);
/* Search and remove unknown nodes with the conflicting uname from CIB */
pcmk__g_strcat(xpath,
"/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION
"/" PCMK_XE_NODES "/" PCMK_XE_NODE
"[@" PCMK_XA_UNAME "='", node->name, "']"
"[@" PCMK_XA_ID "!='", node->xml_id, "']", NULL);
call_id = cib_conn->cmds->query(cib_conn,
(const char *) xpath->str, NULL,
cib_xpath);
fsa_register_cib_callback(call_id, pcmk__str_copy(node->xml_id),
search_conflicting_node_callback);
}
}
if (xpath != NULL) {
g_string_free(xpath, TRUE);
}
}
crm_trace("Populating <nodes> section from %s", from_hashtable ? "hashtable" : "cluster");
if ((controld_update_cib(PCMK_XE_NODES, node_list, cib_none,
node_list_update_callback) == pcmk_rc_ok)
&& (pcmk__peer_cache != NULL) && AM_I_DC) {
/*
* There is no need to update the local CIB with our values if
* we've not seen valid membership data
*/
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
pcmk__xml_free(node_list);
node_list = pcmk__xe_create(NULL, PCMK_XE_STATUS);
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
create_node_state_update(node, flags, node_list, source);
}
if (pcmk__remote_peer_cache != NULL) {
g_hash_table_iter_init(&iter, pcmk__remote_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
create_node_state_update(node, flags, node_list, source);
}
}
controld_update_cib(PCMK_XE_STATUS, node_list, cib_none,
crmd_node_update_complete);
}
pcmk__xml_free(node_list);
}
static void
cib_quorum_update_complete(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
fsa_data_t *msg_data = NULL;
if (rc == pcmk_ok) {
crm_trace("Quorum update %d complete", call_id);
} else {
crm_err("Quorum update %d failed: %s (%d)", call_id, pcmk_strerror(rc), rc);
crm_log_xml_debug(msg, "failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
void
crm_update_quorum(gboolean quorum, gboolean force_update)
{
bool has_quorum = pcmk_is_set(controld_globals.flags, controld_has_quorum);
if (quorum) {
controld_set_global_flags(controld_ever_had_quorum);
} else if (pcmk_all_flags_set(controld_globals.flags,
controld_ever_had_quorum
|controld_no_quorum_suicide)) {
pcmk__panic(__func__);
}
if (AM_I_DC
&& ((has_quorum && !quorum) || (!has_quorum && quorum)
|| force_update)) {
xmlNode *update = NULL;
update = pcmk__xe_create(NULL, PCMK_XE_CIB);
crm_xml_add_int(update, PCMK_XA_HAVE_QUORUM, quorum);
crm_xml_add(update, PCMK_XA_DC_UUID, controld_globals.our_uuid);
crm_debug("Updating quorum status to %s", pcmk__btoa(quorum));
controld_update_cib(PCMK_XE_CIB, update, cib_none,
cib_quorum_update_complete);
pcmk__xml_free(update);
/* Quorum changes usually cause a new transition via other activity:
* quorum gained via a node joining will abort via the node join,
* and quorum lost via a node leaving will usually abort via resource
* activity and/or fencing.
*
* However, it is possible that nothing else causes a transition (e.g.
* someone forces quorum via corosync-cmaptcl, or quorum is lost due to
* a node in standby shutting down cleanly), so here ensure a new
* transition is triggered.
*/
if (quorum) {
/* If quorum was gained, abort after a short delay, in case multiple
* nodes are joining around the same time, so the one that brings us
* to quorum doesn't cause all the remaining ones to be fenced.
*/
abort_after_delay(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Quorum gained", 5000);
} else {
abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
"Quorum lost", NULL);
}
}
if (quorum) {
controld_set_global_flags(controld_has_quorum);
} else {
controld_clear_global_flags(controld_has_quorum);
}
}
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index d9fcd7423d..5e0770a122 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -1,1364 +1,1365 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static enum crmd_fsa_input handle_message(xmlNode *msg,
enum crmd_fsa_cause cause);
static void handle_response(xmlNode *stored_msg);
static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
enum crmd_fsa_cause cause);
static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
static void send_msg_via_ipc(xmlNode * msg, const char *sys);
/* debug only, can wrap all it likes */
static int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (controld_globals.fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
controld_globals.fsa_actions, TRUE, __func__);
}
/* reset the action list */
crm_info("Resetting the current action list");
fsa_dump_actions(controld_globals.fsa_actions, "Drop");
controld_globals.fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
void
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, uint64_t with_actions,
gboolean prepend, const char *raised_from)
{
unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
fsa_data_t *fsa_data = NULL;
if (raised_from == NULL) {
raised_from = "<unknown>";
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return;
}
if (input == I_WAIT_FOR_EVENT) {
controld_set_global_flags(controld_fsa_is_stalled);
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
raised_from, fsa_cause2string(cause), data, old_len);
if (old_len > 0) {
fsa_dump_queue(LOG_TRACE);
prepend = FALSE;
}
if (data == NULL) {
controld_set_fsa_action_flags(with_actions);
fsa_dump_actions(with_actions, "Restored");
return;
}
/* Store everything in the new event and reset
* controld_globals.fsa_actions
*/
with_actions |= controld_globals.fsa_actions;
controld_globals.fsa_actions = A_NOTHING;
}
last_data_id++;
crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
raised_from, (prepend? "prepended" : "appended"), last_data_id,
fsa_input2string(input), fsa_cause2string(cause),
(data? "with" : "without"));
fsa_data = pcmk__assert_alloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input",
(unsigned long long) with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
crm_trace("Copying %s data from %s as cluster message data",
fsa_cause2string(cause), raised_from);
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_UNKNOWN:
case C_STARTUP:
crm_crit("Copying %s data (from %s) is not yet implemented",
fsa_cause2string(cause), raised_from);
crmd_exit(CRM_EX_SOFTWARE);
break;
}
}
/* make sure to free it properly later */
if (prepend) {
controld_globals.fsa_message_queue
= g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
} else {
controld_globals.fsa_message_queue
= g_list_append(controld_globals.fsa_message_queue, fsa_data);
}
crm_trace("FSA message queue length is %d",
g_list_length(controld_globals.fsa_message_queue));
/* fsa_dump_queue(LOG_TRACE); */
if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
crm_err("Couldn't add message to the queue");
}
if (input != I_WAIT_FOR_EVENT) {
controld_trigger_fsa();
}
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *data = (fsa_data_t *) iter->data;
do_crm_log_unlikely(log_level,
"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, data->data, data->data_type,
fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
xmlNode *wrapper = NULL;
ha_msg_input_t *copy = pcmk__assert_alloc(1, sizeof(ha_msg_input_t));
copy->msg = (orig != NULL)? pcmk__xml_copy(NULL, orig->msg) : NULL;
wrapper = pcmk__xe_first_child(copy->msg, PCMK__XE_CRM_XML, NULL, NULL);
copy->xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
pcmk__xml_free(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Don't know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
crmd_exit(CRM_EX_SOFTWARE);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
fsa_data_t *message
= (fsa_data_t *) controld_globals.fsa_message_queue->data;
controld_globals.fsa_message_queue
= g_list_remove(controld_globals.fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input, cause);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
enum pcmk_ipc_server dest = pcmk_ipc_unknown;
bool is_for_dc = false;
bool is_for_dcib = false;
bool is_for_te = false;
bool is_for_crm = false;
bool is_for_cib = false;
bool is_local = false;
bool broadcast = false;
const char *host_to = NULL;
const char *sys_to = NULL;
const char *sys_from = NULL;
const char *type = NULL;
const char *task = NULL;
const char *ref = NULL;
pcmk__node_status_t *node_to = NULL;
CRM_CHECK(msg != NULL, return TRUE);
host_to = crm_element_value(msg, PCMK__XA_CRM_HOST_TO);
sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
type = crm_element_value(msg, PCMK__XA_T);
task = crm_element_value(msg, PCMK__XA_CRM_TASK);
ref = crm_element_value(msg, PCMK_XA_REFERENCE);
broadcast = pcmk__str_empty(host_to);
if (ref == NULL) {
ref = "without reference ID";
}
if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
crm_trace("Received hello %s from %s (no processing needed)",
ref, pcmk__s(sys_from, "unidentified source"));
crm_log_xml_trace(msg, "hello");
return TRUE;
}
// Require message type (set by create_request())
if (!pcmk__str_eq(type, PCMK__VALUE_CRMD, pcmk__str_none)) {
crm_warn("Ignoring invalid message %s with type '%s' "
"(not '" PCMK__VALUE_CRMD "')",
ref, pcmk__s(type, ""));
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Require a destination subsystem (also set by create_request())
if (sys_to == NULL) {
crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO,
ref);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Get the message type appropriate to the destination subsystem
if (pcmk_get_cluster_layer() == pcmk_cluster_layer_corosync) {
dest = pcmk__parse_server(sys_to);
if (dest == pcmk_ipc_unknown) {
/* Unrecognized value, use a sane default
*
* @TODO Maybe we should bail instead
*/
dest = pcmk_ipc_controld;
}
}
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
// Check whether message should be processed locally
is_local = false;
if (broadcast) {
if (is_for_dc || is_for_te) {
is_local = false;
} else if (is_for_crm) {
if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
PCMK__CONTROLD_CMD_NODES, NULL)) {
/* Node info requests do not specify a host, which is normally
* treated as "all hosts", because the whole point is that the
* client may not know the local node name. Always handle these
* requests locally.
*/
is_local = true;
} else {
is_local = !originated_locally;
}
} else {
is_local = true;
}
} else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
pcmk__str_casei)) {
is_local = true;
} else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Local delete of an offline node's resource history
is_local = true;
}
}
// Check whether message should be relayed
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC) {
if (is_for_te) {
crm_trace("Route message %s locally as transition request",
ref);
crm_log_xml_trace(msg, sys_to);
send_msg_via_ipc(msg, sys_to);
return TRUE; // No further processing of message is needed
}
crm_trace("Route message %s locally as DC request", ref);
return FALSE; // More to be done by caller
}
if (originated_locally
&& !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
CRM_SYSTEM_TENGINE, NULL)) {
crm_trace("Relay message %s to DC (via %s)",
ref, pcmk__s(host_to, "broadcast"));
crm_log_xml_trace(msg, "relayed");
if (!broadcast) {
node_to = pcmk__get_node(0, host_to, NULL,
pcmk__node_search_cluster_member);
}
pcmk__cluster_send_message(node_to, dest, msg);
return TRUE;
}
/* Transition engine and scheduler messages are sent only to the DC on
* the same node. If we are no longer the DC, discard this message.
*/
crm_trace("Ignoring message %s because we are no longer DC", ref);
crm_log_xml_trace(msg, "ignored");
return TRUE; // No further processing of message is needed
}
if (is_local) {
if (is_for_crm || is_for_cib) {
crm_trace("Route message %s locally as controller request", ref);
return FALSE; // More to be done by caller
}
crm_trace("Relay message %s locally to %s", ref, sys_to);
crm_log_xml_trace(msg, "IPC-relay");
send_msg_via_ipc(msg, sys_to);
return TRUE;
}
if (!broadcast) {
node_to = pcmk__search_node_caches(0, host_to,
pcmk__node_search_cluster_member);
if (node_to == NULL) {
crm_warn("Ignoring message %s because node %s is unknown",
ref, host_to);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
}
crm_trace("Relay message %s to %s",
ref, pcmk__s(host_to, "all peers"));
crm_log_xml_trace(msg, "relayed");
pcmk__cluster_send_message(node_to, dest, msg);
return TRUE;
}
// Return true if field contains a positive integer
static bool
authorize_version(xmlNode *message_data, const char *field,
const char *client_name, const char *ref, const char *uuid)
{
const char *version = crm_element_value(message_data, field);
long long version_num;
if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
|| (version_num < 0LL)) {
crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
QB_XS " ref=%s uuid=%s",
client_name, ((version == NULL)? "" : version),
field, (ref? ref : "none"), uuid);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether a client IPC message is acceptable
*
* If a given client IPC message is a hello, "authorize" it by ensuring it has
* valid information such as a protocol version, and return false indicating
* that nothing further needs to be done with the message. If the message is not
* a hello, just return true to indicate it needs further processing.
*
* \param[in] client_msg XML of IPC message
* \param[in,out] curr_client If IPC is not proxied, client that sent message
* \param[in] proxy_session If IPC is proxied, the session ID
*
* \return true if message needs further processing, false if it doesn't
*/
bool
controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client,
const char *proxy_session)
{
xmlNode *wrapper = NULL;
xmlNode *message_data = NULL;
const char *client_name = NULL;
const char *op = crm_element_value(client_msg, PCMK__XA_CRM_TASK);
const char *ref = crm_element_value(client_msg, PCMK_XA_REFERENCE);
const char *uuid = (curr_client? curr_client->id : proxy_session);
if (uuid == NULL) {
crm_warn("IPC message from client rejected: No client identifier "
QB_XS " ref=%s", (ref? ref : "none"));
goto rejected;
}
if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
// Only hello messages need to be authorized
return true;
}
wrapper = pcmk__xe_first_child(client_msg, PCMK__XE_CRM_XML, NULL, NULL);
message_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
client_name = crm_element_value(message_data, PCMK__XA_CLIENT_NAME);
if (pcmk__str_empty(client_name)) {
crm_warn("IPC hello from client rejected: No client name",
QB_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
goto rejected;
}
if (!authorize_version(message_data, PCMK__XA_MAJOR_VERSION, client_name,
ref, uuid)) {
goto rejected;
}
if (!authorize_version(message_data, PCMK__XA_MINOR_VERSION, client_name,
ref, uuid)) {
goto rejected;
}
crm_trace("Validated IPC hello from client %s", client_name);
crm_log_xml_trace(client_msg, "hello");
if (curr_client) {
curr_client->userdata = pcmk__str_copy(client_name);
}
controld_trigger_fsa();
return false;
rejected:
crm_log_xml_trace(client_msg, "rejected");
if (curr_client) {
qb_ipcs_disconnect(curr_client->ipcs);
}
return false;
}
static enum crmd_fsa_input
handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, PCMK__XA_SUBT);
if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) {
return handle_request(msg, cause);
}
if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
handle_response(msg);
return I_NULL;
}
crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'",
pcmk__s(type, ""));
crm_log_xml_trace(msg, "bad");
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
const char *uname = NULL;
const char *op = NULL;
char *interval_spec = NULL;
guint interval_ms = 0;
gboolean is_remote_node = FALSE;
xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *xml_op = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
if (xml_op) {
xmlNode *xml_rsc = pcmk__xe_first_child(xml_op, PCMK_XE_PRIMITIVE, NULL,
NULL);
xmlNode *xml_attrs = pcmk__xe_first_child(xml_op, PCMK__XE_ATTRIBUTES,
NULL, NULL);
if (xml_rsc) {
rsc = pcmk__xe_id(xml_rsc);
}
if (xml_attrs) {
op = crm_element_value(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_OP);
crm_element_value_ms(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL,
&interval_ms);
}
}
uname = crm_element_value(xml_op, PCMK__META_ON_NODE);
if ((rsc == NULL) || (uname == NULL)) {
crm_log_xml_warn(stored_msg, "invalid failcount op");
return I_NULL;
}
if (crm_element_value(xml_op, PCMK__XA_ROUTER_NODE)) {
is_remote_node = TRUE;
}
crm_debug("Clearing failures for %s-interval %s on %s "
"from attribute manager, CIB, and executor state",
pcmk__readable_interval(interval_ms), rsc, uname);
if (interval_ms) {
interval_spec = crm_strdup_printf("%ums", interval_ms);
}
update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
free(interval_spec);
controld_cib_delete_last_failure(rsc, uname, op, interval_ms);
lrm_clear_last_failure(rsc, uname, op, interval_ms);
return I_NULL;
}
static enum crmd_fsa_input
handle_lrm_delete(xmlNode *stored_msg)
{
const char *mode = NULL;
xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
CRM_CHECK(msg_data != NULL, return I_NULL);
/* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
* relay the operation to the affected node, which will unregister the
* resource from the local executor, clear the resource's history from the
* CIB, and do some bookkeeping in the controller.
*
* However, if the affected node is offline, the client will specify
* mode=PCMK__VALUE_CIB which means the controller receiving the operation
* should clear the resource's history from the CIB and nothing else. This
* is used to clear shutdown locks.
*/
mode = crm_element_value(msg_data, PCMK__XA_MODE);
if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Relay to affected node
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else {
// Delete CIB history locally (compare with do_lrm_delete())
const char *from_sys = NULL;
const char *user_name = NULL;
const char *rsc_id = NULL;
const char *node = NULL;
xmlNode *rsc_xml = NULL;
int rc = pcmk_rc_ok;
rsc_xml = pcmk__xe_first_child(msg_data, PCMK_XE_PRIMITIVE, NULL, NULL);
CRM_CHECK(rsc_xml != NULL, return I_NULL);
rsc_id = pcmk__xe_id(rsc_xml);
from_sys = crm_element_value(stored_msg, PCMK__XA_CRM_SYS_FROM);
node = crm_element_value(msg_data, PCMK__META_ON_NODE);
user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL);
crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
"(clearing CIB resource history only)", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""));
rc = controld_delete_resource_history(rsc_id, node, user_name,
cib_dryrun|cib_sync_call);
if (rc == pcmk_rc_ok) {
rc = controld_delete_resource_history(rsc_id, node, user_name,
crmd_cib_smart_opt());
}
/* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and
* op=CRM_OP_LRM_DELETE.
*/
if (from_sys) {
lrmd_event_data_t *op = NULL;
const char *from_host = crm_element_value(stored_msg, PCMK__XA_SRC);
const char *transition;
if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
transition = crm_element_value(msg_data,
PCMK__XA_TRANSITION_KEY);
} else {
transition = crm_element_value(stored_msg,
PCMK__XA_TRANSITION_KEY);
}
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "local node"), rsc_id,
((rc == pcmk_rc_ok)? "" : " not"));
op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0);
op->type = lrmd_event_exec_complete;
op->user_data = pcmk__str_copy(pcmk__s(transition, FAKE_TE_ID));
op->params = pcmk__strkey_table(free, free);
pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET,
CRM_FEATURE_SET);
controld_rc2event(op, rc);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
return I_NULL;
}
}
/*!
* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_remote_state(const xmlNode *msg)
{
const char *conn_host = NULL;
const char *remote_uname = pcmk__xe_id(msg);
pcmk__node_status_t *remote_peer;
bool remote_is_up = false;
int rc = pcmk_rc_ok;
rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up);
CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);
remote_peer = pcmk__cluster_lookup_remote_node(remote_uname);
CRM_CHECK(remote_peer, return I_NULL);
pcmk__update_peer_state(__func__, remote_peer,
remote_is_up ? PCMK_VALUE_MEMBER : PCMK__VALUE_LOST,
0);
conn_host = crm_element_value(msg, PCMK__XA_CONNECTION_HOST);
if (conn_host) {
pcmk__str_update(&remote_peer->conn_host, conn_host);
} else if (remote_peer->conn_host) {
free(remote_peer->conn_host);
remote_peer->conn_host = NULL;
}
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_PING message
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_ping(const xmlNode *msg)
{
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
// Build reply
ping = pcmk__xe_create(NULL, PCMK__XE_PING_RESPONSE);
value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value);
// Add controller state
value = fsa_state2string(controld_globals.fsa_state);
crm_xml_add(ping, PCMK__XA_CRMD_STATE, value);
crm_notice("Current ping state: %s", value); // CTS needs this
// Add controller health
// @TODO maybe do some checks to determine meaningful status
crm_xml_add(ping, PCMK_XA_RESULT, "ok");
// Send reply
reply = pcmk__new_reply(msg, ping);
pcmk__xml_free(ping);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
pcmk__xml_free(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a PCMK__CONTROLD_CMD_NODES message
*
* \param[in] request Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_list(const xmlNode *request)
{
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Create message data for reply
reply_data = pcmk__xe_create(NULL, PCMK_XE_NODES);
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = pcmk__xe_create(reply_data, PCMK_XE_NODE);
crm_xml_add_ll(xml, PCMK_XA_ID,
(long long) node->cluster_layer_id); // uint32_t
crm_xml_add(xml, PCMK_XA_UNAME, node->name);
crm_xml_add(xml, PCMK__XA_IN_CCM, node->state);
}
// Create and send reply
reply = pcmk__new_reply(request, reply_data);
pcmk__xml_free(reply_data);
if (reply) {
(void) relay_message(reply, TRUE);
pcmk__xml_free(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_NODE_INFO request
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_info_request(const xmlNode *msg)
{
const char *value = NULL;
pcmk__node_status_t *node = NULL;
int node_id = 0;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Build reply
reply_data = pcmk__xe_create(NULL, PCMK_XE_NODE);
crm_xml_add(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD);
// Add whether current partition has quorum
pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM,
pcmk_is_set(controld_globals.flags,
controld_has_quorum));
/* Check whether client requested node info by ID and/or name
*
* @TODO A Corosync-layer node ID is of type uint32_t. We should be able to
* handle legitimate node IDs greater than INT_MAX, but currently we do not.
*/
crm_element_value_int(msg, PCMK_XA_ID, &node_id);
if (node_id < 0) {
node_id = 0;
}
value = crm_element_value(msg, PCMK_XA_UNAME);
// Default to local node if none given
if ((node_id == 0) && (value == NULL)) {
value = controld_globals.our_nodename;
}
node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any);
if (node) {
crm_xml_add(reply_data, PCMK_XA_ID, node->xml_id);
crm_xml_add(reply_data, PCMK_XA_UNAME, node->name);
crm_xml_add(reply_data, PCMK_XA_CRMD, node->state);
pcmk__xe_set_bool_attr(reply_data, PCMK_XA_REMOTE_NODE,
pcmk_is_set(node->flags,
pcmk__node_status_remote));
}
// Send reply
reply = pcmk__new_reply(msg, reply_data);
pcmk__xml_free(reply_data);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
pcmk__xml_free(reply);
}
// Nothing further to do
return I_NULL;
}
static void
verify_feature_set(xmlNode *msg)
{
const char *dc_version = crm_element_value(msg, PCMK_XA_CRM_FEATURE_SET);
if (dc_version == NULL) {
/* All we really know is that the DC feature set is older than 3.1.0,
* but that's also all that really matters.
*/
dc_version = "3.0.14";
}
if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
crm_trace("Local feature set (%s) is compatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
} else {
crm_err("Local feature set (%s) is incompatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
// Nothing is likely to improve without administrator involvement
controld_set_fsa_input_flags(R_STAYDOWN);
crmd_exit(CRM_EX_FATAL);
}
}
// DC gets own shutdown all-clear
static enum crmd_fsa_input
handle_shutdown_self_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
// The expected case -- we initiated own shutdown sequence
crm_info("Shutting down controller");
return I_STOP;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
// Must be logic error -- DC confirming its own unrequested shutdown
crm_err("Shutting down controller immediately due to "
"unexpected shutdown confirmation");
return I_TERMINATE;
}
if (controld_globals.fsa_state != S_STOPPING) {
// Shouldn't happen -- non-DC confirming unrequested shutdown
crm_err("Starting new DC election because %s is "
"confirming shutdown we did not request",
(host_from? host_from : "another node"));
return I_ELECTION;
}
// Shouldn't happen, but we are already stopping anyway
crm_debug("Ignoring unexpected shutdown confirmation from %s",
(host_from? host_from : "another node"));
return I_NULL;
}
// Non-DC gets shutdown all-clear from DC
static enum crmd_fsa_input
handle_shutdown_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
crm_warn("Ignoring shutdown request without origin specified");
return I_NULL;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name,
pcmk__str_null_matches|pcmk__str_casei)) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting down controller after confirmation from %s",
host_from);
} else {
crm_err("Shutting down controller after unexpected "
"shutdown request from %s", host_from);
controld_set_fsa_input_flags(R_STAYDOWN);
}
return I_STOP;
}
crm_warn("Ignoring shutdown request from %s because DC is %s",
host_from, controld_globals.dc_name);
return I_NULL;
}
static enum crmd_fsa_input
handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
crm_log_xml_trace(stored_msg, "request");
if (op == NULL) {
crm_warn("Ignoring request without " PCMK__XA_CRM_TASK);
return I_NULL;
}
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, PCMK__XA_SRC);
pcmk__node_status_t *node =
pcmk__search_node_caches(0, from, pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
return I_NULL; /* Done */
}
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_self_ack(stored_msg);
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
// Another controller wants to shut down its node
return handle_shutdown_request(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
/* a remote connection host is letting us know the node state */
return handle_remote_state(stored_msg);
} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
throttle_update(stored_msg);
if (AM_I_DC && (controld_globals.transition_graph != NULL)
&& !controld_globals.transition_graph->complete) {
crm_debug("The throttle changed. Trigger a graph.");
trigger_graph();
}
return I_NULL;
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
/* Sometimes we _must_ go into S_ELECTION */
if (controld_globals.fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
verify_feature_set(stored_msg);
crm_debug("Raising I_JOIN_OFFER: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
return handle_lrm_delete(stored_msg);
} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
|| (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
|| (strcmp(op, CRM_OP_REPROBE) == 0)) {
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_PING) == 0) {
return handle_ping(stored_msg);
} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
return handle_node_info_request(stored_msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
int id = 0;
const char *name = NULL;
crm_element_value_int(stored_msg, PCMK_XA_ID, &id);
name = crm_element_value(stored_msg, PCMK_XA_UNAME);
if(cause == C_IPC_MESSAGE) {
- msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL,
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD);
if (!pcmk__cluster_send_message(NULL, pcmk_ipc_controld, msg)) {
crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
} else {
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
}
pcmk__xml_free(msg);
} else {
pcmk__cluster_forget_cluster_node(id, name);
/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
xmlNode *wrapper = pcmk__xe_first_child(stored_msg, PCMK__XE_CRM_XML,
NULL, NULL);
xmlNode *xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
remote_ra_process_maintenance_nodes(xml);
} else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
return handle_node_list(stored_msg);
/*========== (NOT_DC)-Only Actions ==========*/
} else if (!AM_I_DC) {
if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_ack(stored_msg);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
static void
handle_response(xmlNode *stored_msg)
{
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
crm_log_xml_trace(stored_msg, "reply");
if (op == NULL) {
crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK);
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
// Check whether scheduler answer been superseded by subsequent request
const char *msg_ref = crm_element_value(stored_msg, PCMK_XA_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
pcmk__str_none)) {
ha_msg_input_t fsa_input;
controld_stop_sched_timer();
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "controller");
}
}
static enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/procedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = controld_globals.our_nodename;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from,
fsa_state2string(controld_globals.fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = pcmk__ttoa(time(NULL));
update_attrd(host_from, PCMK__NODE_ATTR_SHUTDOWN, now_s, NULL, FALSE);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
static void
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
pcmk__client_t *client_channel = NULL;
CRM_CHECK(sys != NULL, return);
client_channel = pcmk__find_client_by_id(sys);
if (crm_element_value(msg, PCMK__XA_SRC) == NULL) {
crm_xml_add(msg, PCMK__XA_SRC, controld_globals.our_nodename);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
xmlNode *wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL,
NULL);
xmlNode *data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
process_te_message(msg, data);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
xmlNode *wrapper = NULL;
fsa_input.msg = msg;
wrapper = pcmk__xe_first_child(msg, PCMK__XE_CRM_XML, NULL, NULL);
fsa_input.xml = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __func__;
fsa_data.data_type = fsa_dt_ha_msg;
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
I_MESSAGE, &fsa_data);
} else if (crmd_is_proxy_session(sys)) {
crmd_proxy_send(sys, msg);
} else {
crm_info("Received invalid request: unknown subsystem '%s'", sys);
}
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
pcmk__xml_free(orig->msg);
free(orig);
}
/*!
* \internal
* \brief Notify the cluster of a remote node state change
*
* \param[in] node_name Node's name
* \param[in] node_up true if node is up, false if down
*/
void
broadcast_remote_state_message(const char *node_name, bool node_up)
{
xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL,
- CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD);
crm_info("Notifying cluster of Pacemaker Remote node %s %s",
node_name, node_up? "coming up" : "going down");
crm_xml_add(msg, PCMK_XA_ID, node_name);
pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up);
if (node_up) {
crm_xml_add(msg, PCMK__XA_CONNECTION_HOST,
controld_globals.our_nodename);
}
pcmk__cluster_send_message(NULL, pcmk_ipc_controld, msg);
pcmk__xml_free(msg);
}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index 0cc19250e0..779552a267 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,765 +1,766 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
static GHashTable *te_targets = NULL;
void send_rsc_command(pcmk__graph_action_t *action);
static void te_update_job_count(pcmk__graph_action_t *action, int offset);
static void
te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
action->timer = g_timeout_add(action->timeout + graph->network_delay,
action_timer_callback, (void *) action);
CRM_ASSERT(action->timer != 0);
}
/*!
* \internal
* \brief Execute a graph pseudo-action
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] pseudo Pseudo-action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
{
const char *task = crm_element_value(pseudo->xml, PCMK_XA_OPERATION);
/* send to peers as well? */
if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
pcmk__node_status_t *node = NULL;
g_hash_table_iter_init(&iter, pcmk__peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(controld_globals.our_nodename, node->name,
pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->name,
- CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE);
pcmk__cluster_send_message(node, pcmk_ipc_controld, cmd);
pcmk__xml_free(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, PCMK__XA_OPERATION_KEY));
te_action_confirmed(pseudo, graph);
return pcmk_rc_ok;
}
static int
get_target_rc(pcmk__graph_action_t *action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, PCMK__META_OP_TARGET_RC),
&exit_status, 0);
return exit_status;
}
/*!
* \internal
* \brief Execute a cluster action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Cluster action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
const pcmk__node_status_t *node = NULL;
id = pcmk__xe_id(action->xml);
CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = on_node;
if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
router_node = controld_globals.our_nodename;
}
}
}
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Handling controller request '%s' (%s on %s)%s%s",
id, task, on_node, (is_local? " locally" : ""),
(no_wait? " without waiting" : ""));
if (is_local
&& pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
/* defer until everything else completes */
crm_info("Controller request '%s' is a local shutdown", id);
graph->completion_action = pcmk__graph_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return pcmk_rc_ok;
} else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
pcmk__node_status_t *peer =
pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster_member);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
- cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
+ cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_TENGINE);
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(cmd, PCMK__XA_TRANSITION_KEY, counter);
node = pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster_member);
rc = pcmk__cluster_send_message(node, pcmk_ipc_controld, cmd);
free(counter);
pcmk__xml_free(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, pcmk__cluster_local_node_name(),
pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, target_rc,
controld_globals.te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(pcmk__graph_action_t *action,
lrmd_event_data_t *op)
{
cib_t *cib_conn = controld_globals.cib_conn;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
const char *target_uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
int target_rc = get_target_rc(action);
action_rsc = pcmk__xe_first_child(action->xml, PCMK_XE_PRIMITIVE, NULL,
NULL);
if (action_rsc == NULL) {
return;
}
rsc_id = pcmk__xe_id(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = pcmk__xe_create(NULL, PCMK__XE_NODE_STATE);
crm_xml_add(state, PCMK_XA_ID, target_uuid);
crm_xml_add(state, PCMK_XA_UNAME, target);
rsc = pcmk__xe_create(state, PCMK__XE_LRM);
crm_xml_add(rsc, PCMK_XA_ID, target_uuid);
rsc = pcmk__xe_create(rsc, PCMK__XE_LRM_RESOURCES);
rsc = pcmk__xe_create(rsc, PCMK__XE_LRM_RESOURCE);
crm_xml_add(rsc, PCMK_XA_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_TYPE);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_CLASS);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = cib_conn->cmds->modify(cib_conn, PCMK_XE_STATUS, state, cib_none);
fsa_register_cib_callback(rc, NULL, cib_action_updated);
pcmk__xml_free(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(pcmk__graph_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
action->xml->name, action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Execute a resource action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Resource action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node),
crm_err("Corrupted command(id=%s) %s: no node",
pcmk__xe_id(action->xml), pcmk__s(task, "without task"));
return pcmk_rc_node_unknown);
rsc_op = action->xml;
task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
task_uuid = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
router_node = crm_element_value(rsc_op, PCMK__XA_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(rsc_op, PCMK__XA_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s " QB_XS " action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
- CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
+ CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
I_NULL, &msg);
} else {
const pcmk__node_status_t *node =
pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster_member);
rc = pcmk__cluster_send_message(node, pcmk_ipc_execd, cmd);
}
free(counter);
pcmk__xml_free(cmd);
pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
/* Just mark confirmed. Don't bump the job count only to immediately
* decrement it.
*/
crm_info("Action %d confirmed - no wait", action->id);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = pcmk__assert_alloc(1, sizeof(struct te_peer_s));
r->name = pcmk__str_copy(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(pcmk__graph_action_t *action, int offset)
{
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
const char *t1 = crm_meta_value(action->params,
PCMK__META_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params,
PCMK__META_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
te_update_job_count_on(target, offset, FALSE);
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed on a node
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
* \param[in] target Name of node where action should be executed
*
* \return true if action is allowed, otherwise false
*/
static bool
allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *id = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if(target == NULL) {
/* No limit on these */
return true;
} else if(te_targets == NULL) {
return false;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = pcmk__assert_alloc(1, sizeof(struct te_peer_s));
r->name = pcmk__str_copy(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return false;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return false;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return true;
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
*
* \return true if action is allowed, otherwise false
*/
static bool
graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return true;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
target = crm_meta_value(action->params, PCMK__META_MIGRATE_SOURCE);
if (!allowed_on_node(graph, action, target)) {
return false;
}
target = crm_meta_value(action->params, PCMK__META_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
return allowed_on_node(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in,out] action Action to confirm
* \param[in,out] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, PCMK__META_ON_NODE) != NULL)) {
te_update_job_count(action, -1);
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
static pcmk__graph_functions_t te_graph_fns = {
execute_pseudo_action,
execute_rsc_action,
execute_cluster_action,
controld_execute_fence_action,
graph_action_allowed,
};
/*
* \internal
* \brief Register the transitioner's graph functions with \p libpacemaker
*/
void
controld_register_graph_functions(void)
{
pcmk__set_graph_functions(&te_graph_fns);
}
void
notify_crmd(pcmk__graph_t *graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s",
fsa_state2string(controld_globals.fsa_state));
CRM_CHECK(graph->complete, graph->complete = true);
switch (graph->completion_action) {
case pcmk__graph_wait:
type = "stop";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_done:
type = "done";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_restart:
type = "restart";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
if (controld_get_period_transition_timer() > 0) {
controld_stop_transition_timer();
controld_start_transition_timer();
} else {
event = I_PE_CALC;
}
} else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
controld_trigger_fsa();
}
break;
case pcmk__graph_shutdown:
type = "shutdown";
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = pcmk__graph_done;
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else {
controld_trigger_fsa();
}
}
diff --git a/daemons/controld/controld_throttle.c b/daemons/controld/controld_throttle.c
index 3e45b1c0db..37a82d0b01 100644
--- a/daemons/controld/controld_throttle.c
+++ b/daemons/controld/controld_throttle.c
@@ -1,574 +1,575 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <ctype.h>
#include <dirent.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-controld.h>
/* These values don't need to be bits, but these particular values must be kept
* for backward compatibility during rolling upgrades.
*/
enum throttle_state_e {
throttle_none = 0x0000,
throttle_low = 0x0001,
throttle_med = 0x0010,
throttle_high = 0x0100,
throttle_extreme = 0x1000,
};
struct throttle_record_s {
int max;
enum throttle_state_e mode;
char *node;
};
static int throttle_job_max = 0;
static float throttle_load_target = 0.0;
#define THROTTLE_FACTOR_LOW 1.2
#define THROTTLE_FACTOR_MEDIUM 1.6
#define THROTTLE_FACTOR_HIGH 2.0
static GHashTable *throttle_records = NULL;
static mainloop_timer_t *throttle_timer = NULL;
static const char *
load2str(enum throttle_state_e mode)
{
switch (mode) {
case throttle_extreme: return "extreme";
case throttle_high: return "high";
case throttle_med: return "medium";
case throttle_low: return "low";
case throttle_none: return "negligible";
default: return "undetermined";
}
}
#if HAVE_LINUX_PROCFS
/*!
* \internal
* \brief Return name of /proc file containing the CIB daemon's load statistics
*
* \return Newly allocated memory with file name on success, NULL otherwise
*
* \note It is the caller's responsibility to free the return value.
* This will return NULL if the daemon is being run via valgrind.
* This should be called only on Linux systems.
*/
static char *
find_cib_loadfile(void)
{
pid_t pid = pcmk__procfs_pid_of("pacemaker-based");
return pid? crm_strdup_printf("/proc/%lld/stat", (long long) pid) : NULL;
}
static bool
throttle_cib_load(float *load)
{
/*
/proc/[pid]/stat
Status information about the process. This is used by ps(1). It is defined in /usr/src/linux/fs/proc/array.c.
The fields, in order, with their proper scanf(3) format specifiers, are:
pid %d (1) The process ID.
comm %s (2) The filename of the executable, in parentheses. This is visible whether or not the executable is swapped out.
state %c (3) One character from the string "RSDZTW" where R is running, S is sleeping in an interruptible wait, D is waiting in uninterruptible disk sleep, Z is zombie, T is traced or stopped (on a signal), and W is paging.
ppid %d (4) The PID of the parent.
pgrp %d (5) The process group ID of the process.
session %d (6) The session ID of the process.
tty_nr %d (7) The controlling terminal of the process. (The minor device number is contained in the combination of bits 31 to 20 and 7 to 0; the major device number is in bits 15 to 8.)
tpgid %d (8) The ID of the foreground process group of the controlling terminal of the process.
flags %u (%lu before Linux 2.6.22)
(9) The kernel flags word of the process. For bit meanings, see the PF_* defines in the Linux kernel source file include/linux/sched.h. Details depend on the kernel version.
minflt %lu (10) The number of minor faults the process has made which have not required loading a memory page from disk.
cminflt %lu (11) The number of minor faults that the process's waited-for children have made.
majflt %lu (12) The number of major faults the process has made which have required loading a memory page from disk.
cmajflt %lu (13) The number of major faults that the process's waited-for children have made.
utime %lu (14) Amount of time that this process has been scheduled in user mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)). This includes guest time, guest_time (time spent running a virtual CPU, see below), so that applications that are not aware of the guest time field do not lose that time from their calculations.
stime %lu (15) Amount of time that this process has been scheduled in kernel mode, measured in clock ticks (divide by sysconf(_SC_CLK_TCK)).
*/
static char *loadfile = NULL;
static time_t last_call = 0;
static long ticks_per_s = 0;
static unsigned long last_utime, last_stime;
char buffer[64*1024];
FILE *stream = NULL;
time_t now = time(NULL);
if(load == NULL) {
return FALSE;
} else {
*load = 0.0;
}
if(loadfile == NULL) {
last_call = 0;
last_utime = 0;
last_stime = 0;
loadfile = find_cib_loadfile();
if (loadfile == NULL) {
crm_warn("Couldn't find CIB load file");
return FALSE;
}
ticks_per_s = sysconf(_SC_CLK_TCK);
crm_trace("Found %s", loadfile);
}
stream = fopen(loadfile, "r");
if(stream == NULL) {
int rc = errno;
crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
free(loadfile); loadfile = NULL;
return FALSE;
}
if(fgets(buffer, sizeof(buffer), stream)) {
char *comm = pcmk__assert_alloc(1, 256);
char state = 0;
int rc = 0, pid = 0, ppid = 0, pgrp = 0, session = 0, tty_nr = 0, tpgid = 0;
unsigned long flags = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0, utime = 0, stime = 0;
rc = sscanf(buffer, "%d %[^ ] %c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
&pid, comm, &state,
&ppid, &pgrp, &session, &tty_nr, &tpgid,
&flags, &minflt, &cminflt, &majflt, &cmajflt, &utime, &stime);
free(comm);
if(rc != 15) {
crm_err("Only %d of 15 fields found in %s", rc, loadfile);
fclose(stream);
return FALSE;
} else if(last_call > 0
&& last_call < now
&& last_utime <= utime
&& last_stime <= stime) {
time_t elapsed = now - last_call;
unsigned long delta_utime = utime - last_utime;
unsigned long delta_stime = stime - last_stime;
*load = (delta_utime + delta_stime); /* Cast to a float before division */
*load /= ticks_per_s;
*load /= elapsed;
crm_debug("cib load: %f (%lu ticks in %lds)", *load, delta_utime + delta_stime, (long)elapsed);
} else {
crm_debug("Init %lu + %lu ticks at %ld (%lu tps)", utime, stime, (long)now, ticks_per_s);
}
last_call = now;
last_utime = utime;
last_stime = stime;
fclose(stream);
return TRUE;
}
fclose(stream);
return FALSE;
}
static bool
throttle_load_avg(float *load)
{
char buffer[256];
FILE *stream = NULL;
const char *loadfile = "/proc/loadavg";
if(load == NULL) {
return FALSE;
}
stream = fopen(loadfile, "r");
if(stream == NULL) {
int rc = errno;
crm_warn("Couldn't read %s: %s (%d)", loadfile, pcmk_rc_str(rc), rc);
return FALSE;
}
if(fgets(buffer, sizeof(buffer), stream)) {
char *nl = strstr(buffer, "\n");
/* Grab the 1-minute average, ignore the rest */
*load = strtof(buffer, NULL);
if(nl) { nl[0] = 0; }
fclose(stream);
return TRUE;
}
fclose(stream);
return FALSE;
}
/*!
* \internal
* \brief Check a load value against throttling thresholds
*
* \param[in] load Load value to check
* \param[in] desc Description of metric (for logging)
* \param[in] thresholds Low/medium/high/extreme thresholds
*
* \return Throttle mode corresponding to load value
*/
static enum throttle_state_e
throttle_check_thresholds(float load, const char *desc,
const float thresholds[4])
{
if (load > thresholds[3]) {
crm_notice("Extreme %s detected: %f", desc, load);
return throttle_extreme;
} else if (load > thresholds[2]) {
crm_notice("High %s detected: %f", desc, load);
return throttle_high;
} else if (load > thresholds[1]) {
crm_info("Moderate %s detected: %f", desc, load);
return throttle_med;
} else if (load > thresholds[0]) {
crm_debug("Noticeable %s detected: %f", desc, load);
return throttle_low;
}
crm_trace("Negligible %s detected: %f", desc, load);
return throttle_none;
}
static enum throttle_state_e
throttle_handle_load(float load, const char *desc, int cores)
{
float normalize;
float thresholds[4];
if (cores == 1) {
/* On a single core machine, a load of 1.0 is already too high */
normalize = 0.6;
} else {
/* Normalize the load to be per-core */
normalize = cores;
}
thresholds[0] = throttle_load_target * normalize * THROTTLE_FACTOR_LOW;
thresholds[1] = throttle_load_target * normalize * THROTTLE_FACTOR_MEDIUM;
thresholds[2] = throttle_load_target * normalize * THROTTLE_FACTOR_HIGH;
thresholds[3] = load + 1.0; /* never extreme */
return throttle_check_thresholds(load, desc, thresholds);
}
#endif // HAVE_LINUX_PROCFS
static enum throttle_state_e
throttle_mode(void)
{
enum throttle_state_e mode = throttle_none;
#if HAVE_LINUX_PROCFS
unsigned int cores;
float load;
float thresholds[4];
cores = pcmk__procfs_num_cores();
if(throttle_cib_load(&load)) {
float cib_max_cpu = 0.95;
/* The CIB is a single-threaded task and thus cannot consume
* more than 100% of a CPU (and 1/cores of the overall system
* load).
*
* On a many-cored system, the CIB might therefore be maxed out
* (causing operations to fail or appear to fail) even though
* the overall system load is still reasonable.
*
* Therefore, the 'normal' thresholds can not apply here, and we
* need a special case.
*/
if(cores == 1) {
cib_max_cpu = 0.4;
}
if(throttle_load_target > 0.0 && throttle_load_target < cib_max_cpu) {
cib_max_cpu = throttle_load_target;
}
thresholds[0] = cib_max_cpu * 0.8;
thresholds[1] = cib_max_cpu * 0.9;
thresholds[2] = cib_max_cpu;
/* Can only happen on machines with a low number of cores */
thresholds[3] = cib_max_cpu * 1.5;
mode = throttle_check_thresholds(load, "CIB load", thresholds);
}
if(throttle_load_target <= 0) {
/* If we ever make this a valid value, the cluster will at least behave as expected */
return mode;
}
if(throttle_load_avg(&load)) {
enum throttle_state_e cpu_load;
cpu_load = throttle_handle_load(load, "CPU load", cores);
if (cpu_load > mode) {
mode = cpu_load;
}
crm_debug("Current load is %f across %u core(s)", load, cores);
}
#endif // HAVE_LINUX_PROCFS
return mode;
}
static void
throttle_send_command(enum throttle_state_e mode)
{
xmlNode *xml = NULL;
static enum throttle_state_e last = -1;
if(mode != last) {
crm_info("New throttle mode: %s load (was %s)",
load2str(mode), load2str(last));
last = mode;
- xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ xml = create_request(CRM_OP_THROTTLE, NULL, NULL, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_CRMD);
crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MODE, mode);
crm_xml_add_int(xml, PCMK__XA_CRM_LIMIT_MAX, throttle_job_max);
pcmk__cluster_send_message(NULL, pcmk_ipc_controld, xml);
pcmk__xml_free(xml);
}
}
static gboolean
throttle_timer_cb(gpointer data)
{
throttle_send_command(throttle_mode());
return TRUE;
}
static void
throttle_record_free(gpointer p)
{
struct throttle_record_s *r = p;
free(r->node);
free(r);
}
static void
throttle_set_load_target(float target)
{
throttle_load_target = target;
}
/*!
* \internal
* \brief Update the maximum number of simultaneous jobs
*
* \param[in] preference Cluster-wide \c PCMK_OPT_NODE_ACTION_LIMIT from the
* CIB
*/
static void
throttle_update_job_max(const char *preference)
{
long long max = 0LL;
const char *env_limit = pcmk__env_option(PCMK__ENV_NODE_ACTION_LIMIT);
if (env_limit != NULL) {
preference = env_limit; // Per-node override
}
if (preference != NULL) {
pcmk__scan_ll(preference, &max, 0LL);
}
if (max > 0) {
throttle_job_max = (max >= INT_MAX)? INT_MAX : (int) max;
} else {
// Default is based on the number of cores detected
throttle_job_max = 2 * pcmk__procfs_num_cores();
}
}
void
throttle_init(void)
{
if(throttle_records == NULL) {
throttle_records = pcmk__strkey_table(NULL, throttle_record_free);
throttle_timer = mainloop_timer_add("throttle", 30 * 1000, TRUE, throttle_timer_cb, NULL);
}
throttle_update_job_max(NULL);
mainloop_timer_start(throttle_timer);
}
/*!
* \internal
* \brief Configure throttle options based on the CIB
*
* \param[in,out] options Name/value pairs for configured options
*/
void
controld_configure_throttle(GHashTable *options)
{
const char *value = g_hash_table_lookup(options, PCMK_OPT_LOAD_THRESHOLD);
if (value != NULL) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = g_hash_table_lookup(options, PCMK_OPT_NODE_ACTION_LIMIT);
throttle_update_job_max(value);
}
void
throttle_fini(void)
{
if (throttle_timer != NULL) {
mainloop_timer_del(throttle_timer);
throttle_timer = NULL;
}
if (throttle_records != NULL) {
g_hash_table_destroy(throttle_records);
throttle_records = NULL;
}
}
int
throttle_get_total_job_limit(int l)
{
/* Cluster-wide limit */
GHashTableIter iter;
int limit = l;
int peers = pcmk__cluster_num_active_nodes();
struct throttle_record_s *r = NULL;
g_hash_table_iter_init(&iter, throttle_records);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &r)) {
switch(r->mode) {
case throttle_extreme:
if(limit == 0 || limit > peers/4) {
limit = QB_MAX(1, peers/4);
}
break;
case throttle_high:
if(limit == 0 || limit > peers/2) {
limit = QB_MAX(1, peers/2);
}
break;
default:
break;
}
}
if(limit == l) {
} else if(l == 0) {
crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d", limit);
} else {
crm_trace("Using " PCMK_OPT_BATCH_LIMIT "=%d instead of %d", limit, l);
}
return limit;
}
int
throttle_get_job_limit(const char *node)
{
int jobs = 1;
struct throttle_record_s *r = NULL;
r = g_hash_table_lookup(throttle_records, node);
if(r == NULL) {
r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
r->node = pcmk__str_copy(node);
r->mode = throttle_low;
r->max = throttle_job_max;
crm_trace("Defaulting to local values for unknown node %s", node);
g_hash_table_insert(throttle_records, r->node, r);
}
switch(r->mode) {
case throttle_extreme:
case throttle_high:
jobs = 1; /* At least one job must always be allowed */
break;
case throttle_med:
jobs = QB_MAX(1, r->max / 4);
break;
case throttle_low:
jobs = QB_MAX(1, r->max / 2);
break;
case throttle_none:
jobs = QB_MAX(1, r->max);
break;
default:
crm_err("Unknown throttle mode %.4x on %s", r->mode, node);
break;
}
return jobs;
}
void
throttle_update(xmlNode *xml)
{
int max = 0;
int mode = 0;
struct throttle_record_s *r = NULL;
const char *from = crm_element_value(xml, PCMK__XA_SRC);
crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MODE, &mode);
crm_element_value_int(xml, PCMK__XA_CRM_LIMIT_MAX, &max);
r = g_hash_table_lookup(throttle_records, from);
if(r == NULL) {
r = pcmk__assert_alloc(1, sizeof(struct throttle_record_s));
r->node = pcmk__str_copy(from);
g_hash_table_insert(throttle_records, r->node, r);
}
r->max = max;
r->mode = (enum throttle_state_e) mode;
crm_debug("Node %s has %s load and supports at most %d jobs; new job limit %d",
from, load2str((enum throttle_state_e) mode), max,
throttle_get_job_limit(from));
}
diff --git a/include/crm/common/ipc.h b/include/crm/common/ipc.h
index 5d91a2580f..9a0d5f1dcf 100644
--- a/include/crm/common/ipc.h
+++ b/include/crm/common/ipc.h
@@ -1,240 +1,238 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_IPC__H
#define PCMK__CRM_COMMON_IPC__H
#include <sys/uio.h>
#include <qb/qbipcc.h>
#include <crm/common/xml.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief IPC interface to Pacemaker daemons
*
* \ingroup core
*/
/*
* Message creation utilities
*
* These are used for both IPC messages and cluster layer messages. However,
* since this is public API, they stay in this header for backward
* compatibility.
*/
// @COMPAT Make internal when we can break API backward compatibility
//! \deprecated Do not use
-#define create_request(task, xml_data, host_to, sys_to, sys_from, uuid_from) \
- create_request_adv(task, xml_data, host_to, sys_to, sys_from, uuid_from, \
- __func__)
+#define create_request(task, xml_data, host_to, sys_to, sys_from) \
+ create_request_adv(task, xml_data, host_to, sys_to, sys_from, __func__)
// @COMPAT Make internal when we can break API backward compatibility
//! \deprecated Do not use
xmlNode *create_request_adv(const char *task, xmlNode *xml_data,
const char *host_to, const char *sys_to,
- const char *sys_from, const char *uuid_from,
- const char *origin);
+ const char *sender_system, const char *origin);
/*
* The library supports two methods of creating IPC connections. The older code
* allows connecting to any arbitrary IPC name. The newer code only allows
* connecting to one of the Pacemaker daemons.
*
* As daemons are converted to use the new model, the old functions should be
* considered deprecated for use with those daemons. Once all daemons are
* converted, the old functions should be officially deprecated as public API
* and eventually made internal API.
*/
/*
* Pacemaker daemon IPC
*/
/* @COMPAT This is also used internally for cluster message types, but it's not
* worth the hassle of redefining this public API just to change the name.
*/
//! Available IPC interfaces
enum pcmk_ipc_server {
pcmk_ipc_unknown, //!< Unknown or invalid
pcmk_ipc_attrd, //!< Attribute manager
pcmk_ipc_based, //!< CIB manager
pcmk_ipc_controld, //!< Controller
pcmk_ipc_execd, //!< Executor
pcmk_ipc_fenced, //!< Fencer
pcmk_ipc_pacemakerd, //!< Launcher
pcmk_ipc_schedulerd, //!< Scheduler
};
// NOTE: sbd (as of at least 1.5.2) uses this enum
//! Possible event types that an IPC event callback can be called for
enum pcmk_ipc_event {
pcmk_ipc_event_connect, //!< Result of asynchronous connection attempt
// NOTE: sbd (as of at least 1.5.2) uses this value
pcmk_ipc_event_disconnect, //!< Termination of IPC connection
// NOTE: sbd (as of at least 1.5.2) uses this value
pcmk_ipc_event_reply, //!< Daemon's reply to client IPC request
pcmk_ipc_event_notify, //!< Notification from daemon
};
//! How IPC replies should be dispatched
enum pcmk_ipc_dispatch {
pcmk_ipc_dispatch_main, //!< Attach IPC to GMainLoop for dispatch
pcmk_ipc_dispatch_poll, //!< Caller will poll and dispatch IPC
pcmk_ipc_dispatch_sync, //!< Sending a command will wait for any reply
};
// NOTE: sbd (as of at least 1.5.2) uses this
//! Client connection to Pacemaker IPC
typedef struct pcmk_ipc_api_s pcmk_ipc_api_t;
/*!
* \brief Callback function type for Pacemaker daemon IPC APIs
*
* \param[in,out] api IPC API connection
* \param[in] event_type The type of event that occurred
* \param[in] status Event status
* \param[in,out] event_data Event-specific data
* \param[in,out] user_data Caller data provided when callback was registered
*
* \note For connection and disconnection events, event_data may be NULL (for
* local IPC) or the name of the connected node (for remote IPC, for
* daemons that support that). For reply and notify events, event_data is
* defined by the specific daemon API.
*/
typedef void (*pcmk_ipc_callback_t)(pcmk_ipc_api_t *api,
enum pcmk_ipc_event event_type,
crm_exit_t status,
void *event_data, void *user_data);
// NOTE: sbd (as of at least 1.5.2) uses this
int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server);
// NOTE: sbd (as of at least 1.5.2) uses this
void pcmk_free_ipc_api(pcmk_ipc_api_t *api);
// NOTE: sbd (as of at least 1.5.2) uses this
int pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type);
void pcmk_disconnect_ipc(pcmk_ipc_api_t *api);
int pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms);
void pcmk_dispatch_ipc(pcmk_ipc_api_t *api);
// NOTE: sbd (as of at least 1.5.2) uses this
void pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb,
void *user_data);
const char *pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log);
bool pcmk_ipc_is_connected(pcmk_ipc_api_t *api);
int pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name,
uint32_t nodeid);
/*
* Generic IPC API (to eventually be deprecated as public API and made internal)
*/
/* *INDENT-OFF* */
enum crm_ipc_flags
{
crm_ipc_flags_none = 0x00000000,
crm_ipc_compressed = 0x00000001, /* Message has been compressed */
crm_ipc_proxied = 0x00000100, /* _ALL_ replies to proxied connections need to be sent as events */
crm_ipc_client_response = 0x00000200, /* A Response is expected in reply */
// These are options for Pacemaker's internal use only (pcmk__ipc_send_*())
crm_ipc_server_event = 0x00010000, /* Send an Event instead of a Response */
crm_ipc_server_free = 0x00020000, /* Free the iovec after sending */
crm_ipc_proxied_relay_response = 0x00040000, /* all replies to proxied connections are sent as events, this flag preserves whether the event should be treated as an actual event, or a response.*/
};
/* *INDENT-ON* */
typedef struct crm_ipc_s crm_ipc_t;
crm_ipc_t *crm_ipc_new(const char *name, size_t max_size);
bool crm_ipc_connect(crm_ipc_t * client);
void crm_ipc_close(crm_ipc_t * client);
void crm_ipc_destroy(crm_ipc_t * client);
void pcmk_free_ipc_event(struct iovec *event);
int crm_ipc_send(crm_ipc_t *client, const xmlNode *message,
enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply);
int crm_ipc_get_fd(crm_ipc_t * client);
bool crm_ipc_connected(crm_ipc_t * client);
int crm_ipc_ready(crm_ipc_t * client);
long crm_ipc_read(crm_ipc_t * client);
const char *crm_ipc_buffer(crm_ipc_t * client);
uint32_t crm_ipc_buffer_flags(crm_ipc_t * client);
const char *crm_ipc_name(crm_ipc_t * client);
unsigned int crm_ipc_default_buffer_size(void);
/*!
* \brief Check the authenticity of the IPC socket peer process (legacy)
*
* If everything goes well, peer's authenticity is verified by the means
* of comparing against provided referential UID and GID (either satisfies),
* and the result of this check can be deduced from the return value.
* As an exception, detected UID of 0 ("root") satisfies arbitrary
* provided referential daemon's credentials.
*
* \param[in] sock IPC related, connected Unix socket to check peer of
* \param[in] refuid referential UID to check against
* \param[in] refgid referential GID to check against
* \param[out] gotpid to optionally store obtained PID of the peer
* (not available on FreeBSD, special value of 1
* used instead, and the caller is required to
* special case this value respectively)
* \param[out] gotuid to optionally store obtained UID of the peer
* \param[out] gotgid to optionally store obtained GID of the peer
*
* \return 0 if IPC related socket's peer is not authentic given the
* referential credentials (see above), 1 if it is,
* negative value on error (generally expressing -errno unless
* it was zero even on nonhappy path, -pcmk_err_generic is
* returned then; no message is directly emitted)
*
* \note While this function is tolerant on what constitutes authorized
* IPC daemon process (its effective user matches UID=0 or \p refuid,
* or at least its group matches \p refgid), either or both (in case
* of UID=0) mismatches on the expected credentials of such peer
* process \e shall be investigated at the caller when value of 1
* gets returned there, since higher-than-expected privileges in
* respect to the expected/intended credentials possibly violate
* the least privilege principle and may pose an additional risk
* (i.e. such accidental inconsistency shall be eventually fixed).
*/
int crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid,
pid_t *gotpid, uid_t *gotuid, gid_t *gotgid);
// @COMPAT Make internal when we can break API backward compatibility
//! \deprecated Do not use
xmlNode *create_hello_message(const char *uuid, const char *client_name,
const char *major_version, const char *minor_version);
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/cluster/election.c b/lib/cluster/election.c
index c419609c3b..5d7dc0999c 100644
--- a/lib/cluster/election.c
+++ b/lib/cluster/election.c
@@ -1,727 +1,728 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <crm/common/xml.h>
#include <crm/common/mainloop.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/crm.h>
#define STORM_INTERVAL 2 /* in seconds */
struct election_s {
enum election_result state;
guint count; // How many times local node has voted
char *name; // Descriptive name for this election
char *uname; // Local node's name
GSourceFunc cb; // Function to call if election is won
GHashTable *voted; // Key = node name, value = how node voted
mainloop_timer_t *timeout; // When to abort if all votes not received
int election_wins; // Track wins, for storm detection
bool wrote_blackbox; // Write a storm blackbox at most once
time_t expires; // When storm detection period ends
time_t last_election_loss; // When dampening period ends
};
static void
election_complete(election_t *e)
{
e->state = election_won;
if (e->cb != NULL) {
e->cb(e);
}
election_reset(e);
}
static gboolean
election_timer_cb(gpointer user_data)
{
election_t *e = user_data;
crm_info("%s timed out, declaring local node as winner", e->name);
election_complete(e);
return FALSE;
}
/*!
* \brief Get current state of an election
*
* \param[in] e Election object
*
* \return Current state of \e
*/
enum election_result
election_state(const election_t *e)
{
return (e == NULL)? election_error : e->state;
}
/*!
* \brief Create a new election object
*
* Every node that wishes to participate in an election must create an election
* object. Typically, this should be done once, at start-up. A caller should
* only create a single election object.
*
* \param[in] name Label for election (for logging)
* \param[in] uname Local node's name
* \param[in] period_ms How long to wait for all peers to vote
* \param[in] cb Function to call if local node wins election
*
* \return Newly allocated election object on success, NULL on error
* \note The caller is responsible for freeing the returned value using
* election_fini().
*/
election_t *
election_init(const char *name, const char *uname, guint period_ms, GSourceFunc cb)
{
election_t *e = NULL;
static guint count = 0;
CRM_CHECK(uname != NULL, return NULL);
e = calloc(1, sizeof(election_t));
if (e == NULL) {
crm_perror(LOG_CRIT, "Cannot create election");
return NULL;
}
e->uname = strdup(uname);
if (e->uname == NULL) {
crm_perror(LOG_CRIT, "Cannot create election");
free(e);
return NULL;
}
e->name = name? crm_strdup_printf("election-%s", name)
: crm_strdup_printf("election-%u", count++);
e->cb = cb;
e->timeout = mainloop_timer_add(e->name, period_ms, FALSE,
election_timer_cb, e);
crm_trace("Created %s", e->name);
return e;
}
/*!
* \brief Disregard any previous vote by specified peer
*
* This discards any recorded vote from a specified peer. Election users should
* call this whenever a voting peer becomes inactive.
*
* \param[in,out] e Election object
* \param[in] uname Name of peer to disregard
*/
void
election_remove(election_t *e, const char *uname)
{
if ((e != NULL) && (uname != NULL) && (e->voted != NULL)) {
crm_trace("Discarding %s (no-)vote from lost peer %s", e->name, uname);
g_hash_table_remove(e->voted, uname);
}
}
/*!
* \brief Stop election timer and disregard all votes
*
* \param[in,out] e Election object
*/
void
election_reset(election_t *e)
{
if (e != NULL) {
crm_trace("Resetting election %s", e->name);
mainloop_timer_stop(e->timeout);
if (e->voted) {
crm_trace("Destroying voted cache with %d members", g_hash_table_size(e->voted));
g_hash_table_destroy(e->voted);
e->voted = NULL;
}
}
}
/*!
* \brief Free an election object
*
* Free all memory associated with an election object, stopping its
* election timer (if running).
*
* \param[in,out] e Election object
*/
void
election_fini(election_t *e)
{
if (e != NULL) {
election_reset(e);
crm_trace("Destroying %s", e->name);
mainloop_timer_del(e->timeout);
free(e->uname);
free(e->name);
free(e);
}
}
static void
election_timeout_start(election_t *e)
{
if (e != NULL) {
mainloop_timer_start(e->timeout);
}
}
/*!
* \brief Stop an election's timer, if running
*
* \param[in,out] e Election object
*/
void
election_timeout_stop(election_t *e)
{
if (e != NULL) {
mainloop_timer_stop(e->timeout);
}
}
/*!
* \brief Change an election's timeout (restarting timer if running)
*
* \param[in,out] e Election object
* \param[in] period New timeout
*/
void
election_timeout_set_period(election_t *e, guint period)
{
if (e != NULL) {
mainloop_timer_set_period(e->timeout, period);
} else {
crm_err("No election defined");
}
}
static int
get_uptime(struct timeval *output)
{
static time_t expires = 0;
static struct rusage info;
time_t tm_now = time(NULL);
if (expires < tm_now) {
int rc = 0;
info.ru_utime.tv_sec = 0;
info.ru_utime.tv_usec = 0;
rc = getrusage(RUSAGE_SELF, &info);
output->tv_sec = 0;
output->tv_usec = 0;
if (rc < 0) {
crm_perror(LOG_ERR, "Could not calculate the current uptime");
expires = 0;
return -1;
}
crm_debug("Current CPU usage is: %lds, %ldus", (long)info.ru_utime.tv_sec,
(long)info.ru_utime.tv_usec);
}
expires = tm_now + STORM_INTERVAL; /* N seconds after the last _access_ */
output->tv_sec = info.ru_utime.tv_sec;
output->tv_usec = info.ru_utime.tv_usec;
return 1;
}
static int
compare_age(struct timeval your_age)
{
struct timeval our_age;
get_uptime(&our_age); /* If an error occurred, our_age will be compared as {0,0} */
if (our_age.tv_sec > your_age.tv_sec) {
crm_debug("Win: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return 1;
} else if (our_age.tv_sec < your_age.tv_sec) {
crm_debug("Lose: %ld vs %ld (seconds)", (long)our_age.tv_sec, (long)your_age.tv_sec);
return -1;
} else if (our_age.tv_usec > your_age.tv_usec) {
crm_debug("Win: %ld.%06ld vs %ld.%06ld (usec)",
(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
return 1;
} else if (our_age.tv_usec < your_age.tv_usec) {
crm_debug("Lose: %ld.%06ld vs %ld.%06ld (usec)",
(long)our_age.tv_sec, (long)our_age.tv_usec, (long)your_age.tv_sec, (long)your_age.tv_usec);
return -1;
}
return 0;
}
/*!
* \brief Start a new election by offering local node's candidacy
*
* Broadcast a "vote" election message containing the local node's ID,
* (incremented) election counter, and uptime, and start the election timer.
*
* \param[in,out] e Election object
*
* \note Any nodes agreeing to the candidacy will send a "no-vote" reply, and if
* all active peers do so, or if the election times out, the local node
* wins the election. (If we lose to any peer vote, we will stop the
* timer, so a timeout means we did not lose -- either some peer did not
* vote, or we did not call election_check() in time.)
*/
void
election_vote(election_t *e)
{
struct timeval age;
xmlNode *vote = NULL;
pcmk__node_status_t *our_node = NULL;
if (e == NULL) {
crm_trace("Election vote requested, but no election available");
return;
}
our_node = pcmk__get_node(0, e->uname, NULL,
pcmk__node_search_cluster_member);
if (!pcmk__cluster_is_node_active(our_node)) {
crm_trace("Cannot vote in %s yet: local node not connected to cluster",
e->name);
return;
}
election_reset(e);
e->state = election_in_progress;
- vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ vote = create_request(CRM_OP_VOTE, NULL, NULL, CRM_SYSTEM_CRMD,
+ CRM_SYSTEM_CRMD);
e->count++;
crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->xml_id);
crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, e->count);
// Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds
get_uptime(&age);
crm_xml_add_timeval(vote, PCMK__XA_ELECTION_AGE_SEC,
PCMK__XA_ELECTION_AGE_NANO_SEC, &age);
pcmk__cluster_send_message(NULL, pcmk_ipc_controld, vote);
pcmk__xml_free(vote);
crm_debug("Started %s round %d", e->name, e->count);
election_timeout_start(e);
return;
}
/*!
* \brief Check whether local node has won an election
*
* If all known peers have sent no-vote messages, stop the election timer, set
* the election state to won, and call any registered win callback.
*
* \param[in,out] e Election object
*
* \return TRUE if local node has won, FALSE otherwise
* \note If all known peers have sent no-vote messages, but the election owner
* does not call this function, the election will not be won (and the
* callback will not be called) until the election times out.
* \note This should be called when election_count_vote() returns
* \c election_in_progress.
*/
bool
election_check(election_t *e)
{
int voted_size = 0;
int num_members = 0;
if (e == NULL) {
crm_trace("Election check requested, but no election available");
return FALSE;
}
if (e->voted == NULL) {
crm_trace("%s check requested, but no votes received yet", e->name);
return FALSE;
}
voted_size = g_hash_table_size(e->voted);
num_members = pcmk__cluster_num_active_nodes();
/* in the case of #voted > #members, it is better to
* wait for the timeout and give the cluster time to
* stabilize
*/
if (voted_size >= num_members) {
/* we won and everyone has voted */
election_timeout_stop(e);
if (voted_size > num_members) {
GHashTableIter gIter;
const pcmk__node_status_t *node = NULL;
char *key = NULL;
crm_warn("Received too many votes in %s", e->name);
g_hash_table_iter_init(&gIter, pcmk__peer_cache);
while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
if (pcmk__cluster_is_node_active(node)) {
crm_warn("* expected vote: %s", node->name);
}
}
g_hash_table_iter_init(&gIter, e->voted);
while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
crm_warn("* actual vote: %s", key);
}
}
crm_info("%s won by local node", e->name);
election_complete(e);
return TRUE;
} else {
crm_debug("%s still waiting on %d of %d votes",
e->name, num_members - voted_size, num_members);
}
return FALSE;
}
#define LOSS_DAMPEN 2 /* in seconds */
struct vote {
const char *op;
const char *from;
const char *version;
const char *election_owner;
int election_id;
struct timeval age;
};
/*!
* \brief Unpack an election message
*
* \param[in] e Election object (for logging only)
* \param[in] message Election message XML
* \param[out] vote Parsed fields from message
*
* \return TRUE if election message and election are valid, FALSE otherwise
* \note The parsed struct's pointer members are valid only for the lifetime of
* the message argument.
*/
static bool
parse_election_message(const election_t *e, const xmlNode *message,
struct vote *vote)
{
CRM_CHECK(message && vote, return FALSE);
vote->election_id = -1;
vote->age.tv_sec = -1;
vote->age.tv_usec = -1;
vote->op = crm_element_value(message, PCMK__XA_CRM_TASK);
vote->from = crm_element_value(message, PCMK__XA_SRC);
vote->version = crm_element_value(message, PCMK_XA_VERSION);
vote->election_owner = crm_element_value(message, PCMK__XA_ELECTION_OWNER);
crm_element_value_int(message, PCMK__XA_ELECTION_ID, &(vote->election_id));
if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
|| (vote->election_owner == NULL) || (vote->election_id < 0)) {
crm_warn("Invalid %s message from %s in %s ",
(vote->op? vote->op : "election"),
(vote->from? vote->from : "unspecified node"),
(e? e->name : "election"));
return FALSE;
}
// Op-specific validation
if (pcmk__str_eq(vote->op, CRM_OP_VOTE, pcmk__str_none)) {
/* Only vote ops have uptime.
Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is in microseconds.
*/
crm_element_value_timeval(message, PCMK__XA_ELECTION_AGE_SEC,
PCMK__XA_ELECTION_AGE_NANO_SEC, &(vote->age));
if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
crm_warn("Cannot count %s %s from %s because it is missing uptime",
(e? e->name : "election"), vote->op, vote->from);
return FALSE;
}
} else if (!pcmk__str_eq(vote->op, CRM_OP_NOVOTE, pcmk__str_none)) {
crm_info("Cannot process %s message from %s because %s is not a known election op",
(e? e->name : "election"), vote->from, vote->op);
return FALSE;
}
// Election validation
if (e == NULL) {
crm_info("Cannot count %s from %s because no election available",
vote->op, vote->from);
return FALSE;
}
/* If the membership cache is NULL, we REALLY shouldn't be voting --
* the question is how we managed to get here.
*/
if (pcmk__peer_cache == NULL) {
crm_info("Cannot count %s %s from %s because no peer information available",
e->name, vote->op, vote->from);
return FALSE;
}
return TRUE;
}
static void
record_vote(election_t *e, struct vote *vote)
{
CRM_ASSERT(e && vote && vote->from && vote->op);
if (e->voted == NULL) {
e->voted = pcmk__strkey_table(free, free);
}
pcmk__insert_dup(e->voted, vote->from, vote->op);
}
static void
send_no_vote(pcmk__node_status_t *peer, struct vote *vote)
{
// @TODO probably shouldn't hardcode CRM_SYSTEM_CRMD and pcmk_ipc_controld
xmlNode *novote = create_request(CRM_OP_NOVOTE, NULL, vote->from,
- CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
+ CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD);
crm_xml_add(novote, PCMK__XA_ELECTION_OWNER, vote->election_owner);
crm_xml_add_int(novote, PCMK__XA_ELECTION_ID, vote->election_id);
pcmk__cluster_send_message(peer, pcmk_ipc_controld, novote);
pcmk__xml_free(novote);
}
/*!
* \brief Process an election message (vote or no-vote) from a peer
*
* \param[in,out] e Election object
* \param[in] message Election message XML from peer
* \param[in] can_win Whether local node is eligible to win
*
* \return Election state after new vote is considered
* \note If the peer message is a vote, and we prefer the peer to win, this will
* send a no-vote reply to the peer.
* \note The situations "we lost to this vote" from "this is a late no-vote
* after we've already lost" both return election_lost. If a caller needs
* to distinguish them, it should save the current state before calling
* this function, and then compare the result.
*/
enum election_result
election_count_vote(election_t *e, const xmlNode *message, bool can_win)
{
int log_level = LOG_INFO;
gboolean done = FALSE;
gboolean we_lose = FALSE;
const char *reason = "unknown";
bool we_are_owner = FALSE;
pcmk__node_status_t *our_node = NULL;
pcmk__node_status_t *your_node = NULL;
time_t tm_now = time(NULL);
struct vote vote;
CRM_CHECK(message != NULL, return election_error);
if (parse_election_message(e, message, &vote) == FALSE) {
return election_error;
}
your_node = pcmk__get_node(0, vote.from, NULL,
pcmk__node_search_cluster_member);
our_node = pcmk__get_node(0, e->uname, NULL,
pcmk__node_search_cluster_member);
we_are_owner = (our_node != NULL)
&& pcmk__str_eq(our_node->xml_id, vote.election_owner,
pcmk__str_none);
if (!can_win) {
reason = "Not eligible";
we_lose = TRUE;
} else if (!pcmk__cluster_is_node_active(our_node)) {
reason = "We are not part of the cluster";
log_level = LOG_ERR;
we_lose = TRUE;
} else if (we_are_owner && (vote.election_id != e->count)) {
log_level = LOG_TRACE;
reason = "Superseded";
done = TRUE;
} else if (!pcmk__cluster_is_node_active(your_node)) {
/* Possibly we cached the message in the FSA queue at a point that it wasn't */
reason = "Peer is not part of our cluster";
log_level = LOG_WARNING;
done = TRUE;
} else if (pcmk__str_eq(vote.op, CRM_OP_NOVOTE, pcmk__str_none)
|| pcmk__str_eq(vote.from, e->uname, pcmk__str_none)) {
/* Receiving our own broadcast vote, or a no-vote from peer, is a vote
* for us to win
*/
if (!we_are_owner) {
crm_warn("Cannot count %s round %d %s from %s because we are not election owner (%s)",
e->name, vote.election_id, vote.op, vote.from,
vote.election_owner);
return election_error;
}
if (e->state != election_in_progress) {
// Should only happen if we already lost
crm_debug("Not counting %s round %d %s from %s because no election in progress",
e->name, vote.election_id, vote.op, vote.from);
return e->state;
}
record_vote(e, &vote);
reason = "Recorded";
done = TRUE;
} else {
// A peer vote requires a comparison to determine which node is better
int age_result = compare_age(vote.age);
int version_result = compare_version(vote.version, CRM_FEATURE_SET);
if (version_result < 0) {
reason = "Version";
we_lose = TRUE;
} else if (version_result > 0) {
reason = "Version";
} else if (age_result < 0) {
reason = "Uptime";
we_lose = TRUE;
} else if (age_result > 0) {
reason = "Uptime";
} else if (strcasecmp(e->uname, vote.from) > 0) {
reason = "Host name";
we_lose = TRUE;
} else {
reason = "Host name";
}
}
if (e->expires < tm_now) {
e->election_wins = 0;
e->expires = tm_now + STORM_INTERVAL;
} else if (done == FALSE && we_lose == FALSE) {
int peers = 1 + g_hash_table_size(pcmk__peer_cache);
/* If every node has to vote down every other node, thats N*(N-1) total elections
* Allow some leeway before _really_ complaining
*/
e->election_wins++;
if (e->election_wins > (peers * peers)) {
crm_warn("%s election storm detected: %d wins in %d seconds",
e->name, e->election_wins, STORM_INTERVAL);
e->election_wins = 0;
e->expires = tm_now + STORM_INTERVAL;
if (e->wrote_blackbox == FALSE) {
/* It's questionable whether a black box (from every node in the
* cluster) would be truly helpful in diagnosing an election
* storm. It's also highly doubtful a production environment
* would get multiple election storms from distinct causes, so
* saving one blackbox per process lifetime should be
* sufficient. Alternatives would be to save a timestamp of the
* last blackbox write instead of a boolean, and write a new one
* if some amount of time has passed; or to save a storm count,
* write a blackbox on every Nth occurrence.
*/
crm_write_blackbox(0, NULL);
e->wrote_blackbox = TRUE;
}
}
}
if (done) {
do_crm_log(log_level + 1,
"Processed %s round %d %s (current round %d) from %s (%s)",
e->name, vote.election_id, vote.op, e->count, vote.from,
reason);
return e->state;
} else if (we_lose == FALSE) {
/* We track the time of the last election loss to implement an election
* dampening period, reducing the likelihood of an election storm. If
* this node has lost within the dampening period, don't start a new
* election, even if we win against a peer's vote -- the peer we lost to
* should win again.
*
* @TODO This has a problem case: if an election winner immediately
* leaves the cluster, and a new election is immediately called, all
* nodes could lose, with no new winner elected. The ideal solution
* would be to tie the election structure with the peer caches, which
* would allow us to clear the dampening when the previous winner
* leaves (and would allow other improvements as well).
*/
if ((e->last_election_loss == 0)
|| ((tm_now - e->last_election_loss) > (time_t) LOSS_DAMPEN)) {
do_crm_log(log_level, "%s round %d (owner node ID %s) pass: %s from %s (%s)",
e->name, vote.election_id, vote.election_owner, vote.op,
vote.from, reason);
e->last_election_loss = 0;
election_timeout_stop(e);
/* Start a new election by voting down this, and other, peers */
e->state = election_start;
return e->state;
} else {
char *loss_time = ctime(&e->last_election_loss);
if (loss_time) {
// Show only HH:MM:SS
loss_time += 11;
loss_time[8] = '\0';
}
crm_info("Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
e->name, vote.election_id, vote.election_owner, vote.from,
LOSS_DAMPEN, (loss_time? loss_time : "unknown"));
}
}
e->last_election_loss = tm_now;
do_crm_log(log_level, "%s round %d (owner node ID %s) lost: %s from %s (%s)",
e->name, vote.election_id, vote.election_owner, vote.op,
vote.from, reason);
election_reset(e);
send_no_vote(your_node, &vote);
e->state = election_lost;
return e->state;
}
/*!
* \brief Reset any election dampening currently in effect
*
* \param[in,out] e Election object to clear
*/
void
election_clear_dampening(election_t *e)
{
e->last_election_loss = 0;
}
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index 3d17f9f4e6..e396a92fa2 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -1,1687 +1,1687 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#if defined(HAVE_UCRED) || defined(HAVE_SOCKPEERCRED)
#include <sys/socket.h>
#elif defined(HAVE_GETPEERUCRED)
#include <ucred.h>
#endif
#include <stdio.h>
#include <sys/types.h>
#include <errno.h>
#include <bzlib.h>
#include <crm/crm.h> /* indirectly: pcmk_err_generic */
#include <crm/common/xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include "crmcommon_private.h"
static int is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock,
uid_t refuid, gid_t refgid, pid_t *gotpid,
uid_t *gotuid, gid_t *gotgid);
/*!
* \brief Create a new object for using Pacemaker daemon IPC
*
* \param[out] api Where to store new IPC object
* \param[in] server Which Pacemaker daemon the object is for
*
* \return Standard Pacemaker result code
*
* \note The caller is responsible for freeing *api using pcmk_free_ipc_api().
* \note This is intended to supersede crm_ipc_new() but currently only
* supports the controller, pacemakerd, and schedulerd IPC API.
*/
int
pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
{
if (api == NULL) {
return EINVAL;
}
*api = calloc(1, sizeof(pcmk_ipc_api_t));
if (*api == NULL) {
return errno;
}
(*api)->server = server;
if (pcmk_ipc_name(*api, false) == NULL) {
pcmk_free_ipc_api(*api);
*api = NULL;
return EOPNOTSUPP;
}
(*api)->ipc_size_max = 0;
// Set server methods and max_size (if not default)
switch (server) {
case pcmk_ipc_attrd:
(*api)->cmds = pcmk__attrd_api_methods();
break;
case pcmk_ipc_based:
(*api)->ipc_size_max = 512 * 1024; // 512KB
break;
case pcmk_ipc_controld:
(*api)->cmds = pcmk__controld_api_methods();
break;
case pcmk_ipc_execd:
break;
case pcmk_ipc_fenced:
break;
case pcmk_ipc_pacemakerd:
(*api)->cmds = pcmk__pacemakerd_api_methods();
break;
case pcmk_ipc_schedulerd:
(*api)->cmds = pcmk__schedulerd_api_methods();
// @TODO max_size could vary by client, maybe take as argument?
(*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB
break;
default: // pcmk_ipc_unknown
pcmk_free_ipc_api(*api);
*api = NULL;
return EINVAL;
}
if ((*api)->cmds == NULL) {
pcmk_free_ipc_api(*api);
*api = NULL;
return ENOMEM;
}
(*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false),
(*api)->ipc_size_max);
if ((*api)->ipc == NULL) {
pcmk_free_ipc_api(*api);
*api = NULL;
return ENOMEM;
}
// If daemon API has its own data to track, allocate it
if ((*api)->cmds->new_data != NULL) {
if ((*api)->cmds->new_data(*api) != pcmk_rc_ok) {
pcmk_free_ipc_api(*api);
*api = NULL;
return ENOMEM;
}
}
crm_trace("Created %s API IPC object", pcmk_ipc_name(*api, true));
return pcmk_rc_ok;
}
static void
free_daemon_specific_data(pcmk_ipc_api_t *api)
{
if ((api != NULL) && (api->cmds != NULL)) {
if ((api->cmds->free_data != NULL) && (api->api_data != NULL)) {
api->cmds->free_data(api->api_data);
api->api_data = NULL;
}
free(api->cmds);
api->cmds = NULL;
}
}
/*!
* \internal
* \brief Call an IPC API event callback, if one is registed
*
* \param[in,out] api IPC API connection
* \param[in] event_type The type of event that occurred
* \param[in] status Event status
* \param[in,out] event_data Event-specific data
*/
void
pcmk__call_ipc_callback(pcmk_ipc_api_t *api, enum pcmk_ipc_event event_type,
crm_exit_t status, void *event_data)
{
if ((api != NULL) && (api->cb != NULL)) {
api->cb(api, event_type, status, event_data, api->user_data);
}
}
/*!
* \internal
* \brief Clean up after an IPC disconnect
*
* \param[in,out] user_data IPC API connection that disconnected
*
* \note This function can be used as a main loop IPC destroy callback.
*/
static void
ipc_post_disconnect(gpointer user_data)
{
pcmk_ipc_api_t *api = user_data;
crm_info("Disconnected from %s", pcmk_ipc_name(api, true));
// Perform any daemon-specific handling needed
if ((api->cmds != NULL) && (api->cmds->post_disconnect != NULL)) {
api->cmds->post_disconnect(api);
}
// Call client's registered event callback
pcmk__call_ipc_callback(api, pcmk_ipc_event_disconnect, CRM_EX_DISCONNECT,
NULL);
/* If this is being called from a running main loop, mainloop_gio_destroy()
* will free ipc and mainloop_io immediately after calling this function.
* If this is called from a stopped main loop, these will leak, so the best
* practice is to close the connection before stopping the main loop.
*/
api->ipc = NULL;
api->mainloop_io = NULL;
if (api->free_on_disconnect) {
/* pcmk_free_ipc_api() has already been called, but did not free api
* or api->cmds because this function needed them. Do that now.
*/
free_daemon_specific_data(api);
crm_trace("Freeing IPC API object after disconnect");
free(api);
}
}
/*!
* \brief Free the contents of an IPC API object
*
* \param[in,out] api IPC API object to free
*/
void
pcmk_free_ipc_api(pcmk_ipc_api_t *api)
{
bool free_on_disconnect = false;
if (api == NULL) {
return;
}
crm_debug("Releasing %s IPC API", pcmk_ipc_name(api, true));
if (api->ipc != NULL) {
if (api->mainloop_io != NULL) {
/* We need to keep the api pointer itself around, because it is the
* user data for the IPC client destroy callback. That will be
* triggered by the pcmk_disconnect_ipc() call below, but it might
* happen later in the main loop (if still running).
*
* This flag tells the destroy callback to free the object. It can't
* do that unconditionally, because the application might call this
* function after a disconnect that happened by other means.
*/
free_on_disconnect = api->free_on_disconnect = true;
}
pcmk_disconnect_ipc(api); // Frees api if free_on_disconnect is true
}
if (!free_on_disconnect) {
free_daemon_specific_data(api);
crm_trace("Freeing IPC API object");
free(api);
}
}
/*!
* \brief Get the IPC name used with an IPC API connection
*
* \param[in] api IPC API connection
* \param[in] for_log If true, return human-friendly name instead of IPC name
*
* \return IPC API's human-friendly or connection name, or if none is available,
* "Pacemaker" if for_log is true and NULL if for_log is false
*/
const char *
pcmk_ipc_name(const pcmk_ipc_api_t *api, bool for_log)
{
if (api == NULL) {
return for_log? "Pacemaker" : NULL;
}
if (for_log) {
const char *name = pcmk__server_log_name(api->server);
return pcmk__s(name, "Pacemaker");
}
switch (api->server) {
// These servers do not have pcmk_ipc_api_t implementations yet
case pcmk_ipc_based:
case pcmk_ipc_execd:
case pcmk_ipc_fenced:
return NULL;
default:
return pcmk__server_ipc_name(api->server);
}
}
/*!
* \brief Check whether an IPC API connection is active
*
* \param[in,out] api IPC API connection
*
* \return true if IPC is connected, false otherwise
*/
bool
pcmk_ipc_is_connected(pcmk_ipc_api_t *api)
{
return (api != NULL) && crm_ipc_connected(api->ipc);
}
/*!
* \internal
* \brief Call the daemon-specific API's dispatch function
*
* Perform daemon-specific handling of IPC reply dispatch. It is the daemon
* method's responsibility to call the client's registered event callback, as
* well as allocate and free any event data.
*
* \param[in,out] api IPC API connection
* \param[in,out] message IPC reply XML to dispatch
*/
static bool
call_api_dispatch(pcmk_ipc_api_t *api, xmlNode *message)
{
crm_log_xml_trace(message, "ipc-received");
if ((api->cmds != NULL) && (api->cmds->dispatch != NULL)) {
return api->cmds->dispatch(api, message);
}
return false;
}
/*!
* \internal
* \brief Dispatch previously read IPC data
*
* \param[in] buffer Data read from IPC
* \param[in,out] api IPC object
*
* \return Standard Pacemaker return code. In particular:
*
* pcmk_rc_ok: There are no more messages expected from the server. Quit
* reading.
* EINPROGRESS: There are more messages expected from the server. Keep reading.
*
* All other values indicate an error.
*/
static int
dispatch_ipc_data(const char *buffer, pcmk_ipc_api_t *api)
{
bool more = false;
xmlNode *msg;
if (buffer == NULL) {
crm_warn("Empty message received from %s IPC",
pcmk_ipc_name(api, true));
return ENOMSG;
}
msg = pcmk__xml_parse(buffer);
if (msg == NULL) {
crm_warn("Malformed message received from %s IPC",
pcmk_ipc_name(api, true));
return EPROTO;
}
more = call_api_dispatch(api, msg);
pcmk__xml_free(msg);
if (more) {
return EINPROGRESS;
} else {
return pcmk_rc_ok;
}
}
/*!
* \internal
* \brief Dispatch data read from IPC source
*
* \param[in] buffer Data read from IPC
* \param[in] length Number of bytes of data in buffer (ignored)
* \param[in,out] user_data IPC object
*
* \return Always 0 (meaning connection is still required)
*
* \note This function can be used as a main loop IPC dispatch callback.
*/
static int
dispatch_ipc_source_data(const char *buffer, ssize_t length, gpointer user_data)
{
pcmk_ipc_api_t *api = user_data;
CRM_CHECK(api != NULL, return 0);
dispatch_ipc_data(buffer, api);
return 0;
}
/*!
* \brief Check whether an IPC connection has data available (without main loop)
*
* \param[in] api IPC API connection
* \param[in] timeout_ms If less than 0, poll indefinitely; if 0, poll once
* and return immediately; otherwise, poll for up to
* this many milliseconds
*
* \return Standard Pacemaker return code
*
* \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call
* this function to check whether IPC data is available. Return values of
* interest include pcmk_rc_ok meaning data is available, and EAGAIN
* meaning no data is available; all other values indicate errors.
* \todo This does not allow the caller to poll multiple file descriptors at
* once. If there is demand for that, we could add a wrapper for
* pcmk__ipc_fd(api->ipc), so the caller can call poll() themselves.
*/
int
pcmk_poll_ipc(const pcmk_ipc_api_t *api, int timeout_ms)
{
int rc;
struct pollfd pollfd = { 0, };
if ((api == NULL) || (api->dispatch_type != pcmk_ipc_dispatch_poll)) {
return EINVAL;
}
rc = pcmk__ipc_fd(api->ipc, &(pollfd.fd));
if (rc != pcmk_rc_ok) {
crm_debug("Could not obtain file descriptor for %s IPC: %s",
pcmk_ipc_name(api, true), pcmk_rc_str(rc));
return rc;
}
pollfd.events = POLLIN;
rc = poll(&pollfd, 1, timeout_ms);
if (rc < 0) {
/* Some UNIX systems return negative and set EAGAIN for failure to
* allocate memory; standardize the return code in that case
*/
return (errno == EAGAIN)? ENOMEM : errno;
} else if (rc == 0) {
return EAGAIN;
}
return pcmk_rc_ok;
}
/*!
* \brief Dispatch available messages on an IPC connection (without main loop)
*
* \param[in,out] api IPC API connection
*
* \return Standard Pacemaker return code
*
* \note Callers of pcmk_connect_ipc() using pcmk_ipc_dispatch_poll should call
* this function when IPC data is available.
*/
void
pcmk_dispatch_ipc(pcmk_ipc_api_t *api)
{
if (api == NULL) {
return;
}
while (crm_ipc_ready(api->ipc) > 0) {
if (crm_ipc_read(api->ipc) > 0) {
dispatch_ipc_data(crm_ipc_buffer(api->ipc), api);
}
}
}
// \return Standard Pacemaker return code
static int
connect_with_main_loop(pcmk_ipc_api_t *api)
{
int rc;
struct ipc_client_callbacks callbacks = {
.dispatch = dispatch_ipc_source_data,
.destroy = ipc_post_disconnect,
};
rc = pcmk__add_mainloop_ipc(api->ipc, G_PRIORITY_DEFAULT, api,
&callbacks, &(api->mainloop_io));
if (rc != pcmk_rc_ok) {
return rc;
}
crm_debug("Connected to %s IPC (attached to main loop)",
pcmk_ipc_name(api, true));
/* After this point, api->mainloop_io owns api->ipc, so api->ipc
* should not be explicitly freed.
*/
return pcmk_rc_ok;
}
// \return Standard Pacemaker return code
static int
connect_without_main_loop(pcmk_ipc_api_t *api)
{
int rc = pcmk__connect_generic_ipc(api->ipc);
if (rc != pcmk_rc_ok) {
crm_ipc_close(api->ipc);
} else {
crm_debug("Connected to %s IPC (without main loop)",
pcmk_ipc_name(api, true));
}
return rc;
}
/*!
* \internal
* \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors)
*
* \param[in,out] api IPC API instance
* \param[in] dispatch_type How IPC replies should be dispatched
* \param[in] attempts How many times to try (in case of soft error)
*
* \return Standard Pacemaker return code
*/
int
pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type,
int attempts)
{
int rc = pcmk_rc_ok;
if ((api == NULL) || (attempts < 1)) {
return EINVAL;
}
if (api->ipc == NULL) {
api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), api->ipc_size_max);
if (api->ipc == NULL) {
return ENOMEM;
}
}
if (crm_ipc_connected(api->ipc)) {
crm_trace("Already connected to %s", pcmk_ipc_name(api, true));
return pcmk_rc_ok;
}
api->dispatch_type = dispatch_type;
crm_debug("Attempting connection to %s (up to %d time%s)",
pcmk_ipc_name(api, true), attempts, pcmk__plural_s(attempts));
for (int remaining = attempts - 1; remaining >= 0; --remaining) {
switch (dispatch_type) {
case pcmk_ipc_dispatch_main:
rc = connect_with_main_loop(api);
break;
case pcmk_ipc_dispatch_sync:
case pcmk_ipc_dispatch_poll:
rc = connect_without_main_loop(api);
break;
}
if ((remaining == 0) || ((rc != EAGAIN) && (rc != EALREADY))) {
break; // Result is final
}
// Retry after soft error (interrupted by signal, etc.)
pcmk__sleep_ms((attempts - remaining) * 500);
crm_debug("Re-attempting connection to %s (%d attempt%s remaining)",
pcmk_ipc_name(api, true), remaining,
pcmk__plural_s(remaining));
}
if (rc != pcmk_rc_ok) {
return rc;
}
if ((api->cmds != NULL) && (api->cmds->post_connect != NULL)) {
rc = api->cmds->post_connect(api);
if (rc != pcmk_rc_ok) {
crm_ipc_close(api->ipc);
}
}
return rc;
}
/*!
* \brief Connect to a Pacemaker daemon via IPC
*
* \param[in,out] api IPC API instance
* \param[in] dispatch_type How IPC replies should be dispatched
*
* \return Standard Pacemaker return code
*/
int
pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type)
{
int rc = pcmk__connect_ipc(api, dispatch_type, 2);
if (rc != pcmk_rc_ok) {
crm_err("Connection to %s failed: %s",
pcmk_ipc_name(api, true), pcmk_rc_str(rc));
}
return rc;
}
/*!
* \brief Disconnect an IPC API instance
*
* \param[in,out] api IPC API connection
*
* \return Standard Pacemaker return code
*
* \note If the connection is attached to a main loop, this function should be
* called before quitting the main loop, to ensure that all memory is
* freed.
*/
void
pcmk_disconnect_ipc(pcmk_ipc_api_t *api)
{
if ((api == NULL) || (api->ipc == NULL)) {
return;
}
switch (api->dispatch_type) {
case pcmk_ipc_dispatch_main:
{
mainloop_io_t *mainloop_io = api->mainloop_io;
// Make sure no code with access to api can use these again
api->mainloop_io = NULL;
api->ipc = NULL;
mainloop_del_ipc_client(mainloop_io);
// After this point api might have already been freed
}
break;
case pcmk_ipc_dispatch_poll:
case pcmk_ipc_dispatch_sync:
{
crm_ipc_t *ipc = api->ipc;
// Make sure no code with access to api can use ipc again
api->ipc = NULL;
// This should always be the case already, but to be safe
api->free_on_disconnect = false;
crm_ipc_close(ipc);
crm_ipc_destroy(ipc);
ipc_post_disconnect(api);
}
break;
}
}
/*!
* \brief Register a callback for IPC API events
*
* \param[in,out] api IPC API connection
* \param[in] callback Callback to register
* \param[in] userdata Caller data to pass to callback
*
* \note This function may be called multiple times to update the callback
* and/or user data. The caller remains responsible for freeing
* userdata in any case (after the IPC is disconnected, if the
* user data is still registered with the IPC).
*/
void
pcmk_register_ipc_callback(pcmk_ipc_api_t *api, pcmk_ipc_callback_t cb,
void *user_data)
{
if (api == NULL) {
return;
}
api->cb = cb;
api->user_data = user_data;
}
/*!
* \internal
* \brief Send an XML request across an IPC API connection
*
* \param[in,out] api IPC API connection
* \param[in] request XML request to send
*
* \return Standard Pacemaker return code
*
* \note Daemon-specific IPC API functions should call this function to send
* requests, because it handles different dispatch types appropriately.
*/
int
pcmk__send_ipc_request(pcmk_ipc_api_t *api, const xmlNode *request)
{
int rc;
xmlNode *reply = NULL;
enum crm_ipc_flags flags = crm_ipc_flags_none;
if ((api == NULL) || (api->ipc == NULL) || (request == NULL)) {
return EINVAL;
}
crm_log_xml_trace(request, "ipc-sent");
// Synchronous dispatch requires waiting for a reply
if ((api->dispatch_type == pcmk_ipc_dispatch_sync)
&& (api->cmds != NULL)
&& (api->cmds->reply_expected != NULL)
&& (api->cmds->reply_expected(api, request))) {
flags = crm_ipc_client_response;
}
// The 0 here means a default timeout of 5 seconds
rc = crm_ipc_send(api->ipc, request, flags, 0, &reply);
if (rc < 0) {
return pcmk_legacy2rc(rc);
} else if (rc == 0) {
return ENODATA;
}
// With synchronous dispatch, we dispatch any reply now
if (reply != NULL) {
bool more = call_api_dispatch(api, reply);
pcmk__xml_free(reply);
while (more) {
rc = crm_ipc_read(api->ipc);
if (rc == -EAGAIN) {
continue;
} else if (rc == -ENOMSG || rc == pcmk_ok) {
return pcmk_rc_ok;
} else if (rc < 0) {
return -rc;
}
rc = dispatch_ipc_data(crm_ipc_buffer(api->ipc), api);
if (rc == pcmk_rc_ok) {
more = false;
} else if (rc == EINPROGRESS) {
more = true;
} else {
continue;
}
}
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Create the XML for an IPC request to purge a node from the peer cache
*
* \param[in] api IPC API connection
* \param[in] node_name If not NULL, name of node to purge
* \param[in] nodeid If not 0, node ID of node to purge
*
* \return Newly allocated IPC request XML
*
* \note The controller, fencer, and pacemakerd use the same request syntax, but
* the attribute manager uses a different one. The CIB manager doesn't
* have any syntax for it. The executor and scheduler don't connect to the
* cluster layer and thus don't have or need any syntax for it.
*
* \todo Modify the attribute manager to accept the common syntax (as well
* as its current one, for compatibility with older clients). Modify
* the CIB manager to accept and honor the common syntax. Modify the
* executor and scheduler to accept the syntax (immediately returning
* success), just for consistency. Modify this function to use the
* common syntax with all daemons if their version supports it.
*/
static xmlNode *
create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name,
uint32_t nodeid)
{
xmlNode *request = NULL;
const char *client = crm_system_name? crm_system_name : "client";
switch (api->server) {
case pcmk_ipc_attrd:
request = pcmk__xe_create(NULL, __func__);
crm_xml_add(request, PCMK__XA_T, PCMK__VALUE_ATTRD);
crm_xml_add(request, PCMK__XA_SRC, crm_system_name);
crm_xml_add(request, PCMK_XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE);
pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true);
pcmk__xe_add_node(request, node_name, nodeid);
break;
case pcmk_ipc_controld:
case pcmk_ipc_fenced:
case pcmk_ipc_pacemakerd:
request = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL,
- pcmk_ipc_name(api, false), client, NULL);
+ pcmk_ipc_name(api, false), client);
if (nodeid > 0) {
crm_xml_add_ll(request, PCMK_XA_ID, (long long) nodeid);
}
crm_xml_add(request, PCMK_XA_UNAME, node_name);
break;
case pcmk_ipc_based:
case pcmk_ipc_execd:
case pcmk_ipc_schedulerd:
break;
default: // pcmk_ipc_unknown (shouldn't be possible)
return NULL;
}
return request;
}
/*!
* \brief Ask a Pacemaker daemon to purge a node from its peer cache
*
* \param[in,out] api IPC API connection
* \param[in] node_name If not NULL, name of node to purge
* \param[in] nodeid If not 0, node ID of node to purge
*
* \return Standard Pacemaker return code
*
* \note At least one of node_name or nodeid must be specified.
*/
int
pcmk_ipc_purge_node(pcmk_ipc_api_t *api, const char *node_name, uint32_t nodeid)
{
int rc = 0;
xmlNode *request = NULL;
if (api == NULL) {
return EINVAL;
}
if ((node_name == NULL) && (nodeid == 0)) {
return EINVAL;
}
request = create_purge_node_request(api, node_name, nodeid);
if (request == NULL) {
return EOPNOTSUPP;
}
rc = pcmk__send_ipc_request(api, request);
pcmk__xml_free(request);
crm_debug("%s peer cache purge of node %s[%lu]: rc=%d",
pcmk_ipc_name(api, true), node_name, (unsigned long) nodeid, rc);
return rc;
}
/*
* Generic IPC API (to eventually be deprecated as public API and made internal)
*/
struct crm_ipc_s {
struct pollfd pfd;
unsigned int max_buf_size; // maximum bytes we can send or receive over IPC
unsigned int buf_size; // size of allocated buffer
int msg_size;
int need_reply;
char *buffer;
char *server_name; // server IPC name being connected to
qb_ipcc_connection_t *ipc;
};
/*!
* \brief Create a new (legacy) object for using Pacemaker daemon IPC
*
* \param[in] name IPC system name to connect to
* \param[in] max_size Use a maximum IPC buffer size of at least this size
*
* \return Newly allocated IPC object on success, NULL otherwise
*
* \note The caller is responsible for freeing the result using
* crm_ipc_destroy().
* \note This should be considered deprecated for use with daemons supported by
* pcmk_new_ipc_api().
*/
crm_ipc_t *
crm_ipc_new(const char *name, size_t max_size)
{
crm_ipc_t *client = NULL;
client = calloc(1, sizeof(crm_ipc_t));
if (client == NULL) {
crm_err("Could not create IPC connection: %s", strerror(errno));
return NULL;
}
client->server_name = strdup(name);
if (client->server_name == NULL) {
crm_err("Could not create %s IPC connection: %s",
name, strerror(errno));
free(client);
return NULL;
}
client->buf_size = pcmk__ipc_buffer_size(max_size);
client->buffer = malloc(client->buf_size);
if (client->buffer == NULL) {
crm_err("Could not create %s IPC connection: %s",
name, strerror(errno));
free(client->server_name);
free(client);
return NULL;
}
/* Clients initiating connection pick the max buf size */
client->max_buf_size = client->buf_size;
client->pfd.fd = -1;
client->pfd.events = POLLIN;
client->pfd.revents = 0;
return client;
}
/*!
* \internal
* \brief Connect a generic (not daemon-specific) IPC object
*
* \param[in,out] ipc Generic IPC object to connect
*
* \return Standard Pacemaker return code
*/
int
pcmk__connect_generic_ipc(crm_ipc_t *ipc)
{
uid_t cl_uid = 0;
gid_t cl_gid = 0;
pid_t found_pid = 0;
uid_t found_uid = 0;
gid_t found_gid = 0;
int rc = pcmk_rc_ok;
if (ipc == NULL) {
return EINVAL;
}
ipc->need_reply = FALSE;
ipc->ipc = qb_ipcc_connect(ipc->server_name, ipc->buf_size);
if (ipc->ipc == NULL) {
return errno;
}
rc = qb_ipcc_fd_get(ipc->ipc, &ipc->pfd.fd);
if (rc < 0) { // -errno
crm_ipc_close(ipc);
return -rc;
}
rc = pcmk_daemon_user(&cl_uid, &cl_gid);
rc = pcmk_legacy2rc(rc);
if (rc != pcmk_rc_ok) {
crm_ipc_close(ipc);
return rc;
}
rc = is_ipc_provider_expected(ipc->ipc, ipc->pfd.fd, cl_uid, cl_gid,
&found_pid, &found_uid, &found_gid);
if (rc != pcmk_rc_ok) {
if (rc == pcmk_rc_ipc_unauthorized) {
crm_info("%s IPC provider authentication failed: process %lld has "
"uid %lld (expected %lld) and gid %lld (expected %lld)",
ipc->server_name,
(long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) cl_uid,
(long long) found_gid, (long long) cl_gid);
}
crm_ipc_close(ipc);
return rc;
}
ipc->max_buf_size = qb_ipcc_get_buffer_size(ipc->ipc);
if (ipc->max_buf_size > ipc->buf_size) {
free(ipc->buffer);
ipc->buffer = calloc(ipc->max_buf_size, sizeof(char));
if (ipc->buffer == NULL) {
rc = errno;
crm_ipc_close(ipc);
return rc;
}
ipc->buf_size = ipc->max_buf_size;
}
return pcmk_rc_ok;
}
/*!
* \brief Establish an IPC connection to a Pacemaker component
*
* \param[in,out] client Connection instance obtained from crm_ipc_new()
*
* \return true on success, false otherwise (in which case errno will be set;
* specifically, in case of discovering the remote side is not
* authentic, its value is set to ECONNABORTED).
*/
bool
crm_ipc_connect(crm_ipc_t *client)
{
int rc = pcmk__connect_generic_ipc(client);
if (rc == pcmk_rc_ok) {
return true;
}
if ((client != NULL) && (client->ipc == NULL)) {
errno = (rc > 0)? rc : ENOTCONN;
crm_debug("Could not establish %s IPC connection: %s (%d)",
client->server_name, pcmk_rc_str(errno), errno);
} else if (rc == pcmk_rc_ipc_unauthorized) {
crm_err("%s IPC provider authentication failed",
(client == NULL)? "Pacemaker" : client->server_name);
errno = ECONNABORTED;
} else {
crm_perror(LOG_ERR,
"Could not verify authenticity of %s IPC provider",
(client == NULL)? "Pacemaker" : client->server_name);
errno = ENOTCONN;
}
return false;
}
void
crm_ipc_close(crm_ipc_t * client)
{
if (client) {
if (client->ipc) {
qb_ipcc_connection_t *ipc = client->ipc;
client->ipc = NULL;
qb_ipcc_disconnect(ipc);
}
}
}
void
crm_ipc_destroy(crm_ipc_t * client)
{
if (client) {
if (client->ipc && qb_ipcc_is_connected(client->ipc)) {
crm_notice("Destroying active %s IPC connection",
client->server_name);
/* The next line is basically unsafe
*
* If this connection was attached to mainloop and mainloop is active,
* the 'disconnected' callback will end up back here and we'll end
* up free'ing the memory twice - something that can still happen
* even without this if we destroy a connection and it closes before
* we call exit
*/
/* crm_ipc_close(client); */
} else {
crm_trace("Destroying inactive %s IPC connection",
client->server_name);
}
free(client->buffer);
free(client->server_name);
free(client);
}
}
/*!
* \internal
* \brief Get the file descriptor for a generic IPC object
*
* \param[in,out] ipc Generic IPC object to get file descriptor for
* \param[out] fd Where to store file descriptor
*
* \return Standard Pacemaker return code
*/
int
pcmk__ipc_fd(crm_ipc_t *ipc, int *fd)
{
if ((ipc == NULL) || (fd == NULL)) {
return EINVAL;
}
if ((ipc->ipc == NULL) || (ipc->pfd.fd < 0)) {
return ENOTCONN;
}
*fd = ipc->pfd.fd;
return pcmk_rc_ok;
}
int
crm_ipc_get_fd(crm_ipc_t * client)
{
int fd = -1;
if (pcmk__ipc_fd(client, &fd) != pcmk_rc_ok) {
crm_err("Could not obtain file descriptor for %s IPC",
((client == NULL)? "unspecified" : client->server_name));
errno = EINVAL;
return -EINVAL;
}
return fd;
}
bool
crm_ipc_connected(crm_ipc_t * client)
{
bool rc = FALSE;
if (client == NULL) {
crm_trace("No client");
return FALSE;
} else if (client->ipc == NULL) {
crm_trace("No connection");
return FALSE;
} else if (client->pfd.fd < 0) {
crm_trace("Bad descriptor");
return FALSE;
}
rc = qb_ipcc_is_connected(client->ipc);
if (rc == FALSE) {
client->pfd.fd = -EINVAL;
}
return rc;
}
/*!
* \brief Check whether an IPC connection is ready to be read
*
* \param[in,out] client Connection to check
*
* \return Positive value if ready to be read, 0 if not ready, -errno on error
*/
int
crm_ipc_ready(crm_ipc_t *client)
{
int rc;
CRM_ASSERT(client != NULL);
if (!crm_ipc_connected(client)) {
return -ENOTCONN;
}
client->pfd.revents = 0;
rc = poll(&(client->pfd), 1, 0);
return (rc < 0)? -errno : rc;
}
// \return Standard Pacemaker return code
static int
crm_ipc_decompress(crm_ipc_t * client)
{
pcmk__ipc_header_t *header = (pcmk__ipc_header_t *)(void*)client->buffer;
if (header->size_compressed) {
int rc = 0;
unsigned int size_u = 1 + header->size_uncompressed;
/* never let buf size fall below our max size required for ipc reads. */
unsigned int new_buf_size = QB_MAX((sizeof(pcmk__ipc_header_t) + size_u), client->max_buf_size);
char *uncompressed = pcmk__assert_alloc(1, new_buf_size);
crm_trace("Decompressing message data %u bytes into %u bytes",
header->size_compressed, size_u);
rc = BZ2_bzBuffToBuffDecompress(uncompressed + sizeof(pcmk__ipc_header_t), &size_u,
client->buffer + sizeof(pcmk__ipc_header_t), header->size_compressed, 1, 0);
rc = pcmk__bzlib2rc(rc);
if (rc != pcmk_rc_ok) {
crm_err("Decompression failed: %s " QB_XS " rc=%d",
pcmk_rc_str(rc), rc);
free(uncompressed);
return rc;
}
/*
* This assert no longer holds true. For an identical msg, some clients may
* require compression, and others may not. If that same msg (event) is sent
* to multiple clients, it could result in some clients receiving a compressed
* msg even though compression was not explicitly required for them.
*
* CRM_ASSERT((header->size_uncompressed + sizeof(pcmk__ipc_header_t)) >= ipc_buffer_max);
*/
CRM_ASSERT(size_u == header->size_uncompressed);
memcpy(uncompressed, client->buffer, sizeof(pcmk__ipc_header_t)); /* Preserve the header */
header = (pcmk__ipc_header_t *)(void*)uncompressed;
free(client->buffer);
client->buf_size = new_buf_size;
client->buffer = uncompressed;
}
CRM_ASSERT(client->buffer[sizeof(pcmk__ipc_header_t) + header->size_uncompressed - 1] == 0);
return pcmk_rc_ok;
}
long
crm_ipc_read(crm_ipc_t * client)
{
pcmk__ipc_header_t *header = NULL;
CRM_ASSERT(client != NULL);
CRM_ASSERT(client->ipc != NULL);
CRM_ASSERT(client->buffer != NULL);
client->buffer[0] = 0;
client->msg_size = qb_ipcc_event_recv(client->ipc, client->buffer,
client->buf_size, 0);
if (client->msg_size >= 0) {
int rc = crm_ipc_decompress(client);
if (rc != pcmk_rc_ok) {
return pcmk_rc2legacy(rc);
}
header = (pcmk__ipc_header_t *)(void*)client->buffer;
if (!pcmk__valid_ipc_header(header)) {
return -EBADMSG;
}
crm_trace("Received %s IPC event %d size=%u rc=%d text='%.100s'",
client->server_name, header->qb.id, header->qb.size,
client->msg_size,
client->buffer + sizeof(pcmk__ipc_header_t));
} else {
crm_trace("No message received from %s IPC: %s",
client->server_name, pcmk_strerror(client->msg_size));
if (client->msg_size == -EAGAIN) {
return -EAGAIN;
}
}
if (!crm_ipc_connected(client) || client->msg_size == -ENOTCONN) {
crm_err("Connection to %s IPC failed", client->server_name);
}
if (header) {
/* Data excluding the header */
return header->size_uncompressed;
}
return -ENOMSG;
}
const char *
crm_ipc_buffer(crm_ipc_t * client)
{
CRM_ASSERT(client != NULL);
return client->buffer + sizeof(pcmk__ipc_header_t);
}
uint32_t
crm_ipc_buffer_flags(crm_ipc_t * client)
{
pcmk__ipc_header_t *header = NULL;
CRM_ASSERT(client != NULL);
if (client->buffer == NULL) {
return 0;
}
header = (pcmk__ipc_header_t *)(void*)client->buffer;
return header->flags;
}
const char *
crm_ipc_name(crm_ipc_t * client)
{
CRM_ASSERT(client != NULL);
return client->server_name;
}
// \return Standard Pacemaker return code
static int
internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout,
ssize_t *bytes)
{
time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
int rc = pcmk_rc_ok;
/* get the reply */
crm_trace("Waiting on reply to %s IPC message %d",
client->server_name, request_id);
do {
*bytes = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, 1000);
if (*bytes > 0) {
pcmk__ipc_header_t *hdr = NULL;
rc = crm_ipc_decompress(client);
if (rc != pcmk_rc_ok) {
return rc;
}
hdr = (pcmk__ipc_header_t *)(void*)client->buffer;
if (hdr->qb.id == request_id) {
/* Got it */
break;
} else if (hdr->qb.id < request_id) {
xmlNode *bad = pcmk__xml_parse(crm_ipc_buffer(client));
crm_err("Discarding old reply %d (need %d)", hdr->qb.id, request_id);
crm_log_xml_notice(bad, "OldIpcReply");
} else {
xmlNode *bad = pcmk__xml_parse(crm_ipc_buffer(client));
crm_err("Discarding newer reply %d (need %d)", hdr->qb.id, request_id);
crm_log_xml_notice(bad, "ImpossibleReply");
CRM_ASSERT(hdr->qb.id <= request_id);
}
} else if (!crm_ipc_connected(client)) {
crm_err("%s IPC provider disconnected while waiting for message %d",
client->server_name, request_id);
break;
}
} while (time(NULL) < timeout);
if (*bytes < 0) {
rc = (int) -*bytes; // System errno
}
return rc;
}
/*!
* \brief Send an IPC XML message
*
* \param[in,out] client Connection to IPC server
* \param[in] message XML message to send
* \param[in] flags Bitmask of crm_ipc_flags
* \param[in] ms_timeout Give up if not sent within this much time
* (5 seconds if 0, or no timeout if negative)
* \param[out] reply Reply from server (or NULL if none)
*
* \return Negative errno on error, otherwise size of reply received in bytes
* if reply was needed, otherwise number of bytes sent
*/
int
crm_ipc_send(crm_ipc_t *client, const xmlNode *message,
enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply)
{
int rc = 0;
ssize_t qb_rc = 0;
ssize_t bytes = 0;
struct iovec *iov;
static uint32_t id = 0;
static int factor = 8;
pcmk__ipc_header_t *header;
if (client == NULL) {
crm_notice("Can't send IPC request without connection (bug?): %.100s",
message);
return -ENOTCONN;
} else if (!crm_ipc_connected(client)) {
/* Don't even bother */
crm_notice("Can't send %s IPC requests: Connection closed",
client->server_name);
return -ENOTCONN;
}
if (ms_timeout == 0) {
ms_timeout = 5000;
}
if (client->need_reply) {
qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout);
if (qb_rc < 0) {
crm_warn("Sending %s IPC disabled until pending reply received",
client->server_name);
return -EALREADY;
} else {
crm_notice("Sending %s IPC re-enabled after pending reply received",
client->server_name);
client->need_reply = FALSE;
}
}
id++;
CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */
rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes);
if (rc != pcmk_rc_ok) {
crm_warn("Couldn't prepare %s IPC request: %s " QB_XS " rc=%d",
client->server_name, pcmk_rc_str(rc), rc);
return pcmk_rc2legacy(rc);
}
header = iov[0].iov_base;
pcmk__set_ipc_flags(header->flags, client->server_name, flags);
if (pcmk_is_set(flags, crm_ipc_proxied)) {
/* Don't look for a synchronous response */
pcmk__clear_ipc_flags(flags, "client", crm_ipc_client_response);
}
if(header->size_compressed) {
if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) {
crm_notice("Compressed message exceeds %d0%% of configured IPC "
"limit (%u bytes); consider setting PCMK_ipc_buffer to "
"%u or higher",
factor, client->max_buf_size, 2 * client->max_buf_size);
factor++;
}
}
crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout",
client->server_name, header->qb.id, header->qb.size, ms_timeout);
if ((ms_timeout > 0) || !pcmk_is_set(flags, crm_ipc_client_response)) {
time_t timeout = time(NULL) + 1 + (ms_timeout / 1000);
do {
/* @TODO Is this check really needed? Won't qb_ipcc_sendv() return
* an error if it's not connected?
*/
if (!crm_ipc_connected(client)) {
goto send_cleanup;
}
qb_rc = qb_ipcc_sendv(client->ipc, iov, 2);
} while ((qb_rc == -EAGAIN) && (time(NULL) < timeout));
rc = (int) qb_rc; // Negative of system errno, or bytes sent
if (qb_rc <= 0) {
goto send_cleanup;
} else if (!pcmk_is_set(flags, crm_ipc_client_response)) {
crm_trace("Not waiting for reply to %s IPC request %d",
client->server_name, header->qb.id);
goto send_cleanup;
}
rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes);
if (rc != pcmk_rc_ok) {
/* We didn't get the reply in time, so disable future sends for now.
* The only alternative would be to close the connection since we
* don't know how to detect and discard out-of-sequence replies.
*
* @TODO Implement out-of-sequence detection
*/
client->need_reply = TRUE;
}
rc = (int) bytes; // Negative system errno, or size of reply received
} else {
// No timeout, and client response needed
do {
qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer,
client->buf_size, -1);
} while ((qb_rc == -EAGAIN) && crm_ipc_connected(client));
rc = (int) qb_rc; // Negative system errno, or size of reply received
}
if (rc > 0) {
pcmk__ipc_header_t *hdr = (pcmk__ipc_header_t *)(void*)client->buffer;
crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s",
rc, hdr->qb.id, client->server_name, header->qb.id,
crm_ipc_buffer(client));
if (reply) {
*reply = pcmk__xml_parse(crm_ipc_buffer(client));
}
} else {
crm_trace("No reply to %s IPC %d: rc=%d",
client->server_name, header->qb.id, rc);
}
send_cleanup:
if (!crm_ipc_connected(client)) {
crm_notice("Couldn't send %s IPC request %d: Connection closed "
QB_XS " rc=%d", client->server_name, header->qb.id, rc);
} else if (rc == -ETIMEDOUT) {
crm_warn("%s IPC request %d failed: %s after %dms " QB_XS " rc=%d",
client->server_name, header->qb.id, pcmk_strerror(rc),
ms_timeout, rc);
crm_write_blackbox(0, NULL);
} else if (rc <= 0) {
crm_warn("%s IPC request %d failed: %s " QB_XS " rc=%d",
client->server_name, header->qb.id,
((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc);
}
pcmk_free_ipc_event(iov);
return rc;
}
/*!
* \brief Ensure an IPC provider has expected user or group
*
* \param[in] qb_ipc libqb client connection if available
* \param[in] sock Connected Unix socket for IPC
* \param[in] refuid Expected user ID
* \param[in] refgid Expected group ID
* \param[out] gotpid If not NULL, where to store provider's actual process ID
* (or 1 on platforms where ID is not available)
* \param[out] gotuid If not NULL, where to store provider's actual user ID
* \param[out] gotgid If not NULL, where to store provider's actual group ID
*
* \return Standard Pacemaker return code
* \note An actual user ID of 0 (root) will always be considered authorized,
* regardless of the expected values provided. The caller can use the
* output arguments to be stricter than this function.
*/
static int
is_ipc_provider_expected(qb_ipcc_connection_t *qb_ipc, int sock,
uid_t refuid, gid_t refgid,
pid_t *gotpid, uid_t *gotuid, gid_t *gotgid)
{
int rc = EOPNOTSUPP;
pid_t found_pid = 0;
uid_t found_uid = 0;
gid_t found_gid = 0;
#ifdef HAVE_QB_IPCC_AUTH_GET
if (qb_ipc != NULL) {
rc = qb_ipcc_auth_get(qb_ipc, &found_pid, &found_uid, &found_gid);
rc = -rc; // libqb returns 0 or -errno
if (rc == pcmk_rc_ok) {
goto found;
}
}
#endif
#ifdef HAVE_UCRED
{
struct ucred ucred;
socklen_t ucred_len = sizeof(ucred);
if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED, &ucred, &ucred_len) < 0) {
rc = errno;
} else if (ucred_len != sizeof(ucred)) {
rc = EOPNOTSUPP;
} else {
found_pid = ucred.pid;
found_uid = ucred.uid;
found_gid = ucred.gid;
goto found;
}
}
#endif
#ifdef HAVE_SOCKPEERCRED
{
struct sockpeercred sockpeercred;
socklen_t sockpeercred_len = sizeof(sockpeercred);
if (getsockopt(sock, SOL_SOCKET, SO_PEERCRED,
&sockpeercred, &sockpeercred_len) < 0) {
rc = errno;
} else if (sockpeercred_len != sizeof(sockpeercred)) {
rc = EOPNOTSUPP;
} else {
found_pid = sockpeercred.pid;
found_uid = sockpeercred.uid;
found_gid = sockpeercred.gid;
goto found;
}
}
#endif
#ifdef HAVE_GETPEEREID // For example, FreeBSD
if (getpeereid(sock, &found_uid, &found_gid) < 0) {
rc = errno;
} else {
found_pid = PCMK__SPECIAL_PID;
goto found;
}
#endif
#ifdef HAVE_GETPEERUCRED
{
ucred_t *ucred = NULL;
if (getpeerucred(sock, &ucred) < 0) {
rc = errno;
} else {
found_pid = ucred_getpid(ucred);
found_uid = ucred_geteuid(ucred);
found_gid = ucred_getegid(ucred);
ucred_free(ucred);
goto found;
}
}
#endif
return rc; // If we get here, nothing succeeded
found:
if (gotpid != NULL) {
*gotpid = found_pid;
}
if (gotuid != NULL) {
*gotuid = found_uid;
}
if (gotgid != NULL) {
*gotgid = found_gid;
}
if ((found_uid != 0) && (found_uid != refuid) && (found_gid != refgid)) {
return pcmk_rc_ipc_unauthorized;
}
return pcmk_rc_ok;
}
int
crm_ipc_is_authentic_process(int sock, uid_t refuid, gid_t refgid,
pid_t *gotpid, uid_t *gotuid, gid_t *gotgid)
{
int ret = is_ipc_provider_expected(NULL, sock, refuid, refgid,
gotpid, gotuid, gotgid);
/* The old function had some very odd return codes*/
if (ret == 0) {
return 1;
} else if (ret == pcmk_rc_ipc_unauthorized) {
return 0;
} else {
return pcmk_rc2legacy(ret);
}
}
int
pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
gid_t refgid, pid_t *gotpid)
{
static char last_asked_name[PATH_MAX / 2] = ""; /* log spam prevention */
int fd;
int rc = pcmk_rc_ipc_unresponsive;
int auth_rc = 0;
int32_t qb_rc;
pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
qb_ipcc_connection_t *c;
#ifdef HAVE_QB_IPCC_CONNECT_ASYNC
struct pollfd pollfd = { 0, };
int poll_rc;
c = qb_ipcc_connect_async(name, 0,
&(pollfd.fd));
#else
c = qb_ipcc_connect(name, 0);
#endif
if (c == NULL) {
crm_info("Could not connect to %s IPC: %s", name, strerror(errno));
rc = pcmk_rc_ipc_unresponsive;
goto bail;
}
#ifdef HAVE_QB_IPCC_CONNECT_ASYNC
pollfd.events = POLLIN;
do {
poll_rc = poll(&pollfd, 1, 2000);
} while ((poll_rc == -1) && (errno == EINTR));
/* If poll() failed, given that disconnect function is not registered yet,
* qb_ipcc_disconnect() won't clean up the socket. In any case, call
* qb_ipcc_connect_continue() here so that it may fail and do the cleanup
* for us.
*/
if (qb_ipcc_connect_continue(c) != 0) {
crm_info("Could not connect to %s IPC: %s", name,
(poll_rc == 0)?"timeout":strerror(errno));
rc = pcmk_rc_ipc_unresponsive;
c = NULL; // qb_ipcc_connect_continue cleaned up for us
goto bail;
}
#endif
qb_rc = qb_ipcc_fd_get(c, &fd);
if (qb_rc != 0) {
rc = (int) -qb_rc; // System errno
crm_err("Could not get fd from %s IPC: %s " QB_XS " rc=%d",
name, pcmk_rc_str(rc), rc);
goto bail;
}
auth_rc = is_ipc_provider_expected(c, fd, refuid, refgid,
&found_pid, &found_uid, &found_gid);
if (auth_rc == pcmk_rc_ipc_unauthorized) {
crm_err("Daemon (IPC %s) effectively blocked with unauthorized"
" process %lld (uid: %lld, gid: %lld)",
name, (long long) PCMK__SPECIAL_PID_AS_0(found_pid),
(long long) found_uid, (long long) found_gid);
rc = pcmk_rc_ipc_unauthorized;
goto bail;
}
if (auth_rc != pcmk_rc_ok) {
rc = auth_rc;
crm_err("Could not get peer credentials from %s IPC: %s "
QB_XS " rc=%d", name, pcmk_rc_str(rc), rc);
goto bail;
}
if (gotpid != NULL) {
*gotpid = found_pid;
}
rc = pcmk_rc_ok;
if ((found_uid != refuid || found_gid != refgid)
&& strncmp(last_asked_name, name, sizeof(last_asked_name))) {
if ((found_uid == 0) && (refuid != 0)) {
crm_warn("Daemon (IPC %s) runs as root, whereas the expected"
" credentials are %lld:%lld, hazard of violating"
" the least privilege principle",
name, (long long) refuid, (long long) refgid);
} else {
crm_notice("Daemon (IPC %s) runs as %lld:%lld, whereas the"
" expected credentials are %lld:%lld, which may"
" mean a different set of privileges than expected",
name, (long long) found_uid, (long long) found_gid,
(long long) refuid, (long long) refgid);
}
memccpy(last_asked_name, name, '\0', sizeof(last_asked_name));
}
bail:
if (c != NULL) {
qb_ipcc_disconnect(c);
}
return rc;
}
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
index 75b79e2f42..7f53f1cbf7 100644
--- a/lib/common/ipc_controld.c
+++ b/lib/common/ipc_controld.c
@@ -1,663 +1,670 @@
/*
* Copyright 2020-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h>
#include <stdint.h> // uint32_t
#include <stdio.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/ipc_controld.h>
#include "crmcommon_private.h"
struct controld_api_private_s {
char *client_uuid;
unsigned int replies_expected;
};
/*!
* \internal
* \brief Get a string representation of a controller API reply type
*
* \param[in] reply Controller API reply type
*
* \return String representation of a controller API reply type
*/
const char *
pcmk__controld_api_reply2str(enum pcmk_controld_api_reply reply)
{
switch (reply) {
case pcmk_controld_reply_reprobe:
return "reprobe";
case pcmk_controld_reply_info:
return "info";
case pcmk_controld_reply_resource:
return "resource";
case pcmk_controld_reply_ping:
return "ping";
case pcmk_controld_reply_nodes:
return "nodes";
default:
return "unknown";
}
}
// \return Standard Pacemaker return code
static int
new_data(pcmk_ipc_api_t *api)
{
struct controld_api_private_s *private = NULL;
api->api_data = calloc(1, sizeof(struct controld_api_private_s));
if (api->api_data == NULL) {
return errno;
}
private = api->api_data;
/* This is set to the PID because that's how it was always done, but PIDs
* are not unique because clients can be remote. The value appears to be
* unused other than as part of PCMK__XA_CRM_SYS_FROM in IPC requests, which
* is only compared against the internal system names (CRM_SYSTEM_TENGINE,
* etc.), so it shouldn't be a problem.
*/
private->client_uuid = pcmk__getpid_s();
/* @TODO Implement a call ID model similar to the CIB, executor, and fencer
* IPC APIs, so that requests and replies can be matched, and
* duplicate replies can be discarded.
*/
return pcmk_rc_ok;
}
static void
free_data(void *data)
{
free(((struct controld_api_private_s *) data)->client_uuid);
free(data);
}
// \return Standard Pacemaker return code
static int
post_connect(pcmk_ipc_api_t *api)
{
/* The controller currently requires clients to register via a hello
* request, but does not reply back.
*/
struct controld_api_private_s *private = api->api_data;
const char *client_name = crm_system_name? crm_system_name : "client";
xmlNode *hello;
int rc;
hello = create_hello_message(private->client_uuid, client_name,
PCMK__CONTROLD_API_MAJOR,
PCMK__CONTROLD_API_MINOR);
rc = pcmk__send_ipc_request(api, hello);
pcmk__xml_free(hello);
if (rc != pcmk_rc_ok) {
crm_info("Could not send IPC hello to %s: %s " QB_XS " rc=%s",
pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc);
} else {
crm_debug("Sent IPC hello to %s", pcmk_ipc_name(api, true));
}
return rc;
}
static void
set_node_info_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
data->reply_type = pcmk_controld_reply_info;
if (msg_data == NULL) {
return;
}
data->data.node_info.have_quorum =
pcmk__xe_attr_is_true(msg_data, PCMK_XA_HAVE_QUORUM);
data->data.node_info.is_remote =
pcmk__xe_attr_is_true(msg_data, PCMK_XA_REMOTE_NODE);
/* Integer node_info.id is currently valid only for Corosync nodes.
*
* @TODO: Improve handling after pcmk__node_status_t is refactored to handle
* layer-specific data better.
*/
crm_element_value_int(msg_data, PCMK_XA_ID, &(data->data.node_info.id));
data->data.node_info.uuid = crm_element_value(msg_data, PCMK_XA_ID);
data->data.node_info.uname = crm_element_value(msg_data, PCMK_XA_UNAME);
data->data.node_info.state = crm_element_value(msg_data, PCMK_XA_CRMD);
}
static void
set_ping_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
data->reply_type = pcmk_controld_reply_ping;
if (msg_data == NULL) {
return;
}
data->data.ping.sys_from = crm_element_value(msg_data,
PCMK__XA_CRM_SUBSYSTEM);
data->data.ping.fsa_state = crm_element_value(msg_data,
PCMK__XA_CRMD_STATE);
data->data.ping.result = crm_element_value(msg_data, PCMK_XA_RESULT);
}
static void
set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
pcmk_controld_api_node_t *node_info;
data->reply_type = pcmk_controld_reply_nodes;
for (xmlNode *node = pcmk__xe_first_child(msg_data, PCMK_XE_NODE, NULL,
NULL);
node != NULL; node = pcmk__xe_next_same(node)) {
long long id_ll = 0;
node_info = pcmk__assert_alloc(1, sizeof(pcmk_controld_api_node_t));
crm_element_value_ll(node, PCMK_XA_ID, &id_ll);
if (id_ll > 0) {
node_info->id = id_ll;
}
node_info->uname = crm_element_value(node, PCMK_XA_UNAME);
node_info->state = crm_element_value(node, PCMK__XA_IN_CCM);
data->data.nodes = g_list_prepend(data->data.nodes, node_info);
}
}
static bool
reply_expected(pcmk_ipc_api_t *api, const xmlNode *request)
{
// We only need to handle commands that API functions can send
return pcmk__str_any_of(crm_element_value(request, PCMK__XA_CRM_TASK),
PCMK__CONTROLD_CMD_NODES,
CRM_OP_LRM_DELETE,
CRM_OP_LRM_FAIL,
CRM_OP_NODE_INFO,
CRM_OP_PING,
CRM_OP_REPROBE,
CRM_OP_RM_NODE_CACHE,
NULL);
}
static bool
dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
{
struct controld_api_private_s *private = api->api_data;
crm_exit_t status = CRM_EX_OK;
xmlNode *wrapper = NULL;
xmlNode *msg_data = NULL;
const char *value = NULL;
pcmk_controld_api_reply_t reply_data = {
pcmk_controld_reply_unknown, NULL, NULL,
};
if (pcmk__xe_is(reply, PCMK__XE_ACK)) {
/* ACKs are trivial responses that do not count toward expected replies,
* and do not have all the fields that validation requires, so skip that
* processing.
*/
return private->replies_expected > 0;
}
if (private->replies_expected > 0) {
private->replies_expected--;
}
// Do some basic validation of the reply
value = crm_element_value(reply, PCMK__XA_SUBT);
if (pcmk__str_eq(value, PCMK__VALUE_REQUEST, pcmk__str_none)) {
/* @COMPAT Controllers <3.0.0 set PCMK__XA_SUBT to PCMK__VALUE_REQUEST
* for certain replies. Once we no longer support Pacemaker Remote nodes
* connecting to cluster nodes <3.0.0, or rolling upgrades from <3.0.0,
* we can drop this check.
*/
crm_trace("Received a reply that was marked as a request "
"(bug unless sent by a controller <3.0.0)");
} else if (!pcmk__str_eq(value, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
crm_info("Unrecognizable message from controller: "
"invalid message type '%s'", pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
if (pcmk__str_empty(crm_element_value(reply, PCMK_XA_REFERENCE))) {
crm_info("Unrecognizable message from controller: no reference");
status = CRM_EX_PROTOCOL;
goto done;
}
value = crm_element_value(reply, PCMK__XA_CRM_TASK);
if (pcmk__str_empty(value)) {
crm_info("Unrecognizable message from controller: no command name");
status = CRM_EX_PROTOCOL;
goto done;
}
// Parse useful info from reply
reply_data.feature_set = crm_element_value(reply, PCMK_XA_VERSION);
reply_data.host_from = crm_element_value(reply, PCMK__XA_SRC);
wrapper = pcmk__xe_first_child(reply, PCMK__XE_CRM_XML, NULL, NULL);
msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
if (!strcmp(value, CRM_OP_REPROBE)) {
reply_data.reply_type = pcmk_controld_reply_reprobe;
} else if (!strcmp(value, CRM_OP_NODE_INFO)) {
set_node_info_data(&reply_data, msg_data);
} else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
reply_data.reply_type = pcmk_controld_reply_resource;
reply_data.data.resource.node_state = msg_data;
} else if (!strcmp(value, CRM_OP_PING)) {
set_ping_data(&reply_data, msg_data);
} else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
set_nodes_data(&reply_data, msg_data);
} else {
crm_info("Unrecognizable message from controller: unknown command '%s'",
value);
status = CRM_EX_PROTOCOL;
}
done:
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
// Free any reply data that was allocated
if (pcmk__str_eq(value, PCMK__CONTROLD_CMD_NODES, pcmk__str_casei)) {
g_list_free_full(reply_data.data.nodes, free);
}
return false; // No further replies needed
}
pcmk__ipc_methods_t *
pcmk__controld_api_methods(void)
{
pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
if (cmds != NULL) {
cmds->new_data = new_data;
cmds->free_data = free_data;
cmds->post_connect = post_connect;
cmds->reply_expected = reply_expected;
cmds->dispatch = dispatch;
}
return cmds;
}
/*!
* \internal
* \brief Create XML for a controller IPC request
*
* \param[in] api Controller connection
* \param[in] op Controller IPC command name
* \param[in] node Node name to set as destination host
* \param[in] msg_data XML to attach to request as message data
*
* \return Newly allocated XML for request
*/
static xmlNode *
create_controller_request(const pcmk_ipc_api_t *api, const char *op,
const char *node, xmlNode *msg_data)
{
struct controld_api_private_s *private = NULL;
const char *sys_to = NULL;
+ char *sender_system = NULL;
+ xmlNode *request = NULL;
if (api == NULL) {
return NULL;
}
private = api->api_data;
if ((node == NULL) && !strcmp(op, CRM_OP_PING)) {
sys_to = CRM_SYSTEM_DC;
} else {
sys_to = CRM_SYSTEM_CRMD;
}
- return create_request(op, msg_data, node, sys_to,
- (crm_system_name? crm_system_name : "client"),
- private->client_uuid);
+ sender_system = crm_strdup_printf("%s_%s", private->client_uuid,
+ pcmk__s(crm_system_name, "client"));
+ request = create_request(op, msg_data, node, sys_to, sender_system);
+ free(sender_system);
+ return request;
}
// \return Standard Pacemaker return code
static int
send_controller_request(pcmk_ipc_api_t *api, const xmlNode *request,
bool reply_is_expected)
{
if (crm_element_value(request, PCMK_XA_REFERENCE) == NULL) {
return EINVAL;
}
if (reply_is_expected) {
struct controld_api_private_s *private = api->api_data;
private->replies_expected++;
}
return pcmk__send_ipc_request(api, request);
}
static xmlNode *
create_reprobe_message_data(const char *target_node, const char *router_node)
{
xmlNode *msg_data;
msg_data = pcmk__xe_create(NULL, "data_for_" CRM_OP_REPROBE);
crm_xml_add(msg_data, PCMK__META_ON_NODE, target_node);
if ((router_node != NULL) && !pcmk__str_eq(router_node, target_node, pcmk__str_casei)) {
crm_xml_add(msg_data, PCMK__XA_ROUTER_NODE, router_node);
}
return msg_data;
}
/*!
* \brief Send a reprobe controller operation
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node to reprobe
* \param[in] router_node Router node for host
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_reprobe.
*/
int
pcmk_controld_api_reprobe(pcmk_ipc_api_t *api, const char *target_node,
const char *router_node)
{
xmlNode *request;
xmlNode *msg_data;
int rc = pcmk_rc_ok;
if (api == NULL) {
return EINVAL;
}
if (router_node == NULL) {
router_node = target_node;
}
crm_debug("Sending %s IPC request to reprobe %s via %s",
pcmk_ipc_name(api, true), pcmk__s(target_node, "local node"),
pcmk__s(router_node, "local node"));
msg_data = create_reprobe_message_data(target_node, router_node);
request = create_controller_request(api, CRM_OP_REPROBE, router_node,
msg_data);
rc = send_controller_request(api, request, true);
pcmk__xml_free(msg_data);
pcmk__xml_free(request);
return rc;
}
/*!
* \brief Send a "node info" controller operation
*
* \param[in,out] api Controller connection
* \param[in] nodeid ID of node to get info for (or 0 for local node)
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_info.
*/
int
pcmk_controld_api_node_info(pcmk_ipc_api_t *api, uint32_t nodeid)
{
xmlNode *request;
int rc = pcmk_rc_ok;
request = create_controller_request(api, CRM_OP_NODE_INFO, NULL, NULL);
if (request == NULL) {
return EINVAL;
}
if (nodeid > 0) {
crm_xml_add_ll(request, PCMK_XA_ID, nodeid);
}
rc = send_controller_request(api, request, true);
pcmk__xml_free(request);
return rc;
}
/*!
* \brief Ask the controller for status
*
* \param[in,out] api Controller connection
* \param[in] node_name Name of node whose status is desired (NULL for DC)
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_ping.
*/
int
pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name)
{
xmlNode *request;
int rc = pcmk_rc_ok;
request = create_controller_request(api, CRM_OP_PING, node_name, NULL);
if (request == NULL) {
return EINVAL;
}
rc = send_controller_request(api, request, true);
pcmk__xml_free(request);
return rc;
}
/*!
* \brief Ask the controller for cluster information
*
* \param[in,out] api Controller connection
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_nodes.
*/
int
pcmk_controld_api_list_nodes(pcmk_ipc_api_t *api)
{
xmlNode *request;
int rc = EINVAL;
request = create_controller_request(api, PCMK__CONTROLD_CMD_NODES, NULL,
NULL);
if (request != NULL) {
rc = send_controller_request(api, request, true);
pcmk__xml_free(request);
}
return rc;
}
// \return Standard Pacemaker return code
static int
controller_resource_op(pcmk_ipc_api_t *api, const char *op,
const char *target_node, const char *router_node,
bool cib_only, const char *rsc_id,
const char *rsc_long_id, const char *standard,
const char *provider, const char *type)
{
int rc = pcmk_rc_ok;
char *key;
xmlNode *request, *msg_data, *xml_rsc, *params;
if (api == NULL) {
return EINVAL;
}
if (router_node == NULL) {
router_node = target_node;
}
msg_data = pcmk__xe_create(NULL, PCMK__XE_RSC_OP);
/* The controller logs the transition key from resource op requests, so we
* need to have *something* for it.
* @TODO don't use "crm-resource"
*/
key = pcmk__transition_key(0, getpid(), 0,
"xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx");
crm_xml_add(msg_data, PCMK__XA_TRANSITION_KEY, key);
free(key);
crm_xml_add(msg_data, PCMK__META_ON_NODE, target_node);
if (!pcmk__str_eq(router_node, target_node, pcmk__str_casei)) {
crm_xml_add(msg_data, PCMK__XA_ROUTER_NODE, router_node);
}
if (cib_only) {
// Indicate that only the CIB needs to be cleaned
crm_xml_add(msg_data, PCMK__XA_MODE, PCMK__VALUE_CIB);
}
xml_rsc = pcmk__xe_create(msg_data, PCMK_XE_PRIMITIVE);
crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
crm_xml_add(xml_rsc, PCMK__XA_LONG_ID, rsc_long_id);
crm_xml_add(xml_rsc, PCMK_XA_CLASS, standard);
crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, provider);
crm_xml_add(xml_rsc, PCMK_XA_TYPE, type);
params = pcmk__xe_create(msg_data, PCMK__XE_ATTRIBUTES);
crm_xml_add(params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
// The controller parses the timeout from the request
key = crm_meta_name(PCMK_META_TIMEOUT);
crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg
free(key);
request = create_controller_request(api, op, router_node, msg_data);
rc = send_controller_request(api, request, true);
pcmk__xml_free(msg_data);
pcmk__xml_free(request);
return rc;
}
/*!
* \brief Ask the controller to fail a resource
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node resource is on
* \param[in] router_node Router node for target
* \param[in] rsc_id ID of resource to fail
* \param[in] rsc_long_id Long ID of resource (if any)
* \param[in] standard Standard of resource
* \param[in] provider Provider of resource (if any)
* \param[in] type Type of resource to fail
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_resource.
*/
int
pcmk_controld_api_fail(pcmk_ipc_api_t *api,
const char *target_node, const char *router_node,
const char *rsc_id, const char *rsc_long_id,
const char *standard, const char *provider,
const char *type)
{
crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s",
pcmk_ipc_name(api, true), pcmk__s(rsc_id, "unknown resource"),
pcmk__s(rsc_long_id, "no other names"),
pcmk__s(target_node, "unspecified node"),
pcmk__s(router_node, "unspecified node"));
return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node,
router_node, false, rsc_id, rsc_long_id,
standard, provider, type);
}
/*!
* \brief Ask the controller to refresh a resource
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node resource is on
* \param[in] router_node Router node for target
* \param[in] rsc_id ID of resource to refresh
* \param[in] rsc_long_id Long ID of resource (if any)
* \param[in] standard Standard of resource
* \param[in] provider Provider of resource (if any)
* \param[in] type Type of resource
* \param[in] cib_only If true, clean resource from CIB only
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_resource.
*/
int
pcmk_controld_api_refresh(pcmk_ipc_api_t *api, const char *target_node,
const char *router_node,
const char *rsc_id, const char *rsc_long_id,
const char *standard, const char *provider,
const char *type, bool cib_only)
{
crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s",
pcmk_ipc_name(api, true), pcmk__s(rsc_id, "unknown resource"),
pcmk__s(rsc_long_id, "no other names"),
pcmk__s(target_node, "unspecified node"),
pcmk__s(router_node, "unspecified node"));
return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node,
router_node, cib_only, rsc_id, rsc_long_id,
standard, provider, type);
}
/*!
* \brief Get the number of IPC replies currently expected from the controller
*
* \param[in] api Controller IPC API connection
*
* \return Number of replies expected
*/
unsigned int
pcmk_controld_api_replies_expected(const pcmk_ipc_api_t *api)
{
struct controld_api_private_s *private = api->api_data;
return private->replies_expected;
}
/*!
* \brief Create XML for a controller IPC "hello" message
*
* \deprecated This function is deprecated as part of the public C API.
*/
// \todo make this static to this file when breaking API backward compatibility
xmlNode *
create_hello_message(const char *uuid, const char *client_name,
const char *major_version, const char *minor_version)
{
xmlNode *hello_node = NULL;
xmlNode *hello = NULL;
+ char *sender_system = NULL;
if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name)
|| pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"missing information",
client_name? client_name : "unknown client",
uuid? uuid : "unknown");
return NULL;
}
hello_node = pcmk__xe_create(NULL, PCMK__XE_OPTIONS);
crm_xml_add(hello_node, PCMK__XA_MAJOR_VERSION, major_version);
crm_xml_add(hello_node, PCMK__XA_MINOR_VERSION, minor_version);
crm_xml_add(hello_node, PCMK__XA_CLIENT_NAME, client_name);
// @TODO Nothing uses this. Drop, or keep for debugging?
crm_xml_add(hello_node, PCMK__XA_CLIENT_UUID, uuid);
- hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid);
+ sender_system = crm_strdup_printf("%s_%s", uuid, client_name);
+ hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, sender_system);
+ free(sender_system);
+ pcmk__xml_free(hello_node);
if (hello == NULL) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"Request creation failed", client_name, uuid);
return NULL;
}
- pcmk__xml_free(hello_node);
crm_trace("Created hello message from %s (UUID %s)", client_name, uuid);
return hello;
}
diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c
index 7377838b29..f238b9ab27 100644
--- a/lib/common/ipc_pacemakerd.c
+++ b/lib/common/ipc_pacemakerd.c
@@ -1,321 +1,323 @@
/*
* Copyright 2020-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/ipc_pacemakerd.h>
#include "crmcommon_private.h"
typedef struct pacemakerd_api_private_s {
enum pcmk_pacemakerd_state state;
char *client_uuid;
} pacemakerd_api_private_t;
static const char *pacemakerd_state_str[] = {
PCMK__VALUE_INIT,
PCMK__VALUE_STARTING_DAEMONS,
PCMK__VALUE_WAIT_FOR_PING,
PCMK__VALUE_RUNNING,
PCMK__VALUE_SHUTTING_DOWN,
PCMK__VALUE_SHUTDOWN_COMPLETE,
PCMK_VALUE_REMOTE,
};
enum pcmk_pacemakerd_state
pcmk_pacemakerd_api_daemon_state_text2enum(const char *state)
{
int i;
if (state == NULL) {
return pcmk_pacemakerd_state_invalid;
}
for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max;
i++) {
if (pcmk__str_eq(state, pacemakerd_state_str[i], pcmk__str_none)) {
return i;
}
}
return pcmk_pacemakerd_state_invalid;
}
const char *
pcmk_pacemakerd_api_daemon_state_enum2text(
enum pcmk_pacemakerd_state state)
{
if ((state >= pcmk_pacemakerd_state_init) &&
(state <= pcmk_pacemakerd_state_max)) {
return pacemakerd_state_str[state];
}
return "invalid";
}
/*!
* \internal
* \brief Return a friendly string representation of a \p pacemakerd state
*
* \param[in] state \p pacemakerd state
*
* \return A user-friendly string representation of \p state, or
* <tt>"Invalid pacemakerd state"</tt>
*/
const char *
pcmk__pcmkd_state_enum2friendly(enum pcmk_pacemakerd_state state)
{
switch (state) {
case pcmk_pacemakerd_state_init:
return "Initializing pacemaker";
case pcmk_pacemakerd_state_starting_daemons:
return "Pacemaker daemons are starting";
case pcmk_pacemakerd_state_wait_for_ping:
return "Waiting for startup trigger from SBD";
case pcmk_pacemakerd_state_running:
return "Pacemaker is running";
case pcmk_pacemakerd_state_shutting_down:
return "Pacemaker daemons are shutting down";
case pcmk_pacemakerd_state_shutdown_complete:
/* Assuming pacemakerd won't process messages while in
* shutdown_complete state unless reporting to SBD
*/
return "Pacemaker daemons are shut down (reporting to SBD)";
case pcmk_pacemakerd_state_remote:
return "pacemaker-remoted is running (on a Pacemaker Remote node)";
default:
return "Invalid pacemakerd state";
}
}
/*!
* \internal
* \brief Get a string representation of a \p pacemakerd API reply type
*
* \param[in] reply \p pacemakerd API reply type
*
* \return String representation of a \p pacemakerd API reply type
*/
const char *
pcmk__pcmkd_api_reply2str(enum pcmk_pacemakerd_api_reply reply)
{
switch (reply) {
case pcmk_pacemakerd_reply_ping:
return "ping";
case pcmk_pacemakerd_reply_shutdown:
return "shutdown";
default:
return "unknown";
}
}
// \return Standard Pacemaker return code
static int
new_data(pcmk_ipc_api_t *api)
{
struct pacemakerd_api_private_s *private = NULL;
api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s));
if (api->api_data == NULL) {
return errno;
}
private = api->api_data;
private->state = pcmk_pacemakerd_state_invalid;
/* other as with cib, controld, ... we are addressing pacemakerd just
from the local node -> pid is unique and thus sufficient as an ID
*/
private->client_uuid = pcmk__getpid_s();
return pcmk_rc_ok;
}
static void
free_data(void *data)
{
free(((struct pacemakerd_api_private_s *) data)->client_uuid);
free(data);
}
// \return Standard Pacemaker return code
static int
post_connect(pcmk_ipc_api_t *api)
{
struct pacemakerd_api_private_s *private = NULL;
if (api->api_data == NULL) {
return EINVAL;
}
private = api->api_data;
private->state = pcmk_pacemakerd_state_invalid;
return pcmk_rc_ok;
}
static void
post_disconnect(pcmk_ipc_api_t *api)
{
struct pacemakerd_api_private_s *private = NULL;
if (api->api_data == NULL) {
return;
}
private = api->api_data;
private->state = pcmk_pacemakerd_state_invalid;
return;
}
static bool
reply_expected(pcmk_ipc_api_t *api, const xmlNode *request)
{
const char *command = crm_element_value(request, PCMK__XA_CRM_TASK);
if (command == NULL) {
return false;
}
// We only need to handle commands that functions in this file can send
return pcmk__str_any_of(command, CRM_OP_PING, CRM_OP_QUIT, NULL);
}
static bool
dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
{
crm_exit_t status = CRM_EX_OK;
xmlNode *wrapper = NULL;
xmlNode *msg_data = NULL;
pcmk_pacemakerd_api_reply_t reply_data = {
pcmk_pacemakerd_reply_unknown
};
const char *value = NULL;
long long value_ll = 0;
if (pcmk__xe_is(reply, PCMK__XE_ACK)) {
long long int ack_status = 0;
pcmk__scan_ll(crm_element_value(reply, PCMK_XA_STATUS), &ack_status,
CRM_EX_OK);
return ack_status == CRM_EX_INDETERMINATE;
}
value = crm_element_value(reply, PCMK__XA_SUBT);
if (!pcmk__str_eq(value, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
crm_info("Unrecognizable message from %s: "
"message type '%s' not '" PCMK__VALUE_RESPONSE "'",
pcmk_ipc_name(api, true), pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
if (pcmk__str_empty(crm_element_value(reply, PCMK_XA_REFERENCE))) {
crm_info("Unrecognizable message from %s: no reference",
pcmk_ipc_name(api, true));
status = CRM_EX_PROTOCOL;
goto done;
}
value = crm_element_value(reply, PCMK__XA_CRM_TASK);
// Parse useful info from reply
wrapper = pcmk__xe_first_child(reply, PCMK__XE_CRM_XML, NULL, NULL);
msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
crm_element_value_ll(msg_data, PCMK_XA_CRM_TIMESTAMP, &value_ll);
if (pcmk__str_eq(value, CRM_OP_PING, pcmk__str_none)) {
reply_data.reply_type = pcmk_pacemakerd_reply_ping;
reply_data.data.ping.state =
pcmk_pacemakerd_api_daemon_state_text2enum(
crm_element_value(msg_data, PCMK__XA_PACEMAKERD_STATE));
reply_data.data.ping.status =
pcmk__str_eq(crm_element_value(msg_data, PCMK_XA_RESULT), "ok",
pcmk__str_casei)?pcmk_rc_ok:pcmk_rc_error;
reply_data.data.ping.last_good = (value_ll < 0)? 0 : (time_t) value_ll;
reply_data.data.ping.sys_from =
crm_element_value(msg_data, PCMK__XA_CRM_SUBSYSTEM);
} else if (pcmk__str_eq(value, CRM_OP_QUIT, pcmk__str_none)) {
const char *op_status = crm_element_value(msg_data, PCMK__XA_OP_STATUS);
reply_data.reply_type = pcmk_pacemakerd_reply_shutdown;
reply_data.data.shutdown.status = atoi(op_status);
} else {
crm_info("Unrecognizable message from %s: unknown command '%s'",
pcmk_ipc_name(api, true), pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
done:
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
return false;
}
pcmk__ipc_methods_t *
pcmk__pacemakerd_api_methods(void)
{
pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
if (cmds != NULL) {
cmds->new_data = new_data;
cmds->free_data = free_data;
cmds->post_connect = post_connect;
cmds->reply_expected = reply_expected;
cmds->dispatch = dispatch;
cmds->post_disconnect = post_disconnect;
}
return cmds;
}
static int
do_pacemakerd_api_call(pcmk_ipc_api_t *api, const char *ipc_name, const char *task)
{
pacemakerd_api_private_t *private;
+ char *sender_system = NULL;
xmlNode *cmd;
int rc;
if (api == NULL) {
return EINVAL;
}
private = api->api_data;
CRM_ASSERT(private != NULL);
- cmd = create_request(task, NULL, NULL, CRM_SYSTEM_MCP,
- pcmk__ipc_sys_name(ipc_name, "client"),
- private->client_uuid);
+ sender_system = crm_strdup_printf("%s_%s", private->client_uuid,
+ pcmk__ipc_sys_name(ipc_name, "client"));
+ cmd = create_request(task, NULL, NULL, CRM_SYSTEM_MCP, sender_system);
+ free(sender_system);
if (cmd) {
rc = pcmk__send_ipc_request(api, cmd);
if (rc != pcmk_rc_ok) {
crm_debug("Couldn't send request to %s: %s rc=%d",
pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc);
}
pcmk__xml_free(cmd);
} else {
rc = ENOMSG;
}
return rc;
}
int
pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name)
{
return do_pacemakerd_api_call(api, ipc_name, CRM_OP_PING);
}
int
pcmk_pacemakerd_api_shutdown(pcmk_ipc_api_t *api, const char *ipc_name)
{
return do_pacemakerd_api_call(api, ipc_name, CRM_OP_QUIT);
}
diff --git a/lib/common/ipc_schedulerd.c b/lib/common/ipc_schedulerd.c
index 61f0900646..cc2ad42a62 100644
--- a/lib/common/ipc_schedulerd.c
+++ b/lib/common/ipc_schedulerd.c
@@ -1,184 +1,186 @@
/*
* Copyright 2021-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdlib.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/common/xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/ipc_schedulerd.h>
#include "crmcommon_private.h"
typedef struct schedulerd_api_private_s {
char *client_uuid;
} schedulerd_api_private_t;
// \return Standard Pacemaker return code
static int
new_data(pcmk_ipc_api_t *api)
{
struct schedulerd_api_private_s *private = NULL;
api->api_data = calloc(1, sizeof(struct schedulerd_api_private_s));
if (api->api_data == NULL) {
return errno;
}
private = api->api_data;
/* See comments in ipc_pacemakerd.c. */
private->client_uuid = pcmk__getpid_s();
return pcmk_rc_ok;
}
static void
free_data(void *data)
{
free(((struct schedulerd_api_private_s *) data)->client_uuid);
free(data);
}
// \return Standard Pacemaker return code
static int
post_connect(pcmk_ipc_api_t *api)
{
if (api->api_data == NULL) {
return EINVAL;
}
return pcmk_rc_ok;
}
static bool
reply_expected(pcmk_ipc_api_t *api, const xmlNode *request)
{
const char *command = crm_element_value(request, PCMK__XA_CRM_TASK);
if (command == NULL) {
return false;
}
// We only need to handle commands that functions in this file can send
return pcmk__str_any_of(command, CRM_OP_PECALC, NULL);
}
static bool
dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
{
crm_exit_t status = CRM_EX_OK;
xmlNode *wrapper = NULL;
xmlNode *msg_data = NULL;
pcmk_schedulerd_api_reply_t reply_data = {
pcmk_schedulerd_reply_unknown
};
const char *value = NULL;
if (pcmk__xe_is(reply, PCMK__XE_ACK)) {
return false;
}
value = crm_element_value(reply, PCMK__XA_SUBT);
if (!pcmk__str_eq(value, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
crm_info("Unrecognizable message from schedulerd: "
"message type '%s' not '" PCMK__VALUE_RESPONSE "'",
pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
if (pcmk__str_empty(crm_element_value(reply, PCMK_XA_REFERENCE))) {
crm_info("Unrecognizable message from schedulerd: no reference");
status = CRM_EX_PROTOCOL;
goto done;
}
// Parse useful info from reply
wrapper = pcmk__xe_first_child(reply, PCMK__XE_CRM_XML, NULL, NULL);
msg_data = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
value = crm_element_value(reply, PCMK__XA_CRM_TASK);
if (pcmk__str_eq(value, CRM_OP_PECALC, pcmk__str_none)) {
reply_data.reply_type = pcmk_schedulerd_reply_graph;
reply_data.data.graph.reference = crm_element_value(reply,
PCMK_XA_REFERENCE);
reply_data.data.graph.input = crm_element_value(reply,
PCMK__XA_CRM_TGRAPH_IN);
reply_data.data.graph.tgraph = msg_data;
} else {
crm_info("Unrecognizable message from schedulerd: "
"unknown command '%s'", pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
done:
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
return false;
}
pcmk__ipc_methods_t *
pcmk__schedulerd_api_methods(void)
{
pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
if (cmds != NULL) {
cmds->new_data = new_data;
cmds->free_data = free_data;
cmds->post_connect = post_connect;
cmds->reply_expected = reply_expected;
cmds->dispatch = dispatch;
}
return cmds;
}
static int
do_schedulerd_api_call(pcmk_ipc_api_t *api, const char *task, xmlNode *cib, char **ref)
{
schedulerd_api_private_t *private;
xmlNode *cmd = NULL;
int rc;
+ char *sender_system = NULL;
if (!pcmk_ipc_is_connected(api)) {
return ENOTCONN;
}
private = api->api_data;
CRM_ASSERT(private != NULL);
- cmd = create_request(task, cib, NULL, CRM_SYSTEM_PENGINE,
- crm_system_name? crm_system_name : "client",
- private->client_uuid);
+ sender_system = crm_strdup_printf("%s_%s", private->client_uuid,
+ pcmk__s(crm_system_name, "client"));
+ cmd = create_request(task, cib, NULL, CRM_SYSTEM_PENGINE, sender_system);
+ free(sender_system);
if (cmd) {
rc = pcmk__send_ipc_request(api, cmd);
if (rc != pcmk_rc_ok) {
crm_debug("Couldn't send request to schedulerd: %s rc=%d",
pcmk_rc_str(rc), rc);
}
*ref = strdup(crm_element_value(cmd, PCMK_XA_REFERENCE));
pcmk__xml_free(cmd);
} else {
rc = ENOMSG;
}
return rc;
}
int
pcmk_schedulerd_api_graph(pcmk_ipc_api_t *api, xmlNode *cib, char **ref)
{
return do_schedulerd_api_call(api, CRM_OP_PECALC, cib, ref);
}
diff --git a/lib/common/messages.c b/lib/common/messages.c
index 4111f92e1f..4a8a3ce4c1 100644
--- a/lib/common/messages.c
+++ b/lib/common/messages.c
@@ -1,292 +1,278 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <time.h> // time()
#include <sys/types.h>
#include <glib.h>
#include <libxml/tree.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
/*!
* \internal
* \brief Create message XML (for IPC or the cluster layer)
*
* Create standard, generic XML that can be used as a message sent via IPC or
* the cluster layer. Currently, not all IPC and cluster layer messaging uses
* this, but it should (eventually, keeping backward compatibility in mind).
*
* \param[in] origin Name of function that called this one (required)
* \param[in] server Server whose protocol defines message semantics
* \param[in] reply_to If NULL, create message as a request with a
* generated message ID, otherwise create message
* as a reply to this message ID
* \param[in] sender_system Sender's subsystem (required; this is an
* arbitrary string that may have meaning between
* the sender and recipient)
* \param[in] recipient_node If not NULL, add as message's recipient node
* (NULL typically indicates a broadcast message)
* \param[in] recipient_system If not NULL, add as message's recipient
* subsystem (this is an arbitrary string that may
* have meaning between the sender and recipient)
* \param[in] task Add as message's task (required)
* \param[in] data If not NULL, copy as message's data (callers
* should not add attributes to the returned
* message element, but instead pass any desired
* information here, though this is not always
* honored currently)
*
* \return Newly created message XML
*
* \note This function should usually not be called directly, but via the
* pcmk__new_message() wrapper.
* \note The caller is responsible for freeing the return value using
* \c pcmk__xml_free().
*/
xmlNode *
pcmk__new_message_as(const char *origin, enum pcmk_ipc_server server,
const char *reply_to, const char *sender_system,
const char *recipient_node, const char *recipient_system,
const char *task, xmlNode *data)
{
static unsigned int message_counter = 0U;
xmlNode *message = NULL;
char *message_id = NULL;
const char *subtype = PCMK__VALUE_RESPONSE;
CRM_CHECK(!pcmk__str_empty(origin)
&& !pcmk__str_empty(sender_system)
&& !pcmk__str_empty(task),
return NULL);
if (reply_to == NULL) {
subtype = PCMK__VALUE_REQUEST;
message_id = crm_strdup_printf("%s-%s-%llu-%u", task, sender_system,
(unsigned long long) time(NULL),
message_counter++);
reply_to = message_id;
}
message = pcmk__xe_create(NULL, PCMK__XE_MESSAGE);
pcmk__xe_set_props(message,
PCMK_XA_ORIGIN, origin,
PCMK__XA_T, pcmk__server_message_type(server),
PCMK__XA_SUBT, subtype,
PCMK_XA_VERSION, CRM_FEATURE_SET,
PCMK_XA_REFERENCE, reply_to,
PCMK__XA_CRM_SYS_FROM, sender_system,
PCMK__XA_CRM_HOST_TO, recipient_node,
PCMK__XA_CRM_SYS_TO, recipient_system,
PCMK__XA_CRM_TASK, task,
NULL);
if (data != NULL) {
xmlNode *wrapper = pcmk__xe_create(message, PCMK__XE_CRM_XML);
pcmk__xml_copy(wrapper, data);
}
free(message_id);
return message;
}
/*!
* \brief Create a Pacemaker request (for IPC or cluster layer)
*
* \param[in] task What to set as the request's task
* \param[in] msg_data What to add as the request's data contents
* \param[in] host_to What to set as the request's destination host
* \param[in] sys_to What to set as the request's destination system
- * \param[in] sys_from If not NULL, set as request's origin system
- * \param[in] uuid_from If not NULL, use in request's origin system
+ * \param[in] sender_system Sender's subsystem (required; this is an
+ * arbitrary string that may have meaning between
+ * the sender and recipient)
* \param[in] origin Name of function that called this one
*
* \return XML of new request
*
- * \note One of sys_from or uuid_from must be non-NULL
* \note This function should not be called directly, but via the
* create_request() wrapper.
* \note The caller is responsible for freeing the return value using
* \c pcmk__xml_free().
*/
xmlNode *
create_request_adv(const char *task, xmlNode *msg_data,
const char *host_to, const char *sys_to,
- const char *sys_from, const char *uuid_from,
- const char *origin)
+ const char *sender_system, const char *origin)
{
- char *true_from = NULL;
- xmlNode *request = NULL;
-
- if (uuid_from != NULL) {
- true_from = crm_strdup_printf("%s_%s", uuid_from,
- (sys_from? sys_from : "none"));
- } else if (sys_from != NULL) {
- true_from = strdup(sys_from);
- } else {
- crm_err("Cannot create IPC request: No originating system specified");
- }
- request = pcmk__new_message_as(origin, pcmk_ipc_controld, NULL, true_from,
- host_to, sys_to, task, msg_data);
- free(true_from);
- return request;
+ return pcmk__new_message_as(origin, pcmk_ipc_controld, NULL, sender_system,
+ host_to, sys_to, task, msg_data);
}
/*!
* \internal
* \brief Create a Pacemaker reply (for IPC or cluster layer)
*
* \param[in] origin Name of function that called this one
* \param[in] original_request XML of request being replied to
* \param[in] data If not NULL, copy as reply's data (callers
* should not add attributes to the returned
* message element, but instead pass any desired
* information here, though this is not always
* honored currently)
*
* \return Newly created reply XML
*
* \note This function should not be called directly, but via the
* pcmk__new_reply() wrapper.
* \note The caller is responsible for freeing the return value using
* \c pcmk__xml_free().
*/
xmlNode *
pcmk__new_reply_as(const char *origin, const xmlNode *original_request,
xmlNode *data)
{
const char *message_type = crm_element_value(original_request, PCMK__XA_T);
const char *host_from = crm_element_value(original_request, PCMK__XA_SRC);
const char *sys_from = crm_element_value(original_request,
PCMK__XA_CRM_SYS_FROM);
const char *sys_to = crm_element_value(original_request,
PCMK__XA_CRM_SYS_TO);
const char *type = crm_element_value(original_request, PCMK__XA_SUBT);
const char *operation = crm_element_value(original_request,
PCMK__XA_CRM_TASK);
const char *crm_msg_reference = crm_element_value(original_request,
PCMK_XA_REFERENCE);
enum pcmk_ipc_server server = pcmk__parse_server(message_type);
if (server == pcmk_ipc_unknown) {
/* @COMPAT Not all requests currently specify a message type, so use a
* default that preserves past behavior.
*
* @TODO Ensure all requests specify a message type, drop this check
* after we no longer support rolling upgrades or Pacemaker Remote
* connections involving versions before that.
*/
server = pcmk_ipc_controld;
}
if (type == NULL) {
crm_warn("Cannot reply to invalid message: No message type specified");
return NULL;
}
if (strcmp(type, PCMK__VALUE_REQUEST) != 0) {
/* Replies should only be generated for request messages, but it's possible
* we expect replies to other messages right now so this can't be enforced.
*/
crm_trace("Creating a reply for a non-request original message");
}
// Since this is a reply, we reverse the sender and recipient info
return pcmk__new_message_as(origin, server, crm_msg_reference, sys_to,
host_from, sys_from, operation, data);
}
/*!
* \internal
* \brief Register handlers for server commands
*
* \param[in] handlers Array of handler functions for supported server commands
* (the final entry must have a NULL command name, and if
* it has a handler it will be used as the default handler
* for unrecognized commands)
*
* \return Newly created hash table with commands and handlers
* \note The caller is responsible for freeing the return value with
* g_hash_table_destroy().
*/
GHashTable *
pcmk__register_handlers(const pcmk__server_command_t handlers[])
{
GHashTable *commands = g_hash_table_new(g_str_hash, g_str_equal);
if (handlers != NULL) {
int i;
for (i = 0; handlers[i].command != NULL; ++i) {
g_hash_table_insert(commands, (gpointer) handlers[i].command,
handlers[i].handler);
}
if (handlers[i].handler != NULL) {
// g_str_hash() can't handle NULL, so use empty string for default
g_hash_table_insert(commands, (gpointer) "", handlers[i].handler);
}
}
return commands;
}
/*!
* \internal
* \brief Process an incoming request
*
* \param[in,out] request Request to process
* \param[in] handlers Command table created by pcmk__register_handlers()
*
* \return XML to send as reply (or NULL if no reply is needed)
*/
xmlNode *
pcmk__process_request(pcmk__request_t *request, GHashTable *handlers)
{
xmlNode *(*handler)(pcmk__request_t *request) = NULL;
CRM_CHECK((request != NULL) && (request->op != NULL) && (handlers != NULL),
return NULL);
if (pcmk_is_set(request->flags, pcmk__request_sync)
&& (request->ipc_client != NULL)) {
CRM_CHECK(request->ipc_client->request_id == request->ipc_id,
return NULL);
}
handler = g_hash_table_lookup(handlers, request->op);
if (handler == NULL) {
handler = g_hash_table_lookup(handlers, ""); // Default handler
if (handler == NULL) {
crm_info("Ignoring %s request from %s %s with no handler",
request->op, pcmk__request_origin_type(request),
pcmk__request_origin(request));
return NULL;
}
}
return (*handler)(request);
}
/*!
* \internal
* \brief Free memory used within a request (but not the request itself)
*
* \param[in,out] request Request to reset
*/
void
pcmk__reset_request(pcmk__request_t *request)
{
free(request->op);
request->op = NULL;
pcmk__reset_result(&(request->result));
}
diff --git a/tools/crm_node.c b/tools/crm_node.c
index ba64b02720..a134e1317b 100644
--- a/tools/crm_node.c
+++ b/tools/crm_node.c
@@ -1,875 +1,875 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/types.h>
#include <crm/crm.h>
#include <crm/common/cmdline_internal.h>
#include <crm/common/output_internal.h>
#include <crm/common/mainloop.h>
#include <crm/common/xml.h>
#include <crm/cib.h>
#include <crm/cib/internal.h>
#include <crm/common/ipc_controld.h>
#include <crm/common/attrs_internal.h>
#include <pacemaker-internal.h>
#define SUMMARY "crm_node - Tool for displaying low-level node information"
struct {
gboolean corosync;
gboolean dangerous_cmd;
gboolean force_flag;
char command;
int nodeid;
char *target_uname;
} options = {
.command = '\0',
.force_flag = FALSE
};
gboolean command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
gboolean name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
gboolean remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error);
static GError *error = NULL;
static GMainLoop *mainloop = NULL;
static crm_exit_t exit_code = CRM_EX_OK;
static pcmk__output_t *out = NULL;
#define INDENT " "
static GOptionEntry command_entries[] = {
{ "cluster-id", 'i', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display this node's cluster id",
NULL },
{ "list", 'l', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display all known members (past and present) of this cluster",
NULL },
{ "name", 'n', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display the name used by the cluster for this node",
NULL },
{ "partition", 'p', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display the members of this partition",
NULL },
{ "quorum", 'q', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, command_cb,
"Display a 1 if our partition has quorum, 0 if not",
NULL },
{ "name-for-id", 'N', 0, G_OPTION_ARG_CALLBACK, name_cb,
"Display the name used by the cluster for the node with the specified ID",
"ID" },
{ "remove", 'R', 0, G_OPTION_ARG_CALLBACK, remove_cb,
"(Advanced) Remove the (stopped) node with the specified name from Pacemaker's\n"
INDENT "configuration and caches (the node must already have been removed from\n"
INDENT "the underlying cluster stack configuration",
"NAME" },
{ NULL }
};
static GOptionEntry addl_entries[] = {
{ "force", 'f', 0, G_OPTION_ARG_NONE, &options.force_flag,
NULL,
NULL },
#if SUPPORT_COROSYNC
/* Unused and deprecated */
{ "corosync", 'C', G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_NONE, &options.corosync,
NULL,
NULL },
#endif
// @TODO add timeout option for when IPC replies are needed
{ NULL }
};
static pcmk__supported_format_t formats[] = {
PCMK__SUPPORTED_FORMAT_NONE,
PCMK__SUPPORTED_FORMAT_TEXT,
PCMK__SUPPORTED_FORMAT_XML,
{ NULL, NULL, NULL }
};
gboolean
command_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (pcmk__str_eq("-i", option_name, pcmk__str_casei) || pcmk__str_eq("--cluster-id", option_name, pcmk__str_casei)) {
options.command = 'i';
} else if (pcmk__str_eq("-l", option_name, pcmk__str_casei) || pcmk__str_eq("--list", option_name, pcmk__str_casei)) {
options.command = 'l';
} else if (pcmk__str_eq("-n", option_name, pcmk__str_casei) || pcmk__str_eq("--name", option_name, pcmk__str_casei)) {
options.command = 'n';
} else if (pcmk__str_eq("-p", option_name, pcmk__str_casei) || pcmk__str_eq("--partition", option_name, pcmk__str_casei)) {
options.command = 'p';
} else if (pcmk__str_eq("-q", option_name, pcmk__str_casei) || pcmk__str_eq("--quorum", option_name, pcmk__str_casei)) {
options.command = 'q';
} else {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "Unknown param passed to command_cb: %s", option_name);
return FALSE;
}
return TRUE;
}
gboolean
name_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
options.command = 'N';
pcmk__scan_min_int(optarg, &(options.nodeid), 0);
return TRUE;
}
gboolean
remove_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) {
if (optarg == NULL) {
g_set_error(error, PCMK__EXITC_ERROR, CRM_EX_INVALID_PARAM, "-R option requires an argument");
return FALSE;
}
options.command = 'R';
options.dangerous_cmd = TRUE;
pcmk__str_update(&options.target_uname, optarg);
return TRUE;
}
PCMK__OUTPUT_ARGS("node-id", "uint32_t")
static int
node_id_default(pcmk__output_t *out, va_list args) {
uint32_t node_id = va_arg(args, uint32_t);
out->info(out, "%" PRIu32, node_id);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-id", "uint32_t")
static int
node_id_xml(pcmk__output_t *out, va_list args) {
uint32_t node_id = va_arg(args, uint32_t);
char *id_s = crm_strdup_printf("%" PRIu32, node_id);
pcmk__output_create_xml_node(out, PCMK_XE_NODE_INFO,
PCMK_XA_NODEID, id_s,
NULL);
free(id_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("simple-node-list", "GList *")
static int
simple_node_list_default(pcmk__output_t *out, va_list args)
{
GList *nodes = va_arg(args, GList *);
for (GList *node_iter = nodes; node_iter != NULL; node_iter = node_iter->next) {
pcmk_controld_api_node_t *node = node_iter->data;
out->info(out, "%" PRIu32 " %s %s", node->id, pcmk__s(node->uname, ""),
pcmk__s(node->state, ""));
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("simple-node-list", "GList *")
static int
simple_node_list_xml(pcmk__output_t *out, va_list args)
{
GList *nodes = va_arg(args, GList *);
out->begin_list(out, NULL, NULL, PCMK_XE_NODES);
for (GList *node_iter = nodes; node_iter != NULL; node_iter = node_iter->next) {
pcmk_controld_api_node_t *node = node_iter->data;
char *id_s = crm_strdup_printf("%" PRIu32, node->id);
pcmk__output_create_xml_node(out, PCMK_XE_NODE,
PCMK_XA_ID, id_s,
PCMK_XA_NAME, node->uname,
PCMK_XA_STATE, node->state,
NULL);
free(id_s);
}
out->end_list(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-name", "uint32_t", "const char *")
static int
node_name_default(pcmk__output_t *out, va_list args) {
uint32_t node_id G_GNUC_UNUSED = va_arg(args, uint32_t);
const char *node_name = va_arg(args, const char *);
out->info(out, "%s", node_name);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-name", "uint32_t", "const char *")
static int
node_name_xml(pcmk__output_t *out, va_list args) {
uint32_t node_id = va_arg(args, uint32_t);
const char *node_name = va_arg(args, const char *);
char *id_s = crm_strdup_printf("%" PRIu32, node_id);
pcmk__output_create_xml_node(out, PCMK_XE_NODE_INFO,
PCMK_XA_NODEID, id_s,
PCMK_XA_UNAME, node_name,
NULL);
free(id_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("partition-list", "GList *")
static int
partition_list_default(pcmk__output_t *out, va_list args)
{
GList *nodes = va_arg(args, GList *);
GString *buffer = NULL;
for (GList *node_iter = nodes; node_iter != NULL; node_iter = node_iter->next) {
pcmk_controld_api_node_t *node = node_iter->data;
if (pcmk__str_eq(node->state, "member", pcmk__str_none)) {
pcmk__add_separated_word(&buffer, 128, pcmk__s(node->uname, ""), " ");
}
}
if (buffer != NULL) {
out->info(out, "%s", buffer->str);
g_string_free(buffer, TRUE);
return pcmk_rc_ok;
}
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("partition-list", "GList *")
static int
partition_list_xml(pcmk__output_t *out, va_list args)
{
GList *nodes = va_arg(args, GList *);
out->begin_list(out, NULL, NULL, PCMK_XE_NODES);
for (GList *node_iter = nodes; node_iter != NULL; node_iter = node_iter->next) {
pcmk_controld_api_node_t *node = node_iter->data;
if (pcmk__str_eq(node->state, "member", pcmk__str_none)) {
char *id_s = crm_strdup_printf("%" PRIu32, node->id);
pcmk__output_create_xml_node(out, PCMK_XE_NODE,
PCMK_XA_ID, id_s,
PCMK_XA_NAME, node->uname,
PCMK_XA_STATE, node->state,
NULL);
free(id_s);
}
}
out->end_list(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("quorum", "bool")
static int
quorum_default(pcmk__output_t *out, va_list args) {
bool have_quorum = va_arg(args, int);
out->info(out, "%d", have_quorum);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("quorum", "bool")
static int
quorum_xml(pcmk__output_t *out, va_list args) {
bool have_quorum = va_arg(args, int);
pcmk__output_create_xml_node(out, PCMK_XE_CLUSTER_INFO,
PCMK_XA_QUORUM, pcmk__btoa(have_quorum),
NULL);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "node-id", "default", node_id_default },
{ "node-id", "xml", node_id_xml },
{ "node-name", "default", node_name_default },
{ "node-name", "xml", node_name_xml },
{ "partition-list", "default", partition_list_default },
{ "partition-list", "xml", partition_list_xml },
{ "quorum", "default", quorum_default },
{ "quorum", "xml", quorum_xml },
{ "simple-node-list", "default", simple_node_list_default },
{ "simple-node-list", "xml", simple_node_list_xml },
{ NULL, NULL, NULL }
};
static gint
sort_node(gconstpointer a, gconstpointer b)
{
const pcmk_controld_api_node_t *node_a = a;
const pcmk_controld_api_node_t *node_b = b;
return pcmk__numeric_strcasecmp((node_a->uname? node_a->uname : ""),
(node_b->uname? node_b->uname : ""));
}
static void
controller_event_cb(pcmk_ipc_api_t *controld_api,
enum pcmk_ipc_event event_type, crm_exit_t status,
void *event_data, void *user_data)
{
pcmk_controld_api_reply_t *reply = event_data;
switch (event_type) {
case pcmk_ipc_event_disconnect:
if (exit_code == CRM_EX_DISCONNECT) { // Unexpected
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Lost connection to controller");
}
goto done;
break;
case pcmk_ipc_event_reply:
break;
default:
return;
}
if (status != CRM_EX_OK) {
exit_code = status;
g_set_error(&error, PCMK__EXITC_ERROR, status,
"Bad reply from controller: %s",
crm_exit_str(status));
goto done;
}
if (reply->reply_type != pcmk_controld_reply_nodes) {
g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_INDETERMINATE,
"Unknown reply type %d from controller",
reply->reply_type);
goto done;
}
reply->data.nodes = g_list_sort(reply->data.nodes, sort_node);
if (options.command == 'p') {
out->message(out, "partition-list", reply->data.nodes);
} else if (options.command == 'l') {
out->message(out, "simple-node-list", reply->data.nodes);
}
// Success
exit_code = CRM_EX_OK;
done:
pcmk_disconnect_ipc(controld_api);
pcmk_quit_main_loop(mainloop, 10);
}
static void
run_controller_mainloop(void)
{
pcmk_ipc_api_t *controld_api = NULL;
int rc;
// Set disconnect exit code to handle unexpected disconnects
exit_code = CRM_EX_DISCONNECT;
// Create controller IPC object
rc = pcmk_new_ipc_api(&controld_api, pcmk_ipc_controld);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc,
"Could not connect to controller: %s",
pcmk_rc_str(rc));
return;
}
pcmk_register_ipc_callback(controld_api, controller_event_cb, NULL);
// Connect to controller
rc = pcmk__connect_ipc(controld_api, pcmk_ipc_dispatch_main, 5);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Could not connect to %s: %s",
pcmk_ipc_name(controld_api, true), pcmk_rc_str(rc));
return;
}
rc = pcmk_controld_api_list_nodes(controld_api);
if (rc != pcmk_rc_ok) {
pcmk_disconnect_ipc(controld_api);
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Could not ping controller: %s", pcmk_rc_str(rc));
return;
}
// Run main loop to get controller reply via controller_event_cb()
mainloop = g_main_loop_new(NULL, FALSE);
g_main_loop_run(mainloop);
g_main_loop_unref(mainloop);
mainloop = NULL;
pcmk_free_ipc_api(controld_api);
}
static void
print_node_id(void)
{
uint32_t nodeid = 0;
int rc = pcmk__query_node_info(out, &nodeid, NULL, NULL, NULL, NULL, NULL,
false, 0);
if (rc != pcmk_rc_ok) {
/* pcmk__query_node_info already sets an error message on the output object,
* so there's no need to call g_set_error here. That would just create a
* duplicate error message in the output.
*/
exit_code = pcmk_rc2exitc(rc);
return;
}
rc = out->message(out, "node-id", nodeid);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc, "Could not print node ID: %s",
pcmk_rc_str(rc));
}
exit_code = pcmk_rc2exitc(rc);
}
static void
print_node_name(uint32_t nodeid)
{
int rc = pcmk_rc_ok;
char *node_name = NULL;
if (nodeid == 0) {
// Check environment first (i.e. when called by resource agent)
const char *name = getenv("OCF_RESKEY_" CRM_META "_"
PCMK__META_ON_NODE);
if (name != NULL) {
rc = out->message(out, "node-name", 0UL, name);
goto done;
}
}
// Otherwise ask the controller
/* pcmk__query_node_name already sets an error message on the output object,
* so there's no need to call g_set_error here. That would just create a
* duplicate error message in the output.
*/
rc = pcmk__query_node_name(out, nodeid, &node_name, 0);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
return;
}
rc = out->message(out, "node-name", 0UL, node_name);
done:
if (node_name != NULL) {
free(node_name);
}
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc, "Could not print node name: %s",
pcmk_rc_str(rc));
}
exit_code = pcmk_rc2exitc(rc);
}
static void
print_quorum(void)
{
bool quorum;
int rc = pcmk__query_node_info(out, NULL, NULL, NULL, NULL, &quorum, NULL,
false, 0);
if (rc != pcmk_rc_ok) {
/* pcmk__query_node_info already sets an error message on the output object,
* so there's no need to call g_set_error here. That would just create a
* duplicate error message in the output.
*/
exit_code = pcmk_rc2exitc(rc);
return;
}
rc = out->message(out, "quorum", quorum);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc, "Could not print quorum status: %s",
pcmk_rc_str(rc));
}
exit_code = pcmk_rc2exitc(rc);
}
/*!
* \internal
* \brief Extend a transaction by removing a node from a CIB section
*
* \param[in,out] cib Active CIB connection
* \param[in] element CIB element containing node name and/or ID
* \param[in] section CIB section that \p element is in
* \param[in] node_name Name of node to purge (NULL to leave unspecified)
* \param[in] node_id Node ID of node to purge (0 to leave unspecified)
*
* \note At least one of node_name and node_id must be specified.
* \return Standard Pacemaker return code
*/
static int
remove_from_section(cib_t *cib, const char *element, const char *section,
const char *node_name, long node_id)
{
int rc = pcmk_rc_ok;
xmlNode *xml = pcmk__xe_create(NULL, element);
crm_xml_add(xml, PCMK_XA_UNAME, node_name);
if (node_id > 0) {
crm_xml_add_ll(xml, PCMK_XA_ID, node_id);
}
rc = cib->cmds->remove(cib, section, xml, cib_transaction);
pcmk__xml_free(xml);
return (rc >= 0)? pcmk_rc_ok : pcmk_legacy2rc(rc);
}
/*!
* \internal
* \brief Purge a node from CIB
*
* \param[in] node_name Name of node to purge (or NULL to leave unspecified)
* \param[in] node_id Node ID of node to purge (or 0 to leave unspecified)
*
* \note At least one of node_name and node_id must be specified.
* \return Standard Pacemaker return code
*/
static int
purge_node_from_cib(const char *node_name, long node_id)
{
int rc = pcmk_rc_ok;
int commit_rc = pcmk_rc_ok;
cib_t *cib = NULL;
// Connect to CIB and start a transaction
cib = cib_new();
if (cib == NULL) {
return ENOTCONN;
}
rc = cib__signon_attempts(cib, cib_command, 5);
if (rc == pcmk_ok) {
rc = cib->cmds->init_transaction(cib);
}
if (rc != pcmk_ok) {
rc = pcmk_legacy2rc(rc);
cib__clean_up_connection(&cib);
return rc;
}
// Remove from configuration and status
rc = remove_from_section(cib, PCMK_XE_NODE, PCMK_XE_NODES, node_name,
node_id);
if (rc == pcmk_rc_ok) {
rc = remove_from_section(cib, PCMK__XE_NODE_STATE, PCMK_XE_STATUS,
node_name, node_id);
}
// Commit the transaction
commit_rc = cib->cmds->end_transaction(cib, (rc == pcmk_rc_ok),
cib_sync_call);
cib__clean_up_connection(&cib);
if ((rc == pcmk_rc_ok) && (commit_rc == pcmk_ok)) {
crm_debug("Purged node %s (%ld) from CIB",
pcmk__s(node_name, "by ID"), node_id);
}
return rc;
}
/*!
* \internal
* \brief Purge a node from a single server's peer cache
*
* \param[in] server IPC server to send request to
* \param[in] node_name Name of node to purge (or NULL to leave unspecified)
* \param[in] node_id Node ID of node to purge (or 0 to leave unspecified)
*
* \note At least one of node_name and node_id must be specified.
* \return Standard Pacemaker return code
*/
static int
purge_node_from(enum pcmk_ipc_server server, const char *node_name,
long node_id)
{
pcmk_ipc_api_t *api = NULL;
int rc;
rc = pcmk_new_ipc_api(&api, server);
if (rc != pcmk_rc_ok) {
goto done;
}
rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5);
if (rc != pcmk_rc_ok) {
goto done;
}
rc = pcmk_ipc_purge_node(api, node_name, node_id);
done:
if (rc != pcmk_rc_ok) { // Debug message already logged on success
g_set_error(&error, PCMK__RC_ERROR, rc,
"Could not purge node %s from %s: %s",
pcmk__s(node_name, "by ID"), pcmk_ipc_name(api, true),
pcmk_rc_str(rc));
}
pcmk_free_ipc_api(api);
return rc;
}
/*!
* \internal
* \brief Purge a node from the fencer's peer cache
*
* \param[in] node_name Name of node to purge (or NULL to leave unspecified)
* \param[in] node_id Node ID of node to purge (or 0 to leave unspecified)
*
* \note At least one of node_name and node_id must be specified.
* \return Standard Pacemaker return code
*/
static int
purge_node_from_fencer(const char *node_name, long node_id)
{
int rc = pcmk_rc_ok;
crm_ipc_t *conn = NULL;
xmlNode *cmd = NULL;
conn = crm_ipc_new("stonith-ng", 0);
if (conn == NULL) {
rc = ENOTCONN;
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Could not connect to fencer to purge node %s",
pcmk__s(node_name, "by ID"));
return rc;
}
rc = pcmk__connect_generic_ipc(conn);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Could not connect to fencer to purge node %s: %s",
pcmk__s(node_name, "by ID"), pcmk_rc_str(rc));
crm_ipc_destroy(conn);
return rc;
}
cmd = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, "stonith-ng",
- crm_system_name, NULL);
+ crm_system_name);
if (node_id > 0) {
crm_xml_add_ll(cmd, PCMK_XA_ID, node_id);
}
crm_xml_add(cmd, PCMK_XA_UNAME, node_name);
rc = crm_ipc_send(conn, cmd, 0, 0, NULL);
if (rc >= 0) {
rc = pcmk_rc_ok;
crm_debug("Purged node %s (%ld) from fencer",
pcmk__s(node_name, "by ID"), node_id);
} else {
rc = pcmk_legacy2rc(rc);
fprintf(stderr, "Could not purge node %s from fencer: %s\n",
pcmk__s(node_name, "by ID"), pcmk_rc_str(rc));
}
pcmk__xml_free(cmd);
crm_ipc_close(conn);
crm_ipc_destroy(conn);
return rc;
}
static void
remove_node(const char *target_uname)
{
int rc = pcmk_rc_ok;
long nodeid = 0;
const char *node_name = NULL;
char *endptr = NULL;
const enum pcmk_ipc_server servers[] = {
pcmk_ipc_controld,
pcmk_ipc_attrd,
};
// Check whether node was specified by name or numeric ID
errno = 0;
nodeid = strtol(target_uname, &endptr, 10);
if ((errno != 0) || (endptr == target_uname) || (*endptr != '\0')
|| (nodeid <= 0)) {
// It's not a positive integer, so assume it's a node name
nodeid = 0;
node_name = target_uname;
}
for (int i = 0; i < PCMK__NELEM(servers); ++i) {
rc = purge_node_from(servers[i], node_name, nodeid);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
return;
}
}
// The fencer hasn't been converted to pcmk_ipc_api_t yet
rc = purge_node_from_fencer(node_name, nodeid);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
return;
}
// Lastly, purge the node from the CIB itself
rc = purge_node_from_cib(node_name, nodeid);
exit_code = pcmk_rc2exitc(rc);
}
static GOptionContext *
build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
GOptionContext *context = NULL;
GOptionEntry extra_prog_entries[] = {
{ "quiet", 'Q', 0, G_OPTION_ARG_NONE, &(args->quiet),
"Be less descriptive in output.",
NULL },
{ NULL }
};
context = pcmk__build_arg_context(args, "text (default), xml", group, NULL);
/* Add the -q option, which cannot be part of the globally supported options
* because some tools use that flag for something else.
*/
pcmk__add_main_args(context, extra_prog_entries);
pcmk__add_arg_group(context, "commands", "Commands:",
"Show command help", command_entries);
pcmk__add_arg_group(context, "additional", "Additional Options:",
"Show additional options", addl_entries);
return context;
}
int
main(int argc, char **argv)
{
int rc = pcmk_rc_ok;
GOptionGroup *output_group = NULL;
pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY);
gchar **processed_args = pcmk__cmdline_preproc(argv, "NR");
GOptionContext *context = build_arg_context(args, &output_group);
pcmk__register_formats(output_group, formats);
if (!g_option_context_parse_strv(context, &processed_args, &error)) {
exit_code = CRM_EX_USAGE;
goto done;
}
pcmk__cli_init_logging("crm_node", args->verbosity);
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
if (rc != pcmk_rc_ok) {
exit_code = pcmk_rc2exitc(rc);
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"Error creating output format %s: %s", args->output_ty,
pcmk_rc_str(rc));
goto done;
}
if (args->version) {
out->version(out, false);
goto done;
}
if (options.command == 0) {
char *help = g_option_context_get_help(context, TRUE, NULL);
out->err(out, "%s", help);
g_free(help);
exit_code = CRM_EX_USAGE;
goto done;
}
if (options.dangerous_cmd && options.force_flag == FALSE) {
exit_code = CRM_EX_USAGE;
g_set_error(&error, PCMK__EXITC_ERROR, exit_code,
"The supplied command is considered dangerous."
" To prevent accidental destruction of the cluster,"
" the --force flag is required in order to proceed.");
goto done;
}
pcmk__register_lib_messages(out);
pcmk__register_messages(out, fmt_functions);
switch (options.command) {
case 'i':
print_node_id();
break;
case 'n':
print_node_name(0);
break;
case 'q':
print_quorum();
break;
case 'N':
print_node_name(options.nodeid);
break;
case 'R':
remove_node(options.target_uname);
break;
case 'l':
case 'p':
run_controller_mainloop();
break;
default:
break;
}
done:
g_strfreev(processed_args);
pcmk__free_arg_context(context);
pcmk__output_and_clear_error(&error, out);
if (out != NULL) {
out->finish(out, exit_code, true, NULL);
pcmk__output_free(out);
}
pcmk__unregister_formats();
return crm_exit(exit_code);
}

File Metadata

Mime Type
text/x-diff
Expires
Wed, Jun 25, 5:02 AM (1 d, 19 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1952222
Default Alt Text
(360 KB)

Event Timeline