Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F2824816
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
155 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index b459bea1d9..0ab745a11c 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -1,381 +1,383 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <string.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <crm/cib.h>
#include <pacemaker-controld.h>
/* From join_dc... */
extern gboolean check_join_state(enum crmd_fsa_state cur_state, const char *source);
void
crmd_ha_msg_filter(xmlNode * msg)
{
if (AM_I_DC) {
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
if (pcmk__str_eq(sys_from, CRM_SYSTEM_DC, pcmk__str_casei)) {
const char *from = crm_element_value(msg, F_ORIG);
if (!pcmk__str_eq(from, controld_globals.our_nodename,
pcmk__str_casei)) {
int level = LOG_INFO;
const char *op = crm_element_value(msg, F_CRM_TASK);
/* make sure the election happens NOW */
if (controld_globals.fsa_state != S_ELECTION) {
ha_msg_input_t new_input;
level = LOG_WARNING;
new_input.msg = msg;
register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, &new_input,
__func__);
}
do_crm_log(level, "Another DC detected: %s (op=%s)", from, op);
goto done;
}
}
} else {
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
if (pcmk__str_eq(sys_to, CRM_SYSTEM_DC, pcmk__str_casei)) {
return;
}
}
/* crm_log_xml_trace(msg, "HA[inbound]"); */
route_message(C_HA_MESSAGE, msg);
done:
controld_trigger_fsa();
}
/*!
* \internal
* \brief Check whether a node is online
*
* \param[in] node Node to check
*
* \retval -1 if completely dead
* \retval 0 if partially alive
* \retval 1 if completely alive
*/
static int
node_alive(const crm_node_t *node)
{
if (pcmk_is_set(node->flags, crm_remote_node)) {
// Pacemaker Remote nodes can't be partially alive
return pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei) ? 1: -1;
} else if (crm_is_peer_active(node)) {
// Completely up cluster node: both cluster member and peer
return 1;
} else if (!pcmk_is_set(node->processes, crm_get_cluster_proc())
&& !pcmk__str_eq(node->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
// Completely down cluster node: neither cluster member nor peer
return -1;
}
// Partially up cluster node: only cluster member or only peer
return 0;
}
#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
uint32_t old = 0;
bool appeared = FALSE;
bool is_remote = pcmk_is_set(node->flags, crm_remote_node);
/* The controller waits to receive some information from the membership
* layer before declaring itself operational. If this is being called for a
* cluster node, indicate that we have it.
*/
if (!is_remote) {
controld_set_fsa_input_flags(R_PEER_DATA);
}
if (type == crm_status_processes
&& pcmk_is_set(node->processes, crm_get_cluster_proc())
&& !AM_I_DC
&& !is_remote) {
/*
* This is a hack until we can send to a nodeid and/or we fix node name lookups
* These messages are ignored in crmd_ha_msg_filter()
*/
xmlNode *query = create_request(CRM_OP_HELLO, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
crm_debug("Sending hello to node %u so that it learns our node name", node->id);
send_cluster_message(node, crm_msg_crmd, query, FALSE);
free_xml(query);
}
if (node->uname == NULL) {
return;
}
switch (type) {
case crm_status_uname:
/* If we've never seen the node, then it also won't be in the status section */
crm_info("%s node %s is now %s",
(is_remote? "Remote" : "Cluster"),
node->uname, state_text(node->state));
return;
case crm_status_nstate:
/* This callback should not be called unless the state actually
* changed, but here's a failsafe just in case.
*/
CRM_CHECK(!pcmk__str_eq(data, node->state, pcmk__str_casei),
return);
crm_info("%s node %s is now %s (was %s)",
(is_remote? "Remote" : "Cluster"),
node->uname, state_text(node->state), state_text(data));
if (pcmk__str_eq(CRM_NODE_MEMBER, node->state, pcmk__str_casei)) {
appeared = TRUE;
if (!is_remote) {
remove_stonith_cleanup(node->uname);
}
} else {
controld_remove_voter(node->uname);
}
crmd_alert_node_event(node);
break;
case crm_status_processes:
CRM_CHECK(data != NULL, return);
old = *(const uint32_t *)data;
appeared = pcmk_is_set(node->processes, crm_get_cluster_proc());
{
const char *dc_s = controld_globals.dc_name;
if ((dc_s == NULL) && AM_I_DC) {
dc_s = "true";
}
crm_info("Node %s is %s a peer " CRM_XS
" DC=%s old=%#07x new=%#07x",
node->uname, (appeared? "now" : "no longer"),
pcmk__s(dc_s, "<none>"), old, node->processes);
}
if (!pcmk_is_set((node->processes ^ old), crm_get_cluster_proc())) {
/* Peer status did not change. This should not be possible,
* since we don't track process flags other than peer status.
*/
crm_trace("Process flag %#7x did not change from %#7x to %#7x",
crm_get_cluster_proc(), old, node->processes);
return;
}
if (!appeared) {
controld_remove_voter(node->uname);
}
if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_trace("Ignoring peer status change because not connected to CIB");
return;
} else if (controld_globals.fsa_state == S_STOPPING) {
crm_trace("Ignoring peer status change because stopping");
return;
}
if (!appeared
&& pcmk__str_eq(node->uname, controld_globals.our_nodename,
pcmk__str_casei)) {
/* Did we get evicted? */
crm_notice("Our peer connection failed");
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ERROR, NULL);
} else if (pcmk__str_eq(node->uname, controld_globals.dc_name,
pcmk__str_casei)
&& !crm_is_peer_active(node)) {
/* Did the DC leave us? */
crm_notice("Our peer on the DC (%s) is dead",
controld_globals.dc_name);
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
/* @COMPAT DC < 1.1.13: If a DC shuts down normally, we don't
* want to fence it. Newer DCs will send their shutdown request
* to all peers, who will update the DC's expected state to
* down, thus avoiding fencing. We can safely erase the DC's
* transient attributes when it leaves in that case. However,
* the only way to avoid fencing older DCs is to leave the
* transient attributes intact until it rejoins.
*/
if (compare_version(controld_globals.dc_version, "3.0.9") > 0) {
controld_delete_node_state(node->uname,
controld_section_attrs,
cib_scope_local);
}
} else if (AM_I_DC
|| pcmk_is_set(controld_globals.flags, controld_dc_left)
|| (controld_globals.dc_name == NULL)) {
/* This only needs to be done once, so normally the DC should do
* it. However if there is no DC, every node must do it, since
* there is no other way to ensure some one node does it.
*/
if (appeared) {
te_trigger_stonith_history_sync(FALSE);
} else {
controld_delete_node_state(node->uname,
controld_section_attrs,
cib_scope_local);
}
}
break;
}
if (AM_I_DC) {
xmlNode *update = NULL;
int flags = node_update_peer;
int alive = node_alive(node);
pcmk__graph_action_t *down = match_down_event(node->uuid);
crm_trace("Alive=%d, appeared=%d, down=%d",
alive, appeared, (down? down->id : -1));
if (appeared && (alive > 0) && !is_remote) {
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
}
if (down) {
const char *task = crm_element_value(down->xml, XML_LRM_ATTR_TASK);
if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
/* tengine_stonith_callback() confirms fence actions */
crm_trace("Updating CIB %s fencer reported fencing of %s complete",
(pcmk_is_set(down->flags, pcmk__graph_action_confirmed)? "after" : "before"), node->uname);
} else if (!appeared && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
// Shutdown actions are immediately confirmed (i.e. no_wait)
if (!is_remote) {
flags |= node_update_join | node_update_expected;
crmd_peer_down(node, FALSE);
check_join_state(controld_globals.fsa_state, __func__);
}
if (alive >= 0) {
crm_info("%s of peer %s is in progress " CRM_XS " action=%d",
task, node->uname, down->id);
} else {
crm_notice("%s of peer %s is complete " CRM_XS " action=%d",
task, node->uname, down->id);
pcmk__update_graph(transition_graph, down);
trigger_graph();
}
} else {
crm_trace("Node %s is %s, was expected to %s (op %d)",
node->uname,
((alive > 0)? "alive" :
((alive < 0)? "dead" : "partially alive")),
task, down->id);
}
} else if (appeared == FALSE) {
if (transition_graph == NULL || transition_graph->id == -1) {
crm_info("Stonith/shutdown of node %s is unknown to the "
"current DC", node->uname);
} else {
crm_warn("Stonith/shutdown of node %s was not expected",
node->uname);
}
if (!is_remote) {
crm_update_peer_join(__func__, node, crm_join_none);
check_join_state(controld_globals.fsa_state, __func__);
}
abort_transition(INFINITY, pcmk__graph_restart, "Node failure",
NULL);
fail_incompletable_actions(transition_graph, node->uuid);
} else {
crm_trace("Node %s came up, was not expected to be down",
node->uname);
}
if (is_remote) {
/* A pacemaker_remote node won't have its cluster status updated
* in the CIB by membership-layer callbacks, so do it here.
*/
flags |= node_update_cluster;
/* Trigger resource placement on newly integrated nodes */
if (appeared) {
abort_transition(INFINITY, pcmk__graph_restart,
"Pacemaker Remote node integrated", NULL);
}
}
/* Update the CIB node state */
update = create_node_state_update(node, flags, NULL, __func__);
if (update == NULL) {
crm_debug("Node state update not yet possible for %s", node->uname);
} else {
fsa_cib_anon_update(XML_CIB_TAG_STATUS, update);
}
free_xml(update);
}
controld_trigger_fsa();
}
void
crmd_cib_connection_destroy(gpointer user_data)
{
CRM_LOG_ASSERT(user_data == fsa_cib_conn);
controld_trigger_fsa();
fsa_cib_conn->state = cib_disconnected;
if (!pcmk_is_set(controld_globals.fsa_input_register, R_CIB_CONNECTED)) {
crm_info("Connection to the CIB manager terminated");
return;
}
// @TODO This should trigger a reconnect, not a shutdown
crm_crit("Lost connection to the CIB manager, shutting down");
register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
controld_clear_fsa_input_flags(R_CIB_CONNECTED);
return;
}
gboolean
crm_fsa_trigger(gpointer user_data)
{
- crm_trace("Invoked (queue len: %d)", g_list_length(fsa_message_queue));
+ crm_trace("Invoked (queue len: %d)",
+ g_list_length(controld_globals.fsa_message_queue));
s_crmd_fsa(C_FSA_INTERNAL);
- crm_trace("Exited (queue len: %d)", g_list_length(fsa_message_queue));
+ crm_trace("Exited (queue len: %d)",
+ g_list_length(controld_globals.fsa_message_queue));
return TRUE;
}
diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c
index f378be6d1d..5dd03ce955 100644
--- a/daemons/controld/controld_control.c
+++ b/daemons/controld/controld_control.c
@@ -1,870 +1,872 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/rules.h>
#include <crm/cluster/internal.h>
#include <crm/cluster/election_internal.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static qb_ipcs_service_t *ipcs = NULL;
static crm_trigger_t *config_read_trigger = NULL;
#if SUPPORT_COROSYNC
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
#endif
void crm_shutdown(int nsig);
static gboolean crm_read_options(gpointer user_data);
/* A_HA_CONNECT */
void
do_ha_control(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
gboolean registered = FALSE;
static crm_cluster_t *cluster = NULL;
if (cluster == NULL) {
cluster = calloc(1, sizeof(crm_cluster_t));
}
if (action & A_HA_DISCONNECT) {
crm_cluster_disconnect(cluster);
crm_info("Disconnected from the cluster");
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
}
if (action & A_HA_CONNECT) {
crm_set_status_callback(&peer_update_callback);
crm_set_autoreap(FALSE);
if (is_corosync_cluster()) {
#if SUPPORT_COROSYNC
registered = crm_connect_corosync(cluster);
#endif
}
if (registered) {
controld_election_init(cluster->uname);
controld_globals.our_nodename = cluster->uname;
controld_globals.our_uuid = cluster->uuid;
if(cluster->uuid == NULL) {
crm_err("Could not obtain local uuid");
registered = FALSE;
}
}
if (!registered) {
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
return;
}
populate_cib_nodes(node_update_none, __func__);
controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
crm_info("Connected to the cluster");
}
if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
crm_err("Unexpected action %s in %s", fsa_action2string(action),
__func__);
}
}
/* A_SHUTDOWN */
void
do_shutdown(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
/* just in case */
controld_set_fsa_input_flags(R_SHUTDOWN);
controld_disconnect_fencer(FALSE);
}
/* A_SHUTDOWN_REQ */
void
do_shutdown_req(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
xmlNode *msg = NULL;
controld_set_fsa_input_flags(R_SHUTDOWN);
//controld_set_fsa_input_flags(R_STAYDOWN);
crm_info("Sending shutdown request to all peers (DC is %s)",
pcmk__s(controld_globals.dc_name, "not set"));
msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
free_xml(msg);
}
extern pcmk__output_t *logger_out;
void
crmd_fast_exit(crm_exit_t exit_code)
{
if (pcmk_is_set(controld_globals.fsa_input_register, R_STAYDOWN)) {
crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
exit_code, CRM_EX_FATAL);
exit_code = CRM_EX_FATAL;
} else if ((exit_code == CRM_EX_OK)
&& pcmk_is_set(controld_globals.fsa_input_register,
R_IN_RECOVERY)) {
crm_err("Could not recover from internal error");
exit_code = CRM_EX_ERROR;
}
if (logger_out != NULL) {
logger_out->finish(logger_out, exit_code, true, NULL);
pcmk__output_free(logger_out);
logger_out = NULL;
}
crm_exit(exit_code);
}
crm_exit_t
crmd_exit(crm_exit_t exit_code)
{
- GList *gIter = NULL;
GMainLoop *mloop = controld_globals.mainloop;
static bool in_progress = FALSE;
if (in_progress && (exit_code == CRM_EX_OK)) {
crm_debug("Exit is already in progress");
return exit_code;
} else if(in_progress) {
crm_notice("Error during shutdown process, exiting now with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
in_progress = TRUE;
crm_trace("Preparing to exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
/* Suppress secondary errors resulting from us disconnecting everything */
controld_set_fsa_input_flags(R_HA_DISCONNECTED);
/* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
if(ipcs) {
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs);
ipcs = NULL;
}
controld_close_attrd_ipc();
controld_shutdown_schedulerd_ipc();
controld_disconnect_fencer(TRUE);
if ((exit_code == CRM_EX_OK) && (controld_globals.mainloop == NULL)) {
crm_debug("No mainloop detected");
exit_code = CRM_EX_ERROR;
}
/* On an error, just get out.
*
* Otherwise, make the effort to have mainloop exit gracefully so
* that it (mostly) cleans up after itself and valgrind has less
* to report on - allowing real errors stand out
*/
if (exit_code != CRM_EX_OK) {
crm_notice("Forcing immediate exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
crm_write_blackbox(SIGTRAP, NULL);
crmd_fast_exit(exit_code);
}
/* Clean up as much memory as possible for valgrind */
- for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
- fsa_data_t *fsa_data = gIter->data;
+ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
+ iter = iter->next) {
+ fsa_data_t *fsa_data = (fsa_data_t *) iter->data;
crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(controld_globals.fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
delete_fsa_input(fsa_data);
}
controld_clear_fsa_input_flags(R_MEMBERSHIP);
- g_list_free(fsa_message_queue); fsa_message_queue = NULL;
+
+ g_list_free(controld_globals.fsa_message_queue);
+ controld_globals.fsa_message_queue = NULL;
controld_election_fini();
/* Tear down the CIB manager connection, but don't free it yet -- it could
* be used when we drain the mainloop later.
*/
controld_disconnect_cib_manager();
verify_stopped(controld_globals.fsa_state, LOG_WARNING);
controld_clear_fsa_input_flags(R_LRM_CONNECTED);
lrm_state_destroy_all();
mainloop_destroy_trigger(config_read_trigger);
config_read_trigger = NULL;
controld_destroy_fsa_trigger();
controld_destroy_transition_trigger();
pcmk__client_cleanup();
crm_peer_destroy();
controld_free_fsa_timers();
te_cleanup_stonith_history_sync(NULL, TRUE);
controld_free_sched_timer();
free(controld_globals.our_nodename);
controld_globals.our_nodename = NULL;
free(controld_globals.our_uuid);
controld_globals.our_uuid = NULL;
free(controld_globals.dc_name);
controld_globals.dc_name = NULL;
free(controld_globals.dc_version);
controld_globals.dc_version = NULL;
free(controld_globals.cluster_name);
controld_globals.cluster_name = NULL;
free(te_uuid); te_uuid = NULL;
free(failed_stop_offset); failed_stop_offset = NULL;
free(failed_start_offset); failed_start_offset = NULL;
free_max_generation();
mainloop_destroy_signal(SIGPIPE);
mainloop_destroy_signal(SIGUSR1);
mainloop_destroy_signal(SIGTERM);
mainloop_destroy_signal(SIGTRAP);
/* leave SIGCHLD engaged as we might still want to drain some service-actions */
if (mloop) {
GMainContext *ctx = g_main_loop_get_context(controld_globals.mainloop);
/* Don't re-enter this block */
controld_globals.mainloop = NULL;
/* no signals on final draining anymore */
mainloop_destroy_signal(SIGCHLD);
crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
{
int lpc = 0;
while((g_main_context_pending(ctx) && lpc < 10)) {
lpc++;
crm_trace("Iteration %d", lpc);
g_main_context_dispatch(ctx);
}
}
crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
g_main_loop_quit(mloop);
/* Won't do anything yet, since we're inside it now */
g_main_loop_unref(mloop);
} else {
mainloop_destroy_signal(SIGCHLD);
}
cib_delete(fsa_cib_conn);
fsa_cib_conn = NULL;
throttle_fini();
/* Graceful */
crm_trace("Done preparing for exit with status %d (%s)",
exit_code, crm_exit_str(exit_code));
return exit_code;
}
/* A_EXIT_0, A_EXIT_1 */
void
do_exit(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_exit_t exit_code = CRM_EX_OK;
int log_level = LOG_INFO;
const char *exit_type = "gracefully";
if (action & A_EXIT_1) {
log_level = LOG_ERR;
exit_type = "forcefully";
exit_code = CRM_EX_ERROR;
}
verify_stopped(cur_state, LOG_ERR);
do_crm_log(log_level, "Performing %s - %s exiting the controller",
fsa_action2string(action), exit_type);
crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
crmd_exit(exit_code);
}
static void sigpipe_ignore(int nsig) { return; }
/* A_STARTUP */
void
do_startup(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_debug("Registering Signal Handlers");
mainloop_add_signal(SIGTERM, crm_shutdown);
mainloop_add_signal(SIGPIPE, sigpipe_ignore);
config_read_trigger = mainloop_add_trigger(G_PRIORITY_HIGH,
crm_read_options, NULL);
controld_init_fsa_trigger();
controld_init_transition_trigger();
crm_debug("Creating CIB manager and executor objects");
fsa_cib_conn = cib_new();
lrm_state_init_local();
if (controld_init_fsa_timers() == FALSE) {
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
}
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("Accepting new IPC client connection");
if (pcmk__new_client(c, uid, gid) == NULL) {
return -EIO;
}
return 0;
}
// \return libqb error code (0 on success, -errno on error)
static int32_t
dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
{
uint32_t id = 0;
uint32_t flags = 0;
pcmk__client_t *client = pcmk__find_client(c);
xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
if (msg == NULL) {
pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
return 0;
}
pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
CRM_ASSERT(client->user != NULL);
pcmk__update_acl_user(msg, F_CRM_USER, client->user);
crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
if (controld_authorize_ipc_message(msg, client, NULL)) {
crm_trace("Processing IPC message from client %s",
pcmk__client_name(client));
route_message(C_IPC_MESSAGE, msg);
}
controld_trigger_fsa();
free_xml(msg);
return 0;
}
static int32_t
ipc_client_disconnected(qb_ipcs_connection_t *c)
{
pcmk__client_t *client = pcmk__find_client(c);
if (client) {
crm_trace("Disconnecting %sregistered client %s (%p/%p)",
(client->userdata? "" : "un"), pcmk__client_name(client),
c, client);
free(client->userdata);
pcmk__free_client(client);
controld_trigger_fsa();
}
return 0;
}
static void
ipc_connection_destroyed(qb_ipcs_connection_t *c)
{
crm_trace("Connection %p", c);
ipc_client_disconnected(c);
}
/* A_STOP */
void
do_stop(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
crm_trace("Closing IPC server");
mainloop_del_ipc_server(ipcs); ipcs = NULL;
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
/* A_STARTED */
void
do_started(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
static struct qb_ipcs_service_handlers crmd_callbacks = {
.connection_accept = accept_controller_client,
.connection_created = NULL,
.msg_process = dispatch_controller_ipc,
.connection_closed = ipc_client_disconnected,
.connection_destroyed = ipc_connection_destroyed
};
if (cur_state != S_STARTING) {
crm_err("Start cancelled... %s", fsa_state2string(cur_state));
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_MEMBERSHIP)) {
crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_LRM_CONNECTED)) {
crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_CIB_CONNECTED)) {
crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register,
R_READ_CONFIG)) {
crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
crmd_fsa_stall(TRUE);
return;
} else if (!pcmk_is_set(controld_globals.fsa_input_register, R_PEER_DATA)) {
crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
crmd_fsa_stall(TRUE);
return;
}
crm_debug("Init server comms");
ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
if (ipcs == NULL) {
crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
} else {
crm_notice("Pacemaker controller successfully started and accepting connections");
}
controld_trigger_fencer_connect();
controld_clear_fsa_input_flags(R_STARTING);
register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
}
/* A_RECOVER */
void
do_recover(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
controld_set_fsa_input_flags(R_IN_RECOVERY);
crm_warn("Fast-tracking shutdown in response to errors");
register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
}
static pcmk__cluster_option_t controller_options[] = {
/* name, old name, type, allowed values,
* default value, validator,
* short description,
* long description
*/
{
"dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
N_("Includes a hash which identifies the exact changeset the code was "
"built from. Used for diagnostic purposes.")
},
{
"cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
N_("The messaging stack on which Pacemaker is currently running"),
N_("Used for informational and diagnostic purposes.")
},
{
"cluster-name", NULL, "string", NULL, NULL, NULL,
N_("An arbitrary name for the cluster"),
N_("This optional value is mostly for users' convenience as desired "
"in administration, but may also be used in Pacemaker "
"configuration rules via the #cluster-name node attribute, and "
"by higher-level tools and resource agents.")
},
{
XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
NULL, "20s", pcmk__valid_interval_spec,
N_("How long to wait for a response from other nodes during start-up"),
N_("The optimal value will depend on the speed and load of your network "
"and the type of switches used.")
},
{
XML_CONFIG_ATTR_RECHECK, NULL, "time",
N_("Zero disables polling, while positive values are an interval in seconds"
"(unless other units are specified, for example \"5min\")"),
"15min", pcmk__valid_interval_spec,
N_("Polling interval to recheck cluster state and evaluate rules "
"with date specifications"),
N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
"recheck cluster state for failure timeouts and most time-based "
"rules. However, it will also recheck the cluster after this "
"amount of inactivity, to evaluate rules with date specifications "
"and serve as a fail-safe for certain types of scheduler bugs.")
},
{
"load-threshold", NULL, "percentage", NULL,
"80%", pcmk__valid_percentage,
N_("Maximum amount of system load that should be used by cluster nodes"),
N_("The cluster will slow down its recovery process when the amount of "
"system resources used (currently CPU) approaches this limit"),
},
{
"node-action-limit", NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("Maximum number of jobs that can be scheduled per node "
"(defaults to 2x cores)")
},
{ XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
N_("How a cluster node should react if notified of its own fencing"),
N_("A cluster node may receive notification of its own fencing if fencing "
"is misconfigured, or if fabric fencing is in use that doesn't cut "
"cluster communication. Allowed values are \"stop\" to attempt to "
"immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
"to immediately reboot the local node, falling back to stop on failure.")
},
{
XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
"2min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("Declare an election failed if it is not decided within this much "
"time. If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
"20min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("Exit immediately if shutdown does not complete within this much "
"time. If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
"join-integration-timeout", "crmd-integration-timeout", "time", NULL,
"3min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
"join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
"30min", pcmk__valid_interval_spec,
"*** Advanced Use Only ***",
N_("If you need to adjust this value, it probably indicates "
"the presence of a bug.")
},
{
"transition-delay", "crmd-transition-delay", "time", NULL,
"0s", pcmk__valid_interval_spec,
N_("*** Advanced Use Only *** Enabling this option will slow down "
"cluster recovery under all conditions"),
N_("Delay cluster recovery for this much time to allow for additional "
"events to occur. Useful if your configuration is sensitive to "
"the order in which ping updates arrive.")
},
{
"stonith-watchdog-timeout", NULL, "time", NULL,
"0", controld_verify_stonith_watchdog_timeout,
N_("How long before nodes can be assumed to be safely down when "
"watchdog-based self-fencing via SBD is in use"),
N_("If this is set to a positive value, lost nodes are assumed to "
"self-fence using watchdog-based SBD within this much time. This "
"does not require a fencing resource to be explicitly configured, "
"though a fence_watchdog resource can be configured, to limit use "
"to specific nodes. If this is set to 0 (the default), the cluster "
"will never assume watchdog-based self-fencing. If this is set to a "
"negative value, the cluster will use twice the local value of the "
"`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
"or otherwise treat this as 0. WARNING: When used, this timeout "
"must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
"watchdog-based SBD, and Pacemaker will refuse to start on any of "
"those nodes where this is not true for the local value or SBD is "
"not active. When this is set to a negative value, "
"`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
"that use SBD, otherwise data corruption or loss could occur.")
},
{
"stonith-max-attempts", NULL, "integer", NULL,
"10", pcmk__valid_positive_number,
N_("How many times fencing can fail before it will no longer be "
"immediately re-attempted on a target")
},
// Already documented in libpe_status (other values must be kept identical)
{
"no-quorum-policy", NULL, "select",
"stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
"What to do when the cluster does not have quorum", NULL
},
{
XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
"Whether to lock resources to a cleanly shut down node",
"When true, resources active on a node when it is cleanly shut down "
"are kept \"locked\" to that node (not allowed to run elsewhere) "
"until they start again on that node after it rejoins (or for at "
"most shutdown-lock-limit, if set). Stonith resources and "
"Pacemaker Remote connections are never locked. Clone and bundle "
"instances and the promoted role of promotable clones are currently"
" never locked, though support could be added in a future release."
},
};
void
crmd_metadata(void)
{
const char *desc_short = "Pacemaker controller options";
const char *desc_long = "Cluster options used by Pacemaker's controller";
gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
desc_long, controller_options,
PCMK__NELEM(controller_options));
printf("%s", s);
g_free(s);
}
static const char *
controller_option(GHashTable *options, const char *name)
{
return pcmk__cluster_option(options, controller_options,
PCMK__NELEM(controller_options), name);
}
static void
config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
{
const char *value = NULL;
GHashTable *config_hash = NULL;
crm_time_t *now = crm_time_new(NULL);
xmlNode *crmconfig = NULL;
xmlNode *alerts = NULL;
if (rc != pcmk_ok) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
crm_err("The cluster is mis-configured - shutting down and staying down");
controld_set_fsa_input_flags(R_STAYDOWN);
}
goto bail;
}
crmconfig = output;
if ((crmconfig) &&
(crm_element_name(crmconfig)) &&
(strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
}
if (!crmconfig) {
fsa_data_t *msg_data = NULL;
crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
goto bail;
}
crm_debug("Call %d : Parsing CIB options", call_id);
config_hash = pcmk__strkey_table(free, free);
pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
pcmk__validate_cluster_options(config_hash, controller_options,
PCMK__NELEM(controller_options));
value = controller_option(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
election_timer->period_ms = crm_parse_interval_spec(value);
value = controller_option(config_hash, "node-action-limit");
throttle_update_job_max(value);
value = controller_option(config_hash, "load-threshold");
if(value) {
throttle_set_load_target(strtof(value, NULL) / 100.0);
}
value = controller_option(config_hash, "no-quorum-policy");
if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
controld_set_global_flags(controld_no_quorum_suicide);
}
set_fence_reaction(controller_option(config_hash,
XML_CONFIG_ATTR_FENCE_REACTION));
value = controller_option(config_hash, "stonith-max-attempts");
update_stonith_max_attempts(value);
value = controller_option(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
crm_debug("Shutdown escalation occurs if DC has not responded to request in %ums",
shutdown_escalation_timer->period_ms);
value = controller_option(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
controld_set_election_period(value);
value = controller_option(config_hash, XML_CONFIG_ATTR_RECHECK);
recheck_interval_ms = crm_parse_interval_spec(value);
crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
value = controller_option(config_hash, "transition-delay");
transition_timer->period_ms = crm_parse_interval_spec(value);
value = controller_option(config_hash, "join-integration-timeout");
integration_timer->period_ms = crm_parse_interval_spec(value);
value = controller_option(config_hash, "join-finalization-timeout");
finalization_timer->period_ms = crm_parse_interval_spec(value);
value = controller_option(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
if (crm_is_true(value)) {
controld_set_global_flags(controld_shutdown_lock_enabled);
} else {
controld_clear_global_flags(controld_shutdown_lock_enabled);
}
value = g_hash_table_lookup(config_hash, "cluster-name");
pcmk__str_update(&(controld_globals.cluster_name), value);
alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
crmd_unpack_alerts(alerts);
controld_set_fsa_input_flags(R_READ_CONFIG);
controld_trigger_fsa();
g_hash_table_destroy(config_hash);
bail:
crm_time_free(now);
}
/*!
* \internal
* \brief Trigger read and processing of the configuration
*
* \param[in] fn Calling function name
* \param[in] line Line number where call occurred
*/
void
controld_trigger_config_as(const char *fn, int line)
{
if (config_read_trigger != NULL) {
crm_trace("%s:%d - Triggered config processing", fn, line);
mainloop_set_trigger(config_read_trigger);
}
}
gboolean
crm_read_options(gpointer user_data)
{
int call_id =
fsa_cib_conn->cmds->query(fsa_cib_conn,
"//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
NULL, cib_xpath | cib_scope_local);
fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
crm_trace("Querying the CIB... call %d", call_id);
return TRUE;
}
/* A_READCONFIG */
void
do_read_config(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
throttle_init();
controld_trigger_config();
}
void
crm_shutdown(int nsig)
{
if ((controld_globals.mainloop == NULL)
|| !g_main_loop_is_running(controld_globals.mainloop)) {
crmd_exit(CRM_EX_OK);
return;
}
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_err("Escalating shutdown");
register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
return;
}
controld_set_fsa_input_flags(R_SHUTDOWN);
register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
if (shutdown_escalation_timer->period_ms == 0) {
const char *value = controller_option(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
}
crm_notice("Initiating controller shutdown sequence " CRM_XS
" limit=%ums", shutdown_escalation_timer->period_ms);
controld_start_timer(shutdown_escalation_timer);
}
diff --git a/daemons/controld/controld_fsa.c b/daemons/controld/controld_fsa.c
index 6c19aeaca1..cf58401444 100644
--- a/daemons/controld/controld_fsa.c
+++ b/daemons/controld/controld_fsa.c
@@ -1,739 +1,742 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <stdio.h>
#include <stdint.h> // uint64_t
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/lrmd.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/election_internal.h>
#include <crm/cluster.h>
#include <pacemaker-controld.h>
cib_t *fsa_cib_conn = NULL;
//! Triggers an FSA invocation
static crm_trigger_t *fsa_trigger = NULL;
#define DOT_PREFIX "actions:trace: "
#define do_dot_log(fmt, args...) crm_trace( fmt, ##args)
static void do_state_transition(enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state,
fsa_data_t *msg_data);
void s_crmd_fsa_actions(fsa_data_t * fsa_data);
void log_fsa_input(fsa_data_t * stored_msg);
void init_dotfile(void);
void
init_dotfile(void)
{
do_dot_log(DOT_PREFIX "digraph \"g\" {");
do_dot_log(DOT_PREFIX " size = \"30,30\"");
do_dot_log(DOT_PREFIX " graph [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " bb = \"0,0,398.922306,478.927856\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " node [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " shape = \"ellipse\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " edge [");
do_dot_log(DOT_PREFIX " fontsize = \"12\"");
do_dot_log(DOT_PREFIX " fontname = \"Times-Roman\"");
do_dot_log(DOT_PREFIX " fontcolor = \"black\"");
do_dot_log(DOT_PREFIX " color = \"black\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// special nodes");
do_dot_log(DOT_PREFIX " \"S_PENDING\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"blue\"");
do_dot_log(DOT_PREFIX " fontcolor = \"blue\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX " \"S_TERMINATE\" ");
do_dot_log(DOT_PREFIX " [");
do_dot_log(DOT_PREFIX " color = \"red\"");
do_dot_log(DOT_PREFIX " fontcolor = \"red\"");
do_dot_log(DOT_PREFIX " ]");
do_dot_log(DOT_PREFIX "// DC only nodes");
do_dot_log(DOT_PREFIX " \"S_INTEGRATION\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_POLICY_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_TRANSITION_ENGINE\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_RELEASE_DC\" [ fontcolor = \"green\" ]");
do_dot_log(DOT_PREFIX " \"S_IDLE\" [ fontcolor = \"green\" ]");
}
static void
do_fsa_action(fsa_data_t * fsa_data, long long an_action,
void (*function) (long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t * msg_data))
{
controld_clear_fsa_action_flags(an_action);
crm_trace(DOT_PREFIX "\t// %s", fsa_action2string(an_action));
function(an_action, fsa_data->fsa_cause, controld_globals.fsa_state,
fsa_data->fsa_input, fsa_data);
}
static const uint64_t startup_actions =
A_STARTUP | A_CIB_START | A_LRM_CONNECT | A_HA_CONNECT | A_READCONFIG |
A_STARTED | A_CL_JOIN_QUERY;
// A_LOG, A_WARN, A_ERROR
void
do_log(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data)
{
unsigned log_type = LOG_TRACE;
if (action & A_LOG) {
log_type = LOG_INFO;
} else if (action & A_WARN) {
log_type = LOG_WARNING;
} else if (action & A_ERROR) {
log_type = LOG_ERR;
}
do_crm_log(log_type, "Input %s received in state %s from %s",
fsa_input2string(msg_data->fsa_input),
fsa_state2string(cur_state), msg_data->origin);
if (msg_data->data_type == fsa_dt_ha_msg) {
ha_msg_input_t *input = fsa_typed_data(msg_data->data_type);
crm_log_xml_debug(input->msg, __func__);
} else if (msg_data->data_type == fsa_dt_xml) {
xmlNode *input = fsa_typed_data(msg_data->data_type);
crm_log_xml_debug(input, __func__);
} else if (msg_data->data_type == fsa_dt_lrm) {
lrmd_event_data_t *input = fsa_typed_data(msg_data->data_type);
do_crm_log(log_type,
"Resource %s: Call ID %d returned %d (%d)."
" New status if rc=0: %s",
input->rsc_id, input->call_id, input->rc,
input->op_status, (char *)input->user_data);
}
}
/*!
* \internal
* \brief Initialize the FSA trigger
*/
void
controld_init_fsa_trigger(void)
{
fsa_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
}
/*!
* \internal
* \brief Destroy the FSA trigger
*/
void
controld_destroy_fsa_trigger(void)
{
// This basically will not work, since mainloop has a reference to it
mainloop_destroy_trigger(fsa_trigger);
fsa_trigger = NULL;
}
/*!
* \internal
* \brief Trigger an FSA invocation
*
* \param[in] fn Calling function name
* \param[in] line Line number where call occurred
*/
void
controld_trigger_fsa_as(const char *fn, int line)
{
if (fsa_trigger != NULL) {
crm_trace("%s:%d - Triggered FSA invocation", fn, line);
mainloop_set_trigger(fsa_trigger);
}
}
enum crmd_fsa_state
s_crmd_fsa(enum crmd_fsa_cause cause)
{
controld_globals_t *globals = &controld_globals;
fsa_data_t *fsa_data = NULL;
uint64_t register_copy = controld_globals.fsa_input_register;
uint64_t new_actions = A_NOTHING;
enum crmd_fsa_state last_state;
crm_trace("FSA invoked with Cause: %s\tState: %s",
fsa_cause2string(cause),
fsa_state2string(globals->fsa_state));
fsa_dump_actions(controld_globals.fsa_actions, "Initial");
controld_clear_global_flags(controld_fsa_is_stalled);
- if ((fsa_message_queue == NULL)
+ if ((controld_globals.fsa_message_queue == NULL)
&& (controld_globals.fsa_actions != A_NOTHING)) {
/* fake the first message so we can get into the loop */
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->fsa_input = I_NULL;
fsa_data->fsa_cause = C_FSA_INTERNAL;
fsa_data->origin = __func__;
fsa_data->data_type = fsa_dt_none;
- fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
+ controld_globals.fsa_message_queue
+ = g_list_append(controld_globals.fsa_message_queue, fsa_data);
fsa_data = NULL;
}
- while ((fsa_message_queue != NULL)
+ while ((controld_globals.fsa_message_queue != NULL)
&& !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
- crm_trace("Checking messages (%d remaining)", g_list_length(fsa_message_queue));
+ crm_trace("Checking messages (%d remaining)",
+ g_list_length(controld_globals.fsa_message_queue));
fsa_data = get_message();
if(fsa_data == NULL) {
continue;
}
log_fsa_input(fsa_data);
/* add any actions back to the queue */
controld_set_fsa_action_flags(fsa_data->actions);
fsa_dump_actions(fsa_data->actions, "Restored actions");
/* get the next batch of actions */
new_actions = controld_fsa_get_action(fsa_data->fsa_input,
globals->fsa_state);
controld_set_fsa_action_flags(new_actions);
fsa_dump_actions(new_actions, "New actions");
if (fsa_data->fsa_input != I_NULL && fsa_data->fsa_input != I_ROUTER) {
crm_debug("Processing %s: [ state=%s cause=%s origin=%s ]",
fsa_input2string(fsa_data->fsa_input),
fsa_state2string(globals->fsa_state),
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
}
/* logging : *before* the state is changed */
if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) {
do_fsa_action(fsa_data, A_ERROR, do_log);
}
if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) {
do_fsa_action(fsa_data, A_WARN, do_log);
}
if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) {
do_fsa_action(fsa_data, A_LOG, do_log);
}
/* update state variables */
last_state = globals->fsa_state;
globals->fsa_state = controld_fsa_get_next_state(fsa_data->fsa_input,
globals->fsa_state);
/*
* Remove certain actions during shutdown
*/
if ((globals->fsa_state == S_STOPPING)
|| pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
controld_clear_fsa_action_flags(startup_actions);
}
/*
* Hook for change of state.
* Allows actions to be added or removed when entering a state
*/
if (last_state != globals->fsa_state) {
do_state_transition(last_state, globals->fsa_state, fsa_data);
} else {
do_dot_log(DOT_PREFIX "\t// FSA input: State=%s \tCause=%s"
" \tInput=%s \tOrigin=%s() \tid=%d",
fsa_state2string(globals->fsa_state),
fsa_cause2string(fsa_data->fsa_cause),
fsa_input2string(fsa_data->fsa_input), fsa_data->origin, fsa_data->id);
}
/* start doing things... */
s_crmd_fsa_actions(fsa_data);
delete_fsa_input(fsa_data);
fsa_data = NULL;
}
- if ((fsa_message_queue != NULL)
+ if ((controld_globals.fsa_message_queue != NULL)
|| (controld_globals.fsa_actions != A_NOTHING)
|| pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
+
crm_debug("Exiting the FSA: queue=%d, fsa_actions=%#llx, stalled=%s",
- g_list_length(fsa_message_queue),
+ g_list_length(controld_globals.fsa_message_queue),
(unsigned long long) controld_globals.fsa_actions,
pcmk__btoa(pcmk_is_set(controld_globals.flags,
controld_fsa_is_stalled)));
} else {
crm_trace("Exiting the FSA");
}
/* cleanup inputs? */
if (register_copy != controld_globals.fsa_input_register) {
uint64_t same = register_copy & controld_globals.fsa_input_register;
fsa_dump_inputs(LOG_DEBUG, "Added",
controld_globals.fsa_input_register ^ same);
fsa_dump_inputs(LOG_DEBUG, "Removed", register_copy ^ same);
}
fsa_dump_actions(controld_globals.fsa_actions, "Remaining");
fsa_dump_queue(LOG_DEBUG);
return globals->fsa_state;
}
void
s_crmd_fsa_actions(fsa_data_t * fsa_data)
{
/*
* Process actions in order of priority but do only one
* action at a time to avoid complicating the ordering.
*/
CRM_CHECK(fsa_data != NULL, return);
while ((controld_globals.fsa_actions != A_NOTHING)
&& !pcmk_is_set(controld_globals.flags, controld_fsa_is_stalled)) {
/* regular action processing in order of action priority
*
* Make sure all actions that connect to required systems
* are performed first
*/
if (pcmk_is_set(controld_globals.fsa_actions, A_ERROR)) {
do_fsa_action(fsa_data, A_ERROR, do_log);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_WARN)) {
do_fsa_action(fsa_data, A_WARN, do_log);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_LOG)) {
do_fsa_action(fsa_data, A_LOG, do_log);
/* get out of here NOW! before anything worse happens */
} else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_1)) {
do_fsa_action(fsa_data, A_EXIT_1, do_exit);
/* sub-system restart */
} else if (pcmk_all_flags_set(controld_globals.fsa_actions,
O_LRM_RECONNECT)) {
do_fsa_action(fsa_data, O_LRM_RECONNECT, do_lrm_control);
} else if (pcmk_all_flags_set(controld_globals.fsa_actions,
O_CIB_RESTART)) {
do_fsa_action(fsa_data, O_CIB_RESTART, do_cib_control);
} else if (pcmk_all_flags_set(controld_globals.fsa_actions,
O_PE_RESTART)) {
do_fsa_action(fsa_data, O_PE_RESTART, do_pe_control);
} else if (pcmk_all_flags_set(controld_globals.fsa_actions,
O_TE_RESTART)) {
do_fsa_action(fsa_data, O_TE_RESTART, do_te_control);
/* essential start tasks */
} else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTUP)) {
do_fsa_action(fsa_data, A_STARTUP, do_startup);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_START)) {
do_fsa_action(fsa_data, A_CIB_START, do_cib_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_CONNECT)) {
do_fsa_action(fsa_data, A_HA_CONNECT, do_ha_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_READCONFIG)) {
do_fsa_action(fsa_data, A_READCONFIG, do_read_config);
/* sub-system start/connect */
} else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_CONNECT)) {
do_fsa_action(fsa_data, A_LRM_CONNECT, do_lrm_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_START)) {
do_fsa_action(fsa_data, A_TE_START, do_te_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_START)) {
do_fsa_action(fsa_data, A_PE_START, do_pe_control);
/* Timers */
} else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TIMER_STOP)) {
do_fsa_action(fsa_data, A_DC_TIMER_STOP, do_timer_control);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_INTEGRATE_TIMER_STOP)) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_STOP, do_timer_control);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_INTEGRATE_TIMER_START)) {
do_fsa_action(fsa_data, A_INTEGRATE_TIMER_START, do_timer_control);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_FINALIZE_TIMER_STOP)) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_STOP, do_timer_control);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_FINALIZE_TIMER_START)) {
do_fsa_action(fsa_data, A_FINALIZE_TIMER_START, do_timer_control);
/*
* Highest priority actions
*/
} else if (pcmk_is_set(controld_globals.fsa_actions, A_MSG_ROUTE)) {
do_fsa_action(fsa_data, A_MSG_ROUTE, do_msg_route);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_RECOVER)) {
do_fsa_action(fsa_data, A_RECOVER, do_recover);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_CL_JOIN_RESULT)) {
do_fsa_action(fsa_data, A_CL_JOIN_RESULT,
do_cl_join_finalize_respond);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_CL_JOIN_REQUEST)) {
do_fsa_action(fsa_data, A_CL_JOIN_REQUEST,
do_cl_join_offer_respond);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN_REQ)) {
do_fsa_action(fsa_data, A_SHUTDOWN_REQ, do_shutdown_req);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_ELECTION_VOTE)) {
do_fsa_action(fsa_data, A_ELECTION_VOTE, do_election_vote);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_ELECTION_COUNT)) {
do_fsa_action(fsa_data, A_ELECTION_COUNT, do_election_count_vote);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_EVENT)) {
do_fsa_action(fsa_data, A_LRM_EVENT, do_lrm_event);
/*
* High priority actions
*/
} else if (pcmk_is_set(controld_globals.fsa_actions, A_STARTED)) {
do_fsa_action(fsa_data, A_STARTED, do_started);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_CL_JOIN_QUERY)) {
do_fsa_action(fsa_data, A_CL_JOIN_QUERY, do_cl_join_query);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_DC_TIMER_START)) {
do_fsa_action(fsa_data, A_DC_TIMER_START, do_timer_control);
/*
* Medium priority actions
* - Membership
*/
} else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_TAKEOVER)) {
do_fsa_action(fsa_data, A_DC_TAKEOVER, do_dc_takeover);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASE)) {
do_fsa_action(fsa_data, A_DC_RELEASE, do_dc_release);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_JOIN_FINAL)) {
do_fsa_action(fsa_data, A_DC_JOIN_FINAL, do_dc_join_final);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_ELECTION_CHECK)) {
do_fsa_action(fsa_data, A_ELECTION_CHECK, do_election_check);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_ELECTION_START)) {
do_fsa_action(fsa_data, A_ELECTION_START, do_election_vote);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_DC_JOIN_OFFER_ALL)) {
do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ALL, do_dc_join_offer_all);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_DC_JOIN_OFFER_ONE)) {
do_fsa_action(fsa_data, A_DC_JOIN_OFFER_ONE, do_dc_join_offer_one);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_DC_JOIN_PROCESS_REQ)) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_REQ,
do_dc_join_filter_offer);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_DC_JOIN_PROCESS_ACK)) {
do_fsa_action(fsa_data, A_DC_JOIN_PROCESS_ACK, do_dc_join_ack);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_DC_JOIN_FINALIZE)) {
do_fsa_action(fsa_data, A_DC_JOIN_FINALIZE, do_dc_join_finalize);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_CL_JOIN_ANNOUNCE)) {
do_fsa_action(fsa_data, A_CL_JOIN_ANNOUNCE, do_cl_join_announce);
/*
* Low(er) priority actions
* Make sure the CIB is always updated before invoking the
* scheduler, and the scheduler before the transition engine.
*/
} else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_HALT)) {
do_fsa_action(fsa_data, A_TE_HALT, do_te_invoke);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_CANCEL)) {
do_fsa_action(fsa_data, A_TE_CANCEL, do_te_invoke);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_LRM_INVOKE)) {
do_fsa_action(fsa_data, A_LRM_INVOKE, do_lrm_invoke);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_INVOKE)) {
do_fsa_action(fsa_data, A_PE_INVOKE, do_pe_invoke);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_INVOKE)) {
do_fsa_action(fsa_data, A_TE_INVOKE, do_te_invoke);
/* Shutdown actions */
} else if (pcmk_is_set(controld_globals.fsa_actions, A_DC_RELEASED)) {
do_fsa_action(fsa_data, A_DC_RELEASED, do_dc_release);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_PE_STOP)) {
do_fsa_action(fsa_data, A_PE_STOP, do_pe_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_TE_STOP)) {
do_fsa_action(fsa_data, A_TE_STOP, do_te_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_SHUTDOWN)) {
do_fsa_action(fsa_data, A_SHUTDOWN, do_shutdown);
} else if (pcmk_is_set(controld_globals.fsa_actions,
A_LRM_DISCONNECT)) {
do_fsa_action(fsa_data, A_LRM_DISCONNECT, do_lrm_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_HA_DISCONNECT)) {
do_fsa_action(fsa_data, A_HA_DISCONNECT, do_ha_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_CIB_STOP)) {
do_fsa_action(fsa_data, A_CIB_STOP, do_cib_control);
} else if (pcmk_is_set(controld_globals.fsa_actions, A_STOP)) {
do_fsa_action(fsa_data, A_STOP, do_stop);
/* exit gracefully */
} else if (pcmk_is_set(controld_globals.fsa_actions, A_EXIT_0)) {
do_fsa_action(fsa_data, A_EXIT_0, do_exit);
/* Error checking and reporting */
} else {
crm_err("Action %s not supported "CRM_XS" %#llx",
fsa_action2string(controld_globals.fsa_actions),
(unsigned long long) controld_globals.fsa_actions);
register_fsa_error_adv(C_FSA_INTERNAL, I_ERROR, fsa_data, NULL,
__func__);
}
}
}
void
log_fsa_input(fsa_data_t * stored_msg)
{
CRM_ASSERT(stored_msg);
crm_trace("Processing queued input %d", stored_msg->id);
if (stored_msg->fsa_cause == C_LRM_OP_CALLBACK) {
crm_trace("FSA processing LRM callback from %s", stored_msg->origin);
} else if (stored_msg->data == NULL) {
crm_trace("FSA processing input from %s", stored_msg->origin);
} else {
ha_msg_input_t *ha_input = fsa_typed_data_adv(stored_msg, fsa_dt_ha_msg,
__func__);
crm_trace("FSA processing XML message from %s", stored_msg->origin);
crm_log_xml_trace(ha_input->xml, "FSA message data");
}
}
static void
check_join_counts(fsa_data_t *msg_data)
{
int count;
guint npeers;
count = crmd_join_phase_count(crm_join_finalized);
if (count > 0) {
crm_err("%d cluster node%s failed to confirm join",
count, pcmk__plural_s(count));
crmd_join_phase_log(LOG_NOTICE);
return;
}
npeers = crm_active_peers();
count = crmd_join_phase_count(crm_join_confirmed);
if (count == npeers) {
if (npeers == 1) {
crm_debug("Sole active cluster node is fully joined");
} else {
crm_debug("All %d active cluster nodes are fully joined", count);
}
} else if (count > npeers) {
crm_err("New election needed because more nodes confirmed join "
"than are in membership (%d > %u)", count, npeers);
register_fsa_input(C_FSA_INTERNAL, I_ELECTION, NULL);
} else if (saved_ccm_membership_id != crm_peer_seq) {
crm_info("New join needed because membership changed (%llu -> %llu)",
saved_ccm_membership_id, crm_peer_seq);
register_fsa_input_before(C_FSA_INTERNAL, I_NODE_JOIN, NULL);
} else {
crm_warn("Only %d of %u active cluster nodes fully joined "
"(%d did not respond to offer)",
count, npeers, crmd_join_phase_count(crm_join_welcomed));
}
}
static void
do_state_transition(enum crmd_fsa_state cur_state,
enum crmd_fsa_state next_state, fsa_data_t *msg_data)
{
int level = LOG_INFO;
int count = 0;
gboolean clear_recovery_bit = TRUE;
#if 0
uint64_t original_fsa_actions = controld_globals.fsa_actions;
#endif
enum crmd_fsa_cause cause = msg_data->fsa_cause;
enum crmd_fsa_input current_input = msg_data->fsa_input;
const char *state_from = fsa_state2string(cur_state);
const char *state_to = fsa_state2string(next_state);
const char *input = fsa_input2string(current_input);
CRM_LOG_ASSERT(cur_state != next_state);
do_dot_log(DOT_PREFIX "\t%s -> %s [ label=%s cause=%s origin=%s ]",
state_from, state_to, input, fsa_cause2string(cause), msg_data->origin);
if (cur_state == S_IDLE || next_state == S_IDLE) {
level = LOG_NOTICE;
} else if (cur_state == S_NOT_DC || next_state == S_NOT_DC) {
level = LOG_NOTICE;
} else if (cur_state == S_ELECTION) {
level = LOG_NOTICE;
} else if (cur_state == S_STARTING) {
level = LOG_NOTICE;
} else if (next_state == S_RECOVERY) {
level = LOG_WARNING;
}
do_crm_log(level, "State transition %s -> %s "
CRM_XS " input=%s cause=%s origin=%s",
state_from, state_to, input, fsa_cause2string(cause),
msg_data->origin);
if (next_state != S_ELECTION && cur_state != S_RELEASE_DC) {
controld_stop_current_election_timeout();
}
#if 0
if ((controld_globals.fsa_input_register & R_SHUTDOWN)) {
controld_set_fsa_action_flags(A_DC_TIMER_STOP);
}
#endif
if (next_state == S_INTEGRATION) {
controld_set_fsa_action_flags(A_INTEGRATE_TIMER_START);
} else {
controld_set_fsa_action_flags(A_INTEGRATE_TIMER_STOP);
}
if (next_state == S_FINALIZE_JOIN) {
controld_set_fsa_action_flags(A_FINALIZE_TIMER_START);
} else {
controld_set_fsa_action_flags(A_FINALIZE_TIMER_STOP);
}
if (next_state != S_PENDING) {
controld_set_fsa_action_flags(A_DC_TIMER_STOP);
}
if (next_state != S_IDLE) {
controld_stop_timer(recheck_timer);
}
if (cur_state == S_FINALIZE_JOIN && next_state == S_POLICY_ENGINE) {
populate_cib_nodes(node_update_quick|node_update_all, __func__);
}
switch (next_state) {
case S_PENDING:
fsa_cib_conn->cmds->set_secondary(fsa_cib_conn, cib_scope_local);
update_dc(NULL);
break;
case S_ELECTION:
update_dc(NULL);
break;
case S_NOT_DC:
election_timer->counter = 0;
purge_stonith_cleanup();
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we have a new DC");
controld_set_fsa_action_flags(A_SHUTDOWN_REQ);
}
CRM_LOG_ASSERT(controld_globals.dc_name != NULL);
if (controld_globals.dc_name == NULL) {
crm_err("Reached S_NOT_DC without a DC" " being recorded");
}
break;
case S_RECOVERY:
clear_recovery_bit = FALSE;
break;
case S_FINALIZE_JOIN:
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_warn("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
count = crmd_join_phase_count(crm_join_welcomed);
if (count > 0) {
crm_warn("%d cluster node%s failed to respond to join offer",
count, pcmk__plural_s(count));
crmd_join_phase_log(LOG_NOTICE);
} else {
crm_debug("All cluster nodes (%d) responded to join offer",
crmd_join_phase_count(crm_join_integrated));
}
break;
case S_POLICY_ENGINE:
election_timer->counter = 0;
CRM_LOG_ASSERT(AM_I_DC);
if (cause == C_TIMER_POPPED) {
crm_info("Progressed to state %s after %s",
fsa_state2string(next_state), fsa_cause2string(cause));
}
check_join_counts(msg_data);
break;
case S_STOPPING:
case S_TERMINATE:
/* possibly redundant */
controld_set_fsa_input_flags(R_SHUTDOWN);
break;
case S_IDLE:
CRM_LOG_ASSERT(AM_I_DC);
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_info("(Re)Issuing shutdown request now" " that we are the DC");
controld_set_fsa_action_flags(A_SHUTDOWN_REQ);
}
controld_start_recheck_timer();
break;
default:
break;
}
if (clear_recovery_bit && next_state != S_PENDING) {
controld_clear_fsa_action_flags(A_RECOVER);
} else if (clear_recovery_bit == FALSE) {
controld_set_fsa_action_flags(A_RECOVER);
}
#if 0
if (original_fsa_actions != controld_globals.fsa_actions) {
fsa_dump_actions(original_fsa_actions ^ controld_globals.fsa_actions,
"New actions");
}
#endif
}
diff --git a/daemons/controld/controld_fsa.h b/daemons/controld/controld_fsa.h
index ea3b5fff3e..62994aa4fc 100644
--- a/daemons/controld/controld_fsa.h
+++ b/daemons/controld/controld_fsa.h
@@ -1,708 +1,707 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRMD_FSA__H
# define CRMD_FSA__H
# include <crm/crm.h>
# include <crm/cib.h>
# include <crm/common/xml.h>
# include <crm/common/mainloop.h>
# include <crm/cluster.h>
# include <crm/cluster/election_internal.h>
# include <crm/common/ipc_internal.h>
/*! States the controller can be in */
enum crmd_fsa_state {
S_IDLE = 0, /* Nothing happening */
S_ELECTION, /* Take part in the election algorithm as
* described below
*/
S_INTEGRATION, /* integrate that status of new nodes (which is
* all of them if we have just been elected DC)
* to form a complete and up-to-date picture of
* the CIB
*/
S_FINALIZE_JOIN, /* integrate that status of new nodes (which is
* all of them if we have just been elected DC)
* to form a complete and up-to-date picture of
* the CIB
*/
S_NOT_DC, /* we are in non-DC mode */
S_POLICY_ENGINE, /* Determine next stable state of the cluster */
S_RECOVERY, /* Something bad happened, check everything is ok
* before continuing and attempt to recover if
* required
*/
S_RELEASE_DC, /* we were the DC, but now we arent anymore,
* possibly by our own request, and we should
* release all unnecessary sub-systems, finish
* any pending actions, do general cleanup and
* unset anything that makes us think we are
* special :)
*/
S_STARTING, /* we are just starting out */
S_PENDING, /* we are not a full/active member yet */
S_STOPPING, /* We are in the final stages of shutting down */
S_TERMINATE, /* We are going to shutdown, this is the equiv of
* "Sending TERM signal to all processes" in Linux
* and in worst case scenarios could be considered
* a self STONITH
*/
S_TRANSITION_ENGINE, /* Attempt to make the calculated next stable
* state of the cluster a reality
*/
S_HALT, /* Freeze - don't do anything
* Something bad happened that needs the admin to fix
* Wait for I_ELECTION
*/
/* ----------- Last input found in table is above ---------- */
S_ILLEGAL /* This is an illegal FSA state */
/* (must be last) */
};
# define MAXSTATE S_ILLEGAL
/*
Once we start and do some basic sanity checks, we go into the
S_NOT_DC state and await instructions from the DC or input from
the cluster layer which indicates the election algorithm needs to run.
If the election algorithm is triggered, we enter the S_ELECTION state
from where we can either go back to the S_NOT_DC state or progress
to the S_INTEGRATION state (or S_RELEASE_DC if we used to be the DC
but aren't anymore). See the libcrmcluster API documentation for more
information about the election algorithm.
Once the election is complete, if we are the DC, we enter the
S_INTEGRATION state which is a DC-in-waiting style state. We are
the DC, but we shouldn't do anything yet because we may not have an
up-to-date picture of the cluster. There may of course be times
when this fails, so we should go back to the S_RECOVERY stage and
check everything is ok. We may also end up here if a new node came
online, since each node is authoritative about itself, and we would want
to incorporate its information into the CIB.
Once we have the latest CIB, we then enter the S_POLICY_ENGINE state
where invoke the scheduler. It is possible that between
invoking the scheduler and receiving an answer, that we receive
more input. In this case, we would discard the orginal result and
invoke it again.
Once we are satisfied with the output from the scheduler, we
enter S_TRANSITION_ENGINE and feed the scheduler's output to the
Transition Engine who attempts to make the scheduler's
calculation a reality. If the transition completes successfully,
we enter S_IDLE, otherwise we go back to S_POLICY_ENGINE with the
current unstable state and try again.
Of course, we may be asked to shutdown at any time, however we must
progress to S_NOT_DC before doing so. Once we have handed over DC
duties to another node, we can then shut down like everyone else,
that is, by asking the DC for permission and waiting for it to take all
our resources away.
The case where we are the DC and the only node in the cluster is a
special case and handled as an escalation which takes us to
S_SHUTDOWN. Similarly, if any other point in the shutdown
fails or stalls, this is escalated and we end up in S_TERMINATE.
At any point, the controller can relay messages for its subsystems,
but outbound messages (from subsystems) should probably be blocked
until S_INTEGRATION (for the DC) or the join protocol has
completed (for non-DC controllers).
*/
/*======================================
*
* Inputs/Events/Stimuli to be given to the finite state machine
*
* Some of these a true events, and others are synthesised based on
* the "register" (see below) and the contents or source of messages.
*
* The machine keeps processing until receiving I_NULL
*
*======================================*/
enum crmd_fsa_input {
/* 0 */
I_NULL, /* Nothing happened */
/* 1 */
I_CIB_OP, /* An update to the CIB occurred */
I_CIB_UPDATE, /* An update to the CIB occurred */
I_DC_TIMEOUT, /* We have lost communication with the DC */
I_ELECTION, /* Someone started an election */
I_PE_CALC, /* The scheduler needs to be invoked */
I_RELEASE_DC, /* The election completed and we were not
* elected, but we were the DC beforehand
*/
I_ELECTION_DC, /* The election completed and we were (re-)elected
* DC
*/
I_ERROR, /* Something bad happened (more serious than
* I_FAIL) and may not have been due to the action
* being performed. For example, we may have lost
* our connection to the CIB.
*/
/* 9 */
I_FAIL, /* The action failed to complete successfully */
I_INTEGRATED,
I_FINALIZED,
I_NODE_JOIN, /* A node has entered the cluster */
I_NOT_DC, /* We are not and were not the DC before or after
* the current operation or state
*/
I_RECOVERED, /* The recovery process completed successfully */
I_RELEASE_FAIL, /* We could not give up DC status for some reason
*/
I_RELEASE_SUCCESS, /* We are no longer the DC */
I_RESTART, /* The current set of actions needs to be
* restarted
*/
I_TE_SUCCESS, /* Some non-resource, non-cluster-layer action
* is required of us, e.g. ping
*/
/* 20 */
I_ROUTER, /* Do our job as router and forward this to the
* right place
*/
I_SHUTDOWN, /* We are asking to shutdown */
I_STOP, /* We have been told to shutdown */
I_TERMINATE, /* Actually exit */
I_STARTUP,
I_PE_SUCCESS, /* The action completed successfully */
I_JOIN_OFFER, /* The DC is offering membership */
I_JOIN_REQUEST, /* The client is requesting membership */
I_JOIN_RESULT, /* If not the DC: The result of a join request
* Else: A client is responding with its local state info
*/
I_WAIT_FOR_EVENT, /* we may be waiting for an async task to "happen"
* and until it does, we can't do anything else
*/
I_DC_HEARTBEAT, /* The DC is telling us that it is alive and well */
I_LRM_EVENT,
/* 30 */
I_PENDING,
I_HALT,
/* ------------ Last input found in table is above ----------- */
I_ILLEGAL /* This is an illegal value for an FSA input */
/* (must be last) */
};
# define MAXINPUT I_ILLEGAL
# define I_MESSAGE I_ROUTER
/*======================================
*
* actions
*
* Some of the actions below will always occur together for now, but this may
* not always be the case, so they are split up so that they can easily be
* called independently in the future, if necessary.
*
* For example, separating A_LRM_CONNECT from A_STARTUP might be useful
* if we ever try to recover from a faulty or disconnected executor.
*
*======================================*/
/* Don't do anything */
# define A_NOTHING 0x0000000000000000ULL
/* -- Startup actions -- */
/* Hook to perform any actions (other than connecting to other daemons)
* that might be needed as part of the startup.
*/
# define A_STARTUP 0x0000000000000001ULL
/* Hook to perform any actions that might be needed as part
* after startup is successful.
*/
# define A_STARTED 0x0000000000000002ULL
/* Connect to cluster layer */
# define A_HA_CONNECT 0x0000000000000004ULL
# define A_HA_DISCONNECT 0x0000000000000008ULL
# define A_INTEGRATE_TIMER_START 0x0000000000000010ULL
# define A_INTEGRATE_TIMER_STOP 0x0000000000000020ULL
# define A_FINALIZE_TIMER_START 0x0000000000000040ULL
# define A_FINALIZE_TIMER_STOP 0x0000000000000080ULL
/* -- Election actions -- */
# define A_DC_TIMER_START 0x0000000000000100ULL
# define A_DC_TIMER_STOP 0x0000000000000200ULL
# define A_ELECTION_COUNT 0x0000000000000400ULL
# define A_ELECTION_VOTE 0x0000000000000800ULL
# define A_ELECTION_START 0x0000000000001000ULL
/* -- Message processing -- */
/* Process the queue of requests */
# define A_MSG_PROCESS 0x0000000000002000ULL
/* Send the message to the correct recipient */
# define A_MSG_ROUTE 0x0000000000004000ULL
/* Send a welcome message to new node(s) */
# define A_DC_JOIN_OFFER_ONE 0x0000000000008000ULL
/* -- Server Join protocol actions -- */
/* Send a welcome message to all nodes */
# define A_DC_JOIN_OFFER_ALL 0x0000000000010000ULL
/* Process the remote node's ack of our join message */
# define A_DC_JOIN_PROCESS_REQ 0x0000000000020000ULL
/* Send out the results of the Join phase */
# define A_DC_JOIN_FINALIZE 0x0000000000040000ULL
/* Send out the results of the Join phase */
# define A_DC_JOIN_PROCESS_ACK 0x0000000000080000ULL
/* -- Client Join protocol actions -- */
# define A_CL_JOIN_QUERY 0x0000000000100000ULL
# define A_CL_JOIN_ANNOUNCE 0x0000000000200000ULL
/* Request membership to the DC list */
# define A_CL_JOIN_REQUEST 0x0000000000400000ULL
/* Did the DC accept or reject the request */
# define A_CL_JOIN_RESULT 0x0000000000800000ULL
/* -- Recovery, DC start/stop -- */
/* Something bad happened, try to recover */
# define A_RECOVER 0x0000000001000000ULL
/* Hook to perform any actions (apart from starting, the TE, scheduler,
* and gathering the latest CIB) that might be necessary before
* giving up the responsibilities of being the DC.
*/
# define A_DC_RELEASE 0x0000000002000000ULL
/* */
# define A_DC_RELEASED 0x0000000004000000ULL
/* Hook to perform any actions (apart from starting, the TE, scheduler,
* and gathering the latest CIB) that might be necessary before
* taking over the responsibilities of being the DC.
*/
# define A_DC_TAKEOVER 0x0000000008000000ULL
/* -- Shutdown actions -- */
# define A_SHUTDOWN 0x0000000010000000ULL
# define A_STOP 0x0000000020000000ULL
# define A_EXIT_0 0x0000000040000000ULL
# define A_EXIT_1 0x0000000080000000ULL
# define A_SHUTDOWN_REQ 0x0000000100000000ULL
# define A_ELECTION_CHECK 0x0000000200000000ULL
# define A_DC_JOIN_FINAL 0x0000000400000000ULL
/* -- CIB actions -- */
# define A_CIB_START 0x0000020000000000ULL
# define A_CIB_STOP 0x0000040000000000ULL
/* -- Transition Engine actions -- */
/* Attempt to reach the newly calculated cluster state. This is
* only called once per transition (except if it is asked to
* stop the transition or start a new one).
* Once given a cluster state to reach, the TE will determine
* tasks that can be performed in parallel, execute them, wait
* for replies and then determine the next set until the new
* state is reached or no further tasks can be taken.
*/
# define A_TE_INVOKE 0x0000100000000000ULL
# define A_TE_START 0x0000200000000000ULL
# define A_TE_STOP 0x0000400000000000ULL
# define A_TE_CANCEL 0x0000800000000000ULL
# define A_TE_HALT 0x0001000000000000ULL
/* -- Scheduler actions -- */
/* Calculate the next state for the cluster. This is only
* invoked once per needed calculation.
*/
# define A_PE_INVOKE 0x0002000000000000ULL
# define A_PE_START 0x0004000000000000ULL
# define A_PE_STOP 0x0008000000000000ULL
/* -- Misc actions -- */
/* Add a system generate "block" so that resources arent moved
* to or are activly moved away from the affected node. This
* way we can return quickly even if busy with other things.
*/
# define A_NODE_BLOCK 0x0010000000000000ULL
/* Update our information in the local CIB */
# define A_UPDATE_NODESTATUS 0x0020000000000000ULL
# define A_READCONFIG 0x0080000000000000ULL
/* -- LRM Actions -- */
/* Connect to pacemaker-execd */
# define A_LRM_CONNECT 0x0100000000000000ULL
/* Disconnect from pacemaker-execd */
# define A_LRM_DISCONNECT 0x0200000000000000ULL
# define A_LRM_INVOKE 0x0400000000000000ULL
# define A_LRM_EVENT 0x0800000000000000ULL
/* -- Logging actions -- */
# define A_LOG 0x1000000000000000ULL
# define A_ERROR 0x2000000000000000ULL
# define A_WARN 0x4000000000000000ULL
# define O_EXIT (A_SHUTDOWN|A_STOP|A_LRM_DISCONNECT|A_HA_DISCONNECT|A_EXIT_0|A_CIB_STOP)
# define O_RELEASE (A_DC_TIMER_STOP|A_DC_RELEASE|A_PE_STOP|A_TE_STOP|A_DC_RELEASED)
# define O_PE_RESTART (A_PE_START|A_PE_STOP)
# define O_TE_RESTART (A_TE_START|A_TE_STOP)
# define O_CIB_RESTART (A_CIB_START|A_CIB_STOP)
# define O_LRM_RECONNECT (A_LRM_CONNECT|A_LRM_DISCONNECT)
# define O_DC_TIMER_RESTART (A_DC_TIMER_STOP|A_DC_TIMER_START)
/*======================================
*
* "register" contents
*
* Things we may want to remember regardless of which state we are in.
*
* These also count as inputs for synthesizing I_*
*
*======================================*/
# define R_THE_DC 0x00000001ULL
/* Are we the DC? */
# define R_STARTING 0x00000002ULL
/* Are we starting up? */
# define R_SHUTDOWN 0x00000004ULL
/* Are we trying to shut down? */
# define R_STAYDOWN 0x00000008ULL
/* Should we restart? */
# define R_JOIN_OK 0x00000010ULL /* Have we completed the join process */
# define R_READ_CONFIG 0x00000040ULL
# define R_INVOKE_PE 0x00000080ULL // Should the scheduler be invoked?
# define R_CIB_CONNECTED 0x00000100ULL
/* Is the CIB connected? */
# define R_PE_CONNECTED 0x00000200ULL // Is the scheduler connected?
# define R_TE_CONNECTED 0x00000400ULL
/* Is the Transition Engine connected? */
# define R_LRM_CONNECTED 0x00000800ULL // Is pacemaker-execd connected?
# define R_CIB_REQUIRED 0x00001000ULL
/* Is the CIB required? */
# define R_PE_REQUIRED 0x00002000ULL // Is the scheduler required?
# define R_TE_REQUIRED 0x00004000ULL
/* Is the Transition Engine required? */
# define R_ST_REQUIRED 0x00008000ULL
/* Is the Stonith daemon required? */
# define R_CIB_DONE 0x00010000ULL
/* Have we calculated the CIB? */
# define R_HAVE_CIB 0x00020000ULL /* Do we have an up-to-date CIB */
# define R_CIB_ASKED 0x00040000ULL /* Have we asked for an up-to-date CIB */
# define R_MEMBERSHIP 0x00100000ULL /* Have we got cluster layer data yet */
# define R_PEER_DATA 0x00200000ULL /* Have we got T_CL_STATUS data yet */
# define R_HA_DISCONNECTED 0x00400000ULL /* did we sign out of our own accord */
# define R_REQ_PEND 0x01000000ULL
/* Are there Requests waiting for
processing? */
# define R_PE_PEND 0x02000000ULL // Are we awaiting reply from scheduler?
# define R_TE_PEND 0x04000000ULL
/* Has the TE been invoked and we're
awaiting completion? */
# define R_RESP_PEND 0x08000000ULL
/* Do we have clients waiting on a
response? if so perhaps we shouldn't
stop yet */
# define R_IN_TRANSITION 0x10000000ULL
/* */
# define R_SENT_RSC_STOP 0x20000000ULL /* Have we sent a stop action to all
* resources in preparation for
* shutting down */
# define R_IN_RECOVERY 0x80000000ULL
#define CRM_DIRECT_NACK_RC (99) // Deprecated (see PCMK_EXEC_INVALID)
enum crmd_fsa_cause {
C_UNKNOWN = 0,
C_STARTUP,
C_IPC_MESSAGE,
C_HA_MESSAGE,
C_CRMD_STATUS_CALLBACK,
C_LRM_OP_CALLBACK,
C_TIMER_POPPED,
C_SHUTDOWN,
C_FSA_INTERNAL,
};
enum fsa_data_type {
fsa_dt_none,
fsa_dt_ha_msg,
fsa_dt_xml,
fsa_dt_lrm,
};
typedef struct fsa_data_s fsa_data_t;
struct fsa_data_s {
int id;
enum crmd_fsa_input fsa_input;
enum crmd_fsa_cause fsa_cause;
uint64_t actions;
const char *origin;
void *data;
enum fsa_data_type data_type;
};
#define controld_set_fsa_input_flags(flags_to_set) do { \
controld_globals.fsa_input_register \
= pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"FSA input", "controller", \
controld_globals.fsa_input_register, \
(flags_to_set), #flags_to_set); \
} while (0)
#define controld_clear_fsa_input_flags(flags_to_clear) do { \
controld_globals.fsa_input_register \
= pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"FSA input", "controller", \
controld_globals.fsa_input_register, \
(flags_to_clear), \
#flags_to_clear); \
} while (0)
#define controld_set_fsa_action_flags(flags_to_set) do { \
controld_globals.fsa_actions \
= pcmk__set_flags_as(__func__, __LINE__, LOG_DEBUG, \
"FSA action", "controller", \
controld_globals.fsa_actions, \
(flags_to_set), #flags_to_set); \
} while (0)
#define controld_clear_fsa_action_flags(flags_to_clear) do { \
controld_globals.fsa_actions \
= pcmk__clear_flags_as(__func__, __LINE__, LOG_DEBUG, \
"FSA action", "controller", \
controld_globals.fsa_actions, \
(flags_to_clear), #flags_to_clear); \
} while (0)
extern cib_t *fsa_cib_conn;
extern char *fsa_pe_ref; // Last invocation of the scheduler
-extern GList *fsa_message_queue;
extern unsigned long long saved_ccm_membership_id;
// These should be moved elsewhere
void do_update_cib_nodes(gboolean overwrite, const char *caller);
int crmd_cib_smart_opt(void);
xmlNode *controld_query_executor_state(void);
const char *fsa_input2string(enum crmd_fsa_input input);
const char *fsa_state2string(enum crmd_fsa_state state);
const char *fsa_cause2string(enum crmd_fsa_cause cause);
const char *fsa_action2string(long long action);
enum crmd_fsa_state s_crmd_fsa(enum crmd_fsa_cause cause);
enum crmd_fsa_state controld_fsa_get_next_state(enum crmd_fsa_input input,
enum crmd_fsa_state state);
uint64_t controld_fsa_get_action(enum crmd_fsa_input input,
enum crmd_fsa_state state);
void controld_init_fsa_trigger(void);
void controld_destroy_fsa_trigger(void);
void free_max_generation(void);
# define AM_I_DC pcmk_is_set(controld_globals.fsa_input_register, R_THE_DC)
# define controld_trigger_fsa() controld_trigger_fsa_as(__func__, __LINE__)
void controld_trigger_fsa_as(const char *fn, int line);
/* A_READCONFIG */
void do_read_config(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_PE_INVOKE */
void do_pe_invoke(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_LOG */
void do_log(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_STARTUP */
void do_startup(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_CIB_START, STOP, RESTART */
void do_cib_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_HA_CONNECT */
void do_ha_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_LRM_CONNECT */
void do_lrm_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_PE_START, STOP, RESTART */
void do_pe_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_TE_START, STOP, RESTART */
void do_te_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_STARTED */
void do_started(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_MSG_ROUTE */
void do_msg_route(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_RECOVER */
void do_recover(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_ELECTION_VOTE */
void do_election_vote(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_ELECTION_COUNT */
void do_election_count_vote(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input,
fsa_data_t *msg_data);
/* A_ELECTION_CHECK */
void do_election_check(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_TIMER_STOP */
void do_timer_control(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_TAKEOVER */
void do_dc_takeover(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_RELEASE */
void do_dc_release(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_OFFER_ALL */
void do_dc_join_offer_all(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_OFFER_ONE */
void do_dc_join_offer_one(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_ACK */
void do_dc_join_ack(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_REQ */
void do_dc_join_filter_offer(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input,
fsa_data_t *msg_data);
/* A_DC_JOIN_FINALIZE */
void do_dc_join_finalize(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_CL_JOIN_QUERY */
/* is there a DC out there? */
void do_cl_join_query(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_CL_JOIN_ANNOUNCE */
void do_cl_join_announce(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
/* A_CL_JOIN_REQUEST */
void do_cl_join_offer_respond(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data);
/* A_CL_JOIN_RESULT */
void do_cl_join_finalize_respond(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input,
fsa_data_t *msg_data);
/* A_LRM_INVOKE */
void do_lrm_invoke(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_LRM_EVENT */
void do_lrm_event(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_TE_INVOKE, A_TE_CANCEL */
void do_te_invoke(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_SHUTDOWN_REQ */
void do_shutdown_req(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_SHUTDOWN */
void do_shutdown(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_STOP */
void do_stop(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_EXIT_0, A_EXIT_1 */
void do_exit(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input cur_input, fsa_data_t *msg_data);
/* A_DC_JOIN_FINAL */
void do_dc_join_final(long long action, enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t *msg_data);
#endif
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index 2371a1239e..2c672b08ca 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -1,1298 +1,1301 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
-GList *fsa_message_queue = NULL;
extern void crm_shutdown(int nsig);
static enum crmd_fsa_input handle_message(xmlNode *msg,
enum crmd_fsa_cause cause);
static void handle_response(xmlNode *stored_msg);
static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
enum crmd_fsa_cause cause);
static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
static void send_msg_via_ipc(xmlNode * msg, const char *sys);
/* debug only, can wrap all it likes */
static int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (controld_globals.fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
controld_globals.fsa_actions, TRUE, __func__);
}
/* reset the action list */
crm_info("Resetting the current action list");
fsa_dump_actions(controld_globals.fsa_actions, "Drop");
controld_globals.fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
void
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, uint64_t with_actions,
gboolean prepend, const char *raised_from)
{
- unsigned old_len = g_list_length(fsa_message_queue);
+ unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
fsa_data_t *fsa_data = NULL;
if (raised_from == NULL) {
raised_from = "<unknown>";
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return;
}
if (input == I_WAIT_FOR_EVENT) {
controld_set_global_flags(controld_fsa_is_stalled);
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
raised_from, fsa_cause2string(cause), data, old_len);
if (old_len > 0) {
fsa_dump_queue(LOG_TRACE);
prepend = FALSE;
}
if (data == NULL) {
controld_set_fsa_action_flags(with_actions);
fsa_dump_actions(with_actions, "Restored");
return;
}
/* Store everything in the new event and reset
* controld_globals.fsa_actions
*/
with_actions |= controld_globals.fsa_actions;
controld_globals.fsa_actions = A_NOTHING;
}
last_data_id++;
crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
raised_from, (prepend? "prepended" : "appended"), last_data_id,
fsa_input2string(input), fsa_cause2string(cause),
(data? "with" : "without"));
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input",
(unsigned long long) with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
crm_trace("Copying %s data from %s as cluster message data",
fsa_cause2string(cause), raised_from);
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_UNKNOWN:
case C_STARTUP:
crm_crit("Copying %s data (from %s) is not yet implemented",
fsa_cause2string(cause), raised_from);
crmd_exit(CRM_EX_SOFTWARE);
break;
}
}
/* make sure to free it properly later */
if (prepend) {
- fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data);
+ controld_globals.fsa_message_queue
+ = g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
} else {
- fsa_message_queue = g_list_append(fsa_message_queue, fsa_data);
+ controld_globals.fsa_message_queue
+ = g_list_append(controld_globals.fsa_message_queue, fsa_data);
}
crm_trace("FSA message queue length is %d",
- g_list_length(fsa_message_queue));
+ g_list_length(controld_globals.fsa_message_queue));
/* fsa_dump_queue(LOG_TRACE); */
- if (old_len == g_list_length(fsa_message_queue)) {
+ if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
crm_err("Couldn't add message to the queue");
}
if (input != I_WAIT_FOR_EVENT) {
controld_trigger_fsa();
}
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
- GList *lpc = NULL;
- for (lpc = fsa_message_queue; lpc != NULL; lpc = lpc->next) {
- fsa_data_t *data = (fsa_data_t *) lpc->data;
+ for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
+ iter = iter->next) {
+ fsa_data_t *data = (fsa_data_t *) iter->data;
do_crm_log_unlikely(log_level,
"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, data->data, data->data_type,
fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t));
CRM_ASSERT(copy != NULL);
copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL;
copy->xml = get_message_xml(copy->msg, F_CRM_DATA);
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
free_xml(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Don't know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
crmd_exit(CRM_EX_SOFTWARE);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
- fsa_data_t *message = g_list_nth_data(fsa_message_queue, 0);
+ fsa_data_t *message
+ = (fsa_data_t *) controld_globals.fsa_message_queue->data;
- fsa_message_queue = g_list_remove(fsa_message_queue, message);
+ controld_globals.fsa_message_queue
+ = g_list_remove(controld_globals.fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input, cause);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
int dest = 1;
bool is_for_dc = false;
bool is_for_dcib = false;
bool is_for_te = false;
bool is_for_crm = false;
bool is_for_cib = false;
bool is_local = false;
const char *host_to = crm_element_value(msg, F_CRM_HOST_TO);
const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
const char *type = crm_element_value(msg, F_TYPE);
const char *task = crm_element_value(msg, F_CRM_TASK);
const char *ref = crm_element_value(msg, XML_ATTR_REFERENCE);
if (ref == NULL) {
ref = "without reference ID";
}
if (msg == NULL) {
crm_warn("Cannot route empty message");
return TRUE;
} else if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
crm_trace("No routing needed for hello message %s", ref);
return TRUE;
} else if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) {
crm_warn("Received invalid message %s: type '%s' not '" T_CRM "'",
ref, pcmk__s(type, ""));
crm_log_xml_warn(msg, "[bad message type]");
return TRUE;
} else if (sys_to == NULL) {
crm_warn("Received invalid message %s: no subsystem", ref);
crm_log_xml_warn(msg, "[no subsystem]");
return TRUE;
}
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
is_local = false;
if (pcmk__str_empty(host_to)) {
if (is_for_dc || is_for_te) {
is_local = false;
} else if (is_for_crm) {
if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
PCMK__CONTROLD_CMD_NODES, NULL)) {
/* Node info requests do not specify a host, which is normally
* treated as "all hosts", because the whole point is that the
* client may not know the local node name. Always handle these
* requests locally.
*/
is_local = true;
} else {
is_local = !originated_locally;
}
} else {
is_local = true;
}
} else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
pcmk__str_casei)) {
is_local = true;
} else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA);
const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
// Local delete of an offline node's resource history
is_local = true;
}
}
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC && is_for_te) {
crm_trace("Route message %s locally as transition request", ref);
send_msg_via_ipc(msg, sys_to);
} else if (AM_I_DC) {
crm_trace("Route message %s locally as DC request", ref);
return FALSE; // More to be done by caller
} else if (originated_locally && !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
CRM_SYSTEM_TENGINE, NULL)) {
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
}
#endif
crm_trace("Relay message %s to DC", ref);
send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE);
} else {
/* Neither the TE nor the scheduler should be sending messages
* to DCs on other nodes. By definition, if we are no longer the DC,
* then the scheduler's or TE's data should be discarded.
*/
crm_trace("Discard message %s because we are not DC", ref);
}
} else if (is_local && (is_for_crm || is_for_cib)) {
crm_trace("Route message %s locally as controller request", ref);
return FALSE; // More to be done by caller
} else if (is_local) {
crm_trace("Relay message %s locally to %s",
ref, (sys_to? sys_to : "unknown client"));
crm_log_xml_trace(msg, "[IPC relay]");
send_msg_via_ipc(msg, sys_to);
} else {
crm_node_t *node_to = NULL;
#if SUPPORT_COROSYNC
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
if (dest == crm_msg_none || dest > crm_msg_stonith_ng) {
dest = crm_msg_crmd;
}
}
#endif
if (host_to) {
node_to = pcmk__search_cluster_node_cache(0, host_to);
if (node_to == NULL) {
crm_warn("Cannot route message %s: Unknown node %s",
ref, host_to);
return TRUE;
}
crm_trace("Relay message %s to %s",
ref, (node_to->uname? node_to->uname : "peer"));
} else {
crm_trace("Broadcast message %s to all peers", ref);
}
send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE);
}
return TRUE; // No further processing of message is needed
}
// Return true if field contains a positive integer
static bool
authorize_version(xmlNode *message_data, const char *field,
const char *client_name, const char *ref, const char *uuid)
{
const char *version = crm_element_value(message_data, field);
long long version_num;
if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
|| (version_num < 0LL)) {
crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
CRM_XS " ref=%s uuid=%s",
client_name, ((version == NULL)? "" : version),
field, (ref? ref : "none"), uuid);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether a client IPC message is acceptable
*
* If a given client IPC message is a hello, "authorize" it by ensuring it has
* valid information such as a protocol version, and return false indicating
* that nothing further needs to be done with the message. If the message is not
* a hello, just return true to indicate it needs further processing.
*
* \param[in] client_msg XML of IPC message
* \param[in,out] curr_client If IPC is not proxied, client that sent message
* \param[in] proxy_session If IPC is proxied, the session ID
*
* \return true if message needs further processing, false if it doesn't
*/
bool
controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client,
const char *proxy_session)
{
xmlNode *message_data = NULL;
const char *client_name = NULL;
const char *op = crm_element_value(client_msg, F_CRM_TASK);
const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE);
const char *uuid = (curr_client? curr_client->id : proxy_session);
if (uuid == NULL) {
crm_warn("IPC message from client rejected: No client identifier "
CRM_XS " ref=%s", (ref? ref : "none"));
goto rejected;
}
if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
// Only hello messages need to be authorized
return true;
}
message_data = get_message_xml(client_msg, F_CRM_DATA);
client_name = crm_element_value(message_data, "client_name");
if (pcmk__str_empty(client_name)) {
crm_warn("IPC hello from client rejected: No client name",
CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
goto rejected;
}
if (!authorize_version(message_data, "major_version", client_name, ref,
uuid)) {
goto rejected;
}
if (!authorize_version(message_data, "minor_version", client_name, ref,
uuid)) {
goto rejected;
}
crm_trace("Validated IPC hello from client %s", client_name);
if (curr_client) {
curr_client->userdata = strdup(client_name);
}
controld_trigger_fsa();
return false;
rejected:
if (curr_client) {
qb_ipcs_disconnect(curr_client->ipcs);
}
return false;
}
static enum crmd_fsa_input
handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, F_CRM_MSG_TYPE);
if (pcmk__str_eq(type, XML_ATTR_REQUEST, pcmk__str_none)) {
return handle_request(msg, cause);
} else if (pcmk__str_eq(type, XML_ATTR_RESPONSE, pcmk__str_none)) {
handle_response(msg);
return I_NULL;
}
crm_err("Unknown message type: %s", type);
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
const char *uname = NULL;
const char *op = NULL;
char *interval_spec = NULL;
guint interval_ms = 0;
gboolean is_remote_node = FALSE;
xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA);
if (xml_op) {
xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE);
xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS);
if (xml_rsc) {
rsc = ID(xml_rsc);
}
if (xml_attrs) {
op = crm_element_value(xml_attrs,
CRM_META "_" XML_RSC_ATTR_CLEAR_OP);
crm_element_value_ms(xml_attrs,
CRM_META "_" XML_RSC_ATTR_CLEAR_INTERVAL,
&interval_ms);
}
}
uname = crm_element_value(xml_op, XML_LRM_ATTR_TARGET);
if ((rsc == NULL) || (uname == NULL)) {
crm_log_xml_warn(stored_msg, "invalid failcount op");
return I_NULL;
}
if (crm_element_value(xml_op, XML_LRM_ATTR_ROUTER_NODE)) {
is_remote_node = TRUE;
}
if (interval_ms) {
interval_spec = crm_strdup_printf("%ums", interval_ms);
}
update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
free(interval_spec);
lrm_clear_last_failure(rsc, uname, op, interval_ms);
return I_NULL;
}
static enum crmd_fsa_input
handle_lrm_delete(xmlNode *stored_msg)
{
const char *mode = NULL;
xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA);
CRM_CHECK(msg_data != NULL, return I_NULL);
/* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
* relay the operation to the affected node, which will unregister the
* resource from the local executor, clear the resource's history from the
* CIB, and do some bookkeeping in the controller.
*
* However, if the affected node is offline, the client will specify
* mode="cib" which means the controller receiving the operation should
* clear the resource's history from the CIB and nothing else. This is used
* to clear shutdown locks.
*/
mode = crm_element_value(msg_data, PCMK__XA_MODE);
if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) {
// Relay to affected node
crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else {
// Delete CIB history locally (compare with do_lrm_delete())
const char *from_sys = NULL;
const char *user_name = NULL;
const char *rsc_id = NULL;
const char *node = NULL;
xmlNode *rsc_xml = NULL;
int rc = pcmk_rc_ok;
rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE);
CRM_CHECK(rsc_xml != NULL, return I_NULL);
rsc_id = ID(rsc_xml);
from_sys = crm_element_value(stored_msg, F_CRM_SYS_FROM);
node = crm_element_value(msg_data, XML_LRM_ATTR_TARGET);
user_name = pcmk__update_acl_user(stored_msg, F_CRM_USER, NULL);
crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
"(clearing CIB resource history only)", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""));
rc = controld_delete_resource_history(rsc_id, node, user_name,
cib_dryrun|cib_sync_call);
if (rc == pcmk_rc_ok) {
rc = controld_delete_resource_history(rsc_id, node, user_name,
crmd_cib_smart_opt());
}
//Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.)
if (from_sys) {
lrmd_event_data_t *op = NULL;
const char *from_host = crm_element_value(stored_msg,
F_CRM_HOST_FROM);
const char *transition;
if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
transition = crm_element_value(msg_data,
XML_ATTR_TRANSITION_KEY);
} else {
transition = crm_element_value(stored_msg,
XML_ATTR_TRANSITION_KEY);
}
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "local node"), rsc_id,
((rc == pcmk_rc_ok)? "" : " not"));
op = lrmd_new_event(rsc_id, CRMD_ACTION_DELETE, 0);
op->type = lrmd_event_exec_complete;
op->user_data = strdup(transition? transition : FAKE_TE_ID);
op->params = pcmk__strkey_table(free, free);
g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION),
strdup(CRM_FEATURE_SET));
controld_rc2event(op, rc);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
return I_NULL;
}
}
/*!
* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_remote_state(const xmlNode *msg)
{
const char *remote_uname = ID(msg);
crm_node_t *remote_peer;
bool remote_is_up = false;
int rc = pcmk_rc_ok;
rc = pcmk__xe_get_bool_attr(msg, XML_NODE_IN_CLUSTER, &remote_is_up);
CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);
remote_peer = crm_remote_peer_get(remote_uname);
CRM_CHECK(remote_peer, return I_NULL);
pcmk__update_peer_state(__func__, remote_peer,
remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST,
0);
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_PING message
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_ping(const xmlNode *msg)
{
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
// Build reply
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
value = crm_element_value(msg, F_CRM_SYS_TO);
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
// Add controller state
value = fsa_state2string(controld_globals.fsa_state);
crm_xml_add(ping, XML_PING_ATTR_CRMDSTATE, value);
crm_notice("Current ping state: %s", value); // CTS needs this
// Add controller health
// @TODO maybe do some checks to determine meaningful status
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
// Send reply
reply = create_reply(msg, ping);
free_xml(ping);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a PCMK__CONTROLD_CMD_NODES message
*
* \param[in] request Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_list(const xmlNode *request)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Create message data for reply
reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE);
crm_xml_add_ll(xml, XML_ATTR_ID, (long long) node->id); // uint32_t
crm_xml_add(xml, XML_ATTR_UNAME, node->uname);
crm_xml_add(xml, XML_NODE_IN_CLUSTER, node->state);
}
// Create and send reply
reply = create_reply(request, reply_data);
free_xml(reply_data);
if (reply) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_NODE_INFO request
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_info_request(const xmlNode *msg)
{
const char *value = NULL;
crm_node_t *node = NULL;
int node_id = 0;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Build reply
reply_data = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(reply_data, XML_PING_ATTR_SYSFROM, CRM_SYSTEM_CRMD);
// Add whether current partition has quorum
pcmk__xe_set_bool_attr(reply_data, XML_ATTR_HAVE_QUORUM,
pcmk_is_set(controld_globals.flags,
controld_has_quorum));
// Check whether client requested node info by ID and/or name
crm_element_value_int(msg, XML_ATTR_ID, &node_id);
if (node_id < 0) {
node_id = 0;
}
value = crm_element_value(msg, XML_ATTR_UNAME);
// Default to local node if none given
if ((node_id == 0) && (value == NULL)) {
value = controld_globals.our_nodename;
}
node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY);
if (node) {
crm_xml_add_int(reply_data, XML_ATTR_ID, node->id);
crm_xml_add(reply_data, XML_ATTR_UUID, node->uuid);
crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname);
crm_xml_add(reply_data, XML_NODE_IS_PEER, node->state);
pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE,
pcmk_is_set(node->flags, crm_remote_node));
}
// Send reply
reply = create_reply(msg, reply_data);
free_xml(reply_data);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
static void
verify_feature_set(xmlNode *msg)
{
const char *dc_version = crm_element_value(msg, XML_ATTR_CRM_VERSION);
if (dc_version == NULL) {
/* All we really know is that the DC feature set is older than 3.1.0,
* but that's also all that really matters.
*/
dc_version = "3.0.14";
}
if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
crm_trace("Local feature set (%s) is compatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
} else {
crm_err("Local feature set (%s) is incompatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
// Nothing is likely to improve without administrator involvement
controld_set_fsa_input_flags(R_STAYDOWN);
crmd_exit(CRM_EX_FATAL);
}
}
// DC gets own shutdown all-clear
static enum crmd_fsa_input
handle_shutdown_self_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
// The expected case -- we initiated own shutdown sequence
crm_info("Shutting down controller");
return I_STOP;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
// Must be logic error -- DC confirming its own unrequested shutdown
crm_err("Shutting down controller immediately due to "
"unexpected shutdown confirmation");
return I_TERMINATE;
}
if (controld_globals.fsa_state != S_STOPPING) {
// Shouldn't happen -- non-DC confirming unrequested shutdown
crm_err("Starting new DC election because %s is "
"confirming shutdown we did not request",
(host_from? host_from : "another node"));
return I_ELECTION;
}
// Shouldn't happen, but we are already stopping anyway
crm_debug("Ignoring unexpected shutdown confirmation from %s",
(host_from? host_from : "another node"));
return I_NULL;
}
// Non-DC gets shutdown all-clear from DC
static enum crmd_fsa_input
handle_shutdown_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (host_from == NULL) {
crm_warn("Ignoring shutdown request without origin specified");
return I_NULL;
}
if ((controld_globals.dc_name == NULL)
|| (strcasecmp(host_from, controld_globals.dc_name) == 0)) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting down controller after confirmation from %s",
host_from);
} else {
crm_err("Shutting down controller after unexpected "
"shutdown request from %s", host_from);
controld_set_fsa_input_flags(R_STAYDOWN);
}
return I_STOP;
}
crm_warn("Ignoring shutdown request from %s because DC is %s",
host_from, controld_globals.dc_name);
return I_NULL;
}
static enum crmd_fsa_input
handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
if (op == NULL) {
crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]");
return I_NULL;
}
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
crm_node_t *node = pcmk__search_cluster_node_cache(0, from);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
return I_NULL; /* Done */
}
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_self_ack(stored_msg);
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
// Another controller wants to shut down its node
return handle_shutdown_request(stored_msg);
} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
/* a remote connection host is letting us know the node state */
return handle_remote_state(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
throttle_update(stored_msg);
if (AM_I_DC && transition_graph != NULL) {
if (!transition_graph->complete) {
crm_debug("The throttle changed. Trigger a graph.");
trigger_graph();
}
}
return I_NULL;
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
/* Sometimes we _must_ go into S_ELECTION */
if (controld_globals.fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
#if 0
} else if (AM_I_DC) {
/* This is the old way of doing things but what is gained? */
return I_ELECTION;
#endif
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
verify_feature_set(stored_msg);
crm_debug("Raising I_JOIN_OFFER: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s", crm_element_value(stored_msg, F_CRM_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
return handle_lrm_delete(stored_msg);
} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
|| (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
|| (strcmp(op, CRM_OP_REPROBE) == 0)) {
crm_xml_add(stored_msg, F_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_LOCAL_SHUTDOWN) == 0) {
crm_shutdown(SIGTERM);
/*return I_SHUTDOWN; */
return I_NULL;
} else if (strcmp(op, CRM_OP_PING) == 0) {
return handle_ping(stored_msg);
} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
return handle_node_info_request(stored_msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
int id = 0;
const char *name = NULL;
crm_element_value_int(stored_msg, XML_ATTR_ID, &id);
name = crm_element_value(stored_msg, XML_ATTR_UNAME);
if(cause == C_IPC_MESSAGE) {
msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
} else {
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
}
free_xml(msg);
} else {
reap_crm_member(id, name);
/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
remote_ra_process_maintenance_nodes(xml);
} else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
return handle_node_list(stored_msg);
/*========== (NOT_DC)-Only Actions ==========*/
} else if (!AM_I_DC) {
if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_ack(stored_msg);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
static void
handle_response(xmlNode *stored_msg)
{
const char *op = crm_element_value(stored_msg, F_CRM_TASK);
if (op == NULL) {
crm_log_xml_err(stored_msg, "Bad message");
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
// Check whether scheduler answer been superseded by subsequent request
const char *msg_ref = crm_element_value(stored_msg, XML_ATTR_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (pcmk__str_eq(msg_ref, fsa_pe_ref, pcmk__str_casei)) {
ha_msg_input_t fsa_input;
controld_stop_sched_timer();
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "controller");
}
}
static enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/procedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
const char *host_from = crm_element_value(stored_msg, F_CRM_HOST_FROM);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = controld_globals.our_nodename;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from,
fsa_state2string(controld_globals.fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = pcmk__ttoa(time(NULL));
update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
static void
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
pcmk__client_t *client_channel = NULL;
CRM_CHECK(sys != NULL, return);
client_channel = pcmk__find_client_by_id(sys);
if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) {
crm_xml_add(msg, F_CRM_HOST_FROM, controld_globals.our_nodename);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
xmlNode *data = get_message_xml(msg, F_CRM_DATA);
process_te_message(msg, data);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
fsa_input.msg = msg;
fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __func__;
fsa_data.data_type = fsa_dt_ha_msg;
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
I_MESSAGE, &fsa_data);
} else if (crmd_is_proxy_session(sys)) {
crmd_proxy_send(sys, msg);
} else {
crm_info("Received invalid request: unknown subsystem '%s'", sys);
}
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
free_xml(orig->msg);
free(orig);
}
/*!
* \internal
* \brief Notify the DC of a remote node state change
*
* \param[in] node_name Node's name
* \param[in] node_up TRUE if node is up, FALSE if down
*/
void
send_remote_state_message(const char *node_name, gboolean node_up)
{
/* If we don't have a DC, or the message fails, we have a failsafe:
* the DC will eventually pick up the change via the CIB node state.
* The message allows it to happen sooner if possible.
*/
if (controld_globals.dc_name != NULL) {
xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL,
controld_globals.dc_name, CRM_SYSTEM_DC,
CRM_SYSTEM_CRMD, NULL);
crm_info("Notifying DC %s of Pacemaker Remote node %s %s",
controld_globals.dc_name, node_name,
node_up? "coming up" : "going down");
crm_xml_add(msg, XML_ATTR_ID, node_name);
pcmk__xe_set_bool_attr(msg, XML_NODE_IN_CLUSTER, node_up);
send_cluster_message(crm_get_peer(0, controld_globals.dc_name),
crm_msg_crmd, msg, TRUE);
free_xml(msg);
} else {
crm_debug("No DC to notify of Pacemaker Remote node %s %s",
node_name, (node_up? "coming up" : "going down"));
}
}
diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h
index 9111282d03..f1f3e5f068 100644
--- a/daemons/controld/pacemaker-controld.h
+++ b/daemons/controld/pacemaker-controld.h
@@ -1,123 +1,126 @@
/*
* Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef CRMD__H
# define CRMD__H
#include <controld_alerts.h>
#include <controld_callbacks.h>
#include <controld_fencing.h>
#include <controld_fsa.h>
#include <controld_timers.h>
#include <controld_lrm.h>
#include <controld_membership.h>
#include <controld_messages.h>
#include <controld_metadata.h>
#include <controld_throttle.h>
#include <controld_transition.h>
#include <controld_utils.h>
typedef struct {
// Booleans
//! Group of \p controld_flags values
uint32_t flags;
// Controller FSA
//! FSA state
enum crmd_fsa_state fsa_state;
//! FSA actions (group of \p A_* flags)
uint64_t fsa_actions;
//! FSA input register contents (group of \p R_* flags)
uint64_t fsa_input_register;
+ //! FSA message queue
+ GList *fsa_message_queue;
+
// Other
//! Cluster name
char *cluster_name;
//! Designated controller name
char *dc_name;
//! Designated controller's Pacemaker version
char *dc_version;
//! Local node's node name
char *our_nodename;
//! Local node's UUID
char *our_uuid;
//! Main event loop
GMainLoop *mainloop;
} controld_globals_t;
extern controld_globals_t controld_globals;
/*!
* \internal
* \enum controld_flags
* \brief Bit flags to store various controller state and configuration info
*/
enum controld_flags {
//! The DC left in a membership change that is being processed
controld_dc_left = (1 << 0),
//! The FSA is stalled waiting for further input
controld_fsa_is_stalled = (1 << 1),
//! The local node has been in a quorate partition at some point
controld_ever_had_quorum = (1 << 2),
//! The local node is currently in a quorate partition
controld_has_quorum = (1 << 3),
//! Panic the local node if it loses quorum
controld_no_quorum_suicide = (1 << 4),
//! Lock resources to the local node when it shuts down cleanly
controld_shutdown_lock_enabled = (1 << 5),
};
# define controld_set_global_flags(flags_to_set) do { \
controld_globals.flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, \
"Global", "controller", \
controld_globals.flags, \
(flags_to_set), \
#flags_to_set); \
} while (0)
# define controld_clear_global_flags(flags_to_clear) do { \
controld_globals.flags \
= pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Global", \
"controller", controld_globals.flags, \
(flags_to_clear), #flags_to_clear); \
} while (0)
# define controld_trigger_config() \
controld_trigger_config_as(__func__, __LINE__)
void do_cib_updated(const char *event, xmlNode * msg);
void do_cib_replaced(const char *event, xmlNode * msg);
void crmd_metadata(void);
void controld_trigger_config_as(const char *fn, int line);
void controld_election_init(const char *uname);
void controld_remove_voter(const char *uname);
void controld_election_fini(void);
void controld_set_election_period(const char *value);
void controld_stop_current_election_timeout(void);
void controld_disconnect_cib_manager(void);
#endif
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jan 25, 10:50 AM (20 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1322199
Default Alt Text
(155 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment