Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F4624747
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
141 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index d6590e1b3a..003466d275 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -1,1334 +1,1336 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static enum crmd_fsa_input handle_message(xmlNode *msg,
enum crmd_fsa_cause cause);
static void handle_response(xmlNode *stored_msg);
static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
enum crmd_fsa_cause cause);
static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
static void send_msg_via_ipc(xmlNode * msg, const char *sys);
/* debug only, can wrap all it likes */
static int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (controld_globals.fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
controld_globals.fsa_actions, TRUE, __func__);
}
/* reset the action list */
crm_info("Resetting the current action list");
fsa_dump_actions(controld_globals.fsa_actions, "Drop");
controld_globals.fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
void
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, uint64_t with_actions,
gboolean prepend, const char *raised_from)
{
unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
fsa_data_t *fsa_data = NULL;
if (raised_from == NULL) {
raised_from = "<unknown>";
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return;
}
if (input == I_WAIT_FOR_EVENT) {
controld_set_global_flags(controld_fsa_is_stalled);
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
raised_from, fsa_cause2string(cause), data, old_len);
if (old_len > 0) {
fsa_dump_queue(LOG_TRACE);
prepend = FALSE;
}
if (data == NULL) {
controld_set_fsa_action_flags(with_actions);
fsa_dump_actions(with_actions, "Restored");
return;
}
/* Store everything in the new event and reset
* controld_globals.fsa_actions
*/
with_actions |= controld_globals.fsa_actions;
controld_globals.fsa_actions = A_NOTHING;
}
last_data_id++;
crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
raised_from, (prepend? "prepended" : "appended"), last_data_id,
fsa_input2string(input), fsa_cause2string(cause),
(data? "with" : "without"));
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input",
(unsigned long long) with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
crm_trace("Copying %s data from %s as cluster message data",
fsa_cause2string(cause), raised_from);
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_UNKNOWN:
case C_STARTUP:
crm_crit("Copying %s data (from %s) is not yet implemented",
fsa_cause2string(cause), raised_from);
crmd_exit(CRM_EX_SOFTWARE);
break;
}
}
/* make sure to free it properly later */
if (prepend) {
controld_globals.fsa_message_queue
= g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
} else {
controld_globals.fsa_message_queue
= g_list_append(controld_globals.fsa_message_queue, fsa_data);
}
crm_trace("FSA message queue length is %d",
g_list_length(controld_globals.fsa_message_queue));
/* fsa_dump_queue(LOG_TRACE); */
if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
crm_err("Couldn't add message to the queue");
}
if (input != I_WAIT_FOR_EVENT) {
controld_trigger_fsa();
}
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *data = (fsa_data_t *) iter->data;
do_crm_log_unlikely(log_level,
"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, data->data, data->data_type,
fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t));
CRM_ASSERT(copy != NULL);
copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL;
copy->xml = get_message_xml(copy->msg, F_CRM_DATA);
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
free_xml(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Don't know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
crmd_exit(CRM_EX_SOFTWARE);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
fsa_data_t *message
= (fsa_data_t *) controld_globals.fsa_message_queue->data;
controld_globals.fsa_message_queue
= g_list_remove(controld_globals.fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input, cause);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
enum crm_ais_msg_types dest = crm_msg_ais;
bool is_for_dc = false;
bool is_for_dcib = false;
bool is_for_te = false;
bool is_for_crm = false;
bool is_for_cib = false;
bool is_local = false;
bool broadcast = false;
const char *host_to = NULL;
const char *sys_to = NULL;
const char *sys_from = NULL;
const char *type = NULL;
const char *task = NULL;
const char *ref = NULL;
crm_node_t *node_to = NULL;
CRM_CHECK(msg != NULL, return TRUE);
host_to = crm_element_value(msg, PCMK__XA_CRM_HOST_TO);
sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
type = crm_element_value(msg, PCMK__XA_T);
task = crm_element_value(msg, PCMK__XA_CRM_TASK);
ref = crm_element_value(msg, PCMK_XA_REFERENCE);
broadcast = pcmk__str_empty(host_to);
if (ref == NULL) {
ref = "without reference ID";
}
if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
crm_trace("Received hello %s from %s (no processing needed)",
ref, pcmk__s(sys_from, "unidentified source"));
crm_log_xml_trace(msg, "hello");
return TRUE;
}
// Require message type (set by create_request())
if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) {
crm_warn("Ignoring invalid message %s with type '%s' (not '" T_CRM "')",
ref, pcmk__s(type, ""));
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Require a destination subsystem (also set by create_request())
if (sys_to == NULL) {
crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO,
ref);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Get the message type appropriate to the destination subsystem
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
if ((dest < crm_msg_ais) || (dest > crm_msg_stonith_ng)) {
/* Unrecognized value, use a sane default
*
* @TODO Maybe we should bail instead
*/
dest = crm_msg_crmd;
}
}
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
// Check whether message should be processed locally
is_local = false;
if (broadcast) {
if (is_for_dc || is_for_te) {
is_local = false;
} else if (is_for_crm) {
if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
PCMK__CONTROLD_CMD_NODES, NULL)) {
/* Node info requests do not specify a host, which is normally
* treated as "all hosts", because the whole point is that the
* client may not know the local node name. Always handle these
* requests locally.
*/
is_local = true;
} else {
is_local = !originated_locally;
}
} else {
is_local = true;
}
} else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
pcmk__str_casei)) {
is_local = true;
} else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA);
const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
- if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
+ if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Local delete of an offline node's resource history
is_local = true;
}
}
// Check whether message should be relayed
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC) {
if (is_for_te) {
crm_trace("Route message %s locally as transition request",
ref);
crm_log_xml_trace(msg, sys_to);
send_msg_via_ipc(msg, sys_to);
return TRUE; // No further processing of message is needed
}
crm_trace("Route message %s locally as DC request", ref);
return FALSE; // More to be done by caller
}
if (originated_locally
&& !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
CRM_SYSTEM_TENGINE, NULL)) {
crm_trace("Relay message %s to DC (via %s)",
ref, pcmk__s(host_to, "broadcast"));
crm_log_xml_trace(msg, "relayed");
if (!broadcast) {
node_to = pcmk__get_node(0, host_to, NULL,
pcmk__node_search_cluster);
}
send_cluster_message(node_to, dest, msg, TRUE);
return TRUE;
}
/* Transition engine and scheduler messages are sent only to the DC on
* the same node. If we are no longer the DC, discard this message.
*/
crm_trace("Ignoring message %s because we are no longer DC", ref);
crm_log_xml_trace(msg, "ignored");
return TRUE; // No further processing of message is needed
}
if (is_local) {
if (is_for_crm || is_for_cib) {
crm_trace("Route message %s locally as controller request", ref);
return FALSE; // More to be done by caller
}
crm_trace("Relay message %s locally to %s", ref, sys_to);
crm_log_xml_trace(msg, "IPC-relay");
send_msg_via_ipc(msg, sys_to);
return TRUE;
}
if (!broadcast) {
node_to = pcmk__search_node_caches(0, host_to,
pcmk__node_search_cluster);
if (node_to == NULL) {
crm_warn("Ignoring message %s because node %s is unknown",
ref, host_to);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
}
crm_trace("Relay message %s to %s",
ref, pcmk__s(host_to, "all peers"));
crm_log_xml_trace(msg, "relayed");
send_cluster_message(node_to, dest, msg, TRUE);
return TRUE;
}
// Return true if field contains a positive integer
static bool
authorize_version(xmlNode *message_data, const char *field,
const char *client_name, const char *ref, const char *uuid)
{
const char *version = crm_element_value(message_data, field);
long long version_num;
if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
|| (version_num < 0LL)) {
crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
CRM_XS " ref=%s uuid=%s",
client_name, ((version == NULL)? "" : version),
field, (ref? ref : "none"), uuid);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether a client IPC message is acceptable
*
* If a given client IPC message is a hello, "authorize" it by ensuring it has
* valid information such as a protocol version, and return false indicating
* that nothing further needs to be done with the message. If the message is not
* a hello, just return true to indicate it needs further processing.
*
* \param[in] client_msg XML of IPC message
* \param[in,out] curr_client If IPC is not proxied, client that sent message
* \param[in] proxy_session If IPC is proxied, the session ID
*
* \return true if message needs further processing, false if it doesn't
*/
bool
controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client,
const char *proxy_session)
{
xmlNode *message_data = NULL;
const char *client_name = NULL;
const char *op = crm_element_value(client_msg, PCMK__XA_CRM_TASK);
const char *ref = crm_element_value(client_msg, PCMK_XA_REFERENCE);
const char *uuid = (curr_client? curr_client->id : proxy_session);
if (uuid == NULL) {
crm_warn("IPC message from client rejected: No client identifier "
CRM_XS " ref=%s", (ref? ref : "none"));
goto rejected;
}
if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
// Only hello messages need to be authorized
return true;
}
message_data = get_message_xml(client_msg, F_CRM_DATA);
client_name = crm_element_value(message_data, "client_name");
if (pcmk__str_empty(client_name)) {
crm_warn("IPC hello from client rejected: No client name",
CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
goto rejected;
}
if (!authorize_version(message_data, "major_version", client_name, ref,
uuid)) {
goto rejected;
}
if (!authorize_version(message_data, "minor_version", client_name, ref,
uuid)) {
goto rejected;
}
crm_trace("Validated IPC hello from client %s", client_name);
crm_log_xml_trace(client_msg, "hello");
if (curr_client) {
curr_client->userdata = strdup(client_name);
}
controld_trigger_fsa();
return false;
rejected:
crm_log_xml_trace(client_msg, "rejected");
if (curr_client) {
qb_ipcs_disconnect(curr_client->ipcs);
}
return false;
}
static enum crmd_fsa_input
handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, PCMK__XA_SUBT);
if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) {
return handle_request(msg, cause);
}
if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
handle_response(msg);
return I_NULL;
}
crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'",
pcmk__s(type, ""));
crm_log_xml_trace(msg, "bad");
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
const char *uname = NULL;
const char *op = NULL;
char *interval_spec = NULL;
guint interval_ms = 0;
gboolean is_remote_node = FALSE;
xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA);
if (xml_op) {
xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE);
xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS);
if (xml_rsc) {
rsc = ID(xml_rsc);
}
if (xml_attrs) {
op = crm_element_value(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_OP);
crm_element_value_ms(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL,
&interval_ms);
}
}
uname = crm_element_value(xml_op, PCMK__META_ON_NODE);
if ((rsc == NULL) || (uname == NULL)) {
crm_log_xml_warn(stored_msg, "invalid failcount op");
return I_NULL;
}
if (crm_element_value(xml_op, PCMK__XA_ROUTER_NODE)) {
is_remote_node = TRUE;
}
crm_debug("Clearing failures for %s-interval %s on %s "
"from attribute manager, CIB, and executor state",
pcmk__readable_interval(interval_ms), rsc, uname);
if (interval_ms) {
interval_spec = crm_strdup_printf("%ums", interval_ms);
}
update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
free(interval_spec);
controld_cib_delete_last_failure(rsc, uname, op, interval_ms);
lrm_clear_last_failure(rsc, uname, op, interval_ms);
return I_NULL;
}
static enum crmd_fsa_input
handle_lrm_delete(xmlNode *stored_msg)
{
const char *mode = NULL;
xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA);
CRM_CHECK(msg_data != NULL, return I_NULL);
/* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
* relay the operation to the affected node, which will unregister the
* resource from the local executor, clear the resource's history from the
* CIB, and do some bookkeeping in the controller.
*
* However, if the affected node is offline, the client will specify
- * mode="cib" which means the controller receiving the operation should
- * clear the resource's history from the CIB and nothing else. This is used
- * to clear shutdown locks.
+ * mode=PCMK__VALUE_CIB which means the controller receiving the operation
+ * should clear the resource's history from the CIB and nothing else. This
+ * is used to clear shutdown locks.
*/
mode = crm_element_value(msg_data, PCMK__XA_MODE);
- if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) {
+ if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Relay to affected node
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else {
// Delete CIB history locally (compare with do_lrm_delete())
const char *from_sys = NULL;
const char *user_name = NULL;
const char *rsc_id = NULL;
const char *node = NULL;
xmlNode *rsc_xml = NULL;
int rc = pcmk_rc_ok;
rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE);
CRM_CHECK(rsc_xml != NULL, return I_NULL);
rsc_id = ID(rsc_xml);
from_sys = crm_element_value(stored_msg, PCMK__XA_CRM_SYS_FROM);
node = crm_element_value(msg_data, PCMK__META_ON_NODE);
user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL);
crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
"(clearing CIB resource history only)", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""));
rc = controld_delete_resource_history(rsc_id, node, user_name,
cib_dryrun|cib_sync_call);
if (rc == pcmk_rc_ok) {
rc = controld_delete_resource_history(rsc_id, node, user_name,
crmd_cib_smart_opt());
}
- //Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.)
+ /* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and
+ * op=CRM_OP_LRM_DELETE.
+ */
if (from_sys) {
lrmd_event_data_t *op = NULL;
const char *from_host = crm_element_value(stored_msg, PCMK__XA_SRC);
const char *transition;
if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
transition = crm_element_value(msg_data,
PCMK__XA_TRANSITION_KEY);
} else {
transition = crm_element_value(stored_msg,
PCMK__XA_TRANSITION_KEY);
}
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "local node"), rsc_id,
((rc == pcmk_rc_ok)? "" : " not"));
op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0);
op->type = lrmd_event_exec_complete;
op->user_data = strdup(transition? transition : FAKE_TE_ID);
op->params = pcmk__strkey_table(free, free);
g_hash_table_insert(op->params, strdup(PCMK_XA_CRM_FEATURE_SET),
strdup(CRM_FEATURE_SET));
controld_rc2event(op, rc);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
return I_NULL;
}
}
/*!
* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_remote_state(const xmlNode *msg)
{
const char *conn_host = NULL;
const char *remote_uname = ID(msg);
crm_node_t *remote_peer;
bool remote_is_up = false;
int rc = pcmk_rc_ok;
rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up);
CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);
remote_peer = crm_remote_peer_get(remote_uname);
CRM_CHECK(remote_peer, return I_NULL);
pcmk__update_peer_state(__func__, remote_peer,
remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST,
0);
conn_host = crm_element_value(msg, PCMK__XA_CONN_HOST);
if (conn_host) {
pcmk__str_update(&remote_peer->conn_host, conn_host);
} else if (remote_peer->conn_host) {
free(remote_peer->conn_host);
remote_peer->conn_host = NULL;
}
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_PING message
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_ping(const xmlNode *msg)
{
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
// Build reply
ping = create_xml_node(NULL, PCMK__XE_PING_RESPONSE);
value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value);
// Add controller state
value = fsa_state2string(controld_globals.fsa_state);
crm_xml_add(ping, PCMK__XA_CRMD_STATE, value);
crm_notice("Current ping state: %s", value); // CTS needs this
// Add controller health
// @TODO maybe do some checks to determine meaningful status
crm_xml_add(ping, PCMK_XA_RESULT, "ok");
// Send reply
reply = create_reply(msg, ping);
free_xml(ping);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a PCMK__CONTROLD_CMD_NODES message
*
* \param[in] request Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_list(const xmlNode *request)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Create message data for reply
reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE);
crm_xml_add_ll(xml, PCMK_XA_ID, (long long) node->id); // uint32_t
crm_xml_add(xml, PCMK_XA_UNAME, node->uname);
crm_xml_add(xml, PCMK__XA_IN_CCM, node->state);
}
// Create and send reply
reply = create_reply(request, reply_data);
free_xml(reply_data);
if (reply) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_NODE_INFO request
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_info_request(const xmlNode *msg)
{
const char *value = NULL;
crm_node_t *node = NULL;
int node_id = 0;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Build reply
reply_data = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD);
// Add whether current partition has quorum
pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM,
pcmk_is_set(controld_globals.flags,
controld_has_quorum));
// Check whether client requested node info by ID and/or name
crm_element_value_int(msg, PCMK_XA_ID, &node_id);
if (node_id < 0) {
node_id = 0;
}
value = crm_element_value(msg, PCMK_XA_UNAME);
// Default to local node if none given
if ((node_id == 0) && (value == NULL)) {
value = controld_globals.our_nodename;
}
node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any);
if (node) {
crm_xml_add(reply_data, PCMK_XA_ID, node->uuid);
crm_xml_add(reply_data, PCMK_XA_UNAME, node->uname);
crm_xml_add(reply_data, PCMK__XA_CRMD, node->state);
pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE,
pcmk_is_set(node->flags, crm_remote_node));
}
// Send reply
reply = create_reply(msg, reply_data);
free_xml(reply_data);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
static void
verify_feature_set(xmlNode *msg)
{
const char *dc_version = crm_element_value(msg, PCMK_XA_CRM_FEATURE_SET);
if (dc_version == NULL) {
/* All we really know is that the DC feature set is older than 3.1.0,
* but that's also all that really matters.
*/
dc_version = "3.0.14";
}
if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
crm_trace("Local feature set (%s) is compatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
} else {
crm_err("Local feature set (%s) is incompatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
// Nothing is likely to improve without administrator involvement
controld_set_fsa_input_flags(R_STAYDOWN);
crmd_exit(CRM_EX_FATAL);
}
}
// DC gets own shutdown all-clear
static enum crmd_fsa_input
handle_shutdown_self_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
// The expected case -- we initiated own shutdown sequence
crm_info("Shutting down controller");
return I_STOP;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
// Must be logic error -- DC confirming its own unrequested shutdown
crm_err("Shutting down controller immediately due to "
"unexpected shutdown confirmation");
return I_TERMINATE;
}
if (controld_globals.fsa_state != S_STOPPING) {
// Shouldn't happen -- non-DC confirming unrequested shutdown
crm_err("Starting new DC election because %s is "
"confirming shutdown we did not request",
(host_from? host_from : "another node"));
return I_ELECTION;
}
// Shouldn't happen, but we are already stopping anyway
crm_debug("Ignoring unexpected shutdown confirmation from %s",
(host_from? host_from : "another node"));
return I_NULL;
}
// Non-DC gets shutdown all-clear from DC
static enum crmd_fsa_input
handle_shutdown_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
crm_warn("Ignoring shutdown request without origin specified");
return I_NULL;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name,
pcmk__str_null_matches|pcmk__str_casei)) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting down controller after confirmation from %s",
host_from);
} else {
crm_err("Shutting down controller after unexpected "
"shutdown request from %s", host_from);
controld_set_fsa_input_flags(R_STAYDOWN);
}
return I_STOP;
}
crm_warn("Ignoring shutdown request from %s because DC is %s",
host_from, controld_globals.dc_name);
return I_NULL;
}
static enum crmd_fsa_input
handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
crm_log_xml_trace(stored_msg, "request");
if (op == NULL) {
crm_warn("Ignoring request without " PCMK__XA_CRM_TASK);
return I_NULL;
}
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, PCMK__XA_SRC);
crm_node_t *node = pcmk__search_node_caches(0, from,
pcmk__node_search_cluster);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
return I_NULL; /* Done */
}
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_self_ack(stored_msg);
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
// Another controller wants to shut down its node
return handle_shutdown_request(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
/* a remote connection host is letting us know the node state */
return handle_remote_state(stored_msg);
} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
throttle_update(stored_msg);
if (AM_I_DC && (controld_globals.transition_graph != NULL)
&& !controld_globals.transition_graph->complete) {
crm_debug("The throttle changed. Trigger a graph.");
trigger_graph();
}
return I_NULL;
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
/* Sometimes we _must_ go into S_ELECTION */
if (controld_globals.fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
verify_feature_set(stored_msg);
crm_debug("Raising I_JOIN_OFFER: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
return handle_lrm_delete(stored_msg);
} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
|| (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
|| (strcmp(op, CRM_OP_REPROBE) == 0)) {
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_PING) == 0) {
return handle_ping(stored_msg);
} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
return handle_node_info_request(stored_msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
int id = 0;
const char *name = NULL;
crm_element_value_int(stored_msg, PCMK_XA_ID, &id);
name = crm_element_value(stored_msg, PCMK_XA_UNAME);
if(cause == C_IPC_MESSAGE) {
msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
} else {
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
}
free_xml(msg);
} else {
reap_crm_member(id, name);
/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
remote_ra_process_maintenance_nodes(xml);
} else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
return handle_node_list(stored_msg);
/*========== (NOT_DC)-Only Actions ==========*/
} else if (!AM_I_DC) {
if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_ack(stored_msg);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
static void
handle_response(xmlNode *stored_msg)
{
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
crm_log_xml_trace(stored_msg, "reply");
if (op == NULL) {
crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK);
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
// Check whether scheduler answer been superseded by subsequent request
const char *msg_ref = crm_element_value(stored_msg, PCMK_XA_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
pcmk__str_none)) {
ha_msg_input_t fsa_input;
controld_stop_sched_timer();
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "controller");
}
}
static enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/procedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = controld_globals.our_nodename;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from,
fsa_state2string(controld_globals.fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = pcmk__ttoa(time(NULL));
update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
static void
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
pcmk__client_t *client_channel = NULL;
CRM_CHECK(sys != NULL, return);
client_channel = pcmk__find_client_by_id(sys);
if (crm_element_value(msg, PCMK__XA_SRC) == NULL) {
crm_xml_add(msg, PCMK__XA_SRC, controld_globals.our_nodename);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
xmlNode *data = get_message_xml(msg, F_CRM_DATA);
process_te_message(msg, data);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
fsa_input.msg = msg;
fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __func__;
fsa_data.data_type = fsa_dt_ha_msg;
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
I_MESSAGE, &fsa_data);
} else if (crmd_is_proxy_session(sys)) {
crmd_proxy_send(sys, msg);
} else {
crm_info("Received invalid request: unknown subsystem '%s'", sys);
}
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
free_xml(orig->msg);
free(orig);
}
/*!
* \internal
* \brief Notify the cluster of a remote node state change
*
* \param[in] node_name Node's name
* \param[in] node_up true if node is up, false if down
*/
void
broadcast_remote_state_message(const char *node_name, bool node_up)
{
xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL,
CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
crm_info("Notifying cluster of Pacemaker Remote node %s %s",
node_name, node_up? "coming up" : "going down");
crm_xml_add(msg, PCMK_XA_ID, node_name);
pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up);
if (node_up) {
crm_xml_add(msg, PCMK__XA_CONN_HOST, controld_globals.our_nodename);
}
send_cluster_message(NULL, crm_msg_crmd, msg, TRUE);
free_xml(msg);
}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index ce2635db19..772b6b86f9 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,760 +1,760 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
static GHashTable *te_targets = NULL;
void send_rsc_command(pcmk__graph_action_t *action);
static void te_update_job_count(pcmk__graph_action_t *action, int offset);
static void
te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
action->timer = g_timeout_add(action->timeout + graph->network_delay,
action_timer_callback, (void *) action);
CRM_ASSERT(action->timer != 0);
}
/*!
* \internal
* \brief Execute a graph pseudo-action
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] pseudo Pseudo-action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
{
const char *task = crm_element_value(pseudo->xml, PCMK_XA_OPERATION);
/* send to peers as well? */
if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
free_xml(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, PCMK__XA_OPERATION_KEY));
te_action_confirmed(pseudo, graph);
return pcmk_rc_ok;
}
static int
get_target_rc(pcmk__graph_action_t *action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, PCMK__META_OP_TARGET_RC),
&exit_status, 0);
return exit_status;
}
/*!
* \internal
* \brief Execute a cluster action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Cluster action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = on_node;
if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
- if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
+ if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
router_node = controld_globals.our_nodename;
}
}
}
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Handling controller request '%s' (%s on %s)%s%s",
id, task, on_node, (is_local? " locally" : ""),
(no_wait? " without waiting" : ""));
if (is_local
&& pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
/* defer until everything else completes */
crm_info("Controller request '%s' is a local shutdown", id);
graph->completion_action = pcmk__graph_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return pcmk_rc_ok;
} else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
crm_node_t *peer = pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(cmd, PCMK__XA_TRANSITION_KEY, counter);
rc = send_cluster_message(pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster),
crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, target_rc,
controld_globals.te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(pcmk__graph_action_t *action,
lrmd_event_data_t *op)
{
cib_t *cib_conn = controld_globals.cib_conn;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
const char *target_uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
int target_rc = get_target_rc(action);
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, PCMK_XA_ID, target_uuid);
crm_xml_add(state, PCMK_XA_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, PCMK_XA_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, PCMK_XA_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_TYPE);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_CLASS);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state,
cib_scope_local);
fsa_register_cib_callback(rc, NULL, cib_action_updated);
free_xml(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(pcmk__graph_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
action->xml->name, action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Execute a resource action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Resource action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node),
crm_err("Corrupted command(id=%s) %s: no node",
ID(action->xml), pcmk__s(task, "without task"));
return pcmk_rc_node_unknown);
rsc_op = action->xml;
task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
task_uuid = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
router_node = crm_element_value(rsc_op, PCMK__XA_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(rsc_op, PCMK__XA_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
I_NULL, &msg);
} else {
rc = send_cluster_message(pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster),
crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
/* Just mark confirmed. Don't bump the job count only to immediately
* decrement it.
*/
crm_info("Action %d confirmed - no wait", action->id);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(pcmk__graph_action_t *action, int offset)
{
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
const char *t1 = crm_meta_value(action->params,
PCMK__META_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params,
PCMK__META_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
te_update_job_count_on(target, offset, FALSE);
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed on a node
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
* \param[in] target Name of node where action should be executed
*
* \return true if action is allowed, otherwise false
*/
static bool
allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *id = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if(target == NULL) {
/* No limit on these */
return true;
} else if(te_targets == NULL) {
return false;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return false;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return false;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return true;
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
*
* \return true if action is allowed, otherwise false
*/
static bool
graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return true;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
target = crm_meta_value(action->params, PCMK__META_MIGRATE_SOURCE);
if (!allowed_on_node(graph, action, target)) {
return false;
}
target = crm_meta_value(action->params, PCMK__META_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
return allowed_on_node(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in,out] action Action to confirm
* \param[in,out] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, PCMK__META_ON_NODE) != NULL)) {
te_update_job_count(action, -1);
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
static pcmk__graph_functions_t te_graph_fns = {
execute_pseudo_action,
execute_rsc_action,
execute_cluster_action,
controld_execute_fence_action,
graph_action_allowed,
};
/*
* \internal
* \brief Register the transitioner's graph functions with \p libpacemaker
*/
void
controld_register_graph_functions(void)
{
pcmk__set_graph_functions(&te_graph_fns);
}
void
notify_crmd(pcmk__graph_t *graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s",
fsa_state2string(controld_globals.fsa_state));
CRM_CHECK(graph->complete, graph->complete = true);
switch (graph->completion_action) {
case pcmk__graph_wait:
type = "stop";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_done:
type = "done";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_restart:
type = "restart";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
if (controld_get_period_transition_timer() > 0) {
controld_stop_transition_timer();
controld_start_transition_timer();
} else {
event = I_PE_CALC;
}
} else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
controld_trigger_fsa();
}
break;
case pcmk__graph_shutdown:
type = "shutdown";
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = pcmk__graph_done;
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else {
controld_trigger_fsa();
}
}
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
index c76cc2042f..a539b138af 100644
--- a/include/crm/common/options_internal.h
+++ b/include/crm/common/options_internal.h
@@ -1,185 +1,186 @@
/*
* Copyright 2006-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__OPTIONS_INTERNAL__H
# define PCMK__OPTIONS_INTERNAL__H
# ifndef PCMK__CONFIG_H
# define PCMK__CONFIG_H
# include <config.h> // _Noreturn
# endif
# include <glib.h> // GHashTable
# include <stdbool.h> // bool
#include <crm/common/util.h> // pcmk_parse_interval_spec()
_Noreturn void pcmk__cli_help(char cmd);
/*
* Environment variable option handling
*/
const char *pcmk__env_option(const char *option);
void pcmk__set_env_option(const char *option, const char *value, bool compat);
bool pcmk__env_option_enabled(const char *daemon, const char *option);
/*
* Cluster option handling
*/
typedef struct pcmk__cluster_option_s {
const char *name;
const char *alt_name;
const char *type;
const char *values;
const char *default_value;
bool (*is_valid)(const char *);
const char *description_short;
const char *description_long;
} pcmk__cluster_option_t;
const char *pcmk__cluster_option(GHashTable *options,
const pcmk__cluster_option_t *option_list,
int len, const char *name);
gchar *pcmk__format_option_metadata(const char *name, const char *desc_short,
const char *desc_long,
pcmk__cluster_option_t *option_list,
int len);
void pcmk__validate_cluster_options(GHashTable *options,
pcmk__cluster_option_t *option_list,
int len);
bool pcmk__valid_interval_spec(const char *value);
bool pcmk__valid_boolean(const char *value);
bool pcmk__valid_int(const char *value);
bool pcmk__valid_positive_int(const char *value);
bool pcmk__valid_no_quorum_policy(const char *value);
bool pcmk__valid_percentage(const char *value);
bool pcmk__valid_script(const char *value);
// from watchdog.c
long pcmk__get_sbd_watchdog_timeout(void);
bool pcmk__get_sbd_sync_resource_startup(void);
long pcmk__auto_stonith_watchdog_timeout(void);
bool pcmk__valid_stonith_watchdog_timeout(const char *value);
// Constants for environment variable names
#define PCMK__ENV_AUTHKEY_LOCATION "authkey_location"
#define PCMK__ENV_BLACKBOX "blackbox"
#define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled"
#define PCMK__ENV_CLUSTER_TYPE "cluster_type"
#define PCMK__ENV_DEBUG "debug"
#define PCMK__ENV_DH_MAX_BITS "dh_max_bits"
#define PCMK__ENV_DH_MIN_BITS "dh_min_bits"
#define PCMK__ENV_FAIL_FAST "fail_fast"
#define PCMK__ENV_IPC_BUFFER "ipc_buffer"
#define PCMK__ENV_IPC_TYPE "ipc_type"
#define PCMK__ENV_LOGFACILITY "logfacility"
#define PCMK__ENV_LOGFILE "logfile"
#define PCMK__ENV_LOGFILE_MODE "logfile_mode"
#define PCMK__ENV_LOGPRIORITY "logpriority"
#define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit"
#define PCMK__ENV_NODE_START_STATE "node_start_state"
#define PCMK__ENV_PANIC_ACTION "panic_action"
#define PCMK__ENV_REMOTE_ADDRESS "remote_address"
#define PCMK__ENV_REMOTE_SCHEMA_DIR "remote_schema_directory"
#define PCMK__ENV_REMOTE_PID1 "remote_pid1"
#define PCMK__ENV_REMOTE_PORT "remote_port"
#define PCMK__ENV_RESPAWNED "respawned"
#define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory"
#define PCMK__ENV_SERVICE "service"
#define PCMK__ENV_STDERR "stderr"
#define PCMK__ENV_TLS_PRIORITIES "tls_priorities"
#define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox"
#define PCMK__ENV_TRACE_FILES "trace_files"
#define PCMK__ENV_TRACE_FORMATS "trace_formats"
#define PCMK__ENV_TRACE_FUNCTIONS "trace_functions"
#define PCMK__ENV_TRACE_TAGS "trace_tags"
#define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled"
// @COMPAT Drop at 3.0.0; default is plenty
#define PCMK__ENV_CIB_TIMEOUT "cib_timeout"
// @COMPAT Drop at 3.0.0; likely last used in 1.1.24
#define PCMK__ENV_MCP "mcp"
// @COMPAT Drop at 3.0.0; added unused in 1.1.9
#define PCMK__ENV_QUORUM_TYPE "quorum_type"
/* @COMPAT Drop at 3.0.0; added to debug shutdown issues when Pacemaker is
* managed by systemd, but no longer useful.
*/
#define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay"
// @COMPAT Deprecated since 2.1.0
#define PCMK__OPT_REMOVE_AFTER_STOP "remove-after-stop"
// Constants for meta-attribute names
#define PCMK__META_CLONE_INSTANCE_NUM "clone"
#define PCMK__META_CONTAINER "container"
#define PCMK__META_DIGESTS_ALL "digests-all"
#define PCMK__META_DIGESTS_SECURE "digests-secure"
#define PCMK__META_INTERNAL_RSC "internal_rsc"
#define PCMK__META_MIGRATE_SOURCE "migrate_source"
#define PCMK__META_MIGRATE_TARGET "migrate_target"
#define PCMK__META_ON_NODE "on_node"
#define PCMK__META_ON_NODE_UUID "on_node_uuid"
#define PCMK__META_OP_NO_WAIT "op_no_wait"
#define PCMK__META_OP_TARGET_RC "op_target_rc"
#define PCMK__META_PHYSICAL_HOST "physical-host"
/* @TODO Plug these in. Currently, they're never set. These are op attrs for use
* with https://projects.clusterlabs.org/T382.
*/
#define PCMK__META_CLEAR_FAILURE_OP "clear_failure_op"
#define PCMK__META_CLEAR_FAILURE_INTERVAL "clear_failure_interval"
// @COMPAT Deprecated meta-attribute since 2.1.0
#define PCMK__META_CAN_FAIL "can_fail"
// @COMPAT Deprecated alias for PCMK__META_PROMOTED_MAX since 2.0.0
#define PCMK__META_PROMOTED_MAX_LEGACY "master-max"
// @COMPAT Deprecated alias for PCMK__META_PROMOTED_NODE_MAX since 2.0.0
#define PCMK__META_PROMOTED_NODE_MAX_LEGACY "master-node-max"
// @COMPAT Deprecated meta-attribute since 2.0.0
#define PCMK__META_RESTART_TYPE "restart-type"
// @COMPAT Deprecated meta-attribute since 2.0.0
#define PCMK__META_ROLE_AFTER_FAILURE "role_after_failure"
// Constants for enumerated values for various options
+#define PCMK__VALUE_CIB "cib"
#define PCMK__VALUE_CLUSTER "cluster"
#define PCMK__VALUE_CUSTOM "custom"
#define PCMK__VALUE_EN "en"
#define PCMK__VALUE_FENCING "fencing"
#define PCMK__VALUE_GREEN "green"
#define PCMK__VALUE_LOCAL "local"
#define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red"
#define PCMK__VALUE_NONE "none"
#define PCMK__VALUE_NOTHING "nothing"
#define PCMK__VALUE_ONLY_GREEN "only-green"
#define PCMK__VALUE_PROGRESSIVE "progressive"
#define PCMK__VALUE_QUORUM "quorum"
#define PCMK__VALUE_RED "red"
#define PCMK__VALUE_REQUEST "request"
#define PCMK__VALUE_RESPONSE "response"
#define PCMK__VALUE_UNFENCING "unfencing"
#define PCMK__VALUE_YELLOW "yellow"
#endif // PCMK__OPTIONS_INTERNAL__H
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
index 32905ca769..26a07f4898 100644
--- a/lib/common/ipc_controld.c
+++ b/lib/common/ipc_controld.c
@@ -1,657 +1,657 @@
/*
* Copyright 2020-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/ipc_controld.h>
#include "crmcommon_private.h"
struct controld_api_private_s {
char *client_uuid;
unsigned int replies_expected;
};
/*!
* \internal
* \brief Get a string representation of a controller API reply type
*
* \param[in] reply Controller API reply type
*
* \return String representation of a controller API reply type
*/
const char *
pcmk__controld_api_reply2str(enum pcmk_controld_api_reply reply)
{
switch (reply) {
case pcmk_controld_reply_reprobe:
return "reprobe";
case pcmk_controld_reply_info:
return "info";
case pcmk_controld_reply_resource:
return "resource";
case pcmk_controld_reply_ping:
return "ping";
case pcmk_controld_reply_nodes:
return "nodes";
default:
return "unknown";
}
}
// \return Standard Pacemaker return code
static int
new_data(pcmk_ipc_api_t *api)
{
struct controld_api_private_s *private = NULL;
api->api_data = calloc(1, sizeof(struct controld_api_private_s));
if (api->api_data == NULL) {
return errno;
}
private = api->api_data;
/* This is set to the PID because that's how it was always done, but PIDs
* are not unique because clients can be remote. The value appears to be
* unused other than as part of PCMK__XA_CRM_SYS_FROM in IPC requests, which
* is only compared against the internal system names (CRM_SYSTEM_TENGINE,
* etc.), so it shouldn't be a problem.
*/
private->client_uuid = pcmk__getpid_s();
/* @TODO Implement a call ID model similar to the CIB, executor, and fencer
* IPC APIs, so that requests and replies can be matched, and
* duplicate replies can be discarded.
*/
return pcmk_rc_ok;
}
static void
free_data(void *data)
{
free(((struct controld_api_private_s *) data)->client_uuid);
free(data);
}
// \return Standard Pacemaker return code
static int
post_connect(pcmk_ipc_api_t *api)
{
/* The controller currently requires clients to register via a hello
* request, but does not reply back.
*/
struct controld_api_private_s *private = api->api_data;
const char *client_name = crm_system_name? crm_system_name : "client";
xmlNode *hello;
int rc;
hello = create_hello_message(private->client_uuid, client_name,
PCMK__CONTROLD_API_MAJOR,
PCMK__CONTROLD_API_MINOR);
rc = pcmk__send_ipc_request(api, hello);
free_xml(hello);
if (rc != pcmk_rc_ok) {
crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s",
pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc);
} else {
crm_debug("Sent IPC hello to %s", pcmk_ipc_name(api, true));
}
return rc;
}
static void
set_node_info_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
data->reply_type = pcmk_controld_reply_info;
if (msg_data == NULL) {
return;
}
data->data.node_info.have_quorum =
pcmk__xe_attr_is_true(msg_data, PCMK_XA_HAVE_QUORUM);
data->data.node_info.is_remote = pcmk__xe_attr_is_true(msg_data, XML_NODE_IS_REMOTE);
/* Integer node_info.id is currently valid only for Corosync nodes.
*
* @TODO: Improve handling after crm_node_t is refactored to handle layer-
* specific data better.
*/
crm_element_value_int(msg_data, PCMK_XA_ID, &(data->data.node_info.id));
data->data.node_info.uuid = crm_element_value(msg_data, PCMK_XA_ID);
data->data.node_info.uname = crm_element_value(msg_data, PCMK_XA_UNAME);
data->data.node_info.state = crm_element_value(msg_data, PCMK__XA_CRMD);
}
static void
set_ping_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
data->reply_type = pcmk_controld_reply_ping;
if (msg_data == NULL) {
return;
}
data->data.ping.sys_from = crm_element_value(msg_data,
PCMK__XA_CRM_SUBSYSTEM);
data->data.ping.fsa_state = crm_element_value(msg_data,
PCMK__XA_CRMD_STATE);
data->data.ping.result = crm_element_value(msg_data, PCMK_XA_RESULT);
}
static void
set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
pcmk_controld_api_node_t *node_info;
data->reply_type = pcmk_controld_reply_nodes;
for (xmlNode *node = first_named_child(msg_data, XML_CIB_TAG_NODE);
node != NULL; node = crm_next_same_xml(node)) {
long long id_ll = 0;
node_info = calloc(1, sizeof(pcmk_controld_api_node_t));
crm_element_value_ll(node, PCMK_XA_ID, &id_ll);
if (id_ll > 0) {
node_info->id = id_ll;
}
node_info->uname = crm_element_value(node, PCMK_XA_UNAME);
node_info->state = crm_element_value(node, PCMK__XA_IN_CCM);
data->data.nodes = g_list_prepend(data->data.nodes, node_info);
}
}
static bool
reply_expected(pcmk_ipc_api_t *api, const xmlNode *request)
{
// We only need to handle commands that API functions can send
return pcmk__str_any_of(crm_element_value(request, PCMK__XA_CRM_TASK),
PCMK__CONTROLD_CMD_NODES,
CRM_OP_LRM_DELETE,
CRM_OP_LRM_FAIL,
CRM_OP_NODE_INFO,
CRM_OP_PING,
CRM_OP_REPROBE,
CRM_OP_RM_NODE_CACHE,
NULL);
}
static bool
dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
{
struct controld_api_private_s *private = api->api_data;
crm_exit_t status = CRM_EX_OK;
xmlNode *msg_data = NULL;
const char *value = NULL;
pcmk_controld_api_reply_t reply_data = {
pcmk_controld_reply_unknown, NULL, NULL,
};
if (pcmk__xe_is(reply, "ack")) {
/* ACKs are trivial responses that do not count toward expected replies,
* and do not have all the fields that validation requires, so skip that
* processing.
*/
return private->replies_expected > 0;
}
if (private->replies_expected > 0) {
private->replies_expected--;
}
// Do some basic validation of the reply
/* @TODO We should be able to verify that value is always a response, but
* currently the controller doesn't always properly set the type. Even
* if we fix the controller, we'll still need to handle replies from
* old versions (feature set could be used to differentiate).
*/
value = crm_element_value(reply, PCMK__XA_SUBT);
if (!pcmk__str_any_of(value, PCMK__VALUE_REQUEST, PCMK__VALUE_RESPONSE,
NULL)) {
crm_info("Unrecognizable message from controller: "
"invalid message type '%s'", pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
if (pcmk__str_empty(crm_element_value(reply, PCMK_XA_REFERENCE))) {
crm_info("Unrecognizable message from controller: no reference");
status = CRM_EX_PROTOCOL;
goto done;
}
value = crm_element_value(reply, PCMK__XA_CRM_TASK);
if (pcmk__str_empty(value)) {
crm_info("Unrecognizable message from controller: no command name");
status = CRM_EX_PROTOCOL;
goto done;
}
// Parse useful info from reply
reply_data.feature_set = crm_element_value(reply, PCMK_XA_VERSION);
reply_data.host_from = crm_element_value(reply, PCMK__XA_SRC);
msg_data = get_message_xml(reply, F_CRM_DATA);
if (!strcmp(value, CRM_OP_REPROBE)) {
reply_data.reply_type = pcmk_controld_reply_reprobe;
} else if (!strcmp(value, CRM_OP_NODE_INFO)) {
set_node_info_data(&reply_data, msg_data);
} else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
reply_data.reply_type = pcmk_controld_reply_resource;
reply_data.data.resource.node_state = msg_data;
} else if (!strcmp(value, CRM_OP_PING)) {
set_ping_data(&reply_data, msg_data);
} else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
set_nodes_data(&reply_data, msg_data);
} else {
crm_info("Unrecognizable message from controller: unknown command '%s'",
value);
status = CRM_EX_PROTOCOL;
}
done:
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
// Free any reply data that was allocated
if (pcmk__str_eq(value, PCMK__CONTROLD_CMD_NODES, pcmk__str_casei)) {
g_list_free_full(reply_data.data.nodes, free);
}
return false; // No further replies needed
}
pcmk__ipc_methods_t *
pcmk__controld_api_methods(void)
{
pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
if (cmds != NULL) {
cmds->new_data = new_data;
cmds->free_data = free_data;
cmds->post_connect = post_connect;
cmds->reply_expected = reply_expected;
cmds->dispatch = dispatch;
}
return cmds;
}
/*!
* \internal
* \brief Create XML for a controller IPC request
*
* \param[in] api Controller connection
* \param[in] op Controller IPC command name
* \param[in] node Node name to set as destination host
* \param[in] msg_data XML to attach to request as message data
*
* \return Newly allocated XML for request
*/
static xmlNode *
create_controller_request(const pcmk_ipc_api_t *api, const char *op,
const char *node, xmlNode *msg_data)
{
struct controld_api_private_s *private = NULL;
const char *sys_to = NULL;
if (api == NULL) {
return NULL;
}
private = api->api_data;
if ((node == NULL) && !strcmp(op, CRM_OP_PING)) {
sys_to = CRM_SYSTEM_DC;
} else {
sys_to = CRM_SYSTEM_CRMD;
}
return create_request(op, msg_data, node, sys_to,
(crm_system_name? crm_system_name : "client"),
private->client_uuid);
}
// \return Standard Pacemaker return code
static int
send_controller_request(pcmk_ipc_api_t *api, const xmlNode *request,
bool reply_is_expected)
{
if (crm_element_value(request, PCMK_XA_REFERENCE) == NULL) {
return EINVAL;
}
if (reply_is_expected) {
struct controld_api_private_s *private = api->api_data;
private->replies_expected++;
}
return pcmk__send_ipc_request(api, request);
}
static xmlNode *
create_reprobe_message_data(const char *target_node, const char *router_node)
{
xmlNode *msg_data;
msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE);
crm_xml_add(msg_data, PCMK__META_ON_NODE, target_node);
if ((router_node != NULL) && !pcmk__str_eq(router_node, target_node, pcmk__str_casei)) {
crm_xml_add(msg_data, PCMK__XA_ROUTER_NODE, router_node);
}
return msg_data;
}
/*!
* \brief Send a reprobe controller operation
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node to reprobe
* \param[in] router_node Router node for host
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_reprobe.
*/
int
pcmk_controld_api_reprobe(pcmk_ipc_api_t *api, const char *target_node,
const char *router_node)
{
xmlNode *request;
xmlNode *msg_data;
int rc = pcmk_rc_ok;
if (api == NULL) {
return EINVAL;
}
if (router_node == NULL) {
router_node = target_node;
}
crm_debug("Sending %s IPC request to reprobe %s via %s",
pcmk_ipc_name(api, true), pcmk__s(target_node, "local node"),
pcmk__s(router_node, "local node"));
msg_data = create_reprobe_message_data(target_node, router_node);
request = create_controller_request(api, CRM_OP_REPROBE, router_node,
msg_data);
rc = send_controller_request(api, request, true);
free_xml(msg_data);
free_xml(request);
return rc;
}
/*!
* \brief Send a "node info" controller operation
*
* \param[in,out] api Controller connection
* \param[in] nodeid ID of node to get info for (or 0 for local node)
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_info.
*/
int
pcmk_controld_api_node_info(pcmk_ipc_api_t *api, uint32_t nodeid)
{
xmlNode *request;
int rc = pcmk_rc_ok;
request = create_controller_request(api, CRM_OP_NODE_INFO, NULL, NULL);
if (request == NULL) {
return EINVAL;
}
if (nodeid > 0) {
crm_xml_set_id(request, "%lu", (unsigned long) nodeid);
}
rc = send_controller_request(api, request, true);
free_xml(request);
return rc;
}
/*!
* \brief Ask the controller for status
*
* \param[in,out] api Controller connection
* \param[in] node_name Name of node whose status is desired (NULL for DC)
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_ping.
*/
int
pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name)
{
xmlNode *request;
int rc = pcmk_rc_ok;
request = create_controller_request(api, CRM_OP_PING, node_name, NULL);
if (request == NULL) {
return EINVAL;
}
rc = send_controller_request(api, request, true);
free_xml(request);
return rc;
}
/*!
* \brief Ask the controller for cluster information
*
* \param[in,out] api Controller connection
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_nodes.
*/
int
pcmk_controld_api_list_nodes(pcmk_ipc_api_t *api)
{
xmlNode *request;
int rc = EINVAL;
request = create_controller_request(api, PCMK__CONTROLD_CMD_NODES, NULL,
NULL);
if (request != NULL) {
rc = send_controller_request(api, request, true);
free_xml(request);
}
return rc;
}
// \return Standard Pacemaker return code
static int
controller_resource_op(pcmk_ipc_api_t *api, const char *op,
const char *target_node, const char *router_node,
bool cib_only, const char *rsc_id,
const char *rsc_long_id, const char *standard,
const char *provider, const char *type)
{
int rc = pcmk_rc_ok;
char *key;
xmlNode *request, *msg_data, *xml_rsc, *params;
if (api == NULL) {
return EINVAL;
}
if (router_node == NULL) {
router_node = target_node;
}
msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP);
/* The controller logs the transition key from resource op requests, so we
* need to have *something* for it.
* @TODO don't use "crm-resource"
*/
key = pcmk__transition_key(0, getpid(), 0,
"xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx");
crm_xml_add(msg_data, PCMK__XA_TRANSITION_KEY, key);
free(key);
crm_xml_add(msg_data, PCMK__META_ON_NODE, target_node);
if (!pcmk__str_eq(router_node, target_node, pcmk__str_casei)) {
crm_xml_add(msg_data, PCMK__XA_ROUTER_NODE, router_node);
}
if (cib_only) {
// Indicate that only the CIB needs to be cleaned
- crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB);
+ crm_xml_add(msg_data, PCMK__XA_MODE, PCMK__VALUE_CIB);
}
xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE);
crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
crm_xml_add(xml_rsc, PCMK__XA_LONG_ID, rsc_long_id);
crm_xml_add(xml_rsc, PCMK_XA_CLASS, standard);
crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, provider);
crm_xml_add(xml_rsc, PCMK_XA_TYPE, type);
params = create_xml_node(msg_data, XML_TAG_ATTRS);
crm_xml_add(params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
// The controller parses the timeout from the request
key = crm_meta_name(PCMK_META_TIMEOUT);
crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg
free(key);
request = create_controller_request(api, op, router_node, msg_data);
rc = send_controller_request(api, request, true);
free_xml(msg_data);
free_xml(request);
return rc;
}
/*!
* \brief Ask the controller to fail a resource
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node resource is on
* \param[in] router_node Router node for target
* \param[in] rsc_id ID of resource to fail
* \param[in] rsc_long_id Long ID of resource (if any)
* \param[in] standard Standard of resource
* \param[in] provider Provider of resource (if any)
* \param[in] type Type of resource to fail
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_resource.
*/
int
pcmk_controld_api_fail(pcmk_ipc_api_t *api,
const char *target_node, const char *router_node,
const char *rsc_id, const char *rsc_long_id,
const char *standard, const char *provider,
const char *type)
{
crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s",
pcmk_ipc_name(api, true), pcmk__s(rsc_id, "unknown resource"),
pcmk__s(rsc_long_id, "no other names"),
pcmk__s(target_node, "unspecified node"),
pcmk__s(router_node, "unspecified node"));
return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node,
router_node, false, rsc_id, rsc_long_id,
standard, provider, type);
}
/*!
* \brief Ask the controller to refresh a resource
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node resource is on
* \param[in] router_node Router node for target
* \param[in] rsc_id ID of resource to refresh
* \param[in] rsc_long_id Long ID of resource (if any)
* \param[in] standard Standard of resource
* \param[in] provider Provider of resource (if any)
* \param[in] type Type of resource
* \param[in] cib_only If true, clean resource from CIB only
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_resource.
*/
int
pcmk_controld_api_refresh(pcmk_ipc_api_t *api, const char *target_node,
const char *router_node,
const char *rsc_id, const char *rsc_long_id,
const char *standard, const char *provider,
const char *type, bool cib_only)
{
crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s",
pcmk_ipc_name(api, true), pcmk__s(rsc_id, "unknown resource"),
pcmk__s(rsc_long_id, "no other names"),
pcmk__s(target_node, "unspecified node"),
pcmk__s(router_node, "unspecified node"));
return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node,
router_node, cib_only, rsc_id, rsc_long_id,
standard, provider, type);
}
/*!
* \brief Get the number of IPC replies currently expected from the controller
*
* \param[in] api Controller IPC API connection
*
* \return Number of replies expected
*/
unsigned int
pcmk_controld_api_replies_expected(const pcmk_ipc_api_t *api)
{
struct controld_api_private_s *private = api->api_data;
return private->replies_expected;
}
/*!
* \brief Create XML for a controller IPC "hello" message
*
* \deprecated This function is deprecated as part of the public C API.
*/
// \todo make this static to this file when breaking API backward compatibility
xmlNode *
create_hello_message(const char *uuid, const char *client_name,
const char *major_version, const char *minor_version)
{
xmlNode *hello_node = NULL;
xmlNode *hello = NULL;
if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name)
|| pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"missing information",
client_name? client_name : "unknown client",
uuid? uuid : "unknown");
return NULL;
}
hello_node = create_xml_node(NULL, XML_TAG_OPTIONS);
if (hello_node == NULL) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"Message data creation failed", client_name, uuid);
return NULL;
}
crm_xml_add(hello_node, "major_version", major_version);
crm_xml_add(hello_node, "minor_version", minor_version);
crm_xml_add(hello_node, "client_name", client_name);
crm_xml_add(hello_node, "client_uuid", uuid);
hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid);
if (hello == NULL) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"Request creation failed", client_name, uuid);
return NULL;
}
free_xml(hello_node);
crm_trace("Created hello message from %s (UUID %s)", client_name, uuid);
return hello;
}
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
index f5b486f589..9b1f73c0b7 100644
--- a/lib/pacemaker/pcmk_graph_producer.c
+++ b/lib/pacemaker/pcmk_graph_producer.c
@@ -1,1096 +1,1096 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pcmk_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pcmk_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pcmk_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Add an XML node tag for a specified ID
*
* \param[in] id Node UUID to add
* \param[in,out] xml Parent XML tag to add to
*/
static xmlNode*
add_node_to_xml_by_id(const char *id, xmlNode *xml)
{
xmlNode *node_xml;
node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
crm_xml_add(node_xml, PCMK_XA_ID, id);
return node_xml;
}
/*!
* \internal
* \brief Add an XML node tag for a specified node
*
* \param[in] node Node to add
* \param[in,out] xml XML to add node to
*/
static void
add_node_to_xml(const pcmk_node_t *node, void *xml)
{
add_node_to_xml_by_id(node->details->id, (xmlNode *) xml);
}
/*!
* \internal
* \brief Count (optionally add to XML) nodes needing maintenance state update
*
* \param[in,out] xml Parent XML tag to add to, if any
* \param[in] scheduler Scheduler data
*
* \return Count of nodes added
* \note Only Pacemaker Remote nodes are considered currently
*/
static int
add_maintenance_nodes(xmlNode *xml, const pcmk_scheduler_t *scheduler)
{
xmlNode *maintenance = NULL;
int count = 0;
if (xml != NULL) {
maintenance = create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE);
}
for (const GList *iter = scheduler->nodes;
iter != NULL; iter = iter->next) {
const pcmk_node_t *node = iter->data;
if (pe__is_guest_or_remote_node(node) &&
(node->details->maintenance != node->details->remote_maintenance)) {
if (maintenance != NULL) {
crm_xml_add(add_node_to_xml_by_id(node->details->id,
maintenance),
XML_NODE_IS_MAINTENANCE,
(node->details->maintenance? "1" : "0"));
}
count++;
}
}
crm_trace("%s %d nodes in need of maintenance mode update in state",
((maintenance == NULL)? "Counted" : "Added"), count);
return count;
}
/*!
* \internal
* \brief Add pseudo action with nodes needing maintenance state update
*
* \param[in,out] scheduler Scheduler data
*/
static void
add_maintenance_update(pcmk_scheduler_t *scheduler)
{
pcmk_action_t *action = NULL;
if (add_maintenance_nodes(NULL, scheduler) != 0) {
action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, scheduler);
pcmk__set_action_flags(action, pcmk_action_always_in_graph);
}
}
/*!
* \internal
* \brief Add XML with nodes that an action is expected to bring down
*
* If a specified action is expected to bring any nodes down, add an XML block
* with their UUIDs. When a node is lost, this allows the controller to
* determine whether it was expected.
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] action Action to check for downed nodes
*/
static void
add_downed_nodes(xmlNode *xml, const pcmk_action_t *action)
{
CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL),
return);
if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
/* Shutdown makes the action's node down */
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH,
pcmk__str_none)) {
/* Fencing makes the action's node and any hosted guest nodes down */
const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
if (pcmk__is_fencing_action(fence)) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
pe_foreach_guest_node(action->node->details->data_set,
action->node, add_node_to_xml, downed);
}
} else if (action->rsc && action->rsc->is_remote_node
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
/* Stopping a remote connection resource makes connected node down,
* unless it's part of a migration
*/
GList *iter;
pcmk_action_t *input;
bool migrating = false;
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
input = ((pcmk__related_action_t *) iter->data)->action;
if ((input->rsc != NULL)
&& pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none)
&& pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_none)) {
migrating = true;
break;
}
}
if (!migrating) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->rsc->id, downed);
}
}
}
/*!
* \internal
* \brief Create a transition graph operation key for a clone action
*
* \param[in] action Clone action
* \param[in] interval_ms Action interval in milliseconds
*
* \return Newly allocated string with transition graph operation key
*/
static char *
clone_op_key(const pcmk_action_t *action, guint interval_ms)
{
if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_type");
const char *n_task = g_hash_table_lookup(action->meta,
"notify_operation");
CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL));
return pcmk__notify_key(action->rsc->clone_name, n_type, n_task);
} else if (action->cancel_task != NULL) {
return pcmk__op_key(action->rsc->clone_name, action->cancel_task,
interval_ms);
} else {
return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms);
}
}
/*!
* \internal
* \brief Add node details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] xml Transition graph action XML for \p action
*/
static void
add_node_details(const pcmk_action_t *action, xmlNode *xml)
{
pcmk_node_t *router_node = pcmk__connection_host_for_action(action);
crm_xml_add(xml, PCMK__META_ON_NODE, action->node->details->uname);
crm_xml_add(xml, PCMK__META_ON_NODE_UUID, action->node->details->id);
if (router_node != NULL) {
crm_xml_add(xml, PCMK__XA_ROUTER_NODE, router_node->details->uname);
}
}
/*!
* \internal
* \brief Add resource details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_resource_details(const pcmk_action_t *action, xmlNode *action_xml)
{
xmlNode *rsc_xml = NULL;
const char *attr_list[] = {
PCMK_XA_CLASS,
PCMK_XA_PROVIDER,
PCMK_XA_TYPE,
};
/* If a resource is locked to a node via PCMK_OPT_SHUTDOWN_LOCK, mark its
* actions so the controller can preserve the lock when the action
* completes.
*/
if (pcmk__action_locks_rsc_to_node(action)) {
crm_xml_add_ll(action_xml, PCMK_OPT_SHUTDOWN_LOCK,
(long long) action->rsc->lock_time);
}
// List affected resource
rsc_xml = create_xml_node(action_xml,
(const char *) action->rsc->xml->name);
if (pcmk_is_set(action->rsc->flags, pcmk_rsc_removed)
&& (action->rsc->clone_name != NULL)) {
/* Use the numbered instance name here, because if there is more
* than one instance on a node, we need to make sure the command
* goes to the right one.
*
* This is important even for anonymous clones, because the clone's
* unique meta-attribute might have just been toggled from on to
* off.
*/
crm_debug("Using orphan clone name %s instead of %s",
action->rsc->id, action->rsc->clone_name);
crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->clone_name);
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id);
} else if (!pcmk_is_set(action->rsc->flags, pcmk_rsc_unique)) {
const char *xml_id = ID(action->rsc->xml);
crm_debug("Using anonymous clone name %s for %s (aka %s)",
xml_id, action->rsc->id, action->rsc->clone_name);
/* ID is what we'd like client to use
* LONG_ID is what they might know it as instead
*
* LONG_ID is only strictly needed /here/ during the
* transition period until all nodes in the cluster
* are running the new software /and/ have rebooted
* once (meaning that they've only ever spoken to a DC
* supporting this feature).
*
* If anyone toggles the unique flag to 'on', the
* 'instance free' name will correspond to an orphan
* and fall into the clause above instead
*/
crm_xml_add(rsc_xml, PCMK_XA_ID, xml_id);
if ((action->rsc->clone_name != NULL)
&& !pcmk__str_eq(xml_id, action->rsc->clone_name,
pcmk__str_none)) {
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->clone_name);
} else {
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id);
}
} else {
CRM_ASSERT(action->rsc->clone_name == NULL);
crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->id);
}
for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) {
crm_xml_add(rsc_xml, attr_list[lpc],
g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
}
}
/*!
* \internal
* \brief Add action attributes to transition graph action XML
*
* \param[in,out] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_action_attributes(pcmk_action_t *action, xmlNode *action_xml)
{
xmlNode *args_xml = NULL;
/* We create free-standing XML to start, so we can sort the attributes
* before adding it to action_xml, which keeps the scheduler regression
* test graphs comparable.
*/
args_xml = create_xml_node(NULL, XML_TAG_ATTRS);
crm_xml_add(args_xml, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
g_hash_table_foreach(action->extra, hash2field, args_xml);
if ((action->rsc != NULL) && (action->node != NULL)) {
// Get the resource instance attributes, evaluated properly for node
GHashTable *params = pe_rsc_params(action->rsc, action->node,
action->rsc->cluster);
pcmk__substitute_remote_addr(action->rsc, params);
g_hash_table_foreach(params, hash2smartfield, args_xml);
} else if ((action->rsc != NULL)
&& (action->rsc->variant <= pcmk_rsc_variant_primitive)) {
GHashTable *params = pe_rsc_params(action->rsc, NULL,
action->rsc->cluster);
g_hash_table_foreach(params, hash2smartfield, args_xml);
}
g_hash_table_foreach(action->meta, hash2metafield, args_xml);
if (action->rsc != NULL) {
pcmk_resource_t *parent = action->rsc;
while (parent != NULL) {
parent->cmds->add_graph_meta(parent, args_xml);
parent = parent->parent;
}
pcmk__add_guest_meta_to_xml(args_xml, action);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)
&& (action->node != NULL)) {
/* Pass the node's attributes as meta-attributes.
*
* @TODO: Determine whether it is still necessary to do this. It was
* added in 33d99707, probably for the libfence-based implementation in
* c9a90bd, which is no longer used.
*/
g_hash_table_foreach(action->node->details->attrs, hash2metafield,
args_xml);
}
sorted_xml(args_xml, action_xml, FALSE);
free_xml(args_xml);
}
/*!
* \internal
* \brief Create the transition graph XML for a scheduled action
*
* \param[in,out] parent Parent XML element to add action to
* \param[in,out] action Scheduled action
* \param[in] skip_details If false, add action details as sub-elements
* \param[in] scheduler Scheduler data
*/
static void
create_graph_action(xmlNode *parent, pcmk_action_t *action, bool skip_details,
const pcmk_scheduler_t *scheduler)
{
bool needs_node_info = true;
bool needs_maintenance_info = false;
xmlNode *action_xml = NULL;
if ((action == NULL) || (scheduler == NULL)) {
return;
}
// Create the top-level element based on task
if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) {
/* All fences need node info; guest node fences are pseudo-events */
if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
} else {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
}
} else if (pcmk__str_any_of(action->task,
PCMK_ACTION_DO_SHUTDOWN,
PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE,
pcmk__str_none)) {
// CIB-only clean-up for shutdown locks
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
- crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB);
+ crm_xml_add(action_xml, PCMK__XA_MODE, PCMK__VALUE_CIB);
} else if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES,
pcmk__str_none)) {
needs_maintenance_info = true;
}
action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
needs_node_info = false;
} else {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP);
}
crm_xml_add_int(action_xml, PCMK_XA_ID, action->id);
crm_xml_add(action_xml, PCMK_XA_OPERATION, action->task);
if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) {
char *clone_key = NULL;
guint interval_ms;
if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0,
&interval_ms) != pcmk_rc_ok) {
interval_ms = 0;
}
clone_key = clone_op_key(action, interval_ms);
crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, clone_key);
crm_xml_add(action_xml, "internal_" PCMK__XA_OPERATION_KEY,
action->uuid);
free(clone_key);
} else {
crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, action->uuid);
}
if (needs_node_info && (action->node != NULL)) {
add_node_details(action, action_xml);
g_hash_table_insert(action->meta, strdup(PCMK__META_ON_NODE),
strdup(action->node->details->uname));
g_hash_table_insert(action->meta, strdup(PCMK__META_ON_NODE_UUID),
strdup(action->node->details->id));
}
if (skip_details) {
return;
}
if ((action->rsc != NULL)
&& !pcmk_is_set(action->flags, pcmk_action_pseudo)) {
// This is a real resource action, so add resource details
add_resource_details(action, action_xml);
}
/* List any attributes in effect */
add_action_attributes(action, action_xml);
/* List any nodes this action is expected to make down */
if (needs_node_info && (action->node != NULL)) {
add_downed_nodes(action_xml, action);
}
if (needs_maintenance_info) {
add_maintenance_nodes(action_xml, scheduler);
}
}
/*!
* \internal
* \brief Check whether an action should be added to the transition graph
*
* \param[in] action Action to check
*
* \return true if action should be added to graph, otherwise false
*/
static bool
should_add_action_to_graph(const pcmk_action_t *action)
{
if (!pcmk_is_set(action->flags, pcmk_action_runnable)) {
crm_trace("Ignoring action %s (%d): unrunnable",
action->uuid, action->id);
return false;
}
if (pcmk_is_set(action->flags, pcmk_action_optional)
&& !pcmk_is_set(action->flags, pcmk_action_always_in_graph)) {
crm_trace("Ignoring action %s (%d): optional",
action->uuid, action->id);
return false;
}
/* Actions for unmanaged resources should be excluded from the graph,
* with the exception of monitors and cancellation of recurring monitors.
*/
if ((action->rsc != NULL)
&& !pcmk_is_set(action->rsc->flags, pcmk_rsc_managed)
&& !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
const char *interval_ms_s;
/* A cancellation of a recurring monitor will get here because the task
* is cancel rather than monitor, but the interval can still be used to
* recognize it. The interval has been normalized to milliseconds by
* this point, so a string comparison is sufficient.
*/
interval_ms_s = g_hash_table_lookup(action->meta, PCMK_META_INTERVAL);
if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) {
crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)",
action->uuid, action->id, action->rsc->id);
return false;
}
}
/* Always add pseudo-actions, fence actions, and shutdown actions (already
* determined to be required and runnable by this point)
*/
if (pcmk_is_set(action->flags, pcmk_action_pseudo)
|| pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH,
PCMK_ACTION_DO_SHUTDOWN, NULL)) {
return true;
}
if (action->node == NULL) {
pcmk__sched_err("Skipping action %s (%d) "
"because it was not assigned to a node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unassigned", action, false);
return false;
}
if (pcmk_is_set(action->flags, pcmk_action_on_dc)) {
crm_trace("Action %s (%d) should be dumped: "
"can run on DC instead of %s",
action->uuid, action->id, pcmk__node_name(action->node));
} else if (pe__is_guest_node(action->node)
&& !action->node->details->remote_requires_reset) {
crm_trace("Action %s (%d) should be dumped: "
"assuming will be runnable on guest %s",
action->uuid, action->id, pcmk__node_name(action->node));
} else if (!action->node->details->online) {
pcmk__sched_err("Skipping action %s (%d) "
"because it was scheduled for offline node (bug?)",
action->uuid, action->id);
pcmk__log_action("Offline node", action, false);
return false;
} else if (action->node->details->unclean) {
pcmk__sched_err("Skipping action %s (%d) "
"because it was scheduled for unclean node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unclean node", action, false);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether an ordering's flags can change an action
*
* \param[in] ordering Ordering to check
*
* \return true if ordering has flags that can change an action, false otherwise
*/
static bool
ordering_can_change_actions(const pcmk__related_action_t *ordering)
{
return pcmk_any_flags_set(ordering->type,
~(pcmk__ar_then_implies_first_graphed
|pcmk__ar_first_implies_then_graphed
|pcmk__ar_ordered));
}
/*!
* \internal
* \brief Check whether an action input should be in the transition graph
*
* \param[in] action Action to check
* \param[in,out] input Action input to check
*
* \return true if input should be in graph, false otherwise
* \note This function may not only check an input, but disable it under certian
* circumstances (load or anti-colocation orderings that are not needed).
*/
static bool
should_add_input_to_graph(const pcmk_action_t *action,
pcmk__related_action_t *input)
{
if (input->state == pe_link_dumped) {
return true;
}
if ((uint32_t) input->type == pcmk__ar_none) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering disabled",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pcmk_action_runnable)
&& !ordering_can_change_actions(input)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pcmk_action_runnable)
&& pcmk_is_set(input->type, pcmk__ar_min_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"minimum number of instances required but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pcmk__ar_unmigratable_then_blocks)
&& !pcmk_is_set(input->action->flags, pcmk_action_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input blocked if 'then' unmigratable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pcmk__ar_if_first_unmigratable)
&& pcmk_is_set(input->action->flags, pcmk_action_migratable)) {
crm_trace("Ignoring %s (%d) input %s (%d): ordering applies "
"only if input is unmigratable, but it is migratable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (((uint32_t) input->type == pcmk__ar_ordered)
&& pcmk_is_set(input->action->flags, pcmk_action_migratable)
&& pcmk__ends_with(input->action->uuid, "_stop_0")) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional but stop in migration",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if ((uint32_t) input->type == pcmk__ar_if_on_same_node_or_target) {
pcmk_node_t *input_node = input->action->node;
if ((action->rsc != NULL)
&& pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
pcmk_node_t *assigned = action->rsc->allocated_to;
/* For load_stopped -> migrate_to orderings, we care about where
* the resource has been assigned, not where migrate_to will be
* executed.
*/
if (!pcmk__same_node(input_node, assigned)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"migration target %s is not same as input node %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(assigned? assigned->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
}
} else if (!pcmk__same_node(input_node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"not on same node (%s vs %s)",
action->uuid, action->id,
input->action->uuid, input->action->id,
(action->node? action->node->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
} else if (pcmk_is_set(input->action->flags, pcmk_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
}
} else if ((uint32_t) input->type == pcmk__ar_if_required_on_same_node) {
if (input->action->node && action->node
&& !pcmk__same_node(input->action->node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"not on same node (%s vs %s)",
action->uuid, action->id,
input->action->uuid, input->action->id,
pcmk__node_name(action->node),
pcmk__node_name(input->action->node));
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
} else if (pcmk_is_set(input->action->flags, pcmk_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
}
} else if (input->action->rsc
&& input->action->rsc != action->rsc
&& pcmk_is_set(input->action->rsc->flags, pcmk_rsc_failed)
&& !pcmk_is_set(input->action->rsc->flags, pcmk_rsc_managed)
&& pcmk__ends_with(input->action->uuid, "_stop_0")
&& action->rsc && pe_rsc_is_clone(action->rsc)) {
crm_warn("Ignoring requirement that %s complete before %s:"
" unmanaged failed resources cannot prevent clone shutdown",
input->action->uuid, action->uuid);
return false;
} else if (pcmk_is_set(input->action->flags, pcmk_action_optional)
&& !pcmk_any_flags_set(input->action->flags,
pcmk_action_always_in_graph
|pcmk_action_added_to_graph)
&& !should_add_action_to_graph(input->action)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
}
crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x",
action->uuid, action->id, action_type_str(input->action->flags),
input->action->uuid, input->action->id,
action_node_str(input->action),
action_runnable_str(input->action->flags),
action_optional_str(input->action->flags), input->type);
return true;
}
/*!
* \internal
* \brief Check whether an ordering creates an ordering loop
*
* \param[in] init_action "First" action in ordering
* \param[in] action Callers should always set this the same as
* \p init_action (this function may use a different
* value for recursive calls)
* \param[in,out] input Action wrapper for "then" action in ordering
*
* \return true if the ordering creates a loop, otherwise false
*/
bool
pcmk__graph_has_loop(const pcmk_action_t *init_action,
const pcmk_action_t *action, pcmk__related_action_t *input)
{
bool has_loop = false;
if (pcmk_is_set(input->action->flags, pcmk_action_detect_loop)) {
crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
return false;
}
// Don't need to check inputs that won't be used
if (!should_add_input_to_graph(action, input)) {
return false;
}
if (input->action == init_action) {
crm_debug("Input loop found in %s@%s ->...-> %s@%s",
action->uuid,
action->node? action->node->details->uname : "",
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
return true;
}
pcmk__set_action_flags(input->action, pcmk_action_detect_loop);
crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)"
"for graph loop with %s@%s ",
action->uuid,
action->node? action->node->details->uname : "",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
input->type,
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
// Recursively check input itself for loops
for (GList *iter = input->action->actions_before;
iter != NULL; iter = iter->next) {
if (pcmk__graph_has_loop(init_action, input->action,
(pcmk__related_action_t *) iter->data)) {
// Recursive call already logged a debug message
has_loop = true;
break;
}
}
pcmk__clear_action_flags(input->action, pcmk_action_detect_loop);
if (!has_loop) {
crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
}
return has_loop;
}
/*!
* \internal
* \brief Create a synapse XML element for a transition graph
*
* \param[in] action Action that synapse is for
* \param[in,out] scheduler Scheduler data containing graph
*
* \return Newly added XML element for new graph synapse
*/
static xmlNode *
create_graph_synapse(const pcmk_action_t *action, pcmk_scheduler_t *scheduler)
{
int synapse_priority = 0;
xmlNode *syn = create_xml_node(scheduler->graph, "synapse");
crm_xml_add_int(syn, PCMK_XA_ID, scheduler->num_synapse);
scheduler->num_synapse++;
if (action->rsc != NULL) {
synapse_priority = action->rsc->priority;
}
if (action->priority > synapse_priority) {
synapse_priority = action->priority;
}
if (synapse_priority > 0) {
crm_xml_add_int(syn, PCMK__XA_PRIORITY, synapse_priority);
}
return syn;
}
/*!
* \internal
* \brief Add an action to the transition graph XML if appropriate
*
* \param[in,out] data Action to possibly add
* \param[in,out] user_data Scheduler data
*
* \note This will de-duplicate the action inputs, meaning that the
* pcmk__related_action_t:type flags can no longer be relied on to retain
* their original settings. That means this MUST be called after
* pcmk__apply_orderings() is complete, and nothing after this should rely
* on those type flags. (For example, some code looks for type equal to
* some flag rather than whether the flag is set, and some code looks for
* particular combinations of flags -- such code must be done before
* pcmk__create_graph().)
*/
static void
add_action_to_graph(gpointer data, gpointer user_data)
{
pcmk_action_t *action = (pcmk_action_t *) data;
pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) user_data;
xmlNode *syn = NULL;
xmlNode *set = NULL;
xmlNode *in = NULL;
/* If we haven't already, de-duplicate inputs (even if we won't be adding
* the action to the graph, so that crm_simulate's dot graphs don't have
* duplicates).
*/
if (!pcmk_is_set(action->flags, pcmk_action_inputs_deduplicated)) {
pcmk__deduplicate_action_inputs(action);
pcmk__set_action_flags(action, pcmk_action_inputs_deduplicated);
}
if (pcmk_is_set(action->flags, pcmk_action_added_to_graph)
|| !should_add_action_to_graph(action)) {
return; // Already added, or shouldn't be
}
pcmk__set_action_flags(action, pcmk_action_added_to_graph);
crm_trace("Adding action %d (%s%s%s) to graph",
action->id, action->uuid,
((action->node == NULL)? "" : " on "),
((action->node == NULL)? "" : action->node->details->uname));
syn = create_graph_synapse(action, scheduler);
set = create_xml_node(syn, "action_set");
in = create_xml_node(syn, "inputs");
create_graph_action(set, action, false, scheduler);
for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) {
pcmk__related_action_t *input = lpc->data;
if (should_add_input_to_graph(action, input)) {
xmlNode *input_xml = create_xml_node(in, "trigger");
input->state = pe_link_dumped;
create_graph_action(input_xml, input->action, true, scheduler);
}
}
}
static int transition_id = -1;
/*!
* \internal
* \brief Log a message after calculating a transition
*
* \param[in] filename Where transition input is stored
*/
void
pcmk__log_transition_summary(const char *filename)
{
if (was_processing_error || crm_config_error) {
crm_err("Calculated transition %d (with errors)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else if (was_processing_warning || crm_config_warning) {
crm_warn("Calculated transition %d (with warnings)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else {
crm_notice("Calculated transition %d%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
}
if (crm_config_error) {
crm_notice("Configuration errors found during scheduler processing,"
" please run \"crm_verify -L\" to identify issues");
}
}
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in,out] rsc Resource whose actions should be added
*/
void
pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc)
{
GList *iter = NULL;
CRM_ASSERT(rsc != NULL);
pcmk__rsc_trace(rsc, "Adding actions for %s to graph", rsc->id);
// First add the resource's own actions
g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster);
// Then recursively add its children's actions (appropriate to variant)
for (iter = rsc->children; iter != NULL; iter = iter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data;
child_rsc->cmds->add_actions_to_graph(child_rsc);
}
}
/*!
* \internal
* \brief Create a transition graph with all cluster actions needed
*
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__create_graph(pcmk_scheduler_t *scheduler)
{
GList *iter = NULL;
const char *value = NULL;
long long limit = 0LL;
transition_id++;
crm_trace("Creating transition graph %d", transition_id);
scheduler->graph = create_xml_node(NULL, XML_TAG_GRAPH);
value = pe_pref(scheduler->config_hash, PCMK_OPT_CLUSTER_DELAY);
crm_xml_add(scheduler->graph, PCMK_OPT_CLUSTER_DELAY, value);
value = pe_pref(scheduler->config_hash, PCMK_OPT_STONITH_TIMEOUT);
crm_xml_add(scheduler->graph, PCMK_OPT_STONITH_TIMEOUT, value);
crm_xml_add(scheduler->graph, "failed-stop-offset", "INFINITY");
if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) {
crm_xml_add(scheduler->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(scheduler->graph, "failed-start-offset", "1");
}
value = pe_pref(scheduler->config_hash, PCMK_OPT_BATCH_LIMIT);
crm_xml_add(scheduler->graph, PCMK_OPT_BATCH_LIMIT, value);
crm_xml_add_int(scheduler->graph, "transition_id", transition_id);
value = pe_pref(scheduler->config_hash, PCMK_OPT_MIGRATION_LIMIT);
if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) {
crm_xml_add(scheduler->graph, PCMK_OPT_MIGRATION_LIMIT, value);
}
if (scheduler->recheck_by > 0) {
char *recheck_epoch = NULL;
recheck_epoch = crm_strdup_printf("%llu",
(long long) scheduler->recheck_by);
crm_xml_add(scheduler->graph, "recheck-by", recheck_epoch);
free(recheck_epoch);
}
/* The following code will de-duplicate action inputs, so nothing past this
* should rely on the action input type flags retaining their original
* values.
*/
// Add resource actions to graph
for (iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
pcmk__rsc_trace(rsc, "Processing actions for %s", rsc->id);
rsc->cmds->add_actions_to_graph(rsc);
}
// Add pseudo-action for list of nodes with maintenance state update
add_maintenance_update(scheduler);
// Add non-resource (node) actions
for (iter = scheduler->actions; iter != NULL; iter = iter->next) {
pcmk_action_t *action = (pcmk_action_t *) iter->data;
if ((action->rsc != NULL)
&& (action->node != NULL)
&& action->node->details->shutdown
&& !pcmk_is_set(action->rsc->flags, pcmk_rsc_maintenance)
&& !pcmk_any_flags_set(action->flags,
pcmk_action_optional|pcmk_action_runnable)
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) {
/* Eventually we should just ignore the 'fence' case, but for now
* it's the best way to detect (in CTS) when CIB resource updates
* are being lost.
*/
if (pcmk_is_set(scheduler->flags, pcmk_sched_quorate)
|| (scheduler->no_quorum_policy == pcmk_no_quorum_ignore)) {
const bool managed = pcmk_is_set(action->rsc->flags,
pcmk_rsc_managed);
const bool failed = pcmk_is_set(action->rsc->flags,
pcmk_rsc_failed);
crm_crit("Cannot %s %s because of %s:%s%s (%s)",
action->node->details->unclean? "fence" : "shut down",
pcmk__node_name(action->node), action->rsc->id,
(managed? " blocked" : " unmanaged"),
(failed? " failed" : ""), action->uuid);
}
}
add_action_to_graph((gpointer) action, (gpointer) scheduler);
}
crm_log_xml_trace(scheduler->graph, "graph");
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:44 PM (2 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1989321
Default Alt Text
(141 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment