Page MenuHomeClusterLabs Projects

No OneTemporary

diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index d6590e1b3a..003466d275 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -1,1334 +1,1336 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <string.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster/internal.h>
#include <crm/cib.h>
#include <crm/common/ipc_internal.h>
#include <pacemaker-controld.h>
static enum crmd_fsa_input handle_message(xmlNode *msg,
enum crmd_fsa_cause cause);
static void handle_response(xmlNode *stored_msg);
static enum crmd_fsa_input handle_request(xmlNode *stored_msg,
enum crmd_fsa_cause cause);
static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg);
static void send_msg_via_ipc(xmlNode * msg, const char *sys);
/* debug only, can wrap all it likes */
static int last_data_id = 0;
void
register_fsa_error_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
fsa_data_t * cur_data, void *new_data, const char *raised_from)
{
/* save the current actions if any */
if (controld_globals.fsa_actions != A_NOTHING) {
register_fsa_input_adv(cur_data ? cur_data->fsa_cause : C_FSA_INTERNAL,
I_NULL, cur_data ? cur_data->data : NULL,
controld_globals.fsa_actions, TRUE, __func__);
}
/* reset the action list */
crm_info("Resetting the current action list");
fsa_dump_actions(controld_globals.fsa_actions, "Drop");
controld_globals.fsa_actions = A_NOTHING;
/* register the error */
register_fsa_input_adv(cause, input, new_data, A_NOTHING, TRUE, raised_from);
}
void
register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
void *data, uint64_t with_actions,
gboolean prepend, const char *raised_from)
{
unsigned old_len = g_list_length(controld_globals.fsa_message_queue);
fsa_data_t *fsa_data = NULL;
if (raised_from == NULL) {
raised_from = "<unknown>";
}
if (input == I_NULL && with_actions == A_NOTHING /* && data == NULL */ ) {
/* no point doing anything */
crm_err("Cannot add entry to queue: no input and no action");
return;
}
if (input == I_WAIT_FOR_EVENT) {
controld_set_global_flags(controld_fsa_is_stalled);
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
raised_from, fsa_cause2string(cause), data, old_len);
if (old_len > 0) {
fsa_dump_queue(LOG_TRACE);
prepend = FALSE;
}
if (data == NULL) {
controld_set_fsa_action_flags(with_actions);
fsa_dump_actions(with_actions, "Restored");
return;
}
/* Store everything in the new event and reset
* controld_globals.fsa_actions
*/
with_actions |= controld_globals.fsa_actions;
controld_globals.fsa_actions = A_NOTHING;
}
last_data_id++;
crm_trace("%s %s FSA input %d (%s) due to %s, %s data",
raised_from, (prepend? "prepended" : "appended"), last_data_id,
fsa_input2string(input), fsa_cause2string(cause),
(data? "with" : "without"));
fsa_data = calloc(1, sizeof(fsa_data_t));
fsa_data->id = last_data_id;
fsa_data->fsa_input = input;
fsa_data->fsa_cause = cause;
fsa_data->origin = raised_from;
fsa_data->data = NULL;
fsa_data->data_type = fsa_dt_none;
fsa_data->actions = with_actions;
if (with_actions != A_NOTHING) {
crm_trace("Adding actions %.16llx to input",
(unsigned long long) with_actions);
}
if (data != NULL) {
switch (cause) {
case C_FSA_INTERNAL:
case C_CRMD_STATUS_CALLBACK:
case C_IPC_MESSAGE:
case C_HA_MESSAGE:
CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL,
crm_err("Bogus data from %s", raised_from));
crm_trace("Copying %s data from %s as cluster message data",
fsa_cause2string(cause), raised_from);
fsa_data->data = copy_ha_msg_input(data);
fsa_data->data_type = fsa_dt_ha_msg;
break;
case C_LRM_OP_CALLBACK:
crm_trace("Copying %s data from %s as lrmd_event_data_t",
fsa_cause2string(cause), raised_from);
fsa_data->data = lrmd_copy_event((lrmd_event_data_t *) data);
fsa_data->data_type = fsa_dt_lrm;
break;
case C_TIMER_POPPED:
case C_SHUTDOWN:
case C_UNKNOWN:
case C_STARTUP:
crm_crit("Copying %s data (from %s) is not yet implemented",
fsa_cause2string(cause), raised_from);
crmd_exit(CRM_EX_SOFTWARE);
break;
}
}
/* make sure to free it properly later */
if (prepend) {
controld_globals.fsa_message_queue
= g_list_prepend(controld_globals.fsa_message_queue, fsa_data);
} else {
controld_globals.fsa_message_queue
= g_list_append(controld_globals.fsa_message_queue, fsa_data);
}
crm_trace("FSA message queue length is %d",
g_list_length(controld_globals.fsa_message_queue));
/* fsa_dump_queue(LOG_TRACE); */
if (old_len == g_list_length(controld_globals.fsa_message_queue)) {
crm_err("Couldn't add message to the queue");
}
if (input != I_WAIT_FOR_EVENT) {
controld_trigger_fsa();
}
}
void
fsa_dump_queue(int log_level)
{
int offset = 0;
for (GList *iter = controld_globals.fsa_message_queue; iter != NULL;
iter = iter->next) {
fsa_data_t *data = (fsa_data_t *) iter->data;
do_crm_log_unlikely(log_level,
"queue[%d.%d]: input %s raised by %s(%p.%d)\t(cause=%s)",
offset++, data->id, fsa_input2string(data->fsa_input),
data->origin, data->data, data->data_type,
fsa_cause2string(data->fsa_cause));
}
}
ha_msg_input_t *
copy_ha_msg_input(ha_msg_input_t * orig)
{
ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t));
CRM_ASSERT(copy != NULL);
copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL;
copy->xml = get_message_xml(copy->msg, F_CRM_DATA);
return copy;
}
void
delete_fsa_input(fsa_data_t * fsa_data)
{
lrmd_event_data_t *op = NULL;
xmlNode *foo = NULL;
if (fsa_data == NULL) {
return;
}
crm_trace("About to free %s data", fsa_cause2string(fsa_data->fsa_cause));
if (fsa_data->data != NULL) {
switch (fsa_data->data_type) {
case fsa_dt_ha_msg:
delete_ha_msg_input(fsa_data->data);
break;
case fsa_dt_xml:
foo = fsa_data->data;
free_xml(foo);
break;
case fsa_dt_lrm:
op = (lrmd_event_data_t *) fsa_data->data;
lrmd_free_event(op);
break;
case fsa_dt_none:
if (fsa_data->data != NULL) {
crm_err("Don't know how to free %s data from %s",
fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
crmd_exit(CRM_EX_SOFTWARE);
}
break;
}
crm_trace("%s data freed", fsa_cause2string(fsa_data->fsa_cause));
}
free(fsa_data);
}
/* returns the next message */
fsa_data_t *
get_message(void)
{
fsa_data_t *message
= (fsa_data_t *) controld_globals.fsa_message_queue->data;
controld_globals.fsa_message_queue
= g_list_remove(controld_globals.fsa_message_queue, message);
crm_trace("Processing input %d", message->id);
return message;
}
void *
fsa_typed_data_adv(fsa_data_t * fsa_data, enum fsa_data_type a_type, const char *caller)
{
void *ret_val = NULL;
if (fsa_data == NULL) {
crm_err("%s: No FSA data available", caller);
} else if (fsa_data->data == NULL) {
crm_err("%s: No message data available. Origin: %s", caller, fsa_data->origin);
} else if (fsa_data->data_type != a_type) {
crm_crit("%s: Message data was the wrong type! %d vs. requested=%d. Origin: %s",
caller, fsa_data->data_type, a_type, fsa_data->origin);
CRM_ASSERT(fsa_data->data_type == a_type);
} else {
ret_val = fsa_data->data;
}
return ret_val;
}
/* A_MSG_ROUTE */
void
do_msg_route(long long action,
enum crmd_fsa_cause cause,
enum crmd_fsa_state cur_state,
enum crmd_fsa_input current_input, fsa_data_t * msg_data)
{
ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
route_message(msg_data->fsa_cause, input->msg);
}
void
route_message(enum crmd_fsa_cause cause, xmlNode * input)
{
ha_msg_input_t fsa_input;
enum crmd_fsa_input result = I_NULL;
fsa_input.msg = input;
CRM_CHECK(cause == C_IPC_MESSAGE || cause == C_HA_MESSAGE, return);
/* try passing the buck first */
if (relay_message(input, cause == C_IPC_MESSAGE)) {
return;
}
/* handle locally */
result = handle_message(input, cause);
/* done or process later? */
switch (result) {
case I_NULL:
case I_CIB_OP:
case I_ROUTER:
case I_NODE_JOIN:
case I_JOIN_REQUEST:
case I_JOIN_RESULT:
break;
default:
/* Defering local processing of message */
register_fsa_input_later(cause, result, &fsa_input);
return;
}
if (result != I_NULL) {
/* add to the front of the queue */
register_fsa_input(cause, result, &fsa_input);
}
}
gboolean
relay_message(xmlNode * msg, gboolean originated_locally)
{
enum crm_ais_msg_types dest = crm_msg_ais;
bool is_for_dc = false;
bool is_for_dcib = false;
bool is_for_te = false;
bool is_for_crm = false;
bool is_for_cib = false;
bool is_local = false;
bool broadcast = false;
const char *host_to = NULL;
const char *sys_to = NULL;
const char *sys_from = NULL;
const char *type = NULL;
const char *task = NULL;
const char *ref = NULL;
crm_node_t *node_to = NULL;
CRM_CHECK(msg != NULL, return TRUE);
host_to = crm_element_value(msg, PCMK__XA_CRM_HOST_TO);
sys_to = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
sys_from = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
type = crm_element_value(msg, PCMK__XA_T);
task = crm_element_value(msg, PCMK__XA_CRM_TASK);
ref = crm_element_value(msg, PCMK_XA_REFERENCE);
broadcast = pcmk__str_empty(host_to);
if (ref == NULL) {
ref = "without reference ID";
}
if (pcmk__str_eq(task, CRM_OP_HELLO, pcmk__str_casei)) {
crm_trace("Received hello %s from %s (no processing needed)",
ref, pcmk__s(sys_from, "unidentified source"));
crm_log_xml_trace(msg, "hello");
return TRUE;
}
// Require message type (set by create_request())
if (!pcmk__str_eq(type, T_CRM, pcmk__str_casei)) {
crm_warn("Ignoring invalid message %s with type '%s' (not '" T_CRM "')",
ref, pcmk__s(type, ""));
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Require a destination subsystem (also set by create_request())
if (sys_to == NULL) {
crm_warn("Ignoring invalid message %s with no " PCMK__XA_CRM_SYS_TO,
ref);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
// Get the message type appropriate to the destination subsystem
if (is_corosync_cluster()) {
dest = text2msg_type(sys_to);
if ((dest < crm_msg_ais) || (dest > crm_msg_stonith_ng)) {
/* Unrecognized value, use a sane default
*
* @TODO Maybe we should bail instead
*/
dest = crm_msg_crmd;
}
}
is_for_dc = (strcasecmp(CRM_SYSTEM_DC, sys_to) == 0);
is_for_dcib = (strcasecmp(CRM_SYSTEM_DCIB, sys_to) == 0);
is_for_te = (strcasecmp(CRM_SYSTEM_TENGINE, sys_to) == 0);
is_for_cib = (strcasecmp(CRM_SYSTEM_CIB, sys_to) == 0);
is_for_crm = (strcasecmp(CRM_SYSTEM_CRMD, sys_to) == 0);
// Check whether message should be processed locally
is_local = false;
if (broadcast) {
if (is_for_dc || is_for_te) {
is_local = false;
} else if (is_for_crm) {
if (pcmk__strcase_any_of(task, CRM_OP_NODE_INFO,
PCMK__CONTROLD_CMD_NODES, NULL)) {
/* Node info requests do not specify a host, which is normally
* treated as "all hosts", because the whole point is that the
* client may not know the local node name. Always handle these
* requests locally.
*/
is_local = true;
} else {
is_local = !originated_locally;
}
} else {
is_local = true;
}
} else if (pcmk__str_eq(controld_globals.our_nodename, host_to,
pcmk__str_casei)) {
is_local = true;
} else if (is_for_crm && pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
xmlNode *msg_data = get_message_xml(msg, F_CRM_DATA);
const char *mode = crm_element_value(msg_data, PCMK__XA_MODE);
- if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_casei)) {
+ if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Local delete of an offline node's resource history
is_local = true;
}
}
// Check whether message should be relayed
if (is_for_dc || is_for_dcib || is_for_te) {
if (AM_I_DC) {
if (is_for_te) {
crm_trace("Route message %s locally as transition request",
ref);
crm_log_xml_trace(msg, sys_to);
send_msg_via_ipc(msg, sys_to);
return TRUE; // No further processing of message is needed
}
crm_trace("Route message %s locally as DC request", ref);
return FALSE; // More to be done by caller
}
if (originated_locally
&& !pcmk__strcase_any_of(sys_from, CRM_SYSTEM_PENGINE,
CRM_SYSTEM_TENGINE, NULL)) {
crm_trace("Relay message %s to DC (via %s)",
ref, pcmk__s(host_to, "broadcast"));
crm_log_xml_trace(msg, "relayed");
if (!broadcast) {
node_to = pcmk__get_node(0, host_to, NULL,
pcmk__node_search_cluster);
}
send_cluster_message(node_to, dest, msg, TRUE);
return TRUE;
}
/* Transition engine and scheduler messages are sent only to the DC on
* the same node. If we are no longer the DC, discard this message.
*/
crm_trace("Ignoring message %s because we are no longer DC", ref);
crm_log_xml_trace(msg, "ignored");
return TRUE; // No further processing of message is needed
}
if (is_local) {
if (is_for_crm || is_for_cib) {
crm_trace("Route message %s locally as controller request", ref);
return FALSE; // More to be done by caller
}
crm_trace("Relay message %s locally to %s", ref, sys_to);
crm_log_xml_trace(msg, "IPC-relay");
send_msg_via_ipc(msg, sys_to);
return TRUE;
}
if (!broadcast) {
node_to = pcmk__search_node_caches(0, host_to,
pcmk__node_search_cluster);
if (node_to == NULL) {
crm_warn("Ignoring message %s because node %s is unknown",
ref, host_to);
crm_log_xml_trace(msg, "ignored");
return TRUE;
}
}
crm_trace("Relay message %s to %s",
ref, pcmk__s(host_to, "all peers"));
crm_log_xml_trace(msg, "relayed");
send_cluster_message(node_to, dest, msg, TRUE);
return TRUE;
}
// Return true if field contains a positive integer
static bool
authorize_version(xmlNode *message_data, const char *field,
const char *client_name, const char *ref, const char *uuid)
{
const char *version = crm_element_value(message_data, field);
long long version_num;
if ((pcmk__scan_ll(version, &version_num, -1LL) != pcmk_rc_ok)
|| (version_num < 0LL)) {
crm_warn("Rejected IPC hello from %s: '%s' is not a valid protocol %s "
CRM_XS " ref=%s uuid=%s",
client_name, ((version == NULL)? "" : version),
field, (ref? ref : "none"), uuid);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether a client IPC message is acceptable
*
* If a given client IPC message is a hello, "authorize" it by ensuring it has
* valid information such as a protocol version, and return false indicating
* that nothing further needs to be done with the message. If the message is not
* a hello, just return true to indicate it needs further processing.
*
* \param[in] client_msg XML of IPC message
* \param[in,out] curr_client If IPC is not proxied, client that sent message
* \param[in] proxy_session If IPC is proxied, the session ID
*
* \return true if message needs further processing, false if it doesn't
*/
bool
controld_authorize_ipc_message(const xmlNode *client_msg, pcmk__client_t *curr_client,
const char *proxy_session)
{
xmlNode *message_data = NULL;
const char *client_name = NULL;
const char *op = crm_element_value(client_msg, PCMK__XA_CRM_TASK);
const char *ref = crm_element_value(client_msg, PCMK_XA_REFERENCE);
const char *uuid = (curr_client? curr_client->id : proxy_session);
if (uuid == NULL) {
crm_warn("IPC message from client rejected: No client identifier "
CRM_XS " ref=%s", (ref? ref : "none"));
goto rejected;
}
if (!pcmk__str_eq(CRM_OP_HELLO, op, pcmk__str_casei)) {
// Only hello messages need to be authorized
return true;
}
message_data = get_message_xml(client_msg, F_CRM_DATA);
client_name = crm_element_value(message_data, "client_name");
if (pcmk__str_empty(client_name)) {
crm_warn("IPC hello from client rejected: No client name",
CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid);
goto rejected;
}
if (!authorize_version(message_data, "major_version", client_name, ref,
uuid)) {
goto rejected;
}
if (!authorize_version(message_data, "minor_version", client_name, ref,
uuid)) {
goto rejected;
}
crm_trace("Validated IPC hello from client %s", client_name);
crm_log_xml_trace(client_msg, "hello");
if (curr_client) {
curr_client->userdata = strdup(client_name);
}
controld_trigger_fsa();
return false;
rejected:
crm_log_xml_trace(client_msg, "rejected");
if (curr_client) {
qb_ipcs_disconnect(curr_client->ipcs);
}
return false;
}
static enum crmd_fsa_input
handle_message(xmlNode *msg, enum crmd_fsa_cause cause)
{
const char *type = NULL;
CRM_CHECK(msg != NULL, return I_NULL);
type = crm_element_value(msg, PCMK__XA_SUBT);
if (pcmk__str_eq(type, PCMK__VALUE_REQUEST, pcmk__str_none)) {
return handle_request(msg, cause);
}
if (pcmk__str_eq(type, PCMK__VALUE_RESPONSE, pcmk__str_none)) {
handle_response(msg);
return I_NULL;
}
crm_warn("Ignoring message with unknown " PCMK__XA_SUBT" '%s'",
pcmk__s(type, ""));
crm_log_xml_trace(msg, "bad");
return I_NULL;
}
static enum crmd_fsa_input
handle_failcount_op(xmlNode * stored_msg)
{
const char *rsc = NULL;
const char *uname = NULL;
const char *op = NULL;
char *interval_spec = NULL;
guint interval_ms = 0;
gboolean is_remote_node = FALSE;
xmlNode *xml_op = get_message_xml(stored_msg, F_CRM_DATA);
if (xml_op) {
xmlNode *xml_rsc = first_named_child(xml_op, XML_CIB_TAG_RESOURCE);
xmlNode *xml_attrs = first_named_child(xml_op, XML_TAG_ATTRS);
if (xml_rsc) {
rsc = ID(xml_rsc);
}
if (xml_attrs) {
op = crm_element_value(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_OP);
crm_element_value_ms(xml_attrs,
CRM_META "_" PCMK__META_CLEAR_FAILURE_INTERVAL,
&interval_ms);
}
}
uname = crm_element_value(xml_op, PCMK__META_ON_NODE);
if ((rsc == NULL) || (uname == NULL)) {
crm_log_xml_warn(stored_msg, "invalid failcount op");
return I_NULL;
}
if (crm_element_value(xml_op, PCMK__XA_ROUTER_NODE)) {
is_remote_node = TRUE;
}
crm_debug("Clearing failures for %s-interval %s on %s "
"from attribute manager, CIB, and executor state",
pcmk__readable_interval(interval_ms), rsc, uname);
if (interval_ms) {
interval_spec = crm_strdup_printf("%ums", interval_ms);
}
update_attrd_clear_failures(uname, rsc, op, interval_spec, is_remote_node);
free(interval_spec);
controld_cib_delete_last_failure(rsc, uname, op, interval_ms);
lrm_clear_last_failure(rsc, uname, op, interval_ms);
return I_NULL;
}
static enum crmd_fsa_input
handle_lrm_delete(xmlNode *stored_msg)
{
const char *mode = NULL;
xmlNode *msg_data = get_message_xml(stored_msg, F_CRM_DATA);
CRM_CHECK(msg_data != NULL, return I_NULL);
/* CRM_OP_LRM_DELETE has two distinct modes. The default behavior is to
* relay the operation to the affected node, which will unregister the
* resource from the local executor, clear the resource's history from the
* CIB, and do some bookkeeping in the controller.
*
* However, if the affected node is offline, the client will specify
- * mode="cib" which means the controller receiving the operation should
- * clear the resource's history from the CIB and nothing else. This is used
- * to clear shutdown locks.
+ * mode=PCMK__VALUE_CIB which means the controller receiving the operation
+ * should clear the resource's history from the CIB and nothing else. This
+ * is used to clear shutdown locks.
*/
mode = crm_element_value(msg_data, PCMK__XA_MODE);
- if ((mode == NULL) || strcmp(mode, XML_TAG_CIB)) {
+ if (!pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
// Relay to affected node
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else {
// Delete CIB history locally (compare with do_lrm_delete())
const char *from_sys = NULL;
const char *user_name = NULL;
const char *rsc_id = NULL;
const char *node = NULL;
xmlNode *rsc_xml = NULL;
int rc = pcmk_rc_ok;
rsc_xml = first_named_child(msg_data, XML_CIB_TAG_RESOURCE);
CRM_CHECK(rsc_xml != NULL, return I_NULL);
rsc_id = ID(rsc_xml);
from_sys = crm_element_value(stored_msg, PCMK__XA_CRM_SYS_FROM);
node = crm_element_value(msg_data, PCMK__META_ON_NODE);
user_name = pcmk__update_acl_user(stored_msg, PCMK__XA_CRM_USER, NULL);
crm_debug("Handling " CRM_OP_LRM_DELETE " for %s on %s locally%s%s "
"(clearing CIB resource history only)", rsc_id, node,
(user_name? " for user " : ""), (user_name? user_name : ""));
rc = controld_delete_resource_history(rsc_id, node, user_name,
cib_dryrun|cib_sync_call);
if (rc == pcmk_rc_ok) {
rc = controld_delete_resource_history(rsc_id, node, user_name,
crmd_cib_smart_opt());
}
- //Notify client and tengine.(Only notify tengine if mode = "cib" and CRM_OP_LRM_DELETE.)
+ /* Notify client. Also notify tengine if mode=PCMK__VALUE_CIB and
+ * op=CRM_OP_LRM_DELETE.
+ */
if (from_sys) {
lrmd_event_data_t *op = NULL;
const char *from_host = crm_element_value(stored_msg, PCMK__XA_SRC);
const char *transition;
if (strcmp(from_sys, CRM_SYSTEM_TENGINE)) {
transition = crm_element_value(msg_data,
PCMK__XA_TRANSITION_KEY);
} else {
transition = crm_element_value(stored_msg,
PCMK__XA_TRANSITION_KEY);
}
crm_info("Notifying %s on %s that %s was%s deleted",
from_sys, (from_host? from_host : "local node"), rsc_id,
((rc == pcmk_rc_ok)? "" : " not"));
op = lrmd_new_event(rsc_id, PCMK_ACTION_DELETE, 0);
op->type = lrmd_event_exec_complete;
op->user_data = strdup(transition? transition : FAKE_TE_ID);
op->params = pcmk__strkey_table(free, free);
g_hash_table_insert(op->params, strdup(PCMK_XA_CRM_FEATURE_SET),
strdup(CRM_FEATURE_SET));
controld_rc2event(op, rc);
controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
lrmd_free_event(op);
controld_trigger_delete_refresh(from_sys, rsc_id);
}
return I_NULL;
}
}
/*!
* \brief Handle a CRM_OP_REMOTE_STATE message by updating remote peer cache
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_remote_state(const xmlNode *msg)
{
const char *conn_host = NULL;
const char *remote_uname = ID(msg);
crm_node_t *remote_peer;
bool remote_is_up = false;
int rc = pcmk_rc_ok;
rc = pcmk__xe_get_bool_attr(msg, PCMK__XA_IN_CCM, &remote_is_up);
CRM_CHECK(remote_uname && rc == pcmk_rc_ok, return I_NULL);
remote_peer = crm_remote_peer_get(remote_uname);
CRM_CHECK(remote_peer, return I_NULL);
pcmk__update_peer_state(__func__, remote_peer,
remote_is_up ? CRM_NODE_MEMBER : CRM_NODE_LOST,
0);
conn_host = crm_element_value(msg, PCMK__XA_CONN_HOST);
if (conn_host) {
pcmk__str_update(&remote_peer->conn_host, conn_host);
} else if (remote_peer->conn_host) {
free(remote_peer->conn_host);
remote_peer->conn_host = NULL;
}
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_PING message
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_ping(const xmlNode *msg)
{
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
// Build reply
ping = create_xml_node(NULL, PCMK__XE_PING_RESPONSE);
value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
crm_xml_add(ping, PCMK__XA_CRM_SUBSYSTEM, value);
// Add controller state
value = fsa_state2string(controld_globals.fsa_state);
crm_xml_add(ping, PCMK__XA_CRMD_STATE, value);
crm_notice("Current ping state: %s", value); // CTS needs this
// Add controller health
// @TODO maybe do some checks to determine meaningful status
crm_xml_add(ping, PCMK_XA_RESULT, "ok");
// Send reply
reply = create_reply(msg, ping);
free_xml(ping);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a PCMK__CONTROLD_CMD_NODES message
*
* \param[in] request Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_list(const xmlNode *request)
{
GHashTableIter iter;
crm_node_t *node = NULL;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Create message data for reply
reply_data = create_xml_node(NULL, XML_CIB_TAG_NODES);
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
xmlNode *xml = create_xml_node(reply_data, XML_CIB_TAG_NODE);
crm_xml_add_ll(xml, PCMK_XA_ID, (long long) node->id); // uint32_t
crm_xml_add(xml, PCMK_XA_UNAME, node->uname);
crm_xml_add(xml, PCMK__XA_IN_CCM, node->state);
}
// Create and send reply
reply = create_reply(request, reply_data);
free_xml(reply_data);
if (reply) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
/*!
* \brief Handle a CRM_OP_NODE_INFO request
*
* \param[in] msg Message XML
*
* \return Next FSA input
*/
static enum crmd_fsa_input
handle_node_info_request(const xmlNode *msg)
{
const char *value = NULL;
crm_node_t *node = NULL;
int node_id = 0;
xmlNode *reply = NULL;
xmlNode *reply_data = NULL;
// Build reply
reply_data = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(reply_data, PCMK__XA_CRM_SUBSYSTEM, CRM_SYSTEM_CRMD);
// Add whether current partition has quorum
pcmk__xe_set_bool_attr(reply_data, PCMK_XA_HAVE_QUORUM,
pcmk_is_set(controld_globals.flags,
controld_has_quorum));
// Check whether client requested node info by ID and/or name
crm_element_value_int(msg, PCMK_XA_ID, &node_id);
if (node_id < 0) {
node_id = 0;
}
value = crm_element_value(msg, PCMK_XA_UNAME);
// Default to local node if none given
if ((node_id == 0) && (value == NULL)) {
value = controld_globals.our_nodename;
}
node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any);
if (node) {
crm_xml_add(reply_data, PCMK_XA_ID, node->uuid);
crm_xml_add(reply_data, PCMK_XA_UNAME, node->uname);
crm_xml_add(reply_data, PCMK__XA_CRMD, node->state);
pcmk__xe_set_bool_attr(reply_data, XML_NODE_IS_REMOTE,
pcmk_is_set(node->flags, crm_remote_node));
}
// Send reply
reply = create_reply(msg, reply_data);
free_xml(reply_data);
if (reply != NULL) {
(void) relay_message(reply, TRUE);
free_xml(reply);
}
// Nothing further to do
return I_NULL;
}
static void
verify_feature_set(xmlNode *msg)
{
const char *dc_version = crm_element_value(msg, PCMK_XA_CRM_FEATURE_SET);
if (dc_version == NULL) {
/* All we really know is that the DC feature set is older than 3.1.0,
* but that's also all that really matters.
*/
dc_version = "3.0.14";
}
if (feature_set_compatible(dc_version, CRM_FEATURE_SET)) {
crm_trace("Local feature set (%s) is compatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
} else {
crm_err("Local feature set (%s) is incompatible with DC's (%s)",
CRM_FEATURE_SET, dc_version);
// Nothing is likely to improve without administrator involvement
controld_set_fsa_input_flags(R_STAYDOWN);
crmd_exit(CRM_EX_FATAL);
}
}
// DC gets own shutdown all-clear
static enum crmd_fsa_input
handle_shutdown_self_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
// The expected case -- we initiated own shutdown sequence
crm_info("Shutting down controller");
return I_STOP;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name, pcmk__str_casei)) {
// Must be logic error -- DC confirming its own unrequested shutdown
crm_err("Shutting down controller immediately due to "
"unexpected shutdown confirmation");
return I_TERMINATE;
}
if (controld_globals.fsa_state != S_STOPPING) {
// Shouldn't happen -- non-DC confirming unrequested shutdown
crm_err("Starting new DC election because %s is "
"confirming shutdown we did not request",
(host_from? host_from : "another node"));
return I_ELECTION;
}
// Shouldn't happen, but we are already stopping anyway
crm_debug("Ignoring unexpected shutdown confirmation from %s",
(host_from? host_from : "another node"));
return I_NULL;
}
// Non-DC gets shutdown all-clear from DC
static enum crmd_fsa_input
handle_shutdown_ack(xmlNode *stored_msg)
{
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
crm_warn("Ignoring shutdown request without origin specified");
return I_NULL;
}
if (pcmk__str_eq(host_from, controld_globals.dc_name,
pcmk__str_null_matches|pcmk__str_casei)) {
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
crm_info("Shutting down controller after confirmation from %s",
host_from);
} else {
crm_err("Shutting down controller after unexpected "
"shutdown request from %s", host_from);
controld_set_fsa_input_flags(R_STAYDOWN);
}
return I_STOP;
}
crm_warn("Ignoring shutdown request from %s because DC is %s",
host_from, controld_globals.dc_name);
return I_NULL;
}
static enum crmd_fsa_input
handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause)
{
xmlNode *msg = NULL;
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
/* Optimize this for the DC - it has the most to do */
crm_log_xml_trace(stored_msg, "request");
if (op == NULL) {
crm_warn("Ignoring request without " PCMK__XA_CRM_TASK);
return I_NULL;
}
if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
const char *from = crm_element_value(stored_msg, PCMK__XA_SRC);
crm_node_t *node = pcmk__search_node_caches(0, from,
pcmk__node_search_cluster);
pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN);
if(AM_I_DC == FALSE) {
return I_NULL; /* Done */
}
}
/*========== DC-Only Actions ==========*/
if (AM_I_DC) {
if (strcmp(op, CRM_OP_JOIN_ANNOUNCE) == 0) {
return I_NODE_JOIN;
} else if (strcmp(op, CRM_OP_JOIN_REQUEST) == 0) {
return I_JOIN_REQUEST;
} else if (strcmp(op, CRM_OP_JOIN_CONFIRM) == 0) {
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_self_ack(stored_msg);
} else if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) {
// Another controller wants to shut down its node
return handle_shutdown_request(stored_msg);
}
}
/*========== common actions ==========*/
if (strcmp(op, CRM_OP_NOVOTE) == 0) {
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
} else if (strcmp(op, CRM_OP_REMOTE_STATE) == 0) {
/* a remote connection host is letting us know the node state */
return handle_remote_state(stored_msg);
} else if (strcmp(op, CRM_OP_THROTTLE) == 0) {
throttle_update(stored_msg);
if (AM_I_DC && (controld_globals.transition_graph != NULL)
&& !controld_globals.transition_graph->complete) {
crm_debug("The throttle changed. Trigger a graph.");
trigger_graph();
}
return I_NULL;
} else if (strcmp(op, CRM_OP_CLEAR_FAILCOUNT) == 0) {
return handle_failcount_op(stored_msg);
} else if (strcmp(op, CRM_OP_VOTE) == 0) {
/* count the vote and decide what to do after that */
ha_msg_input_t fsa_input;
fsa_input.msg = stored_msg;
register_fsa_input_adv(C_HA_MESSAGE, I_NULL, &fsa_input,
A_ELECTION_COUNT | A_ELECTION_CHECK, FALSE,
__func__);
/* Sometimes we _must_ go into S_ELECTION */
if (controld_globals.fsa_state == S_HALT) {
crm_debug("Forcing an election from S_HALT");
return I_ELECTION;
}
} else if (strcmp(op, CRM_OP_JOIN_OFFER) == 0) {
verify_feature_set(stored_msg);
crm_debug("Raising I_JOIN_OFFER: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_OFFER;
} else if (strcmp(op, CRM_OP_JOIN_ACKNAK) == 0) {
crm_debug("Raising I_JOIN_RESULT: join-%s",
crm_element_value(stored_msg, PCMK__XA_JOIN_ID));
return I_JOIN_RESULT;
} else if (strcmp(op, CRM_OP_LRM_DELETE) == 0) {
return handle_lrm_delete(stored_msg);
} else if ((strcmp(op, CRM_OP_LRM_FAIL) == 0)
|| (strcmp(op, CRM_OP_LRM_REFRESH) == 0) // @COMPAT
|| (strcmp(op, CRM_OP_REPROBE) == 0)) {
crm_xml_add(stored_msg, PCMK__XA_CRM_SYS_TO, CRM_SYSTEM_LRMD);
return I_ROUTER;
} else if (strcmp(op, CRM_OP_NOOP) == 0) {
return I_NULL;
} else if (strcmp(op, CRM_OP_PING) == 0) {
return handle_ping(stored_msg);
} else if (strcmp(op, CRM_OP_NODE_INFO) == 0) {
return handle_node_info_request(stored_msg);
} else if (strcmp(op, CRM_OP_RM_NODE_CACHE) == 0) {
int id = 0;
const char *name = NULL;
crm_element_value_int(stored_msg, PCMK_XA_ID, &id);
name = crm_element_value(stored_msg, PCMK_XA_UNAME);
if(cause == C_IPC_MESSAGE) {
msg = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
crm_err("Could not instruct peers to remove references to node %s/%u", name, id);
} else {
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
}
free_xml(msg);
} else {
reap_crm_member(id, name);
/* If we're forgetting this node, also forget any failures to fence
* it, so we don't carry that over to any node added later with the
* same name.
*/
st_fail_count_reset(name);
}
} else if (strcmp(op, CRM_OP_MAINTENANCE_NODES) == 0) {
xmlNode *xml = get_message_xml(stored_msg, F_CRM_DATA);
remote_ra_process_maintenance_nodes(xml);
} else if (strcmp(op, PCMK__CONTROLD_CMD_NODES) == 0) {
return handle_node_list(stored_msg);
/*========== (NOT_DC)-Only Actions ==========*/
} else if (!AM_I_DC) {
if (strcmp(op, CRM_OP_SHUTDOWN) == 0) {
return handle_shutdown_ack(stored_msg);
}
} else {
crm_err("Unexpected request (%s) sent to %s", op, AM_I_DC ? "the DC" : "non-DC node");
crm_log_xml_err(stored_msg, "Unexpected");
}
return I_NULL;
}
static void
handle_response(xmlNode *stored_msg)
{
const char *op = crm_element_value(stored_msg, PCMK__XA_CRM_TASK);
crm_log_xml_trace(stored_msg, "reply");
if (op == NULL) {
crm_warn("Ignoring reply without " PCMK__XA_CRM_TASK);
} else if (AM_I_DC && strcmp(op, CRM_OP_PECALC) == 0) {
// Check whether scheduler answer been superseded by subsequent request
const char *msg_ref = crm_element_value(stored_msg, PCMK_XA_REFERENCE);
if (msg_ref == NULL) {
crm_err("%s - Ignoring calculation with no reference", op);
} else if (pcmk__str_eq(msg_ref, controld_globals.fsa_pe_ref,
pcmk__str_none)) {
ha_msg_input_t fsa_input;
controld_stop_sched_timer();
fsa_input.msg = stored_msg;
register_fsa_input_later(C_IPC_MESSAGE, I_PE_SUCCESS, &fsa_input);
} else {
crm_info("%s calculation %s is obsolete", op, msg_ref);
}
} else if (strcmp(op, CRM_OP_VOTE) == 0
|| strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0 || strcmp(op, CRM_OP_SHUTDOWN) == 0) {
} else {
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
crm_err("Unexpected response (op=%s, src=%s) sent to the %s",
op, host_from, AM_I_DC ? "DC" : "controller");
}
}
static enum crmd_fsa_input
handle_shutdown_request(xmlNode * stored_msg)
{
/* handle here to avoid potential version issues
* where the shutdown message/procedure may have
* been changed in later versions.
*
* This way the DC is always in control of the shutdown
*/
char *now_s = NULL;
const char *host_from = crm_element_value(stored_msg, PCMK__XA_SRC);
if (host_from == NULL) {
/* we're shutting down and the DC */
host_from = controld_globals.our_nodename;
}
crm_info("Creating shutdown request for %s (state=%s)", host_from,
fsa_state2string(controld_globals.fsa_state));
crm_log_xml_trace(stored_msg, "message");
now_s = pcmk__ttoa(time(NULL));
update_attrd(host_from, XML_CIB_ATTR_SHUTDOWN, now_s, NULL, FALSE);
free(now_s);
/* will be picked up by the TE as long as its running */
return I_NULL;
}
static void
send_msg_via_ipc(xmlNode * msg, const char *sys)
{
pcmk__client_t *client_channel = NULL;
CRM_CHECK(sys != NULL, return);
client_channel = pcmk__find_client_by_id(sys);
if (crm_element_value(msg, PCMK__XA_SRC) == NULL) {
crm_xml_add(msg, PCMK__XA_SRC, controld_globals.our_nodename);
}
if (client_channel != NULL) {
/* Transient clients such as crmadmin */
pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
xmlNode *data = get_message_xml(msg, F_CRM_DATA);
process_te_message(msg, data);
} else if (pcmk__str_eq(sys, CRM_SYSTEM_LRMD, pcmk__str_none)) {
fsa_data_t fsa_data;
ha_msg_input_t fsa_input;
fsa_input.msg = msg;
fsa_input.xml = get_message_xml(msg, F_CRM_DATA);
fsa_data.id = 0;
fsa_data.actions = 0;
fsa_data.data = &fsa_input;
fsa_data.fsa_input = I_MESSAGE;
fsa_data.fsa_cause = C_IPC_MESSAGE;
fsa_data.origin = __func__;
fsa_data.data_type = fsa_dt_ha_msg;
do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, controld_globals.fsa_state,
I_MESSAGE, &fsa_data);
} else if (crmd_is_proxy_session(sys)) {
crmd_proxy_send(sys, msg);
} else {
crm_info("Received invalid request: unknown subsystem '%s'", sys);
}
}
void
delete_ha_msg_input(ha_msg_input_t * orig)
{
if (orig == NULL) {
return;
}
free_xml(orig->msg);
free(orig);
}
/*!
* \internal
* \brief Notify the cluster of a remote node state change
*
* \param[in] node_name Node's name
* \param[in] node_up true if node is up, false if down
*/
void
broadcast_remote_state_message(const char *node_name, bool node_up)
{
xmlNode *msg = create_request(CRM_OP_REMOTE_STATE, NULL, NULL,
CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
crm_info("Notifying cluster of Pacemaker Remote node %s %s",
node_name, node_up? "coming up" : "going down");
crm_xml_add(msg, PCMK_XA_ID, node_name);
pcmk__xe_set_bool_attr(msg, PCMK__XA_IN_CCM, node_up);
if (node_up) {
crm_xml_add(msg, PCMK__XA_CONN_HOST, controld_globals.our_nodename);
}
send_cluster_message(NULL, crm_msg_crmd, msg, TRUE);
free_xml(msg);
}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index ce2635db19..772b6b86f9 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,760 +1,760 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
static GHashTable *te_targets = NULL;
void send_rsc_command(pcmk__graph_action_t *action);
static void te_update_job_count(pcmk__graph_action_t *action, int offset);
static void
te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
action->timer = g_timeout_add(action->timeout + graph->network_delay,
action_timer_callback, (void *) action);
CRM_ASSERT(action->timer != 0);
}
/*!
* \internal
* \brief Execute a graph pseudo-action
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] pseudo Pseudo-action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
{
const char *task = crm_element_value(pseudo->xml, PCMK_XA_OPERATION);
/* send to peers as well? */
if (pcmk__str_eq(task, PCMK_ACTION_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
free_xml(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, PCMK__XA_OPERATION_KEY));
te_action_confirmed(pseudo, graph);
return pcmk_rc_ok;
}
static int
get_target_rc(pcmk__graph_action_t *action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, PCMK__META_OP_TARGET_RC),
&exit_status, 0);
return exit_status;
}
/*!
* \internal
* \brief Execute a cluster action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Cluster action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
task = crm_element_value(action->xml, PCMK_XA_OPERATION);
CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = on_node;
if (pcmk__str_eq(task, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
- if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
+ if (pcmk__str_eq(mode, PCMK__VALUE_CIB, pcmk__str_none)) {
router_node = controld_globals.our_nodename;
}
}
}
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Handling controller request '%s' (%s on %s)%s%s",
id, task, on_node, (is_local? " locally" : ""),
(no_wait? " without waiting" : ""));
if (is_local
&& pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
/* defer until everything else completes */
crm_info("Controller request '%s' is a local shutdown", id);
graph->completion_action = pcmk__graph_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return pcmk_rc_ok;
} else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
crm_node_t *peer = pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(cmd, PCMK__XA_TRANSITION_KEY, counter);
rc = send_cluster_message(pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster),
crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, target_rc,
controld_globals.te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(pcmk__graph_action_t *action,
lrmd_event_data_t *op)
{
cib_t *cib_conn = controld_globals.cib_conn;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
const char *target_uuid = crm_element_value(action->xml,
PCMK__META_ON_NODE_UUID);
int target_rc = get_target_rc(action);
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, PCMK_XA_ID, target_uuid);
crm_xml_add(state, PCMK_XA_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, PCMK_XA_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, PCMK_XA_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_TYPE);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_CLASS);
crm_copy_xml_element(action_rsc, rsc, PCMK_XA_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state,
cib_scope_local);
fsa_register_cib_callback(rc, NULL, cib_action_updated);
free_xml(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(pcmk__graph_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
const char *task_uuid = crm_element_value(action->xml,
PCMK__XA_OPERATION_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
action->xml->name, action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Execute a resource action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Resource action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
CRM_CHECK(!pcmk__str_empty(on_node),
crm_err("Corrupted command(id=%s) %s: no node",
ID(action->xml), pcmk__s(task, "without task"));
return pcmk_rc_node_unknown);
rsc_op = action->xml;
task = crm_element_value(rsc_op, PCMK_XA_OPERATION);
task_uuid = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
router_node = crm_element_value(rsc_op, PCMK__XA_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(rsc_op, PCMK__XA_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, PCMK__META_OP_NO_WAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
I_NULL, &msg);
} else {
rc = send_cluster_message(pcmk__get_node(0, router_node, NULL,
pcmk__node_search_cluster),
crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
/* Just mark confirmed. Don't bump the job count only to immediately
* decrement it.
*/
crm_info("Action %d confirmed - no wait", action->id);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(pcmk__graph_action_t *action, int offset)
{
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *target = crm_element_value(action->xml, PCMK__META_ON_NODE);
if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
const char *t1 = crm_meta_value(action->params,
PCMK__META_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params,
PCMK__META_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
te_update_job_count_on(target, offset, FALSE);
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed on a node
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
* \param[in] target Name of node where action should be executed
*
* \return true if action is allowed, otherwise false
*/
static bool
allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
const char *id = crm_element_value(action->xml, PCMK__XA_OPERATION_KEY);
if(target == NULL) {
/* No limit on these */
return true;
} else if(te_targets == NULL) {
return false;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return false;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return false;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return true;
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
*
* \return true if action is allowed, otherwise false
*/
static bool
graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, PCMK_XA_OPERATION);
if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return true;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
if ((target == NULL)
&& pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
target = crm_meta_value(action->params, PCMK__META_MIGRATE_SOURCE);
if (!allowed_on_node(graph, action, target)) {
return false;
}
target = crm_meta_value(action->params, PCMK__META_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, PCMK__META_ON_NODE);
}
return allowed_on_node(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in,out] action Action to confirm
* \param[in,out] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, PCMK__META_ON_NODE) != NULL)) {
te_update_job_count(action, -1);
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
static pcmk__graph_functions_t te_graph_fns = {
execute_pseudo_action,
execute_rsc_action,
execute_cluster_action,
controld_execute_fence_action,
graph_action_allowed,
};
/*
* \internal
* \brief Register the transitioner's graph functions with \p libpacemaker
*/
void
controld_register_graph_functions(void)
{
pcmk__set_graph_functions(&te_graph_fns);
}
void
notify_crmd(pcmk__graph_t *graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s",
fsa_state2string(controld_globals.fsa_state));
CRM_CHECK(graph->complete, graph->complete = true);
switch (graph->completion_action) {
case pcmk__graph_wait:
type = "stop";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_done:
type = "done";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_restart:
type = "restart";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
if (controld_get_period_transition_timer() > 0) {
controld_stop_transition_timer();
controld_start_transition_timer();
} else {
event = I_PE_CALC;
}
} else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
controld_trigger_fsa();
}
break;
case pcmk__graph_shutdown:
type = "shutdown";
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = pcmk__graph_done;
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else {
controld_trigger_fsa();
}
}
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
index c76cc2042f..a539b138af 100644
--- a/include/crm/common/options_internal.h
+++ b/include/crm/common/options_internal.h
@@ -1,185 +1,186 @@
/*
* Copyright 2006-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__OPTIONS_INTERNAL__H
# define PCMK__OPTIONS_INTERNAL__H
# ifndef PCMK__CONFIG_H
# define PCMK__CONFIG_H
# include <config.h> // _Noreturn
# endif
# include <glib.h> // GHashTable
# include <stdbool.h> // bool
#include <crm/common/util.h> // pcmk_parse_interval_spec()
_Noreturn void pcmk__cli_help(char cmd);
/*
* Environment variable option handling
*/
const char *pcmk__env_option(const char *option);
void pcmk__set_env_option(const char *option, const char *value, bool compat);
bool pcmk__env_option_enabled(const char *daemon, const char *option);
/*
* Cluster option handling
*/
typedef struct pcmk__cluster_option_s {
const char *name;
const char *alt_name;
const char *type;
const char *values;
const char *default_value;
bool (*is_valid)(const char *);
const char *description_short;
const char *description_long;
} pcmk__cluster_option_t;
const char *pcmk__cluster_option(GHashTable *options,
const pcmk__cluster_option_t *option_list,
int len, const char *name);
gchar *pcmk__format_option_metadata(const char *name, const char *desc_short,
const char *desc_long,
pcmk__cluster_option_t *option_list,
int len);
void pcmk__validate_cluster_options(GHashTable *options,
pcmk__cluster_option_t *option_list,
int len);
bool pcmk__valid_interval_spec(const char *value);
bool pcmk__valid_boolean(const char *value);
bool pcmk__valid_int(const char *value);
bool pcmk__valid_positive_int(const char *value);
bool pcmk__valid_no_quorum_policy(const char *value);
bool pcmk__valid_percentage(const char *value);
bool pcmk__valid_script(const char *value);
// from watchdog.c
long pcmk__get_sbd_watchdog_timeout(void);
bool pcmk__get_sbd_sync_resource_startup(void);
long pcmk__auto_stonith_watchdog_timeout(void);
bool pcmk__valid_stonith_watchdog_timeout(const char *value);
// Constants for environment variable names
#define PCMK__ENV_AUTHKEY_LOCATION "authkey_location"
#define PCMK__ENV_BLACKBOX "blackbox"
#define PCMK__ENV_CALLGRIND_ENABLED "callgrind_enabled"
#define PCMK__ENV_CLUSTER_TYPE "cluster_type"
#define PCMK__ENV_DEBUG "debug"
#define PCMK__ENV_DH_MAX_BITS "dh_max_bits"
#define PCMK__ENV_DH_MIN_BITS "dh_min_bits"
#define PCMK__ENV_FAIL_FAST "fail_fast"
#define PCMK__ENV_IPC_BUFFER "ipc_buffer"
#define PCMK__ENV_IPC_TYPE "ipc_type"
#define PCMK__ENV_LOGFACILITY "logfacility"
#define PCMK__ENV_LOGFILE "logfile"
#define PCMK__ENV_LOGFILE_MODE "logfile_mode"
#define PCMK__ENV_LOGPRIORITY "logpriority"
#define PCMK__ENV_NODE_ACTION_LIMIT "node_action_limit"
#define PCMK__ENV_NODE_START_STATE "node_start_state"
#define PCMK__ENV_PANIC_ACTION "panic_action"
#define PCMK__ENV_REMOTE_ADDRESS "remote_address"
#define PCMK__ENV_REMOTE_SCHEMA_DIR "remote_schema_directory"
#define PCMK__ENV_REMOTE_PID1 "remote_pid1"
#define PCMK__ENV_REMOTE_PORT "remote_port"
#define PCMK__ENV_RESPAWNED "respawned"
#define PCMK__ENV_SCHEMA_DIRECTORY "schema_directory"
#define PCMK__ENV_SERVICE "service"
#define PCMK__ENV_STDERR "stderr"
#define PCMK__ENV_TLS_PRIORITIES "tls_priorities"
#define PCMK__ENV_TRACE_BLACKBOX "trace_blackbox"
#define PCMK__ENV_TRACE_FILES "trace_files"
#define PCMK__ENV_TRACE_FORMATS "trace_formats"
#define PCMK__ENV_TRACE_FUNCTIONS "trace_functions"
#define PCMK__ENV_TRACE_TAGS "trace_tags"
#define PCMK__ENV_VALGRIND_ENABLED "valgrind_enabled"
// @COMPAT Drop at 3.0.0; default is plenty
#define PCMK__ENV_CIB_TIMEOUT "cib_timeout"
// @COMPAT Drop at 3.0.0; likely last used in 1.1.24
#define PCMK__ENV_MCP "mcp"
// @COMPAT Drop at 3.0.0; added unused in 1.1.9
#define PCMK__ENV_QUORUM_TYPE "quorum_type"
/* @COMPAT Drop at 3.0.0; added to debug shutdown issues when Pacemaker is
* managed by systemd, but no longer useful.
*/
#define PCMK__ENV_SHUTDOWN_DELAY "shutdown_delay"
// @COMPAT Deprecated since 2.1.0
#define PCMK__OPT_REMOVE_AFTER_STOP "remove-after-stop"
// Constants for meta-attribute names
#define PCMK__META_CLONE_INSTANCE_NUM "clone"
#define PCMK__META_CONTAINER "container"
#define PCMK__META_DIGESTS_ALL "digests-all"
#define PCMK__META_DIGESTS_SECURE "digests-secure"
#define PCMK__META_INTERNAL_RSC "internal_rsc"
#define PCMK__META_MIGRATE_SOURCE "migrate_source"
#define PCMK__META_MIGRATE_TARGET "migrate_target"
#define PCMK__META_ON_NODE "on_node"
#define PCMK__META_ON_NODE_UUID "on_node_uuid"
#define PCMK__META_OP_NO_WAIT "op_no_wait"
#define PCMK__META_OP_TARGET_RC "op_target_rc"
#define PCMK__META_PHYSICAL_HOST "physical-host"
/* @TODO Plug these in. Currently, they're never set. These are op attrs for use
* with https://projects.clusterlabs.org/T382.
*/
#define PCMK__META_CLEAR_FAILURE_OP "clear_failure_op"
#define PCMK__META_CLEAR_FAILURE_INTERVAL "clear_failure_interval"
// @COMPAT Deprecated meta-attribute since 2.1.0
#define PCMK__META_CAN_FAIL "can_fail"
// @COMPAT Deprecated alias for PCMK__META_PROMOTED_MAX since 2.0.0
#define PCMK__META_PROMOTED_MAX_LEGACY "master-max"
// @COMPAT Deprecated alias for PCMK__META_PROMOTED_NODE_MAX since 2.0.0
#define PCMK__META_PROMOTED_NODE_MAX_LEGACY "master-node-max"
// @COMPAT Deprecated meta-attribute since 2.0.0
#define PCMK__META_RESTART_TYPE "restart-type"
// @COMPAT Deprecated meta-attribute since 2.0.0
#define PCMK__META_ROLE_AFTER_FAILURE "role_after_failure"
// Constants for enumerated values for various options
+#define PCMK__VALUE_CIB "cib"
#define PCMK__VALUE_CLUSTER "cluster"
#define PCMK__VALUE_CUSTOM "custom"
#define PCMK__VALUE_EN "en"
#define PCMK__VALUE_FENCING "fencing"
#define PCMK__VALUE_GREEN "green"
#define PCMK__VALUE_LOCAL "local"
#define PCMK__VALUE_MIGRATE_ON_RED "migrate-on-red"
#define PCMK__VALUE_NONE "none"
#define PCMK__VALUE_NOTHING "nothing"
#define PCMK__VALUE_ONLY_GREEN "only-green"
#define PCMK__VALUE_PROGRESSIVE "progressive"
#define PCMK__VALUE_QUORUM "quorum"
#define PCMK__VALUE_RED "red"
#define PCMK__VALUE_REQUEST "request"
#define PCMK__VALUE_RESPONSE "response"
#define PCMK__VALUE_UNFENCING "unfencing"
#define PCMK__VALUE_YELLOW "yellow"
#endif // PCMK__OPTIONS_INTERNAL__H
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
index 32905ca769..26a07f4898 100644
--- a/lib/common/ipc_controld.c
+++ b/lib/common/ipc_controld.c
@@ -1,657 +1,657 @@
/*
* Copyright 2020-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <libxml/tree.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/ipc.h>
#include <crm/common/ipc_internal.h>
#include <crm/common/ipc_controld.h>
#include "crmcommon_private.h"
struct controld_api_private_s {
char *client_uuid;
unsigned int replies_expected;
};
/*!
* \internal
* \brief Get a string representation of a controller API reply type
*
* \param[in] reply Controller API reply type
*
* \return String representation of a controller API reply type
*/
const char *
pcmk__controld_api_reply2str(enum pcmk_controld_api_reply reply)
{
switch (reply) {
case pcmk_controld_reply_reprobe:
return "reprobe";
case pcmk_controld_reply_info:
return "info";
case pcmk_controld_reply_resource:
return "resource";
case pcmk_controld_reply_ping:
return "ping";
case pcmk_controld_reply_nodes:
return "nodes";
default:
return "unknown";
}
}
// \return Standard Pacemaker return code
static int
new_data(pcmk_ipc_api_t *api)
{
struct controld_api_private_s *private = NULL;
api->api_data = calloc(1, sizeof(struct controld_api_private_s));
if (api->api_data == NULL) {
return errno;
}
private = api->api_data;
/* This is set to the PID because that's how it was always done, but PIDs
* are not unique because clients can be remote. The value appears to be
* unused other than as part of PCMK__XA_CRM_SYS_FROM in IPC requests, which
* is only compared against the internal system names (CRM_SYSTEM_TENGINE,
* etc.), so it shouldn't be a problem.
*/
private->client_uuid = pcmk__getpid_s();
/* @TODO Implement a call ID model similar to the CIB, executor, and fencer
* IPC APIs, so that requests and replies can be matched, and
* duplicate replies can be discarded.
*/
return pcmk_rc_ok;
}
static void
free_data(void *data)
{
free(((struct controld_api_private_s *) data)->client_uuid);
free(data);
}
// \return Standard Pacemaker return code
static int
post_connect(pcmk_ipc_api_t *api)
{
/* The controller currently requires clients to register via a hello
* request, but does not reply back.
*/
struct controld_api_private_s *private = api->api_data;
const char *client_name = crm_system_name? crm_system_name : "client";
xmlNode *hello;
int rc;
hello = create_hello_message(private->client_uuid, client_name,
PCMK__CONTROLD_API_MAJOR,
PCMK__CONTROLD_API_MINOR);
rc = pcmk__send_ipc_request(api, hello);
free_xml(hello);
if (rc != pcmk_rc_ok) {
crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s",
pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc);
} else {
crm_debug("Sent IPC hello to %s", pcmk_ipc_name(api, true));
}
return rc;
}
static void
set_node_info_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
data->reply_type = pcmk_controld_reply_info;
if (msg_data == NULL) {
return;
}
data->data.node_info.have_quorum =
pcmk__xe_attr_is_true(msg_data, PCMK_XA_HAVE_QUORUM);
data->data.node_info.is_remote = pcmk__xe_attr_is_true(msg_data, XML_NODE_IS_REMOTE);
/* Integer node_info.id is currently valid only for Corosync nodes.
*
* @TODO: Improve handling after crm_node_t is refactored to handle layer-
* specific data better.
*/
crm_element_value_int(msg_data, PCMK_XA_ID, &(data->data.node_info.id));
data->data.node_info.uuid = crm_element_value(msg_data, PCMK_XA_ID);
data->data.node_info.uname = crm_element_value(msg_data, PCMK_XA_UNAME);
data->data.node_info.state = crm_element_value(msg_data, PCMK__XA_CRMD);
}
static void
set_ping_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
data->reply_type = pcmk_controld_reply_ping;
if (msg_data == NULL) {
return;
}
data->data.ping.sys_from = crm_element_value(msg_data,
PCMK__XA_CRM_SUBSYSTEM);
data->data.ping.fsa_state = crm_element_value(msg_data,
PCMK__XA_CRMD_STATE);
data->data.ping.result = crm_element_value(msg_data, PCMK_XA_RESULT);
}
static void
set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
{
pcmk_controld_api_node_t *node_info;
data->reply_type = pcmk_controld_reply_nodes;
for (xmlNode *node = first_named_child(msg_data, XML_CIB_TAG_NODE);
node != NULL; node = crm_next_same_xml(node)) {
long long id_ll = 0;
node_info = calloc(1, sizeof(pcmk_controld_api_node_t));
crm_element_value_ll(node, PCMK_XA_ID, &id_ll);
if (id_ll > 0) {
node_info->id = id_ll;
}
node_info->uname = crm_element_value(node, PCMK_XA_UNAME);
node_info->state = crm_element_value(node, PCMK__XA_IN_CCM);
data->data.nodes = g_list_prepend(data->data.nodes, node_info);
}
}
static bool
reply_expected(pcmk_ipc_api_t *api, const xmlNode *request)
{
// We only need to handle commands that API functions can send
return pcmk__str_any_of(crm_element_value(request, PCMK__XA_CRM_TASK),
PCMK__CONTROLD_CMD_NODES,
CRM_OP_LRM_DELETE,
CRM_OP_LRM_FAIL,
CRM_OP_NODE_INFO,
CRM_OP_PING,
CRM_OP_REPROBE,
CRM_OP_RM_NODE_CACHE,
NULL);
}
static bool
dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
{
struct controld_api_private_s *private = api->api_data;
crm_exit_t status = CRM_EX_OK;
xmlNode *msg_data = NULL;
const char *value = NULL;
pcmk_controld_api_reply_t reply_data = {
pcmk_controld_reply_unknown, NULL, NULL,
};
if (pcmk__xe_is(reply, "ack")) {
/* ACKs are trivial responses that do not count toward expected replies,
* and do not have all the fields that validation requires, so skip that
* processing.
*/
return private->replies_expected > 0;
}
if (private->replies_expected > 0) {
private->replies_expected--;
}
// Do some basic validation of the reply
/* @TODO We should be able to verify that value is always a response, but
* currently the controller doesn't always properly set the type. Even
* if we fix the controller, we'll still need to handle replies from
* old versions (feature set could be used to differentiate).
*/
value = crm_element_value(reply, PCMK__XA_SUBT);
if (!pcmk__str_any_of(value, PCMK__VALUE_REQUEST, PCMK__VALUE_RESPONSE,
NULL)) {
crm_info("Unrecognizable message from controller: "
"invalid message type '%s'", pcmk__s(value, ""));
status = CRM_EX_PROTOCOL;
goto done;
}
if (pcmk__str_empty(crm_element_value(reply, PCMK_XA_REFERENCE))) {
crm_info("Unrecognizable message from controller: no reference");
status = CRM_EX_PROTOCOL;
goto done;
}
value = crm_element_value(reply, PCMK__XA_CRM_TASK);
if (pcmk__str_empty(value)) {
crm_info("Unrecognizable message from controller: no command name");
status = CRM_EX_PROTOCOL;
goto done;
}
// Parse useful info from reply
reply_data.feature_set = crm_element_value(reply, PCMK_XA_VERSION);
reply_data.host_from = crm_element_value(reply, PCMK__XA_SRC);
msg_data = get_message_xml(reply, F_CRM_DATA);
if (!strcmp(value, CRM_OP_REPROBE)) {
reply_data.reply_type = pcmk_controld_reply_reprobe;
} else if (!strcmp(value, CRM_OP_NODE_INFO)) {
set_node_info_data(&reply_data, msg_data);
} else if (!strcmp(value, CRM_OP_INVOKE_LRM)) {
reply_data.reply_type = pcmk_controld_reply_resource;
reply_data.data.resource.node_state = msg_data;
} else if (!strcmp(value, CRM_OP_PING)) {
set_ping_data(&reply_data, msg_data);
} else if (!strcmp(value, PCMK__CONTROLD_CMD_NODES)) {
set_nodes_data(&reply_data, msg_data);
} else {
crm_info("Unrecognizable message from controller: unknown command '%s'",
value);
status = CRM_EX_PROTOCOL;
}
done:
pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
// Free any reply data that was allocated
if (pcmk__str_eq(value, PCMK__CONTROLD_CMD_NODES, pcmk__str_casei)) {
g_list_free_full(reply_data.data.nodes, free);
}
return false; // No further replies needed
}
pcmk__ipc_methods_t *
pcmk__controld_api_methods(void)
{
pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
if (cmds != NULL) {
cmds->new_data = new_data;
cmds->free_data = free_data;
cmds->post_connect = post_connect;
cmds->reply_expected = reply_expected;
cmds->dispatch = dispatch;
}
return cmds;
}
/*!
* \internal
* \brief Create XML for a controller IPC request
*
* \param[in] api Controller connection
* \param[in] op Controller IPC command name
* \param[in] node Node name to set as destination host
* \param[in] msg_data XML to attach to request as message data
*
* \return Newly allocated XML for request
*/
static xmlNode *
create_controller_request(const pcmk_ipc_api_t *api, const char *op,
const char *node, xmlNode *msg_data)
{
struct controld_api_private_s *private = NULL;
const char *sys_to = NULL;
if (api == NULL) {
return NULL;
}
private = api->api_data;
if ((node == NULL) && !strcmp(op, CRM_OP_PING)) {
sys_to = CRM_SYSTEM_DC;
} else {
sys_to = CRM_SYSTEM_CRMD;
}
return create_request(op, msg_data, node, sys_to,
(crm_system_name? crm_system_name : "client"),
private->client_uuid);
}
// \return Standard Pacemaker return code
static int
send_controller_request(pcmk_ipc_api_t *api, const xmlNode *request,
bool reply_is_expected)
{
if (crm_element_value(request, PCMK_XA_REFERENCE) == NULL) {
return EINVAL;
}
if (reply_is_expected) {
struct controld_api_private_s *private = api->api_data;
private->replies_expected++;
}
return pcmk__send_ipc_request(api, request);
}
static xmlNode *
create_reprobe_message_data(const char *target_node, const char *router_node)
{
xmlNode *msg_data;
msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE);
crm_xml_add(msg_data, PCMK__META_ON_NODE, target_node);
if ((router_node != NULL) && !pcmk__str_eq(router_node, target_node, pcmk__str_casei)) {
crm_xml_add(msg_data, PCMK__XA_ROUTER_NODE, router_node);
}
return msg_data;
}
/*!
* \brief Send a reprobe controller operation
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node to reprobe
* \param[in] router_node Router node for host
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_reprobe.
*/
int
pcmk_controld_api_reprobe(pcmk_ipc_api_t *api, const char *target_node,
const char *router_node)
{
xmlNode *request;
xmlNode *msg_data;
int rc = pcmk_rc_ok;
if (api == NULL) {
return EINVAL;
}
if (router_node == NULL) {
router_node = target_node;
}
crm_debug("Sending %s IPC request to reprobe %s via %s",
pcmk_ipc_name(api, true), pcmk__s(target_node, "local node"),
pcmk__s(router_node, "local node"));
msg_data = create_reprobe_message_data(target_node, router_node);
request = create_controller_request(api, CRM_OP_REPROBE, router_node,
msg_data);
rc = send_controller_request(api, request, true);
free_xml(msg_data);
free_xml(request);
return rc;
}
/*!
* \brief Send a "node info" controller operation
*
* \param[in,out] api Controller connection
* \param[in] nodeid ID of node to get info for (or 0 for local node)
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_info.
*/
int
pcmk_controld_api_node_info(pcmk_ipc_api_t *api, uint32_t nodeid)
{
xmlNode *request;
int rc = pcmk_rc_ok;
request = create_controller_request(api, CRM_OP_NODE_INFO, NULL, NULL);
if (request == NULL) {
return EINVAL;
}
if (nodeid > 0) {
crm_xml_set_id(request, "%lu", (unsigned long) nodeid);
}
rc = send_controller_request(api, request, true);
free_xml(request);
return rc;
}
/*!
* \brief Ask the controller for status
*
* \param[in,out] api Controller connection
* \param[in] node_name Name of node whose status is desired (NULL for DC)
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_ping.
*/
int
pcmk_controld_api_ping(pcmk_ipc_api_t *api, const char *node_name)
{
xmlNode *request;
int rc = pcmk_rc_ok;
request = create_controller_request(api, CRM_OP_PING, node_name, NULL);
if (request == NULL) {
return EINVAL;
}
rc = send_controller_request(api, request, true);
free_xml(request);
return rc;
}
/*!
* \brief Ask the controller for cluster information
*
* \param[in,out] api Controller connection
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_nodes.
*/
int
pcmk_controld_api_list_nodes(pcmk_ipc_api_t *api)
{
xmlNode *request;
int rc = EINVAL;
request = create_controller_request(api, PCMK__CONTROLD_CMD_NODES, NULL,
NULL);
if (request != NULL) {
rc = send_controller_request(api, request, true);
free_xml(request);
}
return rc;
}
// \return Standard Pacemaker return code
static int
controller_resource_op(pcmk_ipc_api_t *api, const char *op,
const char *target_node, const char *router_node,
bool cib_only, const char *rsc_id,
const char *rsc_long_id, const char *standard,
const char *provider, const char *type)
{
int rc = pcmk_rc_ok;
char *key;
xmlNode *request, *msg_data, *xml_rsc, *params;
if (api == NULL) {
return EINVAL;
}
if (router_node == NULL) {
router_node = target_node;
}
msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP);
/* The controller logs the transition key from resource op requests, so we
* need to have *something* for it.
* @TODO don't use "crm-resource"
*/
key = pcmk__transition_key(0, getpid(), 0,
"xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx");
crm_xml_add(msg_data, PCMK__XA_TRANSITION_KEY, key);
free(key);
crm_xml_add(msg_data, PCMK__META_ON_NODE, target_node);
if (!pcmk__str_eq(router_node, target_node, pcmk__str_casei)) {
crm_xml_add(msg_data, PCMK__XA_ROUTER_NODE, router_node);
}
if (cib_only) {
// Indicate that only the CIB needs to be cleaned
- crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB);
+ crm_xml_add(msg_data, PCMK__XA_MODE, PCMK__VALUE_CIB);
}
xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE);
crm_xml_add(xml_rsc, PCMK_XA_ID, rsc_id);
crm_xml_add(xml_rsc, PCMK__XA_LONG_ID, rsc_long_id);
crm_xml_add(xml_rsc, PCMK_XA_CLASS, standard);
crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, provider);
crm_xml_add(xml_rsc, PCMK_XA_TYPE, type);
params = create_xml_node(msg_data, XML_TAG_ATTRS);
crm_xml_add(params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
// The controller parses the timeout from the request
key = crm_meta_name(PCMK_META_TIMEOUT);
crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg
free(key);
request = create_controller_request(api, op, router_node, msg_data);
rc = send_controller_request(api, request, true);
free_xml(msg_data);
free_xml(request);
return rc;
}
/*!
* \brief Ask the controller to fail a resource
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node resource is on
* \param[in] router_node Router node for target
* \param[in] rsc_id ID of resource to fail
* \param[in] rsc_long_id Long ID of resource (if any)
* \param[in] standard Standard of resource
* \param[in] provider Provider of resource (if any)
* \param[in] type Type of resource to fail
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_resource.
*/
int
pcmk_controld_api_fail(pcmk_ipc_api_t *api,
const char *target_node, const char *router_node,
const char *rsc_id, const char *rsc_long_id,
const char *standard, const char *provider,
const char *type)
{
crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s",
pcmk_ipc_name(api, true), pcmk__s(rsc_id, "unknown resource"),
pcmk__s(rsc_long_id, "no other names"),
pcmk__s(target_node, "unspecified node"),
pcmk__s(router_node, "unspecified node"));
return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node,
router_node, false, rsc_id, rsc_long_id,
standard, provider, type);
}
/*!
* \brief Ask the controller to refresh a resource
*
* \param[in,out] api Controller connection
* \param[in] target_node Name of node resource is on
* \param[in] router_node Router node for target
* \param[in] rsc_id ID of resource to refresh
* \param[in] rsc_long_id Long ID of resource (if any)
* \param[in] standard Standard of resource
* \param[in] provider Provider of resource (if any)
* \param[in] type Type of resource
* \param[in] cib_only If true, clean resource from CIB only
*
* \return Standard Pacemaker return code
* \note Event callback will get a reply of type pcmk_controld_reply_resource.
*/
int
pcmk_controld_api_refresh(pcmk_ipc_api_t *api, const char *target_node,
const char *router_node,
const char *rsc_id, const char *rsc_long_id,
const char *standard, const char *provider,
const char *type, bool cib_only)
{
crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s",
pcmk_ipc_name(api, true), pcmk__s(rsc_id, "unknown resource"),
pcmk__s(rsc_long_id, "no other names"),
pcmk__s(target_node, "unspecified node"),
pcmk__s(router_node, "unspecified node"));
return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node,
router_node, cib_only, rsc_id, rsc_long_id,
standard, provider, type);
}
/*!
* \brief Get the number of IPC replies currently expected from the controller
*
* \param[in] api Controller IPC API connection
*
* \return Number of replies expected
*/
unsigned int
pcmk_controld_api_replies_expected(const pcmk_ipc_api_t *api)
{
struct controld_api_private_s *private = api->api_data;
return private->replies_expected;
}
/*!
* \brief Create XML for a controller IPC "hello" message
*
* \deprecated This function is deprecated as part of the public C API.
*/
// \todo make this static to this file when breaking API backward compatibility
xmlNode *
create_hello_message(const char *uuid, const char *client_name,
const char *major_version, const char *minor_version)
{
xmlNode *hello_node = NULL;
xmlNode *hello = NULL;
if (pcmk__str_empty(uuid) || pcmk__str_empty(client_name)
|| pcmk__str_empty(major_version) || pcmk__str_empty(minor_version)) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"missing information",
client_name? client_name : "unknown client",
uuid? uuid : "unknown");
return NULL;
}
hello_node = create_xml_node(NULL, XML_TAG_OPTIONS);
if (hello_node == NULL) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"Message data creation failed", client_name, uuid);
return NULL;
}
crm_xml_add(hello_node, "major_version", major_version);
crm_xml_add(hello_node, "minor_version", minor_version);
crm_xml_add(hello_node, "client_name", client_name);
crm_xml_add(hello_node, "client_uuid", uuid);
hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid);
if (hello == NULL) {
crm_err("Could not create IPC hello message from %s (UUID %s): "
"Request creation failed", client_name, uuid);
return NULL;
}
free_xml(hello_node);
crm_trace("Created hello message from %s (UUID %s)", client_name, uuid);
return hello;
}
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
index f5b486f589..9b1f73c0b7 100644
--- a/lib/pacemaker/pcmk_graph_producer.c
+++ b/lib/pacemaker/pcmk_graph_producer.c
@@ -1,1096 +1,1096 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pcmk_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pcmk_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pcmk_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Add an XML node tag for a specified ID
*
* \param[in] id Node UUID to add
* \param[in,out] xml Parent XML tag to add to
*/
static xmlNode*
add_node_to_xml_by_id(const char *id, xmlNode *xml)
{
xmlNode *node_xml;
node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
crm_xml_add(node_xml, PCMK_XA_ID, id);
return node_xml;
}
/*!
* \internal
* \brief Add an XML node tag for a specified node
*
* \param[in] node Node to add
* \param[in,out] xml XML to add node to
*/
static void
add_node_to_xml(const pcmk_node_t *node, void *xml)
{
add_node_to_xml_by_id(node->details->id, (xmlNode *) xml);
}
/*!
* \internal
* \brief Count (optionally add to XML) nodes needing maintenance state update
*
* \param[in,out] xml Parent XML tag to add to, if any
* \param[in] scheduler Scheduler data
*
* \return Count of nodes added
* \note Only Pacemaker Remote nodes are considered currently
*/
static int
add_maintenance_nodes(xmlNode *xml, const pcmk_scheduler_t *scheduler)
{
xmlNode *maintenance = NULL;
int count = 0;
if (xml != NULL) {
maintenance = create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE);
}
for (const GList *iter = scheduler->nodes;
iter != NULL; iter = iter->next) {
const pcmk_node_t *node = iter->data;
if (pe__is_guest_or_remote_node(node) &&
(node->details->maintenance != node->details->remote_maintenance)) {
if (maintenance != NULL) {
crm_xml_add(add_node_to_xml_by_id(node->details->id,
maintenance),
XML_NODE_IS_MAINTENANCE,
(node->details->maintenance? "1" : "0"));
}
count++;
}
}
crm_trace("%s %d nodes in need of maintenance mode update in state",
((maintenance == NULL)? "Counted" : "Added"), count);
return count;
}
/*!
* \internal
* \brief Add pseudo action with nodes needing maintenance state update
*
* \param[in,out] scheduler Scheduler data
*/
static void
add_maintenance_update(pcmk_scheduler_t *scheduler)
{
pcmk_action_t *action = NULL;
if (add_maintenance_nodes(NULL, scheduler) != 0) {
action = get_pseudo_op(PCMK_ACTION_MAINTENANCE_NODES, scheduler);
pcmk__set_action_flags(action, pcmk_action_always_in_graph);
}
}
/*!
* \internal
* \brief Add XML with nodes that an action is expected to bring down
*
* If a specified action is expected to bring any nodes down, add an XML block
* with their UUIDs. When a node is lost, this allows the controller to
* determine whether it was expected.
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] action Action to check for downed nodes
*/
static void
add_downed_nodes(xmlNode *xml, const pcmk_action_t *action)
{
CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL),
return);
if (pcmk__str_eq(action->task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) {
/* Shutdown makes the action's node down */
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH,
pcmk__str_none)) {
/* Fencing makes the action's node and any hosted guest nodes down */
const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
if (pcmk__is_fencing_action(fence)) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
pe_foreach_guest_node(action->node->details->data_set,
action->node, add_node_to_xml, downed);
}
} else if (action->rsc && action->rsc->is_remote_node
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
/* Stopping a remote connection resource makes connected node down,
* unless it's part of a migration
*/
GList *iter;
pcmk_action_t *input;
bool migrating = false;
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
input = ((pcmk__related_action_t *) iter->data)->action;
if ((input->rsc != NULL)
&& pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none)
&& pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_none)) {
migrating = true;
break;
}
}
if (!migrating) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->rsc->id, downed);
}
}
}
/*!
* \internal
* \brief Create a transition graph operation key for a clone action
*
* \param[in] action Clone action
* \param[in] interval_ms Action interval in milliseconds
*
* \return Newly allocated string with transition graph operation key
*/
static char *
clone_op_key(const pcmk_action_t *action, guint interval_ms)
{
if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_type");
const char *n_task = g_hash_table_lookup(action->meta,
"notify_operation");
CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL));
return pcmk__notify_key(action->rsc->clone_name, n_type, n_task);
} else if (action->cancel_task != NULL) {
return pcmk__op_key(action->rsc->clone_name, action->cancel_task,
interval_ms);
} else {
return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms);
}
}
/*!
* \internal
* \brief Add node details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] xml Transition graph action XML for \p action
*/
static void
add_node_details(const pcmk_action_t *action, xmlNode *xml)
{
pcmk_node_t *router_node = pcmk__connection_host_for_action(action);
crm_xml_add(xml, PCMK__META_ON_NODE, action->node->details->uname);
crm_xml_add(xml, PCMK__META_ON_NODE_UUID, action->node->details->id);
if (router_node != NULL) {
crm_xml_add(xml, PCMK__XA_ROUTER_NODE, router_node->details->uname);
}
}
/*!
* \internal
* \brief Add resource details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_resource_details(const pcmk_action_t *action, xmlNode *action_xml)
{
xmlNode *rsc_xml = NULL;
const char *attr_list[] = {
PCMK_XA_CLASS,
PCMK_XA_PROVIDER,
PCMK_XA_TYPE,
};
/* If a resource is locked to a node via PCMK_OPT_SHUTDOWN_LOCK, mark its
* actions so the controller can preserve the lock when the action
* completes.
*/
if (pcmk__action_locks_rsc_to_node(action)) {
crm_xml_add_ll(action_xml, PCMK_OPT_SHUTDOWN_LOCK,
(long long) action->rsc->lock_time);
}
// List affected resource
rsc_xml = create_xml_node(action_xml,
(const char *) action->rsc->xml->name);
if (pcmk_is_set(action->rsc->flags, pcmk_rsc_removed)
&& (action->rsc->clone_name != NULL)) {
/* Use the numbered instance name here, because if there is more
* than one instance on a node, we need to make sure the command
* goes to the right one.
*
* This is important even for anonymous clones, because the clone's
* unique meta-attribute might have just been toggled from on to
* off.
*/
crm_debug("Using orphan clone name %s instead of %s",
action->rsc->id, action->rsc->clone_name);
crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->clone_name);
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id);
} else if (!pcmk_is_set(action->rsc->flags, pcmk_rsc_unique)) {
const char *xml_id = ID(action->rsc->xml);
crm_debug("Using anonymous clone name %s for %s (aka %s)",
xml_id, action->rsc->id, action->rsc->clone_name);
/* ID is what we'd like client to use
* LONG_ID is what they might know it as instead
*
* LONG_ID is only strictly needed /here/ during the
* transition period until all nodes in the cluster
* are running the new software /and/ have rebooted
* once (meaning that they've only ever spoken to a DC
* supporting this feature).
*
* If anyone toggles the unique flag to 'on', the
* 'instance free' name will correspond to an orphan
* and fall into the clause above instead
*/
crm_xml_add(rsc_xml, PCMK_XA_ID, xml_id);
if ((action->rsc->clone_name != NULL)
&& !pcmk__str_eq(xml_id, action->rsc->clone_name,
pcmk__str_none)) {
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->clone_name);
} else {
crm_xml_add(rsc_xml, PCMK__XA_LONG_ID, action->rsc->id);
}
} else {
CRM_ASSERT(action->rsc->clone_name == NULL);
crm_xml_add(rsc_xml, PCMK_XA_ID, action->rsc->id);
}
for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) {
crm_xml_add(rsc_xml, attr_list[lpc],
g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
}
}
/*!
* \internal
* \brief Add action attributes to transition graph action XML
*
* \param[in,out] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_action_attributes(pcmk_action_t *action, xmlNode *action_xml)
{
xmlNode *args_xml = NULL;
/* We create free-standing XML to start, so we can sort the attributes
* before adding it to action_xml, which keeps the scheduler regression
* test graphs comparable.
*/
args_xml = create_xml_node(NULL, XML_TAG_ATTRS);
crm_xml_add(args_xml, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
g_hash_table_foreach(action->extra, hash2field, args_xml);
if ((action->rsc != NULL) && (action->node != NULL)) {
// Get the resource instance attributes, evaluated properly for node
GHashTable *params = pe_rsc_params(action->rsc, action->node,
action->rsc->cluster);
pcmk__substitute_remote_addr(action->rsc, params);
g_hash_table_foreach(params, hash2smartfield, args_xml);
} else if ((action->rsc != NULL)
&& (action->rsc->variant <= pcmk_rsc_variant_primitive)) {
GHashTable *params = pe_rsc_params(action->rsc, NULL,
action->rsc->cluster);
g_hash_table_foreach(params, hash2smartfield, args_xml);
}
g_hash_table_foreach(action->meta, hash2metafield, args_xml);
if (action->rsc != NULL) {
pcmk_resource_t *parent = action->rsc;
while (parent != NULL) {
parent->cmds->add_graph_meta(parent, args_xml);
parent = parent->parent;
}
pcmk__add_guest_meta_to_xml(args_xml, action);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)
&& (action->node != NULL)) {
/* Pass the node's attributes as meta-attributes.
*
* @TODO: Determine whether it is still necessary to do this. It was
* added in 33d99707, probably for the libfence-based implementation in
* c9a90bd, which is no longer used.
*/
g_hash_table_foreach(action->node->details->attrs, hash2metafield,
args_xml);
}
sorted_xml(args_xml, action_xml, FALSE);
free_xml(args_xml);
}
/*!
* \internal
* \brief Create the transition graph XML for a scheduled action
*
* \param[in,out] parent Parent XML element to add action to
* \param[in,out] action Scheduled action
* \param[in] skip_details If false, add action details as sub-elements
* \param[in] scheduler Scheduler data
*/
static void
create_graph_action(xmlNode *parent, pcmk_action_t *action, bool skip_details,
const pcmk_scheduler_t *scheduler)
{
bool needs_node_info = true;
bool needs_maintenance_info = false;
xmlNode *action_xml = NULL;
if ((action == NULL) || (scheduler == NULL)) {
return;
}
// Create the top-level element based on task
if (pcmk__str_eq(action->task, PCMK_ACTION_STONITH, pcmk__str_none)) {
/* All fences need node info; guest node fences are pseudo-events */
if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
} else {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
}
} else if (pcmk__str_any_of(action->task,
PCMK_ACTION_DO_SHUTDOWN,
PCMK_ACTION_CLEAR_FAILCOUNT, NULL)) {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_LRM_DELETE,
pcmk__str_none)) {
// CIB-only clean-up for shutdown locks
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
- crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB);
+ crm_xml_add(action_xml, PCMK__XA_MODE, PCMK__VALUE_CIB);
} else if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
if (pcmk__str_eq(action->task, PCMK_ACTION_MAINTENANCE_NODES,
pcmk__str_none)) {
needs_maintenance_info = true;
}
action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
needs_node_info = false;
} else {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP);
}
crm_xml_add_int(action_xml, PCMK_XA_ID, action->id);
crm_xml_add(action_xml, PCMK_XA_OPERATION, action->task);
if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) {
char *clone_key = NULL;
guint interval_ms;
if (pcmk__guint_from_hash(action->meta, PCMK_META_INTERVAL, 0,
&interval_ms) != pcmk_rc_ok) {
interval_ms = 0;
}
clone_key = clone_op_key(action, interval_ms);
crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, clone_key);
crm_xml_add(action_xml, "internal_" PCMK__XA_OPERATION_KEY,
action->uuid);
free(clone_key);
} else {
crm_xml_add(action_xml, PCMK__XA_OPERATION_KEY, action->uuid);
}
if (needs_node_info && (action->node != NULL)) {
add_node_details(action, action_xml);
g_hash_table_insert(action->meta, strdup(PCMK__META_ON_NODE),
strdup(action->node->details->uname));
g_hash_table_insert(action->meta, strdup(PCMK__META_ON_NODE_UUID),
strdup(action->node->details->id));
}
if (skip_details) {
return;
}
if ((action->rsc != NULL)
&& !pcmk_is_set(action->flags, pcmk_action_pseudo)) {
// This is a real resource action, so add resource details
add_resource_details(action, action_xml);
}
/* List any attributes in effect */
add_action_attributes(action, action_xml);
/* List any nodes this action is expected to make down */
if (needs_node_info && (action->node != NULL)) {
add_downed_nodes(action_xml, action);
}
if (needs_maintenance_info) {
add_maintenance_nodes(action_xml, scheduler);
}
}
/*!
* \internal
* \brief Check whether an action should be added to the transition graph
*
* \param[in] action Action to check
*
* \return true if action should be added to graph, otherwise false
*/
static bool
should_add_action_to_graph(const pcmk_action_t *action)
{
if (!pcmk_is_set(action->flags, pcmk_action_runnable)) {
crm_trace("Ignoring action %s (%d): unrunnable",
action->uuid, action->id);
return false;
}
if (pcmk_is_set(action->flags, pcmk_action_optional)
&& !pcmk_is_set(action->flags, pcmk_action_always_in_graph)) {
crm_trace("Ignoring action %s (%d): optional",
action->uuid, action->id);
return false;
}
/* Actions for unmanaged resources should be excluded from the graph,
* with the exception of monitors and cancellation of recurring monitors.
*/
if ((action->rsc != NULL)
&& !pcmk_is_set(action->rsc->flags, pcmk_rsc_managed)
&& !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
const char *interval_ms_s;
/* A cancellation of a recurring monitor will get here because the task
* is cancel rather than monitor, but the interval can still be used to
* recognize it. The interval has been normalized to milliseconds by
* this point, so a string comparison is sufficient.
*/
interval_ms_s = g_hash_table_lookup(action->meta, PCMK_META_INTERVAL);
if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) {
crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)",
action->uuid, action->id, action->rsc->id);
return false;
}
}
/* Always add pseudo-actions, fence actions, and shutdown actions (already
* determined to be required and runnable by this point)
*/
if (pcmk_is_set(action->flags, pcmk_action_pseudo)
|| pcmk__strcase_any_of(action->task, PCMK_ACTION_STONITH,
PCMK_ACTION_DO_SHUTDOWN, NULL)) {
return true;
}
if (action->node == NULL) {
pcmk__sched_err("Skipping action %s (%d) "
"because it was not assigned to a node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unassigned", action, false);
return false;
}
if (pcmk_is_set(action->flags, pcmk_action_on_dc)) {
crm_trace("Action %s (%d) should be dumped: "
"can run on DC instead of %s",
action->uuid, action->id, pcmk__node_name(action->node));
} else if (pe__is_guest_node(action->node)
&& !action->node->details->remote_requires_reset) {
crm_trace("Action %s (%d) should be dumped: "
"assuming will be runnable on guest %s",
action->uuid, action->id, pcmk__node_name(action->node));
} else if (!action->node->details->online) {
pcmk__sched_err("Skipping action %s (%d) "
"because it was scheduled for offline node (bug?)",
action->uuid, action->id);
pcmk__log_action("Offline node", action, false);
return false;
} else if (action->node->details->unclean) {
pcmk__sched_err("Skipping action %s (%d) "
"because it was scheduled for unclean node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unclean node", action, false);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether an ordering's flags can change an action
*
* \param[in] ordering Ordering to check
*
* \return true if ordering has flags that can change an action, false otherwise
*/
static bool
ordering_can_change_actions(const pcmk__related_action_t *ordering)
{
return pcmk_any_flags_set(ordering->type,
~(pcmk__ar_then_implies_first_graphed
|pcmk__ar_first_implies_then_graphed
|pcmk__ar_ordered));
}
/*!
* \internal
* \brief Check whether an action input should be in the transition graph
*
* \param[in] action Action to check
* \param[in,out] input Action input to check
*
* \return true if input should be in graph, false otherwise
* \note This function may not only check an input, but disable it under certian
* circumstances (load or anti-colocation orderings that are not needed).
*/
static bool
should_add_input_to_graph(const pcmk_action_t *action,
pcmk__related_action_t *input)
{
if (input->state == pe_link_dumped) {
return true;
}
if ((uint32_t) input->type == pcmk__ar_none) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering disabled",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pcmk_action_runnable)
&& !ordering_can_change_actions(input)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pcmk_action_runnable)
&& pcmk_is_set(input->type, pcmk__ar_min_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"minimum number of instances required but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pcmk__ar_unmigratable_then_blocks)
&& !pcmk_is_set(input->action->flags, pcmk_action_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input blocked if 'then' unmigratable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pcmk__ar_if_first_unmigratable)
&& pcmk_is_set(input->action->flags, pcmk_action_migratable)) {
crm_trace("Ignoring %s (%d) input %s (%d): ordering applies "
"only if input is unmigratable, but it is migratable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (((uint32_t) input->type == pcmk__ar_ordered)
&& pcmk_is_set(input->action->flags, pcmk_action_migratable)
&& pcmk__ends_with(input->action->uuid, "_stop_0")) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional but stop in migration",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if ((uint32_t) input->type == pcmk__ar_if_on_same_node_or_target) {
pcmk_node_t *input_node = input->action->node;
if ((action->rsc != NULL)
&& pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
pcmk_node_t *assigned = action->rsc->allocated_to;
/* For load_stopped -> migrate_to orderings, we care about where
* the resource has been assigned, not where migrate_to will be
* executed.
*/
if (!pcmk__same_node(input_node, assigned)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"migration target %s is not same as input node %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(assigned? assigned->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
}
} else if (!pcmk__same_node(input_node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"not on same node (%s vs %s)",
action->uuid, action->id,
input->action->uuid, input->action->id,
(action->node? action->node->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
} else if (pcmk_is_set(input->action->flags, pcmk_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
}
} else if ((uint32_t) input->type == pcmk__ar_if_required_on_same_node) {
if (input->action->node && action->node
&& !pcmk__same_node(input->action->node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"not on same node (%s vs %s)",
action->uuid, action->id,
input->action->uuid, input->action->id,
pcmk__node_name(action->node),
pcmk__node_name(input->action->node));
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
} else if (pcmk_is_set(input->action->flags, pcmk_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = (enum pe_ordering) pcmk__ar_none;
return false;
}
} else if (input->action->rsc
&& input->action->rsc != action->rsc
&& pcmk_is_set(input->action->rsc->flags, pcmk_rsc_failed)
&& !pcmk_is_set(input->action->rsc->flags, pcmk_rsc_managed)
&& pcmk__ends_with(input->action->uuid, "_stop_0")
&& action->rsc && pe_rsc_is_clone(action->rsc)) {
crm_warn("Ignoring requirement that %s complete before %s:"
" unmanaged failed resources cannot prevent clone shutdown",
input->action->uuid, action->uuid);
return false;
} else if (pcmk_is_set(input->action->flags, pcmk_action_optional)
&& !pcmk_any_flags_set(input->action->flags,
pcmk_action_always_in_graph
|pcmk_action_added_to_graph)
&& !should_add_action_to_graph(input->action)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
}
crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x",
action->uuid, action->id, action_type_str(input->action->flags),
input->action->uuid, input->action->id,
action_node_str(input->action),
action_runnable_str(input->action->flags),
action_optional_str(input->action->flags), input->type);
return true;
}
/*!
* \internal
* \brief Check whether an ordering creates an ordering loop
*
* \param[in] init_action "First" action in ordering
* \param[in] action Callers should always set this the same as
* \p init_action (this function may use a different
* value for recursive calls)
* \param[in,out] input Action wrapper for "then" action in ordering
*
* \return true if the ordering creates a loop, otherwise false
*/
bool
pcmk__graph_has_loop(const pcmk_action_t *init_action,
const pcmk_action_t *action, pcmk__related_action_t *input)
{
bool has_loop = false;
if (pcmk_is_set(input->action->flags, pcmk_action_detect_loop)) {
crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
return false;
}
// Don't need to check inputs that won't be used
if (!should_add_input_to_graph(action, input)) {
return false;
}
if (input->action == init_action) {
crm_debug("Input loop found in %s@%s ->...-> %s@%s",
action->uuid,
action->node? action->node->details->uname : "",
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
return true;
}
pcmk__set_action_flags(input->action, pcmk_action_detect_loop);
crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)"
"for graph loop with %s@%s ",
action->uuid,
action->node? action->node->details->uname : "",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
input->type,
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
// Recursively check input itself for loops
for (GList *iter = input->action->actions_before;
iter != NULL; iter = iter->next) {
if (pcmk__graph_has_loop(init_action, input->action,
(pcmk__related_action_t *) iter->data)) {
// Recursive call already logged a debug message
has_loop = true;
break;
}
}
pcmk__clear_action_flags(input->action, pcmk_action_detect_loop);
if (!has_loop) {
crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
}
return has_loop;
}
/*!
* \internal
* \brief Create a synapse XML element for a transition graph
*
* \param[in] action Action that synapse is for
* \param[in,out] scheduler Scheduler data containing graph
*
* \return Newly added XML element for new graph synapse
*/
static xmlNode *
create_graph_synapse(const pcmk_action_t *action, pcmk_scheduler_t *scheduler)
{
int synapse_priority = 0;
xmlNode *syn = create_xml_node(scheduler->graph, "synapse");
crm_xml_add_int(syn, PCMK_XA_ID, scheduler->num_synapse);
scheduler->num_synapse++;
if (action->rsc != NULL) {
synapse_priority = action->rsc->priority;
}
if (action->priority > synapse_priority) {
synapse_priority = action->priority;
}
if (synapse_priority > 0) {
crm_xml_add_int(syn, PCMK__XA_PRIORITY, synapse_priority);
}
return syn;
}
/*!
* \internal
* \brief Add an action to the transition graph XML if appropriate
*
* \param[in,out] data Action to possibly add
* \param[in,out] user_data Scheduler data
*
* \note This will de-duplicate the action inputs, meaning that the
* pcmk__related_action_t:type flags can no longer be relied on to retain
* their original settings. That means this MUST be called after
* pcmk__apply_orderings() is complete, and nothing after this should rely
* on those type flags. (For example, some code looks for type equal to
* some flag rather than whether the flag is set, and some code looks for
* particular combinations of flags -- such code must be done before
* pcmk__create_graph().)
*/
static void
add_action_to_graph(gpointer data, gpointer user_data)
{
pcmk_action_t *action = (pcmk_action_t *) data;
pcmk_scheduler_t *scheduler = (pcmk_scheduler_t *) user_data;
xmlNode *syn = NULL;
xmlNode *set = NULL;
xmlNode *in = NULL;
/* If we haven't already, de-duplicate inputs (even if we won't be adding
* the action to the graph, so that crm_simulate's dot graphs don't have
* duplicates).
*/
if (!pcmk_is_set(action->flags, pcmk_action_inputs_deduplicated)) {
pcmk__deduplicate_action_inputs(action);
pcmk__set_action_flags(action, pcmk_action_inputs_deduplicated);
}
if (pcmk_is_set(action->flags, pcmk_action_added_to_graph)
|| !should_add_action_to_graph(action)) {
return; // Already added, or shouldn't be
}
pcmk__set_action_flags(action, pcmk_action_added_to_graph);
crm_trace("Adding action %d (%s%s%s) to graph",
action->id, action->uuid,
((action->node == NULL)? "" : " on "),
((action->node == NULL)? "" : action->node->details->uname));
syn = create_graph_synapse(action, scheduler);
set = create_xml_node(syn, "action_set");
in = create_xml_node(syn, "inputs");
create_graph_action(set, action, false, scheduler);
for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) {
pcmk__related_action_t *input = lpc->data;
if (should_add_input_to_graph(action, input)) {
xmlNode *input_xml = create_xml_node(in, "trigger");
input->state = pe_link_dumped;
create_graph_action(input_xml, input->action, true, scheduler);
}
}
}
static int transition_id = -1;
/*!
* \internal
* \brief Log a message after calculating a transition
*
* \param[in] filename Where transition input is stored
*/
void
pcmk__log_transition_summary(const char *filename)
{
if (was_processing_error || crm_config_error) {
crm_err("Calculated transition %d (with errors)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else if (was_processing_warning || crm_config_warning) {
crm_warn("Calculated transition %d (with warnings)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else {
crm_notice("Calculated transition %d%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
}
if (crm_config_error) {
crm_notice("Configuration errors found during scheduler processing,"
" please run \"crm_verify -L\" to identify issues");
}
}
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in,out] rsc Resource whose actions should be added
*/
void
pcmk__add_rsc_actions_to_graph(pcmk_resource_t *rsc)
{
GList *iter = NULL;
CRM_ASSERT(rsc != NULL);
pcmk__rsc_trace(rsc, "Adding actions for %s to graph", rsc->id);
// First add the resource's own actions
g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster);
// Then recursively add its children's actions (appropriate to variant)
for (iter = rsc->children; iter != NULL; iter = iter->next) {
pcmk_resource_t *child_rsc = (pcmk_resource_t *) iter->data;
child_rsc->cmds->add_actions_to_graph(child_rsc);
}
}
/*!
* \internal
* \brief Create a transition graph with all cluster actions needed
*
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__create_graph(pcmk_scheduler_t *scheduler)
{
GList *iter = NULL;
const char *value = NULL;
long long limit = 0LL;
transition_id++;
crm_trace("Creating transition graph %d", transition_id);
scheduler->graph = create_xml_node(NULL, XML_TAG_GRAPH);
value = pe_pref(scheduler->config_hash, PCMK_OPT_CLUSTER_DELAY);
crm_xml_add(scheduler->graph, PCMK_OPT_CLUSTER_DELAY, value);
value = pe_pref(scheduler->config_hash, PCMK_OPT_STONITH_TIMEOUT);
crm_xml_add(scheduler->graph, PCMK_OPT_STONITH_TIMEOUT, value);
crm_xml_add(scheduler->graph, "failed-stop-offset", "INFINITY");
if (pcmk_is_set(scheduler->flags, pcmk_sched_start_failure_fatal)) {
crm_xml_add(scheduler->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(scheduler->graph, "failed-start-offset", "1");
}
value = pe_pref(scheduler->config_hash, PCMK_OPT_BATCH_LIMIT);
crm_xml_add(scheduler->graph, PCMK_OPT_BATCH_LIMIT, value);
crm_xml_add_int(scheduler->graph, "transition_id", transition_id);
value = pe_pref(scheduler->config_hash, PCMK_OPT_MIGRATION_LIMIT);
if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) {
crm_xml_add(scheduler->graph, PCMK_OPT_MIGRATION_LIMIT, value);
}
if (scheduler->recheck_by > 0) {
char *recheck_epoch = NULL;
recheck_epoch = crm_strdup_printf("%llu",
(long long) scheduler->recheck_by);
crm_xml_add(scheduler->graph, "recheck-by", recheck_epoch);
free(recheck_epoch);
}
/* The following code will de-duplicate action inputs, so nothing past this
* should rely on the action input type flags retaining their original
* values.
*/
// Add resource actions to graph
for (iter = scheduler->resources; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
pcmk__rsc_trace(rsc, "Processing actions for %s", rsc->id);
rsc->cmds->add_actions_to_graph(rsc);
}
// Add pseudo-action for list of nodes with maintenance state update
add_maintenance_update(scheduler);
// Add non-resource (node) actions
for (iter = scheduler->actions; iter != NULL; iter = iter->next) {
pcmk_action_t *action = (pcmk_action_t *) iter->data;
if ((action->rsc != NULL)
&& (action->node != NULL)
&& action->node->details->shutdown
&& !pcmk_is_set(action->rsc->flags, pcmk_rsc_maintenance)
&& !pcmk_any_flags_set(action->flags,
pcmk_action_optional|pcmk_action_runnable)
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) {
/* Eventually we should just ignore the 'fence' case, but for now
* it's the best way to detect (in CTS) when CIB resource updates
* are being lost.
*/
if (pcmk_is_set(scheduler->flags, pcmk_sched_quorate)
|| (scheduler->no_quorum_policy == pcmk_no_quorum_ignore)) {
const bool managed = pcmk_is_set(action->rsc->flags,
pcmk_rsc_managed);
const bool failed = pcmk_is_set(action->rsc->flags,
pcmk_rsc_failed);
crm_crit("Cannot %s %s because of %s:%s%s (%s)",
action->node->details->unclean? "fence" : "shut down",
pcmk__node_name(action->node), action->rsc->id,
(managed? " blocked" : " unmanaged"),
(failed? " failed" : ""), action->uuid);
}
}
add_action_to_graph((gpointer) action, (gpointer) scheduler);
}
crm_log_xml_trace(scheduler->graph, "graph");
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:44 PM (2 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1989321
Default Alt Text
(141 KB)

Event Timeline