Page MenuHomeClusterLabs Projects

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 86a756aee2..9e346ac577 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1471 +1,1472 @@
/*
* Copyright 2013-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd.h>
#include <crm/lrmd_internal.h>
#include <crm/services.h>
#include <pacemaker-controld.h>
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
#define cmd_set_flags(cmd, flags_to_set) do { \
(cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define cmd_clear_flags(cmd, flags_to_clear) do { \
(cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_cmd_status {
cmd_reported_success = (1 << 0),
cmd_cancel = (1 << 1),
};
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
pcmk__action_result_t result;
int call_id;
time_t start_time;
uint32_t status;
} remote_ra_cmd_t;
#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_status {
expect_takeover = (1 << 0),
takeover_complete = (1 << 1),
remote_active = (1 << 2),
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
remote_in_maint = (1 << 3),
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
controlling_guest = (1 << 4),
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
uint32_t status;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
pcmk__reset_result(&(cmd->result));
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static bool
should_purge_attributes(crm_node_t *node)
{
bool purge = true;
crm_node_t *conn_node = NULL;
lrm_state_t *connection_rsc = NULL;
if (!node->conn_host) {
return purge;
}
/* Get the node that was hosting the remote connection resource from the
* peer cache. That's the one we really care about here.
*/
conn_node = crm_get_peer(0, node->conn_host);
if (conn_node == NULL) {
return purge;
}
/* Check the uptime of connection_rsc. If it hasn't been running long
* enough, set purge=true. "Long enough" means it started running earlier
* than the timestamp when we noticed it went away in the first place.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
time_t uptime = lrmd__uptime(lrm);
time_t now = time(NULL);
/* Add 20s of fuzziness to give corosync a while to notice the remote
* host is gone. On various error conditions (failure to get uptime,
* peer_lost isn't set) we default to purging.
*/
if (uptime > 0 &&
conn_node->peer_lost > 0 &&
uptime + 20 >= now - conn_node->peer_lost) {
purge = false;
}
}
return purge;
}
static enum controld_section_e
section_to_delete(bool purge)
{
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
if (purge) {
return controld_section_all_unlocked;
} else {
return controld_section_lrm_unlocked;
}
} else {
if (purge) {
return controld_section_all;
} else {
return controld_section_lrm;
}
}
}
static void
purge_remote_node_attrs(int call_opt, crm_node_t *node)
{
bool purge = should_purge_attributes(node);
enum controld_section_e section = section_to_delete(purge);
/* Purge node from attrd's memory */
if (purge) {
update_attrd_remote_node_removed(node->uname, NULL);
}
controld_delete_node_state(node->uname, section, call_opt);
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt;
xmlNode *update, *state;
crm_node_t *node;
lrm_state_t *connection_rsc = NULL;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing Pacemaker Remote node %s", node_name);
call_opt = crmd_cib_smart_opt();
/* Delete node's probe_complete attribute. This serves two purposes:
*
* - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it
* - deleting it (or any attribute for that matter) here ensures the
* attribute manager learns the node is remote
*/
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
purge_remote_node_attrs(call_opt, node);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
/* Apply any start state that we were given from the environment on the
* remote node.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
const char *start_state = lrmd__node_start_state(lrm);
if (start_state) {
set_join_state(start_state, node->uname, node->uuid, true);
}
}
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
broadcast_remote_state_message(node_name, true);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__func__);
/* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, XML_NODE_IS_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the XML_NODE_IS_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL, NULL);
free_xml(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_opt = crmd_cib_smart_opt();
crm_node_t *node;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Normally, only node attributes should be erased, and the resource history
* should be kept until the node comes back up. However, after a successful
* fence, we want to clear the history as well, so we don't think resources
* are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
controld_delete_node_state(node_name, controld_section_all, call_opt);
} else {
controld_delete_node_state(node_name, controld_section_attrs, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
/* Notify DC */
broadcast_remote_state_message(node_name, false);
/* Update CIB node state */
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
create_node_state_update(node, node_update_cluster, update, __func__);
controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL, NULL);
free_xml(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(const remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (!pcmk__result_ok(&(cmd->result))) {
return;
}
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
remote_node_up(cmd->rsc_id);
- } else if (pcmk__str_eq(cmd->action, "migrate_from", pcmk__str_casei)) {
+ } else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
+ pcmk__str_casei)) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
crm_remote_peer_cache_remove(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.t_run = (unsigned int) cmd->start_time;
op.t_rcchange = (unsigned int) cmd->start_time;
lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
cmd->result.exit_reason);
if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
op.t_rcchange = (unsigned int) time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = pcmk__strkey_table(free, free);
for (tmp = cmd->params; tmp; tmp = tmp->next) {
g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
lrmd__reset_result(&op);
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = ETIME;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
- if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, "migrate_from",
- NULL)) {
+ if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
} else {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Not enough time remains to retry remote connection");
}
if (rc != pcmk_rc_ok) {
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = lrm_state_find(cmd->rsc_id);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = lrm_state_find(cmd->rsc_id);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Remote executor did not respond");
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = lrm_state_find(controld_globals.our_nodename);
}
CRM_ASSERT(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.t_run = (unsigned int) time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
services_ocf_exitcode_str(op->rc), op->rc,
pcmk_exec_status_str(op->op_status), op->op_status);
lrm_state = lrm_state_find(op->remote_nodename);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (pcmk_is_set(ra_data->status, expect_takeover)) {
// Great, we knew this was coming
lrm_remote_clear_flags(lrm_state, expect_takeover);
lrm_remote_set_flags(lrm_state, takeover_complete);
} else {
crm_err("Disconnecting from Pacemaker Remote node %s due to "
"unexpected client takeover", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (pcmk_is_set(ra_data->status, takeover_complete)) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (!pcmk_is_set(ra_data->status, remote_active)) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name,
PCMK_ACTION_STOP);
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if ((op->type == lrmd_event_connect)
- && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START, "migrate_from",
- NULL)) {
+ && pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if ((op->connection_rc == -ENOKEY)
|| (op->connection_rc == -EKEYREJECTED)) {
// Hard error, don't retry
pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
PCMK_EXEC_ERROR,
pcmk_strerror(op->connection_rc));
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"%s without enough time to retry",
pcmk_strerror(op->connection_rc));
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
lrm_remote_set_flags(lrm_state, remote_active);
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_poke)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
cmd_set_flags(cmd, cmd_reported_success);
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_disconnect)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (pcmk_is_set(ra_data->status, remote_active) &&
!pcmk_is_set(cmd->status, cmd_cancel)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Remote connection unexpectedly dropped "
"during monitor");
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_new_client)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
handle_remote_ra_stop(lrm_state, cmd);
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
CRM_ASSERT(lrm_state);
ra_data = lrm_state->remote_ra_data;
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->active_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
lrm_remote_clear_flags(lrm_state, remote_active);
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
}
// \return Standard Pacemaker return code
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
int rc = pcmk_rc_ok;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
server = tmp->value;
} else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
port = atoi(tmp->value);
} else if (pcmk__str_eq(tmp->key, CRM_META "_" XML_RSC_ATTR_CONTAINER, pcmk__str_casei)) {
lrm_remote_set_flags(lrm_state, controlling_guest);
}
}
rc = controld_connect_remote_executor(lrm_state, server, port,
timeout_used);
if (rc != pcmk_rc_ok) {
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Could not connect to Pacemaker Remote node %s: %s",
lrm_state->node_name, pcmk_rc_str(rc));
}
return rc;
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
- if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START, "migrate_from",
- NULL)) {
+ if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
if (handle_remote_ra_start(lrm_state, cmd,
cmd->timeout) == pcmk_rc_ok) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, pcmk_strerror(rc));
}
} else {
rc = -1;
pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
PCMK_EXEC_DONE, "Remote connection inactive");
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
if (pcmk_is_set(ra_data->status, expect_takeover)) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
lrm_remote_clear_flags(lrm_state, takeover_complete);
lrm_remote_set_flags(lrm_state, expect_takeover);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Currently the only reloadable parameter is reconnect_interval,
* which is only used by the scheduler via the CIB, so reloads are a
* no-op.
*
* @COMPAT DC <2.1.0: We only need to check for "reload" in case
* we're in a rolling upgrade with a DC scheduling "reload" instead
* of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
* so this would work for that purpose as well.
*/
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = calloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
if ((id != NULL) && (lrm_state_find(id) != NULL)
&& !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
return TRUE;
}
return FALSE;
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
if ((lrm_state_find(rsc_id))) {
info = calloc(1, sizeof(lrmd_rsc_info_t));
info->id = strdup(rsc_id);
info->type = strdup(REMOTE_LRMD_RA);
info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
info->provider = strdup("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
return pcmk__str_any_of(action,
PCMK_ACTION_START,
PCMK_ACTION_STOP,
PCMK_ACTION_MONITOR,
PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED,
+ PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_RELOAD,
NULL);
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, "Lost connection to remote executor");
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
!pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
(ra_data->cur_cmd->interval_ms == interval_ms)
&& pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = strdup(userdata);
}
/* if we've already reported success, generate a new call id */
if (pcmk_is_set(cmd->status, cmd_reported_success)) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd_clear_flags(cmd, cmd_reported_success);
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
/*!
* \internal
* \brief Execute an action using the (internal) ocf:pacemaker:remote agent
*
* \param[in] lrm_state Executor state object for remote connection
* \param[in] rsc_id Connection resource ID
* \param[in] action Action to execute
* \param[in] userdata String to copy and pass to execution callback
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] start_delay_ms Delay (in milliseconds) before executing action
* \param[in,out] params Connection resource parameters
* \param[out] call_id Where to store call ID on success
*
* \return Standard Pacemaker return code
* \note This takes ownership of \p params, which should not be used or freed
* after calling this function.
*/
int
controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata,
guint interval_ms, int timeout_ms,
int start_delay_ms, lrmd_key_value_t *params,
int *call_id)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
*call_id = 0;
CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
&& (userdata != NULL) && (call_id != NULL),
lrmd_key_value_freeall(params); return EINVAL);
if (!is_remote_ra_supported_action(action)) {
lrmd_key_value_freeall(params);
return EOPNOTSUPP;
}
connection_rsc = lrm_state_find(rsc_id);
if (connection_rsc == NULL) {
lrmd_key_value_freeall(params);
return ENOTCONN;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
*call_id = cmd->call_id;
lrmd_key_value_freeall(params);
return pcmk_rc_ok;
}
cmd = calloc(1, sizeof(remote_ra_cmd_t));
if (cmd == NULL) {
lrmd_key_value_freeall(params);
return ENOMEM;
}
cmd->owner = strdup(lrm_state->node_name);
cmd->rsc_id = strdup(rsc_id);
cmd->action = strdup(action);
cmd->userdata = strdup(userdata);
if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
|| (cmd->userdata == NULL)) {
free_cmd(cmd);
lrmd_key_value_freeall(params);
return ENOMEM;
}
cmd->interval_ms = interval_ms;
cmd->timeout = timeout_ms;
cmd->start_delay = start_delay_ms;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
*call_id = cmd->call_id;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
* <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
* on_node="lxc1" on_node_uuid="lxc1">
* <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
* CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
* <downed>
* <node id="lxc1"/>
* </downed>
* </pseudo_event>
*/
#define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = ID(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
xmlNode *update, *state;
int call_opt;
crm_node_t *node;
call_opt = crmd_cib_smart_opt();
node = crm_remote_peer_get(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_none, update,
__func__);
crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL,
NULL) == pcmk_rc_ok) {
/* TODO: still not 100% sure that async update will succeed ... */
if (maintenance) {
lrm_remote_set_flags(lrm_state, remote_in_maint);
} else {
lrm_remote_clear_flags(lrm_state, remote_in_maint);
}
}
free_xml(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='" CRM_OP_MAINTENANCE_NODES "']/" \
XML_GRAPH_TAG_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node =
first_named_child(getXpathResult(search, 0), XML_CIB_TAG_NODE);
node != NULL; node = pcmk__xml_next(node)) {
lrm_state_t *lrm_state = lrm_state_find(ID(node));
cnt++;
if (lrm_state && lrm_state->remote_ra_data &&
pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
int is_maint;
cnt_remote++;
pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
&is_maint, 0);
remote_ra_maintenance(lrm_state, is_maint);
}
}
crm_trace("Action holds %d nodes (%d remotes found) "
"adjusting maintenance-mode", cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, remote_in_maint);
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, controlling_guest);
}
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
index 91d850d194..bedd4bf0f1 100644
--- a/daemons/controld/controld_te_actions.c
+++ b/daemons/controld/controld_te_actions.c
@@ -1,747 +1,748 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/lrmd.h> // lrmd_event_data_t, lrmd_free_event()
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/cluster.h>
#include <pacemaker-internal.h>
#include <pacemaker-controld.h>
static GHashTable *te_targets = NULL;
void send_rsc_command(pcmk__graph_action_t *action);
static void te_update_job_count(pcmk__graph_action_t *action, int offset);
static void
te_start_action_timer(const pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
action->timer = g_timeout_add(action->timeout + graph->network_delay,
action_timer_callback, (void *) action);
CRM_ASSERT(action->timer != 0);
}
/*!
* \internal
* \brief Execute a graph pseudo-action
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] pseudo Pseudo-action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_pseudo_action(pcmk__graph_t *graph, pcmk__graph_action_t *pseudo)
{
const char *task = crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK);
/* send to peers as well? */
if (pcmk__str_eq(task, CRM_OP_MAINTENANCE_NODES, pcmk__str_casei)) {
GHashTableIter iter;
crm_node_t *node = NULL;
g_hash_table_iter_init(&iter, crm_peer_cache);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
xmlNode *cmd = NULL;
if (pcmk__str_eq(controld_globals.our_nodename, node->uname,
pcmk__str_casei)) {
continue;
}
cmd = create_request(task, pseudo->xml, node->uname,
CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
send_cluster_message(node, crm_msg_crmd, cmd, FALSE);
free_xml(cmd);
}
remote_ra_process_maintenance_nodes(pseudo->xml);
} else {
/* Check action for Pacemaker Remote node side effects */
remote_ra_process_pseudo(pseudo->xml);
}
crm_debug("Pseudo-action %d (%s) fired and confirmed", pseudo->id,
crm_element_value(pseudo->xml, XML_LRM_ATTR_TASK_KEY));
te_action_confirmed(pseudo, graph);
return pcmk_rc_ok;
}
static int
get_target_rc(pcmk__graph_action_t *action)
{
int exit_status;
pcmk__scan_min_int(crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC),
&exit_status, 0);
return exit_status;
}
/*!
* \internal
* \brief Execute a cluster action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Cluster action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
char *counter = NULL;
xmlNode *cmd = NULL;
gboolean is_local = FALSE;
const char *id = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
id = ID(action->xml);
CRM_CHECK(!pcmk__str_empty(id), return EPROTO);
task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
CRM_CHECK(!pcmk__str_empty(task), return EPROTO);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
CRM_CHECK(!pcmk__str_empty(on_node), return pcmk_rc_node_unknown);
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router_node == NULL) {
router_node = on_node;
if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
const char *mode = crm_element_value(action->xml, PCMK__XA_MODE);
if (pcmk__str_eq(mode, XML_TAG_CIB, pcmk__str_none)) {
router_node = controld_globals.our_nodename;
}
}
}
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_info("Handling controller request '%s' (%s on %s)%s%s",
id, task, on_node, (is_local? " locally" : ""),
(no_wait? " without waiting" : ""));
if (is_local && pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
/* defer until everything else completes */
crm_info("Controller request '%s' is a local shutdown", id);
graph->completion_action = pcmk__graph_shutdown;
graph->abort_reason = "local shutdown";
te_action_confirmed(action, graph);
return pcmk_rc_ok;
} else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
crm_node_t *peer = crm_get_peer(0, router_node);
pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN);
}
cmd = create_request(task, action->xml, router_node, CRM_SYSTEM_CRMD, CRM_SYSTEM_TENGINE, NULL);
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter);
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE);
free(counter);
free_xml(cmd);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
te_action_confirmed(action, graph);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Synthesize an executor event for a resource action timeout
*
* \param[in] action Resource action that timed out
* \param[in] target_rc Expected result of action that timed out
*
* Synthesize an executor event for a resource action timeout. (If the executor
* gets a timeout while waiting for a resource action to complete, that will be
* reported via the usual callback. This timeout means we didn't hear from the
* executor itself or the controller that relayed the action to the executor.)
*
* \return Newly created executor event for result of \p action
* \note The caller is responsible for freeing the return value using
* lrmd_free_event().
*/
static lrmd_event_data_t *
synthesize_timeout_event(const pcmk__graph_action_t *action, int target_rc)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *reason = NULL;
char *dynamic_reason = NULL;
if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
reason = "Local executor did not return result in time";
} else {
const char *router_node = NULL;
router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (router_node == NULL) {
router_node = target;
}
dynamic_reason = crm_strdup_printf("Controller on %s did not return "
"result in time", router_node);
reason = dynamic_reason;
}
op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
PCMK_OCF_UNKNOWN_ERROR, reason);
op->call_id = -1;
op->user_data = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, target_rc,
controld_globals.te_uuid);
free(dynamic_reason);
return op;
}
static void
controld_record_action_event(pcmk__graph_action_t *action,
lrmd_event_data_t *op)
{
cib_t *cib_conn = controld_globals.cib_conn;
xmlNode *state = NULL;
xmlNode *rsc = NULL;
xmlNode *action_rsc = NULL;
int rc = pcmk_ok;
const char *rsc_id = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
const char *target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
int target_rc = get_target_rc(action);
action_rsc = find_xml_node(action->xml, XML_CIB_TAG_RESOURCE, TRUE);
if (action_rsc == NULL) {
return;
}
rsc_id = ID(action_rsc);
CRM_CHECK(rsc_id != NULL,
crm_log_xml_err(action->xml, "Bad:action"); return);
/*
update the CIB
<node_state id="hadev">
<lrm>
<lrm_resources>
<lrm_resource id="rsc2" last_op="start" op_code="0" target="hadev"/>
*/
state = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(state, XML_ATTR_ID, target_uuid);
crm_xml_add(state, XML_ATTR_UNAME, target);
rsc = create_xml_node(state, XML_CIB_TAG_LRM);
crm_xml_add(rsc, XML_ATTR_ID, target_uuid);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCES);
rsc = create_xml_node(rsc, XML_LRM_TAG_RESOURCE);
crm_xml_add(rsc, XML_ATTR_ID, rsc_id);
crm_copy_xml_element(action_rsc, rsc, XML_ATTR_TYPE);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
__func__);
rc = cib_conn->cmds->modify(cib_conn, XML_CIB_TAG_STATUS, state,
cib_scope_local);
fsa_register_cib_callback(rc, NULL, cib_action_updated);
free_xml(state);
crm_trace("Sent CIB update (call ID %d) for synthesized event of action %d (%s on %s)",
rc, action->id, task_uuid, target);
pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update);
}
void
controld_record_action_timeout(pcmk__graph_action_t *action)
{
lrmd_event_data_t *op = NULL;
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
int target_rc = get_target_rc(action);
crm_warn("%s %d: %s on %s timed out",
crm_element_name(action->xml), action->id, task_uuid, target);
op = synthesize_timeout_event(action, target_rc);
controld_record_action_event(action, op);
lrmd_free_event(op);
}
/*!
* \internal
* \brief Execute a resource action from a transition graph
*
* \param[in,out] graph Transition graph being executed
* \param[in,out] action Resource action to execute
*
* \return Standard Pacemaker return code
*/
static int
execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
/* never overwrite stop actions in the CIB with
* anything other than completed results
*
* Writing pending stops makes it look like the
* resource is running again
*/
xmlNode *cmd = NULL;
xmlNode *rsc_op = NULL;
gboolean rc = TRUE;
gboolean no_wait = FALSE;
gboolean is_local = FALSE;
char *counter = NULL;
const char *task = NULL;
const char *value = NULL;
const char *on_node = NULL;
const char *router_node = NULL;
const char *task_uuid = NULL;
CRM_ASSERT(action != NULL);
CRM_ASSERT(action->xml != NULL);
pcmk__clear_graph_action_flags(action, pcmk__graph_action_executed);
on_node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
CRM_CHECK(!pcmk__str_empty(on_node),
crm_err("Corrupted command(id=%s) %s: no node",
ID(action->xml), pcmk__s(task, "without task"));
return pcmk_rc_node_unknown);
rsc_op = action->xml;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
task_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
router_node = crm_element_value(rsc_op, XML_LRM_ATTR_ROUTER_NODE);
if (!router_node) {
router_node = on_node;
}
counter = pcmk__transition_key(controld_globals.transition_graph->id,
action->id, get_target_rc(action),
controld_globals.te_uuid);
crm_xml_add(rsc_op, XML_ATTR_TRANSITION_KEY, counter);
if (pcmk__str_eq(router_node, controld_globals.our_nodename,
pcmk__str_casei)) {
is_local = TRUE;
}
value = crm_meta_value(action->params, XML_ATTR_TE_NOWAIT);
if (crm_is_true(value)) {
no_wait = TRUE;
}
crm_notice("Initiating %s operation %s%s on %s%s "CRM_XS" action %d",
task, task_uuid, (is_local? " locally" : ""), on_node,
(no_wait? " without waiting" : ""), action->id);
cmd = create_request(CRM_OP_INVOKE_LRM, rsc_op, router_node,
CRM_SYSTEM_LRMD, CRM_SYSTEM_TENGINE, NULL);
if (is_local) {
/* shortcut local resource commands */
ha_msg_input_t data = {
.msg = cmd,
.xml = rsc_op,
};
fsa_data_t msg = {
.id = 0,
.data = &data,
.data_type = fsa_dt_ha_msg,
.fsa_input = I_NULL,
.fsa_cause = C_FSA_INTERNAL,
.actions = A_LRM_INVOKE,
.origin = __func__,
};
do_lrm_invoke(A_LRM_INVOKE, C_FSA_INTERNAL, controld_globals.fsa_state,
I_NULL, &msg);
} else {
rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE);
}
free(counter);
free_xml(cmd);
pcmk__set_graph_action_flags(action, pcmk__graph_action_executed);
if (rc == FALSE) {
crm_err("Action %d failed: send", action->id);
return ECOMM;
} else if (no_wait) {
/* Just mark confirmed. Don't bump the job count only to immediately
* decrement it.
*/
crm_info("Action %d confirmed - no wait", action->id);
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
pcmk__update_graph(controld_globals.transition_graph, action);
trigger_graph();
} else if (pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
crm_debug("Action %d: %s %s on %s(timeout %dms) was already confirmed.",
action->id, task, task_uuid, on_node, action->timeout);
} else {
if (action->timeout <= 0) {
crm_err("Action %d: %s %s on %s had an invalid timeout (%dms). Using %ums instead",
action->id, task, task_uuid, on_node, action->timeout, graph->network_delay);
action->timeout = (int) graph->network_delay;
}
te_update_job_count(action, 1);
te_start_action_timer(graph, action);
}
return pcmk_rc_ok;
}
struct te_peer_s
{
char *name;
int jobs;
int migrate_jobs;
};
static void te_peer_free(gpointer p)
{
struct te_peer_s *peer = p;
free(peer->name);
free(peer);
}
void te_reset_job_counts(void)
{
GHashTableIter iter;
struct te_peer_s *peer = NULL;
if(te_targets == NULL) {
te_targets = pcmk__strkey_table(NULL, te_peer_free);
}
g_hash_table_iter_init(&iter, te_targets);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & peer)) {
peer->jobs = 0;
peer->migrate_jobs = 0;
}
}
static void
te_update_job_count_on(const char *target, int offset, bool migrate)
{
struct te_peer_s *r = NULL;
if(target == NULL || te_targets == NULL) {
return;
}
r = g_hash_table_lookup(te_targets, target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
r->jobs += offset;
if(migrate) {
r->migrate_jobs += offset;
}
crm_trace("jobs[%s] = %d", target, r->jobs);
}
static void
te_update_job_count(pcmk__graph_action_t *action, int offset)
{
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
if ((action->type != pcmk__rsc_graph_action) || (target == NULL)) {
/* No limit on these */
return;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
- if ((target == NULL) && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED, NULL)) {
-
+ if ((target == NULL)
+ && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
const char *t1 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *t2 = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
te_update_job_count_on(t1, offset, TRUE);
te_update_job_count_on(t2, offset, TRUE);
return;
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
te_update_job_count_on(target, offset, FALSE);
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed on a node
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
* \param[in] target Name of node where action should be executed
*
* \return true if action is allowed, otherwise false
*/
static bool
allowed_on_node(const pcmk__graph_t *graph, const pcmk__graph_action_t *action,
const char *target)
{
int limit = 0;
struct te_peer_s *r = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
const char *id = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
if(target == NULL) {
/* No limit on these */
return true;
} else if(te_targets == NULL) {
return false;
}
r = g_hash_table_lookup(te_targets, target);
limit = throttle_get_job_limit(target);
if(r == NULL) {
r = calloc(1, sizeof(struct te_peer_s));
r->name = strdup(target);
g_hash_table_insert(te_targets, r->name, r);
}
if(limit <= r->jobs) {
crm_trace("Peer %s is over their job limit of %d (%d): deferring %s",
target, limit, r->jobs, id);
return false;
} else if(graph->migration_limit > 0 && r->migrate_jobs >= graph->migration_limit) {
if (pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED, NULL)) {
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
crm_trace("Peer %s is over their migration job limit of %d (%d): deferring %s",
target, graph->migration_limit, r->migrate_jobs, id);
return false;
}
}
crm_trace("Peer %s has not hit their limit yet. current jobs = %d limit= %d limit", target, r->jobs, limit);
return true;
}
/*!
* \internal
* \brief Check whether a graph action is allowed to be executed
*
* \param[in] graph Transition graph being executed
* \param[in] action Graph action being executed
*
* \return true if action is allowed, otherwise false
*/
static bool
graph_action_allowed(pcmk__graph_t *graph, pcmk__graph_action_t *action)
{
const char *target = NULL;
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
if (action->type != pcmk__rsc_graph_action) {
/* No limit on these */
return true;
}
/* if we have a router node, this means the action is performing
* on a remote node. For now, we count all actions occurring on a
* remote node against the job list on the cluster node hosting
* the connection resources */
target = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
- if ((target == NULL) && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED, NULL)) {
+ if ((target == NULL)
+ && pcmk__strcase_any_of(task, PCMK_ACTION_MIGRATE_TO,
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_SOURCE);
if (!allowed_on_node(graph, action, target)) {
return false;
}
target = crm_meta_value(action->params, XML_LRM_ATTR_MIGRATE_TARGET);
} else if (target == NULL) {
target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
}
return allowed_on_node(graph, action, target);
}
/*!
* \brief Confirm a graph action (and optionally update graph)
*
* \param[in,out] action Action to confirm
* \param[in,out] graph Update and trigger this graph (if non-NULL)
*/
void
te_action_confirmed(pcmk__graph_action_t *action, pcmk__graph_t *graph)
{
if (!pcmk_is_set(action->flags, pcmk__graph_action_confirmed)) {
if ((action->type == pcmk__rsc_graph_action)
&& (crm_element_value(action->xml, XML_LRM_ATTR_TARGET) != NULL)) {
te_update_job_count(action, -1);
}
pcmk__set_graph_action_flags(action, pcmk__graph_action_confirmed);
}
if (graph) {
pcmk__update_graph(graph, action);
trigger_graph();
}
}
static pcmk__graph_functions_t te_graph_fns = {
execute_pseudo_action,
execute_rsc_action,
execute_cluster_action,
controld_execute_fence_action,
graph_action_allowed,
};
/*
* \internal
* \brief Register the transitioner's graph functions with \p libpacemaker
*/
void
controld_register_graph_functions(void)
{
pcmk__set_graph_functions(&te_graph_fns);
}
void
notify_crmd(pcmk__graph_t *graph)
{
const char *type = "unknown";
enum crmd_fsa_input event = I_NULL;
crm_debug("Processing transition completion in state %s",
fsa_state2string(controld_globals.fsa_state));
CRM_CHECK(graph->complete, graph->complete = true);
switch (graph->completion_action) {
case pcmk__graph_wait:
type = "stop";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_done:
type = "done";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
event = I_TE_SUCCESS;
}
break;
case pcmk__graph_restart:
type = "restart";
if (controld_globals.fsa_state == S_TRANSITION_ENGINE) {
if (controld_get_period_transition_timer() > 0) {
controld_stop_transition_timer();
controld_start_transition_timer();
} else {
event = I_PE_CALC;
}
} else if (controld_globals.fsa_state == S_POLICY_ENGINE) {
controld_set_fsa_action_flags(A_PE_INVOKE);
controld_trigger_fsa();
}
break;
case pcmk__graph_shutdown:
type = "shutdown";
if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
event = I_STOP;
} else {
crm_err("We didn't ask to be shut down, yet the scheduler is telling us to");
event = I_TERMINATE;
}
}
crm_debug("Transition %d status: %s - %s", graph->id, type,
pcmk__s(graph->abort_reason, "unspecified reason"));
graph->abort_reason = NULL;
graph->completion_action = pcmk__graph_done;
if (event != I_NULL) {
register_fsa_input(C_FSA_INTERNAL, event, NULL);
} else {
controld_trigger_fsa();
}
}
diff --git a/include/crm/common/actions.h b/include/crm/common/actions.h
index d669454ffb..370dd18a7f 100644
--- a/include/crm/common/actions.h
+++ b/include/crm/common/actions.h
@@ -1,42 +1,43 @@
/*
* Copyright 2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_COMMON_ACTIONS__H
#define PCMK__CRM_COMMON_ACTIONS__H
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \file
* \brief APIs related to actions
* \ingroup core
*/
// Action names as strings
#define PCMK_ACTION_CANCEL "cancel"
#define PCMK_ACTION_DELETE "delete"
#define PCMK_ACTION_DEMOTE "demote"
#define PCMK_ACTION_META_DATA "meta-data"
+#define PCMK_ACTION_MIGRATE_FROM "migrate_from"
#define PCMK_ACTION_MIGRATE_TO "migrate_to"
#define PCMK_ACTION_MONITOR "monitor"
#define PCMK_ACTION_NOTIFY "notify"
#define PCMK_ACTION_PROMOTE "promote"
#define PCMK_ACTION_RELOAD "reload"
#define PCMK_ACTION_RELOAD_AGENT "reload-agent"
#define PCMK_ACTION_START "start"
#define PCMK_ACTION_STOP "stop"
#define PCMK_ACTION_VALIDATE_ALL "validate-all"
#ifdef __cplusplus
}
#endif
#endif // PCMK__CRM_COMMON_ACTIONS__H
diff --git a/include/crm/crm.h b/include/crm/crm.h
index 9b2a7e5489..40194606df 100644
--- a/include/crm/crm.h
+++ b/include/crm/crm.h
@@ -1,214 +1,214 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_CRM__H
# define PCMK__CRM_CRM__H
# include <crm_config.h>
# include <stdlib.h>
# include <glib.h>
# include <stdbool.h>
# include <string.h>
# include <libxml/tree.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* \file
* \brief A dumping ground
* \ingroup core
*/
#ifndef PCMK_ALLOW_DEPRECATED
/*!
* \brief Allow use of deprecated Pacemaker APIs
*
* By default, external code using Pacemaker headers is allowed to use
* deprecated Pacemaker APIs. If PCMK_ALLOW_DEPRECATED is defined to 0 before
* including any Pacemaker headers, deprecated APIs will be unusable. It is
* strongly recommended to leave this unchanged for production and release
* builds, to avoid breakage when users upgrade to new Pacemaker releases that
* deprecate more APIs. This should be defined to 0 only for development and
* testing builds when desiring to check for usage of currently deprecated APIs.
*/
#define PCMK_ALLOW_DEPRECATED 1
#endif
/*!
* The CRM feature set assists with compatibility in mixed-version clusters.
* The major version number increases when nodes with different versions
* would not work (rolling upgrades are not allowed). The minor version
* number increases when mixed-version clusters are allowed only during
* rolling upgrades (a node with the oldest feature set will be elected DC). The
* minor-minor version number is ignored, but allows resource agents to detect
* cluster support for various features.
*
* The feature set also affects the processing of old saved CIBs (such as for
* many scheduler regression tests).
*
* Particular feature points currently tested by Pacemaker code:
*
* >2.1: Operation updates include timing data
* >=3.0.5: XML v2 digests are created
* >=3.0.8: Peers do not need acks for cancellations
* >=3.0.9: DC will send its own shutdown request to all peers
* XML v2 patchsets are created by default
* >=3.0.13: Fail counts include operation name and interval
* >=3.2.0: DC supports PCMK_EXEC_INVALID and PCMK_EXEC_NOT_CONNECTED
*/
# define CRM_FEATURE_SET "3.18.0"
/* Pacemaker's CPG protocols use fixed-width binary fields for the sender and
* recipient of a CPG message. This imposes an arbitrary limit on cluster node
* names.
*/
//! \brief Maximum length of a Corosync cluster node name (in bytes)
#define MAX_NAME 256
# define CRM_META "CRM_meta"
extern char *crm_system_name;
/* *INDENT-OFF* */
// How we represent "infinite" scores
# define CRM_SCORE_INFINITY 1000000
# define CRM_INFINITY_S "INFINITY"
# define CRM_PLUS_INFINITY_S "+" CRM_INFINITY_S
# define CRM_MINUS_INFINITY_S "-" CRM_INFINITY_S
/* @COMPAT API < 2.0.0 Deprecated "infinity" aliases
*
* INFINITY might be defined elsewhere (e.g. math.h), so undefine it first.
* This, of course, complicates any attempt to use the other definition in any
* code that includes this header.
*/
# undef INFINITY
# define INFINITY_S "INFINITY"
# define MINUS_INFINITY_S "-INFINITY"
# define INFINITY 1000000
/* Sub-systems */
# define CRM_SYSTEM_DC "dc"
#define CRM_SYSTEM_DCIB "dcib" // Primary instance of CIB manager
# define CRM_SYSTEM_CIB "cib"
# define CRM_SYSTEM_CRMD "crmd"
# define CRM_SYSTEM_LRMD "lrmd"
# define CRM_SYSTEM_PENGINE "pengine"
# define CRM_SYSTEM_TENGINE "tengine"
# define CRM_SYSTEM_STONITHD "stonithd"
# define CRM_SYSTEM_MCP "pacemakerd"
// Names of internally generated node attributes
# define CRM_ATTR_UNAME "#uname"
# define CRM_ATTR_ID "#id"
# define CRM_ATTR_KIND "#kind"
# define CRM_ATTR_ROLE "#role"
# define CRM_ATTR_IS_DC "#is_dc"
# define CRM_ATTR_CLUSTER_NAME "#cluster-name"
# define CRM_ATTR_SITE_NAME "#site-name"
# define CRM_ATTR_UNFENCED "#node-unfenced"
# define CRM_ATTR_DIGESTS_ALL "#digests-all"
# define CRM_ATTR_DIGESTS_SECURE "#digests-secure"
# define CRM_ATTR_PROTOCOL "#attrd-protocol"
# define CRM_ATTR_FEATURE_SET "#feature-set"
/* Valid operations */
# define CRM_OP_NOOP "noop"
# define CRM_OP_JOIN_ANNOUNCE "join_announce"
# define CRM_OP_JOIN_OFFER "join_offer"
# define CRM_OP_JOIN_REQUEST "join_request"
# define CRM_OP_JOIN_ACKNAK "join_ack_nack"
# define CRM_OP_JOIN_CONFIRM "join_confirm"
# define CRM_OP_PING "ping"
# define CRM_OP_NODE_INFO "node-info"
# define CRM_OP_THROTTLE "throttle"
# define CRM_OP_VOTE "vote"
# define CRM_OP_NOVOTE "no-vote"
# define CRM_OP_HELLO "hello"
# define CRM_OP_PECALC "pe_calc"
# define CRM_OP_QUIT "quit"
# define CRM_OP_LOCAL_SHUTDOWN "start_shutdown"
# define CRM_OP_SHUTDOWN_REQ "req_shutdown"
# define CRM_OP_SHUTDOWN "do_shutdown"
# define CRM_OP_FENCE "stonith"
# define CRM_OP_REGISTER "register"
# define CRM_OP_IPC_FWD "ipc_fwd"
# define CRM_OP_INVOKE_LRM "lrm_invoke"
# define CRM_OP_LRM_REFRESH "lrm_refresh" //!< Deprecated since 1.1.10
# define CRM_OP_LRM_DELETE "lrm_delete"
# define CRM_OP_LRM_FAIL "lrm_fail"
# define CRM_OP_PROBED "probe_complete"
# define CRM_OP_REPROBE "probe_again"
# define CRM_OP_CLEAR_FAILCOUNT "clear_failcount"
# define CRM_OP_REMOTE_STATE "remote_state"
# define CRM_OP_RELAXED_SET "one-or-more"
# define CRM_OP_RELAXED_CLONE "clone-one-or-more"
# define CRM_OP_RM_NODE_CACHE "rm_node_cache"
# define CRM_OP_MAINTENANCE_NODES "maintenance_nodes"
/* Possible cluster membership states */
# define CRMD_JOINSTATE_DOWN "down"
# define CRMD_JOINSTATE_PENDING "pending"
# define CRMD_JOINSTATE_MEMBER "member"
# define CRMD_JOINSTATE_NACK "banned"
-# define CRMD_ACTION_MIGRATED "migrate_from"
+# define CRMD_ACTION_MIGRATED PCMK_ACTION_MIGRATE_FROM
# define CRMD_ACTION_STARTED "running"
# define CRMD_ACTION_STOPPED "stopped"
# define CRMD_ACTION_PROMOTED "promoted"
# define CRMD_ACTION_DEMOTED "demoted"
# define CRMD_ACTION_NOTIFIED "notified"
# define CRMD_METADATA_CALL_TIMEOUT 30000
/* short names */
-# define RSC_MIGRATED CRMD_ACTION_MIGRATED
+# define RSC_MIGRATED PCMK_ACTION_MIGRATE_FROM
# define RSC_STARTED CRMD_ACTION_STARTED
# define RSC_STOPPED CRMD_ACTION_STOPPED
# define RSC_PROMOTED CRMD_ACTION_PROMOTED
# define RSC_DEMOTED CRMD_ACTION_DEMOTED
# define RSC_NOTIFIED CRMD_ACTION_NOTIFIED
/* *INDENT-ON* */
# include <crm/common/actions.h>
# include <crm/common/cib.h>
# include <crm/common/logging.h>
# include <crm/common/util.h>
static inline const char *
crm_action_str(const char *task, guint interval_ms) {
if ((task != NULL) && (interval_ms == 0)
&& (strcasecmp(task, PCMK_ACTION_MONITOR) == 0)) {
return "probe";
}
return task;
}
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/crm_compat.h>
#endif
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/common/operations.c b/lib/common/operations.c
index 85c57d2eea..3c317c6fbd 100644
--- a/lib/common/operations.c
+++ b/lib/common/operations.c
@@ -1,531 +1,531 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#ifndef _GNU_SOURCE
# define _GNU_SOURCE
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <ctype.h>
#include <crm/crm.h>
#include <crm/lrmd.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/util.h>
/*!
* \brief Generate an operation key (RESOURCE_ACTION_INTERVAL)
*
* \param[in] rsc_id ID of resource being operated on
* \param[in] op_type Operation name
* \param[in] interval_ms Operation interval
*
* \return Newly allocated memory containing operation key as string
*
* \note This function asserts on errors, so it will never return NULL.
* The caller is responsible for freeing the result with free().
*/
char *
pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
{
CRM_ASSERT(rsc_id != NULL);
CRM_ASSERT(op_type != NULL);
return crm_strdup_printf(PCMK__OP_FMT, rsc_id, op_type, interval_ms);
}
static inline gboolean
convert_interval(const char *s, guint *interval_ms)
{
unsigned long l;
errno = 0;
l = strtoul(s, NULL, 10);
if (errno != 0) {
return FALSE;
}
*interval_ms = (guint) l;
return TRUE;
}
/*!
* \internal
* \brief Check for underbar-separated substring match
*
* \param[in] key Overall string being checked
* \param[in] position Match before underbar at this \p key index
* \param[in] matches Substrings to match (may contain underbars)
*
* \return \p key index of underbar before any matching substring,
* or 0 if none
*/
static size_t
match_before(const char *key, size_t position, const char **matches)
{
for (int i = 0; matches[i] != NULL; ++i) {
const size_t match_len = strlen(matches[i]);
// Must have at least X_MATCH before position
if (position > (match_len + 1)) {
const size_t possible = position - match_len - 1;
if ((key[possible] == '_')
&& (strncmp(key + possible + 1, matches[i], match_len) == 0)) {
return possible;
}
}
}
return 0;
}
gboolean
parse_op_key(const char *key, char **rsc_id, char **op_type, guint *interval_ms)
{
guint local_interval_ms = 0;
const size_t key_len = (key == NULL)? 0 : strlen(key);
// Operation keys must be formatted as RSC_ACTION_INTERVAL
size_t action_underbar = 0; // Index in key of underbar before ACTION
size_t interval_underbar = 0; // Index in key of underbar before INTERVAL
size_t possible = 0;
/* Underbar was a poor choice of separator since both RSC and ACTION can
* contain underbars. Here, list action names and name prefixes that can.
*/
const char *actions_with_underbars[] = {
- CRMD_ACTION_MIGRATED,
+ PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_MIGRATE_TO,
NULL
};
const char *action_prefixes_with_underbars[] = {
"pre_" PCMK_ACTION_NOTIFY,
"post_" PCMK_ACTION_NOTIFY,
"confirmed-pre_" PCMK_ACTION_NOTIFY,
"confirmed-post_" PCMK_ACTION_NOTIFY,
NULL,
};
// Initialize output variables in case of early return
if (rsc_id) {
*rsc_id = NULL;
}
if (op_type) {
*op_type = NULL;
}
if (interval_ms) {
*interval_ms = 0;
}
// RSC_ACTION_INTERVAL implies a minimum of 5 characters
if (key_len < 5) {
return FALSE;
}
// Find, parse, and validate interval
interval_underbar = key_len - 2;
while ((interval_underbar > 2) && (key[interval_underbar] != '_')) {
--interval_underbar;
}
if ((interval_underbar == 2)
|| !convert_interval(key + interval_underbar + 1, &local_interval_ms)) {
return FALSE;
}
// Find the base (OCF) action name, disregarding prefixes
action_underbar = match_before(key, interval_underbar,
actions_with_underbars);
if (action_underbar == 0) {
action_underbar = interval_underbar - 2;
while ((action_underbar > 0) && (key[action_underbar] != '_')) {
--action_underbar;
}
if (action_underbar == 0) {
return FALSE;
}
}
possible = match_before(key, action_underbar,
action_prefixes_with_underbars);
if (possible != 0) {
action_underbar = possible;
}
// Set output variables
if (rsc_id != NULL) {
*rsc_id = strndup(key, action_underbar);
CRM_ASSERT(*rsc_id != NULL);
}
if (op_type != NULL) {
*op_type = strndup(key + action_underbar + 1,
interval_underbar - action_underbar - 1);
CRM_ASSERT(*op_type != NULL);
}
if (interval_ms != NULL) {
*interval_ms = local_interval_ms;
}
return TRUE;
}
char *
pcmk__notify_key(const char *rsc_id, const char *notify_type,
const char *op_type)
{
CRM_CHECK(rsc_id != NULL, return NULL);
CRM_CHECK(op_type != NULL, return NULL);
CRM_CHECK(notify_type != NULL, return NULL);
return crm_strdup_printf("%s_%s_notify_%s_0",
rsc_id, notify_type, op_type);
}
/*!
* \brief Parse a transition magic string into its constituent parts
*
* \param[in] magic Magic string to parse (must be non-NULL)
* \param[out] uuid If non-NULL, where to store copy of parsed UUID
* \param[out] transition_id If non-NULL, where to store parsed transition ID
* \param[out] action_id If non-NULL, where to store parsed action ID
* \param[out] op_status If non-NULL, where to store parsed result status
* \param[out] op_rc If non-NULL, where to store parsed actual rc
* \param[out] target_rc If non-NULL, where to stored parsed target rc
*
* \return TRUE if key was valid, FALSE otherwise
* \note If uuid is supplied and this returns TRUE, the caller is responsible
* for freeing the memory for *uuid using free().
*/
gboolean
decode_transition_magic(const char *magic, char **uuid, int *transition_id, int *action_id,
int *op_status, int *op_rc, int *target_rc)
{
int res = 0;
char *key = NULL;
gboolean result = TRUE;
int local_op_status = -1;
int local_op_rc = -1;
CRM_CHECK(magic != NULL, return FALSE);
#ifdef HAVE_SSCANF_M
res = sscanf(magic, "%d:%d;%ms", &local_op_status, &local_op_rc, &key);
#else
key = calloc(1, strlen(magic) - 3); // magic must have >=4 other characters
CRM_ASSERT(key);
res = sscanf(magic, "%d:%d;%s", &local_op_status, &local_op_rc, key);
#endif
if (res == EOF) {
crm_err("Could not decode transition information '%s': %s",
magic, pcmk_rc_str(errno));
result = FALSE;
} else if (res < 3) {
crm_warn("Transition information '%s' incomplete (%d of 3 expected items)",
magic, res);
result = FALSE;
} else {
if (op_status) {
*op_status = local_op_status;
}
if (op_rc) {
*op_rc = local_op_rc;
}
result = decode_transition_key(key, uuid, transition_id, action_id,
target_rc);
}
free(key);
return result;
}
char *
pcmk__transition_key(int transition_id, int action_id, int target_rc,
const char *node)
{
CRM_CHECK(node != NULL, return NULL);
return crm_strdup_printf("%d:%d:%d:%-*s",
action_id, transition_id, target_rc, 36, node);
}
/*!
* \brief Parse a transition key into its constituent parts
*
* \param[in] key Transition key to parse (must be non-NULL)
* \param[out] uuid If non-NULL, where to store copy of parsed UUID
* \param[out] transition_id If non-NULL, where to store parsed transition ID
* \param[out] action_id If non-NULL, where to store parsed action ID
* \param[out] target_rc If non-NULL, where to stored parsed target rc
*
* \return TRUE if key was valid, FALSE otherwise
* \note If uuid is supplied and this returns TRUE, the caller is responsible
* for freeing the memory for *uuid using free().
*/
gboolean
decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id,
int *target_rc)
{
int local_transition_id = -1;
int local_action_id = -1;
int local_target_rc = -1;
char local_uuid[37] = { '\0' };
// Initialize any supplied output arguments
if (uuid) {
*uuid = NULL;
}
if (transition_id) {
*transition_id = -1;
}
if (action_id) {
*action_id = -1;
}
if (target_rc) {
*target_rc = -1;
}
CRM_CHECK(key != NULL, return FALSE);
if (sscanf(key, "%d:%d:%d:%36s", &local_action_id, &local_transition_id,
&local_target_rc, local_uuid) != 4) {
crm_err("Invalid transition key '%s'", key);
return FALSE;
}
if (strlen(local_uuid) != 36) {
crm_warn("Invalid UUID '%s' in transition key '%s'", local_uuid, key);
}
if (uuid) {
*uuid = strdup(local_uuid);
CRM_ASSERT(*uuid);
}
if (transition_id) {
*transition_id = local_transition_id;
}
if (action_id) {
*action_id = local_action_id;
}
if (target_rc) {
*target_rc = local_target_rc;
}
return TRUE;
}
// Return true if a is an attribute that should be filtered
static bool
should_filter_for_digest(xmlAttrPtr a, void *user_data)
{
if (strncmp((const char *) a->name, CRM_META "_",
sizeof(CRM_META " ") - 1) == 0) {
return true;
}
return pcmk__str_any_of((const char *) a->name,
XML_ATTR_ID,
XML_ATTR_CRM_VERSION,
XML_LRM_ATTR_OP_DIGEST,
XML_LRM_ATTR_TARGET,
XML_LRM_ATTR_TARGET_UUID,
"pcmk_external_ip",
NULL);
}
/*!
* \internal
* \brief Remove XML attributes not needed for operation digest
*
* \param[in,out] param_set XML with operation parameters
*/
void
pcmk__filter_op_for_digest(xmlNode *param_set)
{
char *key = NULL;
char *timeout = NULL;
guint interval_ms = 0;
if (param_set == NULL) {
return;
}
/* Timeout is useful for recurring operation digests, so grab it before
* removing meta-attributes
*/
key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
if (crm_element_value_ms(param_set, key, &interval_ms) != pcmk_ok) {
interval_ms = 0;
}
free(key);
key = NULL;
if (interval_ms != 0) {
key = crm_meta_name(XML_ATTR_TIMEOUT);
timeout = crm_element_value_copy(param_set, key);
}
// Remove all CRM_meta_* attributes and certain other attributes
pcmk__xe_remove_matching_attrs(param_set, should_filter_for_digest, NULL);
// Add timeout back for recurring operation digests
if (timeout != NULL) {
crm_xml_add(param_set, key, timeout);
}
free(timeout);
free(key);
}
int
rsc_op_expected_rc(const lrmd_event_data_t *op)
{
int rc = 0;
if (op && op->user_data) {
decode_transition_key(op->user_data, NULL, NULL, NULL, &rc);
}
return rc;
}
gboolean
did_rsc_op_fail(lrmd_event_data_t * op, int target_rc)
{
switch (op->op_status) {
case PCMK_EXEC_CANCELLED:
case PCMK_EXEC_PENDING:
return FALSE;
case PCMK_EXEC_NOT_SUPPORTED:
case PCMK_EXEC_TIMEOUT:
case PCMK_EXEC_ERROR:
case PCMK_EXEC_NOT_CONNECTED:
case PCMK_EXEC_NO_FENCE_DEVICE:
case PCMK_EXEC_NO_SECRETS:
case PCMK_EXEC_INVALID:
return TRUE;
default:
if (target_rc != op->rc) {
return TRUE;
}
}
return FALSE;
}
/*!
* \brief Create a CIB XML element for an operation
*
* \param[in,out] parent If not NULL, make new XML node a child of this
* \param[in] prefix Generate an ID using this prefix
* \param[in] task Operation task to set
* \param[in] interval_spec Operation interval to set
* \param[in] timeout If not NULL, operation timeout to set
*
* \return New XML object on success, NULL otherwise
*/
xmlNode *
crm_create_op_xml(xmlNode *parent, const char *prefix, const char *task,
const char *interval_spec, const char *timeout)
{
xmlNode *xml_op;
CRM_CHECK(prefix && task && interval_spec, return NULL);
xml_op = create_xml_node(parent, XML_ATTR_OP);
crm_xml_set_id(xml_op, "%s-%s-%s", prefix, task, interval_spec);
crm_xml_add(xml_op, XML_LRM_ATTR_INTERVAL, interval_spec);
crm_xml_add(xml_op, "name", task);
if (timeout) {
crm_xml_add(xml_op, XML_ATTR_TIMEOUT, timeout);
}
return xml_op;
}
/*!
* \brief Check whether an operation requires resource agent meta-data
*
* \param[in] rsc_class Resource agent class (or NULL to skip class check)
* \param[in] op Operation action (or NULL to skip op check)
*
* \return true if operation needs meta-data, false otherwise
* \note At least one of rsc_class and op must be specified.
*/
bool
crm_op_needs_metadata(const char *rsc_class, const char *op)
{
/* Agent metadata is used to determine whether an agent reload is possible,
* so if this op is not relevant to that feature, we don't need metadata.
*/
CRM_CHECK((rsc_class != NULL) || (op != NULL), return false);
if ((rsc_class != NULL)
&& !pcmk_is_set(pcmk_get_ra_caps(rsc_class), pcmk_ra_cap_params)) {
// Metadata is needed only for resource classes that use parameters
return false;
}
if (op == NULL) {
return true;
}
// Metadata is needed only for these actions
return pcmk__str_any_of(op, PCMK_ACTION_START, PCMK_ACTION_MONITOR,
PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT,
- PCMK_ACTION_MIGRATE_TO, CRMD_ACTION_MIGRATED,
+ PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_NOTIFY, NULL);
}
/*!
* \internal
* \brief Check whether an action name is for a fencing action
*
* \param[in] action Action name to check
*
* \return true if \p action is "off", "reboot", or "poweroff", otherwise false
*/
bool
pcmk__is_fencing_action(const char *action)
{
return pcmk__str_any_of(action, "off", "reboot", "poweroff", NULL);
}
bool
pcmk_is_probe(const char *task, guint interval)
{
if (task == NULL) {
return false;
}
return (interval == 0)
&& pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_none);
}
bool
pcmk_xe_is_probe(const xmlNode *xml_op)
{
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *interval_ms_s = crm_element_value(xml_op, XML_LRM_ATTR_INTERVAL_MS);
int interval_ms;
pcmk__scan_min_int(interval_ms_s, &interval_ms, 0);
return pcmk_is_probe(task, interval_ms);
}
bool
pcmk_xe_mask_probe_failure(const xmlNode *xml_op)
{
int status = PCMK_EXEC_UNKNOWN;
int rc = PCMK_OCF_OK;
if (!pcmk_xe_is_probe(xml_op)) {
return false;
}
crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
return rc == PCMK_OCF_NOT_INSTALLED || rc == PCMK_OCF_INVALID_PARAM ||
status == PCMK_EXEC_NOT_INSTALLED;
}
diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c
index 1a59fd0f30..d92ef67056 100644
--- a/lib/pacemaker/pcmk_graph_producer.c
+++ b/lib/pacemaker/pcmk_graph_producer.c
@@ -1,1090 +1,1090 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <glib.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Add an XML node tag for a specified ID
*
* \param[in] id Node UUID to add
* \param[in,out] xml Parent XML tag to add to
*/
static xmlNode*
add_node_to_xml_by_id(const char *id, xmlNode *xml)
{
xmlNode *node_xml;
node_xml = create_xml_node(xml, XML_CIB_TAG_NODE);
crm_xml_add(node_xml, XML_ATTR_ID, id);
return node_xml;
}
/*!
* \internal
* \brief Add an XML node tag for a specified node
*
* \param[in] node Node to add
* \param[in,out] xml XML to add node to
*/
static void
add_node_to_xml(const pe_node_t *node, void *xml)
{
add_node_to_xml_by_id(node->details->id, (xmlNode *) xml);
}
/*!
* \internal
* \brief Count (optionally add to XML) nodes needing maintenance state update
*
* \param[in,out] xml Parent XML tag to add to, if any
* \param[in] data_set Working set for cluster
*
* \return Count of nodes added
* \note Only Pacemaker Remote nodes are considered currently
*/
static int
add_maintenance_nodes(xmlNode *xml, const pe_working_set_t *data_set)
{
xmlNode *maintenance = NULL;
int count = 0;
if (xml != NULL) {
maintenance = create_xml_node(xml, XML_GRAPH_TAG_MAINTENANCE);
}
for (const GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
const pe_node_t *node = iter->data;
if (pe__is_guest_or_remote_node(node) &&
(node->details->maintenance != node->details->remote_maintenance)) {
if (maintenance != NULL) {
crm_xml_add(add_node_to_xml_by_id(node->details->id,
maintenance),
XML_NODE_IS_MAINTENANCE,
(node->details->maintenance? "1" : "0"));
}
count++;
}
}
crm_trace("%s %d nodes in need of maintenance mode update in state",
((maintenance == NULL)? "Counted" : "Added"), count);
return count;
}
/*!
* \internal
* \brief Add pseudo action with nodes needing maintenance state update
*
* \param[in,out] data_set Working set for cluster
*/
static void
add_maintenance_update(pe_working_set_t *data_set)
{
pe_action_t *action = NULL;
if (add_maintenance_nodes(NULL, data_set) != 0) {
action = get_pseudo_op(CRM_OP_MAINTENANCE_NODES, data_set);
pe__set_action_flags(action, pe_action_print_always);
}
}
/*!
* \internal
* \brief Add XML with nodes that an action is expected to bring down
*
* If a specified action is expected to bring any nodes down, add an XML block
* with their UUIDs. When a node is lost, this allows the controller to
* determine whether it was expected.
*
* \param[in,out] xml Parent XML tag to add to
* \param[in] action Action to check for downed nodes
*/
static void
add_downed_nodes(xmlNode *xml, const pe_action_t *action)
{
CRM_CHECK((xml != NULL) && (action != NULL) && (action->node != NULL),
return);
if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
/* Shutdown makes the action's node down */
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none)) {
/* Fencing makes the action's node and any hosted guest nodes down */
const char *fence = g_hash_table_lookup(action->meta, "stonith_action");
if (pcmk__is_fencing_action(fence)) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->node->details->id, downed);
pe_foreach_guest_node(action->node->details->data_set, action->node,
add_node_to_xml, downed);
}
} else if (action->rsc && action->rsc->is_remote_node
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
/* Stopping a remote connection resource makes connected node down,
* unless it's part of a migration
*/
GList *iter;
pe_action_t *input;
bool migrating = false;
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
input = ((pe_action_wrapper_t *) iter->data)->action;
if ((input->rsc != NULL)
&& pcmk__str_eq(action->rsc->id, input->rsc->id, pcmk__str_none)
- && pcmk__str_eq(input->task, CRMD_ACTION_MIGRATED,
+ && pcmk__str_eq(input->task, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_none)) {
migrating = true;
break;
}
}
if (!migrating) {
xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED);
add_node_to_xml_by_id(action->rsc->id, downed);
}
}
}
/*!
* \internal
* \brief Create a transition graph operation key for a clone action
*
* \param[in] action Clone action
* \param[in] interval_ms Action interval in milliseconds
*
* \return Newly allocated string with transition graph operation key
*/
static char *
clone_op_key(const pe_action_t *action, guint interval_ms)
{
if (pcmk__str_eq(action->task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = g_hash_table_lookup(action->meta, "notify_type");
const char *n_task = g_hash_table_lookup(action->meta,
"notify_operation");
CRM_LOG_ASSERT((n_type != NULL) && (n_task != NULL));
return pcmk__notify_key(action->rsc->clone_name, n_type, n_task);
} else if (action->cancel_task != NULL) {
return pcmk__op_key(action->rsc->clone_name, action->cancel_task,
interval_ms);
} else {
return pcmk__op_key(action->rsc->clone_name, action->task, interval_ms);
}
}
/*!
* \internal
* \brief Add node details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] xml Transition graph action XML for \p action
*/
static void
add_node_details(const pe_action_t *action, xmlNode *xml)
{
pe_node_t *router_node = pcmk__connection_host_for_action(action);
crm_xml_add(xml, XML_LRM_ATTR_TARGET, action->node->details->uname);
crm_xml_add(xml, XML_LRM_ATTR_TARGET_UUID, action->node->details->id);
if (router_node != NULL) {
crm_xml_add(xml, XML_LRM_ATTR_ROUTER_NODE, router_node->details->uname);
}
}
/*!
* \internal
* \brief Add resource details to transition graph action XML
*
* \param[in] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_resource_details(const pe_action_t *action, xmlNode *action_xml)
{
xmlNode *rsc_xml = NULL;
const char *attr_list[] = {
XML_AGENT_ATTR_CLASS,
XML_AGENT_ATTR_PROVIDER,
XML_ATTR_TYPE
};
/* If a resource is locked to a node via shutdown-lock, mark its actions
* so the controller can preserve the lock when the action completes.
*/
if (pcmk__action_locks_rsc_to_node(action)) {
crm_xml_add_ll(action_xml, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
(long long) action->rsc->lock_time);
}
// List affected resource
rsc_xml = create_xml_node(action_xml, crm_element_name(action->rsc->xml));
if (pcmk_is_set(action->rsc->flags, pe_rsc_orphan)
&& (action->rsc->clone_name != NULL)) {
/* Use the numbered instance name here, because if there is more
* than one instance on a node, we need to make sure the command
* goes to the right one.
*
* This is important even for anonymous clones, because the clone's
* unique meta-attribute might have just been toggled from on to
* off.
*/
crm_debug("Using orphan clone name %s instead of %s",
action->rsc->id, action->rsc->clone_name);
crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->clone_name);
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
} else if (!pcmk_is_set(action->rsc->flags, pe_rsc_unique)) {
const char *xml_id = ID(action->rsc->xml);
crm_debug("Using anonymous clone name %s for %s (aka %s)",
xml_id, action->rsc->id, action->rsc->clone_name);
/* ID is what we'd like client to use
* ID_LONG is what they might know it as instead
*
* ID_LONG is only strictly needed /here/ during the
* transition period until all nodes in the cluster
* are running the new software /and/ have rebooted
* once (meaning that they've only ever spoken to a DC
* supporting this feature).
*
* If anyone toggles the unique flag to 'on', the
* 'instance free' name will correspond to an orphan
* and fall into the clause above instead
*/
crm_xml_add(rsc_xml, XML_ATTR_ID, xml_id);
if ((action->rsc->clone_name != NULL)
&& !pcmk__str_eq(xml_id, action->rsc->clone_name,
pcmk__str_none)) {
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->clone_name);
} else {
crm_xml_add(rsc_xml, XML_ATTR_ID_LONG, action->rsc->id);
}
} else {
CRM_ASSERT(action->rsc->clone_name == NULL);
crm_xml_add(rsc_xml, XML_ATTR_ID, action->rsc->id);
}
for (int lpc = 0; lpc < PCMK__NELEM(attr_list); lpc++) {
crm_xml_add(rsc_xml, attr_list[lpc],
g_hash_table_lookup(action->rsc->meta, attr_list[lpc]));
}
}
/*!
* \internal
* \brief Add action attributes to transition graph action XML
*
* \param[in,out] action Scheduled action
* \param[in,out] action_xml Transition graph action XML for \p action
*/
static void
add_action_attributes(pe_action_t *action, xmlNode *action_xml)
{
xmlNode *args_xml = NULL;
/* We create free-standing XML to start, so we can sort the attributes
* before adding it to action_xml, which keeps the scheduler regression
* test graphs comparable.
*/
args_xml = create_xml_node(NULL, XML_TAG_ATTRS);
crm_xml_add(args_xml, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET);
g_hash_table_foreach(action->extra, hash2field, args_xml);
if ((action->rsc != NULL) && (action->node != NULL)) {
// Get the resource instance attributes, evaluated properly for node
GHashTable *params = pe_rsc_params(action->rsc, action->node,
action->rsc->cluster);
pcmk__substitute_remote_addr(action->rsc, params);
g_hash_table_foreach(params, hash2smartfield, args_xml);
} else if ((action->rsc != NULL) && (action->rsc->variant <= pe_native)) {
GHashTable *params = pe_rsc_params(action->rsc, NULL,
action->rsc->cluster);
g_hash_table_foreach(params, hash2smartfield, args_xml);
}
g_hash_table_foreach(action->meta, hash2metafield, args_xml);
if (action->rsc != NULL) {
pe_resource_t *parent = action->rsc;
while (parent != NULL) {
parent->cmds->add_graph_meta(parent, args_xml);
parent = parent->parent;
}
pcmk__add_bundle_meta_to_xml(args_xml, action);
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none)
&& (action->node != NULL)) {
/* Pass the node's attributes as meta-attributes.
*
* @TODO: Determine whether it is still necessary to do this. It was
* added in 33d99707, probably for the libfence-based implementation in
* c9a90bd, which is no longer used.
*/
g_hash_table_foreach(action->node->details->attrs, hash2metafield,
args_xml);
}
sorted_xml(args_xml, action_xml, FALSE);
free_xml(args_xml);
}
/*!
* \internal
* \brief Create the transition graph XML for a scheduled action
*
* \param[in,out] parent Parent XML element to add action to
* \param[in,out] action Scheduled action
* \param[in] skip_details If false, add action details as sub-elements
* \param[in] data_set Cluster working set
*/
static void
create_graph_action(xmlNode *parent, pe_action_t *action, bool skip_details,
const pe_working_set_t *data_set)
{
bool needs_node_info = true;
bool needs_maintenance_info = false;
xmlNode *action_xml = NULL;
if ((action == NULL) || (data_set == NULL)) {
return;
}
// Create the top-level element based on task
if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none)) {
/* All fences need node info; guest node fences are pseudo-events */
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
} else {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
}
} else if (pcmk__str_any_of(action->task,
CRM_OP_SHUTDOWN,
CRM_OP_CLEAR_FAILCOUNT, NULL)) {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
} else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_none)) {
// CIB-only clean-up for shutdown locks
action_xml = create_xml_node(parent, XML_GRAPH_TAG_CRM_EVENT);
crm_xml_add(action_xml, PCMK__XA_MODE, XML_TAG_CIB);
} else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
if (pcmk__str_eq(action->task, CRM_OP_MAINTENANCE_NODES,
pcmk__str_none)) {
needs_maintenance_info = true;
}
action_xml = create_xml_node(parent, XML_GRAPH_TAG_PSEUDO_EVENT);
needs_node_info = false;
} else {
action_xml = create_xml_node(parent, XML_GRAPH_TAG_RSC_OP);
}
crm_xml_add_int(action_xml, XML_ATTR_ID, action->id);
crm_xml_add(action_xml, XML_LRM_ATTR_TASK, action->task);
if ((action->rsc != NULL) && (action->rsc->clone_name != NULL)) {
char *clone_key = NULL;
guint interval_ms;
if (pcmk__guint_from_hash(action->meta, XML_LRM_ATTR_INTERVAL_MS, 0,
&interval_ms) != pcmk_rc_ok) {
interval_ms = 0;
}
clone_key = clone_op_key(action, interval_ms);
crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, clone_key);
crm_xml_add(action_xml, "internal_" XML_LRM_ATTR_TASK_KEY,
action->uuid);
free(clone_key);
} else {
crm_xml_add(action_xml, XML_LRM_ATTR_TASK_KEY, action->uuid);
}
if (needs_node_info && (action->node != NULL)) {
add_node_details(action, action_xml);
g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET),
strdup(action->node->details->uname));
g_hash_table_insert(action->meta, strdup(XML_LRM_ATTR_TARGET_UUID),
strdup(action->node->details->id));
}
if (skip_details) {
return;
}
if ((action->rsc != NULL)
&& !pcmk_is_set(action->flags, pe_action_pseudo)) {
// This is a real resource action, so add resource details
add_resource_details(action, action_xml);
}
/* List any attributes in effect */
add_action_attributes(action, action_xml);
/* List any nodes this action is expected to make down */
if (needs_node_info && (action->node != NULL)) {
add_downed_nodes(action_xml, action);
}
if (needs_maintenance_info) {
add_maintenance_nodes(action_xml, data_set);
}
}
/*!
* \internal
* \brief Check whether an action should be added to the transition graph
*
* \param[in] action Action to check
*
* \return true if action should be added to graph, otherwise false
*/
static bool
should_add_action_to_graph(const pe_action_t *action)
{
if (!pcmk_is_set(action->flags, pe_action_runnable)) {
crm_trace("Ignoring action %s (%d): unrunnable",
action->uuid, action->id);
return false;
}
if (pcmk_is_set(action->flags, pe_action_optional)
&& !pcmk_is_set(action->flags, pe_action_print_always)) {
crm_trace("Ignoring action %s (%d): optional",
action->uuid, action->id);
return false;
}
/* Actions for unmanaged resources should be excluded from the graph,
* with the exception of monitors and cancellation of recurring monitors.
*/
if ((action->rsc != NULL)
&& !pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& !pcmk__str_eq(action->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
const char *interval_ms_s;
/* A cancellation of a recurring monitor will get here because the task
* is cancel rather than monitor, but the interval can still be used to
* recognize it. The interval has been normalized to milliseconds by
* this point, so a string comparison is sufficient.
*/
interval_ms_s = g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS);
if (pcmk__str_eq(interval_ms_s, "0", pcmk__str_null_matches)) {
crm_trace("Ignoring action %s (%d): for unmanaged resource (%s)",
action->uuid, action->id, action->rsc->id);
return false;
}
}
/* Always add pseudo-actions, fence actions, and shutdown actions (already
* determined to be required and runnable by this point)
*/
if (pcmk_is_set(action->flags, pe_action_pseudo)
|| pcmk__strcase_any_of(action->task, CRM_OP_FENCE, CRM_OP_SHUTDOWN,
NULL)) {
return true;
}
if (action->node == NULL) {
pe_err("Skipping action %s (%d) "
"because it was not assigned to a node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unassigned", action, false);
return false;
}
if (pcmk_is_set(action->flags, pe_action_dc)) {
crm_trace("Action %s (%d) should be dumped: "
"can run on DC instead of %s",
action->uuid, action->id, pe__node_name(action->node));
} else if (pe__is_guest_node(action->node)
&& !action->node->details->remote_requires_reset) {
crm_trace("Action %s (%d) should be dumped: "
"assuming will be runnable on guest %s",
action->uuid, action->id, pe__node_name(action->node));
} else if (!action->node->details->online) {
pe_err("Skipping action %s (%d) "
"because it was scheduled for offline node (bug?)",
action->uuid, action->id);
pcmk__log_action("Offline node", action, false);
return false;
} else if (action->node->details->unclean) {
pe_err("Skipping action %s (%d) "
"because it was scheduled for unclean node (bug?)",
action->uuid, action->id);
pcmk__log_action("Unclean node", action, false);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether an ordering's flags can change an action
*
* \param[in] ordering Ordering to check
*
* \return true if ordering has flags that can change an action, false otherwise
*/
static bool
ordering_can_change_actions(const pe_action_wrapper_t *ordering)
{
return pcmk_any_flags_set(ordering->type, ~(pe_order_implies_first_printed
|pe_order_implies_then_printed
|pe_order_optional));
}
/*!
* \internal
* \brief Check whether an action input should be in the transition graph
*
* \param[in] action Action to check
* \param[in,out] input Action input to check
*
* \return true if input should be in graph, false otherwise
* \note This function may not only check an input, but disable it under certian
* circumstances (load or anti-colocation orderings that are not needed).
*/
static bool
should_add_input_to_graph(const pe_action_t *action, pe_action_wrapper_t *input)
{
if (input->state == pe_link_dumped) {
return true;
}
if (input->type == pe_order_none) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"ordering disabled",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pe_action_runnable)
&& !ordering_can_change_actions(input)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (!pcmk_is_set(input->action->flags, pe_action_runnable)
&& pcmk_is_set(input->type, pe_order_one_or_more)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"one-or-more and input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pe_order_implies_first_migratable)
&& !pcmk_is_set(input->action->flags, pe_action_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"implies input migratable but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (pcmk_is_set(input->type, pe_order_apply_first_non_migratable)
&& pcmk_is_set(input->action->flags,
pe_action_migrate_runnable)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"only if input unmigratable but input unrunnable",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if ((input->type == pe_order_optional)
&& pcmk_is_set(input->action->flags, pe_action_migrate_runnable)
&& pcmk__ends_with(input->action->uuid, "_stop_0")) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"optional but stop in migration",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
} else if (input->type == pe_order_load) {
pe_node_t *input_node = input->action->node;
// load orderings are relevant only if actions are for same node
if ((action->rsc != NULL)
&& pcmk__str_eq(action->task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
pe_node_t *assigned = action->rsc->allocated_to;
/* For load_stopped -> migrate_to orderings, we care about where it
* has been assigned to, not where it will be executed.
*/
if (!pe__same_node(input_node, assigned)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(assigned? assigned->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = pe_order_none;
return false;
}
} else if (!pe__same_node(input_node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
(action->node? action->node->details->uname : "<none>"),
(input_node? input_node->details->uname : "<none>"));
input->type = pe_order_none;
return false;
} else if (pcmk_is_set(input->action->flags, pe_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"load ordering input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = pe_order_none;
return false;
}
} else if (input->type == pe_order_anti_colocation) {
if (input->action->node && action->node
&& !pe__same_node(input->action->node, action->node)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"anti-colocation node mismatch %s vs %s",
action->uuid, action->id,
input->action->uuid, input->action->id,
pe__node_name(action->node),
pe__node_name(input->action->node));
input->type = pe_order_none;
return false;
} else if (pcmk_is_set(input->action->flags, pe_action_optional)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"anti-colocation input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
input->type = pe_order_none;
return false;
}
} else if (input->action->rsc
&& input->action->rsc != action->rsc
&& pcmk_is_set(input->action->rsc->flags, pe_rsc_failed)
&& !pcmk_is_set(input->action->rsc->flags, pe_rsc_managed)
&& pcmk__ends_with(input->action->uuid, "_stop_0")
&& action->rsc && pe_rsc_is_clone(action->rsc)) {
crm_warn("Ignoring requirement that %s complete before %s:"
" unmanaged failed resources cannot prevent clone shutdown",
input->action->uuid, action->uuid);
return false;
} else if (pcmk_is_set(input->action->flags, pe_action_optional)
&& !pcmk_any_flags_set(input->action->flags,
pe_action_print_always|pe_action_dumped)
&& !should_add_action_to_graph(input->action)) {
crm_trace("Ignoring %s (%d) input %s (%d): "
"input optional",
action->uuid, action->id,
input->action->uuid, input->action->id);
return false;
}
crm_trace("%s (%d) input %s %s (%d) on %s should be dumped: %s %s %#.6x",
action->uuid, action->id, action_type_str(input->action->flags),
input->action->uuid, input->action->id,
action_node_str(input->action),
action_runnable_str(input->action->flags),
action_optional_str(input->action->flags), input->type);
return true;
}
/*!
* \internal
* \brief Check whether an ordering creates an ordering loop
*
* \param[in] init_action "First" action in ordering
* \param[in] action Callers should always set this the same as
* \p init_action (this function may use a different
* value for recursive calls)
* \param[in,out] input Action wrapper for "then" action in ordering
*
* \return true if the ordering creates a loop, otherwise false
*/
bool
pcmk__graph_has_loop(const pe_action_t *init_action, const pe_action_t *action,
pe_action_wrapper_t *input)
{
bool has_loop = false;
if (pcmk_is_set(input->action->flags, pe_action_tracking)) {
crm_trace("Breaking tracking loop: %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
return false;
}
// Don't need to check inputs that won't be used
if (!should_add_input_to_graph(action, input)) {
return false;
}
if (input->action == init_action) {
crm_debug("Input loop found in %s@%s ->...-> %s@%s",
action->uuid,
action->node? action->node->details->uname : "",
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
return true;
}
pe__set_action_flags(input->action, pe_action_tracking);
crm_trace("Checking inputs of action %s@%s input %s@%s (%#.6x)"
"for graph loop with %s@%s ",
action->uuid,
action->node? action->node->details->uname : "",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
input->type,
init_action->uuid,
init_action->node? init_action->node->details->uname : "");
// Recursively check input itself for loops
for (GList *iter = input->action->actions_before;
iter != NULL; iter = iter->next) {
if (pcmk__graph_has_loop(init_action, input->action,
(pe_action_wrapper_t *) iter->data)) {
// Recursive call already logged a debug message
has_loop = true;
break;
}
}
pe__clear_action_flags(input->action, pe_action_tracking);
if (!has_loop) {
crm_trace("No input loop found in %s@%s -> %s@%s (%#.6x)",
input->action->uuid,
input->action->node? input->action->node->details->uname : "",
action->uuid,
action->node? action->node->details->uname : "",
input->type);
}
return has_loop;
}
/*!
* \internal
* \brief Create a synapse XML element for a transition graph
*
* \param[in] action Action that synapse is for
* \param[in,out] data_set Cluster working set containing graph
*
* \return Newly added XML element for new graph synapse
*/
static xmlNode *
create_graph_synapse(const pe_action_t *action, pe_working_set_t *data_set)
{
int synapse_priority = 0;
xmlNode *syn = create_xml_node(data_set->graph, "synapse");
crm_xml_add_int(syn, XML_ATTR_ID, data_set->num_synapse);
data_set->num_synapse++;
if (action->rsc != NULL) {
synapse_priority = action->rsc->priority;
}
if (action->priority > synapse_priority) {
synapse_priority = action->priority;
}
if (synapse_priority > 0) {
crm_xml_add_int(syn, XML_CIB_ATTR_PRIORITY, synapse_priority);
}
return syn;
}
/*!
* \internal
* \brief Add an action to the transition graph XML if appropriate
*
* \param[in,out] data Action to possibly add
* \param[in,out] user_data Cluster working set
*
* \note This will de-duplicate the action inputs, meaning that the
* pe_action_wrapper_t:type flags can no longer be relied on to retain
* their original settings. That means this MUST be called after
* pcmk__apply_orderings() is complete, and nothing after this should rely
* on those type flags. (For example, some code looks for type equal to
* some flag rather than whether the flag is set, and some code looks for
* particular combinations of flags -- such code must be done before
* pcmk__create_graph().)
*/
static void
add_action_to_graph(gpointer data, gpointer user_data)
{
pe_action_t *action = (pe_action_t *) data;
pe_working_set_t *data_set = (pe_working_set_t *) user_data;
xmlNode *syn = NULL;
xmlNode *set = NULL;
xmlNode *in = NULL;
/* If we haven't already, de-duplicate inputs (even if we won't be adding
* the action to the graph, so that crm_simulate's dot graphs don't have
* duplicates).
*/
if (!pcmk_is_set(action->flags, pe_action_dedup)) {
pcmk__deduplicate_action_inputs(action);
pe__set_action_flags(action, pe_action_dedup);
}
if (pcmk_is_set(action->flags, pe_action_dumped) // Already added, or
|| !should_add_action_to_graph(action)) { // shouldn't be added
return;
}
pe__set_action_flags(action, pe_action_dumped);
crm_trace("Adding action %d (%s%s%s) to graph",
action->id, action->uuid,
((action->node == NULL)? "" : " on "),
((action->node == NULL)? "" : action->node->details->uname));
syn = create_graph_synapse(action, data_set);
set = create_xml_node(syn, "action_set");
in = create_xml_node(syn, "inputs");
create_graph_action(set, action, false, data_set);
for (GList *lpc = action->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *input = (pe_action_wrapper_t *) lpc->data;
if (should_add_input_to_graph(action, input)) {
xmlNode *input_xml = create_xml_node(in, "trigger");
input->state = pe_link_dumped;
create_graph_action(input_xml, input->action, true, data_set);
}
}
}
static int transition_id = -1;
/*!
* \internal
* \brief Log a message after calculating a transition
*
* \param[in] filename Where transition input is stored
*/
void
pcmk__log_transition_summary(const char *filename)
{
if (was_processing_error) {
crm_err("Calculated transition %d (with errors)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else if (was_processing_warning) {
crm_warn("Calculated transition %d (with warnings)%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
} else {
crm_notice("Calculated transition %d%s%s",
transition_id,
(filename == NULL)? "" : ", saving inputs in ",
(filename == NULL)? "" : filename);
}
if (crm_config_error) {
crm_notice("Configuration errors found during scheduler processing,"
" please run \"crm_verify -L\" to identify issues");
}
}
/*!
* \internal
* \brief Add a resource's actions to the transition graph
*
* \param[in,out] rsc Resource whose actions should be added
*/
void
pcmk__add_rsc_actions_to_graph(pe_resource_t *rsc)
{
GList *iter = NULL;
CRM_ASSERT(rsc != NULL);
pe_rsc_trace(rsc, "Adding actions for %s to graph", rsc->id);
// First add the resource's own actions
g_list_foreach(rsc->actions, add_action_to_graph, rsc->cluster);
// Then recursively add its children's actions (appropriate to variant)
for (iter = rsc->children; iter != NULL; iter = iter->next) {
pe_resource_t *child_rsc = (pe_resource_t *) iter->data;
child_rsc->cmds->add_actions_to_graph(child_rsc);
}
}
/*!
* \internal
* \brief Create a transition graph with all cluster actions needed
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__create_graph(pe_working_set_t *data_set)
{
GList *iter = NULL;
const char *value = NULL;
long long limit = 0LL;
transition_id++;
crm_trace("Creating transition graph %d", transition_id);
data_set->graph = create_xml_node(NULL, XML_TAG_GRAPH);
value = pe_pref(data_set->config_hash, "cluster-delay");
crm_xml_add(data_set->graph, "cluster-delay", value);
value = pe_pref(data_set->config_hash, "stonith-timeout");
crm_xml_add(data_set->graph, "stonith-timeout", value);
crm_xml_add(data_set->graph, "failed-stop-offset", "INFINITY");
if (pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)) {
crm_xml_add(data_set->graph, "failed-start-offset", "INFINITY");
} else {
crm_xml_add(data_set->graph, "failed-start-offset", "1");
}
value = pe_pref(data_set->config_hash, "batch-limit");
crm_xml_add(data_set->graph, "batch-limit", value);
crm_xml_add_int(data_set->graph, "transition_id", transition_id);
value = pe_pref(data_set->config_hash, "migration-limit");
if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) {
crm_xml_add(data_set->graph, "migration-limit", value);
}
if (data_set->recheck_by > 0) {
char *recheck_epoch = NULL;
recheck_epoch = crm_strdup_printf("%llu",
(long long) data_set->recheck_by);
crm_xml_add(data_set->graph, "recheck-by", recheck_epoch);
free(recheck_epoch);
}
/* The following code will de-duplicate action inputs, so nothing past this
* should rely on the action input type flags retaining their original
* values.
*/
// Add resource actions to graph
for (iter = data_set->resources; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
pe_rsc_trace(rsc, "Processing actions for %s", rsc->id);
rsc->cmds->add_actions_to_graph(rsc);
}
// Add pseudo-action for list of nodes with maintenance state update
add_maintenance_update(data_set);
// Add non-resource (node) actions
for (iter = data_set->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = (pe_action_t *) iter->data;
if ((action->rsc != NULL)
&& (action->node != NULL)
&& action->node->details->shutdown
&& !pcmk_is_set(action->rsc->flags, pe_rsc_maintenance)
&& !pcmk_any_flags_set(action->flags,
pe_action_optional|pe_action_runnable)
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_none)) {
/* Eventually we should just ignore the 'fence' case, but for now
* it's the best way to detect (in CTS) when CIB resource updates
* are being lost.
*/
if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)
|| (data_set->no_quorum_policy == no_quorum_ignore)) {
const bool managed = pcmk_is_set(action->rsc->flags,
pe_rsc_managed);
const bool failed = pcmk_is_set(action->rsc->flags,
pe_rsc_failed);
crm_crit("Cannot %s %s because of %s:%s%s (%s)",
action->node->details->unclean? "fence" : "shut down",
pe__node_name(action->node), action->rsc->id,
(managed? " blocked" : " unmanaged"),
(failed? " failed" : ""), action->uuid);
}
}
add_action_to_graph((gpointer) action, (gpointer) data_set);
}
crm_log_xml_trace(data_set->graph, "graph");
}
diff --git a/lib/pacemaker/pcmk_output.c b/lib/pacemaker/pcmk_output.c
index 25e972c33b..d9b774c731 100644
--- a/lib/pacemaker/pcmk_output.c
+++ b/lib/pacemaker/pcmk_output.c
@@ -1,2394 +1,2396 @@
/*
* Copyright 2019-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/common/output.h>
#include <crm/common/results.h>
#include <crm/msg_xml.h>
#include <crm/stonith-ng.h>
#include <crm/fencing/internal.h>
#include <crm/pengine/internal.h>
#include <libxml/tree.h>
#include <pacemaker-internal.h>
#include <inttypes.h>
#include <stdint.h>
static char *
colocations_header(pe_resource_t *rsc, pcmk__colocation_t *cons,
bool dependents) {
char *retval = NULL;
if (cons->primary_role > RSC_ROLE_STARTED) {
retval = crm_strdup_printf("%s (score=%s, %s role=%s, id=%s)",
rsc->id, pcmk_readable_score(cons->score),
(dependents? "needs" : "with"),
role2text(cons->primary_role), cons->id);
} else {
retval = crm_strdup_printf("%s (score=%s, id=%s)",
rsc->id, pcmk_readable_score(cons->score),
cons->id);
}
return retval;
}
static void
colocations_xml_node(pcmk__output_t *out, pe_resource_t *rsc,
pcmk__colocation_t *cons) {
xmlNodePtr node = NULL;
node = pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_DEPEND,
"id", cons->id,
"rsc", cons->dependent->id,
"with-rsc", cons->primary->id,
"score",
pcmk_readable_score(cons->score),
NULL);
if (cons->node_attribute) {
xmlSetProp(node, (pcmkXmlStr) "node-attribute",
(pcmkXmlStr) cons->node_attribute);
}
if (cons->dependent_role != RSC_ROLE_UNKNOWN) {
xmlSetProp(node, (pcmkXmlStr) "rsc-role",
(pcmkXmlStr) role2text(cons->dependent_role));
}
if (cons->primary_role != RSC_ROLE_UNKNOWN) {
xmlSetProp(node, (pcmkXmlStr) "with-rsc-role",
(pcmkXmlStr) role2text(cons->primary_role));
}
}
static int
do_locations_list_xml(pcmk__output_t *out, pe_resource_t *rsc, bool add_header)
{
GList *lpc = NULL;
GList *list = rsc->rsc_location;
int rc = pcmk_rc_no_output;
for (lpc = list; lpc != NULL; lpc = lpc->next) {
pe__location_t *cons = lpc->data;
GList *lpc2 = NULL;
for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) {
pe_node_t *node = (pe_node_t *) lpc2->data;
if (add_header) {
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "locations");
}
pcmk__output_create_xml_node(out, XML_CONS_TAG_RSC_LOCATION,
"node", node->details->uname,
"rsc", rsc->id,
"id", cons->id,
"score",
pcmk_readable_score(node->weight),
NULL);
}
}
if (add_header) {
PCMK__OUTPUT_LIST_FOOTER(out, rc);
}
return rc;
}
PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *",
"pe_node_t *", "pe_node_t *", "pe_action_t *",
"pe_action_t *")
static int
rsc_action_item(pcmk__output_t *out, va_list args)
{
const char *change = va_arg(args, const char *);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
pe_node_t *origin = va_arg(args, pe_node_t *);
pe_node_t *destination = va_arg(args, pe_node_t *);
pe_action_t *action = va_arg(args, pe_action_t *);
pe_action_t *source = va_arg(args, pe_action_t *);
int len = 0;
char *reason = NULL;
char *details = NULL;
bool same_host = false;
bool same_role = false;
bool need_role = false;
static int rsc_width = 5;
static int detail_width = 5;
CRM_ASSERT(action);
CRM_ASSERT(destination != NULL || origin != NULL);
if (source == NULL) {
source = action;
}
len = strlen(rsc->id);
if (len > rsc_width) {
rsc_width = len + 2;
}
if ((rsc->role > RSC_ROLE_STARTED)
|| (rsc->next_role > RSC_ROLE_UNPROMOTED)) {
need_role = true;
}
if (pe__same_node(origin, destination)) {
same_host = true;
}
if (rsc->role == rsc->next_role) {
same_role = true;
}
if (need_role && (origin == NULL)) {
/* Starting and promoting a promotable clone instance */
details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role),
role2text(rsc->next_role),
pe__node_name(destination));
} else if (origin == NULL) {
/* Starting a resource */
details = crm_strdup_printf("%s", pe__node_name(destination));
} else if (need_role && (destination == NULL)) {
/* Stopping a promotable clone instance */
details = crm_strdup_printf("%s %s", role2text(rsc->role),
pe__node_name(origin));
} else if (destination == NULL) {
/* Stopping a resource */
details = crm_strdup_printf("%s", pe__node_name(origin));
} else if (need_role && same_role && same_host) {
/* Recovering, restarting or re-promoting a promotable clone instance */
details = crm_strdup_printf("%s %s", role2text(rsc->role),
pe__node_name(origin));
} else if (same_role && same_host) {
/* Recovering or Restarting a normal resource */
details = crm_strdup_printf("%s", pe__node_name(origin));
} else if (need_role && same_role) {
/* Moving a promotable clone instance */
details = crm_strdup_printf("%s -> %s %s", pe__node_name(origin),
pe__node_name(destination),
role2text(rsc->role));
} else if (same_role) {
/* Moving a normal resource */
details = crm_strdup_printf("%s -> %s", pe__node_name(origin),
pe__node_name(destination));
} else if (same_host) {
/* Promoting or demoting a promotable clone instance */
details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role),
role2text(rsc->next_role),
pe__node_name(origin));
} else {
/* Moving and promoting/demoting */
details = crm_strdup_printf("%s %s -> %s %s", role2text(rsc->role),
pe__node_name(origin),
role2text(rsc->next_role),
pe__node_name(destination));
}
len = strlen(details);
if (len > detail_width) {
detail_width = len;
}
if ((source->reason != NULL)
&& !pcmk_is_set(action->flags, pe_action_runnable)) {
reason = crm_strdup_printf("due to %s (blocked)", source->reason);
} else if (source->reason) {
reason = crm_strdup_printf("due to %s", source->reason);
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
reason = strdup("blocked");
}
out->list_item(out, NULL, "%-8s %-*s ( %*s )%s%s",
change, rsc_width, rsc->id, detail_width, details,
((reason == NULL)? "" : " "), pcmk__s(reason, ""));
free(details);
free(reason);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("rsc-action-item", "const char *", "pe_resource_t *",
"pe_node_t *", "pe_node_t *", "pe_action_t *",
"pe_action_t *")
static int
rsc_action_item_xml(pcmk__output_t *out, va_list args)
{
const char *change = va_arg(args, const char *);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
pe_node_t *origin = va_arg(args, pe_node_t *);
pe_node_t *destination = va_arg(args, pe_node_t *);
pe_action_t *action = va_arg(args, pe_action_t *);
pe_action_t *source = va_arg(args, pe_action_t *);
char *change_str = NULL;
bool same_host = false;
bool same_role = false;
bool need_role = false;
xmlNode *xml = NULL;
CRM_ASSERT(action);
CRM_ASSERT(destination != NULL || origin != NULL);
if (source == NULL) {
source = action;
}
if ((rsc->role > RSC_ROLE_STARTED)
|| (rsc->next_role > RSC_ROLE_UNPROMOTED)) {
need_role = true;
}
if (pe__same_node(origin, destination)) {
same_host = true;
}
if (rsc->role == rsc->next_role) {
same_role = true;
}
change_str = g_ascii_strdown(change, -1);
xml = pcmk__output_create_xml_node(out, "rsc_action",
"action", change_str,
"resource", rsc->id,
NULL);
g_free(change_str);
if (need_role && (origin == NULL)) {
/* Starting and promoting a promotable clone instance */
pcmk__xe_set_props(xml,
"role", role2text(rsc->role),
"next-role", role2text(rsc->next_role),
"dest", destination->details->uname,
NULL);
} else if (origin == NULL) {
/* Starting a resource */
crm_xml_add(xml, "node", destination->details->uname);
} else if (need_role && (destination == NULL)) {
/* Stopping a promotable clone instance */
pcmk__xe_set_props(xml,
"role", role2text(rsc->role),
"node", origin->details->uname,
NULL);
} else if (destination == NULL) {
/* Stopping a resource */
crm_xml_add(xml, "node", origin->details->uname);
} else if (need_role && same_role && same_host) {
/* Recovering, restarting or re-promoting a promotable clone instance */
pcmk__xe_set_props(xml,
"role", role2text(rsc->role),
"source", origin->details->uname,
NULL);
} else if (same_role && same_host) {
/* Recovering or Restarting a normal resource */
crm_xml_add(xml, "source", origin->details->uname);
} else if (need_role && same_role) {
/* Moving a promotable clone instance */
pcmk__xe_set_props(xml,
"source", origin->details->uname,
"dest", destination->details->uname,
"role", role2text(rsc->role),
NULL);
} else if (same_role) {
/* Moving a normal resource */
pcmk__xe_set_props(xml,
"source", origin->details->uname,
"dest", destination->details->uname,
NULL);
} else if (same_host) {
/* Promoting or demoting a promotable clone instance */
pcmk__xe_set_props(xml,
"role", role2text(rsc->role),
"next-role", role2text(rsc->next_role),
"source", origin->details->uname,
NULL);
} else {
/* Moving and promoting/demoting */
pcmk__xe_set_props(xml,
"role", role2text(rsc->role),
"source", origin->details->uname,
"next-role", role2text(rsc->next_role),
"dest", destination->details->uname,
NULL);
}
if (source->reason && !pcmk_is_set(action->flags, pe_action_runnable)) {
pcmk__xe_set_props(xml,
"reason", source->reason,
"blocked", "true",
NULL);
} else if (source->reason != NULL) {
crm_xml_add(xml, "reason", source->reason);
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
pcmk__xe_set_bool_attr(xml, "blocked", true);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "bool")
static int
rsc_is_colocated_with_list(pcmk__output_t *out, va_list args) {
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
bool recursive = va_arg(args, int);
int rc = pcmk_rc_no_output;
if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
return rc;
}
/* We're listing constraints explicitly involving rsc, so use rsc->rsc_cons
* directly rather than rsc->cmds->this_with_colocations().
*/
pe__set_resource_flags(rsc, pe_rsc_detect_loop);
for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
char *hdr = NULL;
PCMK__OUTPUT_LIST_HEADER(out, false, rc,
"Resources %s is colocated with", rsc->id);
if (pcmk_is_set(cons->primary->flags, pe_rsc_detect_loop)) {
out->list_item(out, NULL, "%s (id=%s - loop)",
cons->primary->id, cons->id);
continue;
}
hdr = colocations_header(cons->primary, cons, false);
out->list_item(out, NULL, "%s", hdr);
free(hdr);
// Empty list header for indentation of information about this resource
out->begin_list(out, NULL, NULL, NULL);
out->message(out, "locations-list", cons->primary);
if (recursive) {
out->message(out, "rsc-is-colocated-with-list",
cons->primary, recursive);
}
out->end_list(out);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("rsc-is-colocated-with-list", "pe_resource_t *", "bool")
static int
rsc_is_colocated_with_list_xml(pcmk__output_t *out, va_list args) {
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
bool recursive = va_arg(args, int);
int rc = pcmk_rc_no_output;
if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
return rc;
}
/* We're listing constraints explicitly involving rsc, so use rsc->rsc_cons
* directly rather than rsc->cmds->this_with_colocations().
*/
pe__set_resource_flags(rsc, pe_rsc_detect_loop);
for (GList *lpc = rsc->rsc_cons; lpc != NULL; lpc = lpc->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
if (pcmk_is_set(cons->primary->flags, pe_rsc_detect_loop)) {
colocations_xml_node(out, cons->primary, cons);
continue;
}
colocations_xml_node(out, cons->primary, cons);
do_locations_list_xml(out, cons->primary, false);
if (recursive) {
out->message(out, "rsc-is-colocated-with-list",
cons->primary, recursive);
}
}
return rc;
}
PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "bool")
static int
rscs_colocated_with_list(pcmk__output_t *out, va_list args) {
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
bool recursive = va_arg(args, int);
int rc = pcmk_rc_no_output;
if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
return rc;
}
/* We're listing constraints explicitly involving rsc, so use
* rsc->rsc_cons_lhs directly rather than
* rsc->cmds->with_this_colocations().
*/
pe__set_resource_flags(rsc, pe_rsc_detect_loop);
for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
char *hdr = NULL;
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Resources colocated with %s",
rsc->id);
if (pcmk_is_set(cons->dependent->flags, pe_rsc_detect_loop)) {
out->list_item(out, NULL, "%s (id=%s - loop)",
cons->dependent->id, cons->id);
continue;
}
hdr = colocations_header(cons->dependent, cons, true);
out->list_item(out, NULL, "%s", hdr);
free(hdr);
// Empty list header for indentation of information about this resource
out->begin_list(out, NULL, NULL, NULL);
out->message(out, "locations-list", cons->dependent);
if (recursive) {
out->message(out, "rscs-colocated-with-list",
cons->dependent, recursive);
}
out->end_list(out);
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("rscs-colocated-with-list", "pe_resource_t *", "bool")
static int
rscs_colocated_with_list_xml(pcmk__output_t *out, va_list args) {
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
bool recursive = va_arg(args, int);
int rc = pcmk_rc_no_output;
if (pcmk_is_set(rsc->flags, pe_rsc_detect_loop)) {
return rc;
}
/* We're listing constraints explicitly involving rsc, so use
* rsc->rsc_cons_lhs directly rather than
* rsc->cmds->with_this_colocations().
*/
pe__set_resource_flags(rsc, pe_rsc_detect_loop);
for (GList *lpc = rsc->rsc_cons_lhs; lpc != NULL; lpc = lpc->next) {
pcmk__colocation_t *cons = (pcmk__colocation_t *) lpc->data;
if (pcmk_is_set(cons->dependent->flags, pe_rsc_detect_loop)) {
colocations_xml_node(out, cons->dependent, cons);
continue;
}
colocations_xml_node(out, cons->dependent, cons);
do_locations_list_xml(out, cons->dependent, false);
if (recursive) {
out->message(out, "rscs-colocated-with-list",
cons->dependent, recursive);
}
}
return rc;
}
PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *")
static int
locations_list(pcmk__output_t *out, va_list args) {
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *lpc = NULL;
GList *list = rsc->rsc_location;
int rc = pcmk_rc_no_output;
for (lpc = list; lpc != NULL; lpc = lpc->next) {
pe__location_t *cons = lpc->data;
GList *lpc2 = NULL;
for (lpc2 = cons->node_list_rh; lpc2 != NULL; lpc2 = lpc2->next) {
pe_node_t *node = (pe_node_t *) lpc2->data;
PCMK__OUTPUT_LIST_HEADER(out, false, rc, "Locations");
out->list_item(out, NULL, "Node %s (score=%s, id=%s, rsc=%s)",
pe__node_name(node),
pcmk_readable_score(node->weight), cons->id,
rsc->id);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
PCMK__OUTPUT_ARGS("locations-list", "pe_resource_t *")
static int
locations_list_xml(pcmk__output_t *out, va_list args) {
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
return do_locations_list_xml(out, rsc, true);
}
PCMK__OUTPUT_ARGS("locations-and-colocations", "pe_resource_t *",
"bool", "bool")
static int
locations_and_colocations(pcmk__output_t *out, va_list args)
{
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
bool recursive = va_arg(args, int);
bool force = va_arg(args, int);
pcmk__unpack_constraints(rsc->cluster);
// Constraints apply to group/clone, not member/instance
if (!force) {
rsc = uber_parent(rsc);
}
out->message(out, "locations-list", rsc);
pe__clear_resource_flags_on_all(rsc->cluster, pe_rsc_detect_loop);
out->message(out, "rscs-colocated-with-list", rsc, recursive);
pe__clear_resource_flags_on_all(rsc->cluster, pe_rsc_detect_loop);
out->message(out, "rsc-is-colocated-with-list", rsc, recursive);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("locations-and-colocations", "pe_resource_t *",
"bool", "bool")
static int
locations_and_colocations_xml(pcmk__output_t *out, va_list args)
{
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
bool recursive = va_arg(args, int);
bool force = va_arg(args, int);
pcmk__unpack_constraints(rsc->cluster);
// Constraints apply to group/clone, not member/instance
if (!force) {
rsc = uber_parent(rsc);
}
pcmk__output_xml_create_parent(out, "constraints", NULL);
do_locations_list_xml(out, rsc, false);
pe__clear_resource_flags_on_all(rsc->cluster, pe_rsc_detect_loop);
out->message(out, "rscs-colocated-with-list", rsc, recursive);
pe__clear_resource_flags_on_all(rsc->cluster, pe_rsc_detect_loop);
out->message(out, "rsc-is-colocated-with-list", rsc, recursive);
pcmk__output_xml_pop_parent(out);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *",
"const char *")
static int
health(pcmk__output_t *out, va_list args)
{
const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *);
const char *host_from = va_arg(args, const char *);
const char *fsa_state = va_arg(args, const char *);
const char *result = va_arg(args, const char *);
return out->info(out, "Controller on %s in state %s: %s",
pcmk__s(host_from, "unknown node"),
pcmk__s(fsa_state, "unknown"),
pcmk__s(result, "unknown result"));
}
PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *",
"const char *")
static int
health_text(pcmk__output_t *out, va_list args)
{
if (!out->is_quiet(out)) {
return health(out, args);
} else {
const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *);
const char *host_from G_GNUC_UNUSED = va_arg(args, const char *);
const char *fsa_state = va_arg(args, const char *);
const char *result G_GNUC_UNUSED = va_arg(args, const char *);
if (fsa_state != NULL) {
pcmk__formatted_printf(out, "%s\n", fsa_state);
return pcmk_rc_ok;
}
}
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("health", "const char *", "const char *", "const char *",
"const char *")
static int
health_xml(pcmk__output_t *out, va_list args)
{
const char *sys_from = va_arg(args, const char *);
const char *host_from = va_arg(args, const char *);
const char *fsa_state = va_arg(args, const char *);
const char *result = va_arg(args, const char *);
pcmk__output_create_xml_node(out, pcmk__s(sys_from, ""),
"node_name", pcmk__s(host_from, ""),
"state", pcmk__s(fsa_state, ""),
"result", pcmk__s(result, ""),
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
"enum pcmk_pacemakerd_state", "const char *", "time_t")
static int
pacemakerd_health(pcmk__output_t *out, va_list args)
{
const char *sys_from = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = va_arg(args, const char *);
time_t last_updated = va_arg(args, time_t);
char *last_updated_s = NULL;
int rc = pcmk_rc_ok;
if (sys_from == NULL) {
if (state == pcmk_pacemakerd_state_remote) {
sys_from = "pacemaker-remoted";
} else {
sys_from = CRM_SYSTEM_MCP;
}
}
if (state_s == NULL) {
state_s = pcmk__pcmkd_state_enum2friendly(state);
}
if (last_updated != 0) {
last_updated_s = pcmk__epoch2str(&last_updated,
crm_time_log_date
|crm_time_log_timeofday
|crm_time_log_with_timezone);
}
rc = out->info(out, "Status of %s: '%s' (last updated %s)",
sys_from, state_s,
pcmk__s(last_updated_s, "at unknown time"));
free(last_updated_s);
return rc;
}
PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
"enum pcmk_pacemakerd_state", "const char *", "time_t")
static int
pacemakerd_health_html(pcmk__output_t *out, va_list args)
{
const char *sys_from = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = va_arg(args, const char *);
time_t last_updated = va_arg(args, time_t);
char *last_updated_s = NULL;
char *msg = NULL;
if (sys_from == NULL) {
if (state == pcmk_pacemakerd_state_remote) {
sys_from = "pacemaker-remoted";
} else {
sys_from = CRM_SYSTEM_MCP;
}
}
if (state_s == NULL) {
state_s = pcmk__pcmkd_state_enum2friendly(state);
}
if (last_updated != 0) {
last_updated_s = pcmk__epoch2str(&last_updated,
crm_time_log_date
|crm_time_log_timeofday
|crm_time_log_with_timezone);
}
msg = crm_strdup_printf("Status of %s: '%s' (last updated %s)",
sys_from, state_s,
pcmk__s(last_updated_s, "at unknown time"));
pcmk__output_create_html_node(out, "li", NULL, NULL, msg);
free(msg);
free(last_updated_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
"enum pcmk_pacemakerd_state", "const char *", "time_t")
static int
pacemakerd_health_text(pcmk__output_t *out, va_list args)
{
if (!out->is_quiet(out)) {
return pacemakerd_health(out, args);
} else {
const char *sys_from G_GNUC_UNUSED = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = va_arg(args, const char *);
time_t last_updated G_GNUC_UNUSED = va_arg(args, time_t);
if (state_s == NULL) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
}
pcmk__formatted_printf(out, "%s\n", state_s);
return pcmk_rc_ok;
}
}
PCMK__OUTPUT_ARGS("pacemakerd-health", "const char *",
"enum pcmk_pacemakerd_state", "const char *", "time_t")
static int
pacemakerd_health_xml(pcmk__output_t *out, va_list args)
{
const char *sys_from = va_arg(args, const char *);
enum pcmk_pacemakerd_state state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
const char *state_s = va_arg(args, const char *);
time_t last_updated = va_arg(args, time_t);
char *last_updated_s = NULL;
if (sys_from == NULL) {
if (state == pcmk_pacemakerd_state_remote) {
sys_from = "pacemaker-remoted";
} else {
sys_from = CRM_SYSTEM_MCP;
}
}
if (state_s == NULL) {
state_s = pcmk_pacemakerd_api_daemon_state_enum2text(state);
}
if (last_updated != 0) {
last_updated_s = pcmk__epoch2str(&last_updated,
crm_time_log_date
|crm_time_log_timeofday
|crm_time_log_with_timezone);
}
pcmk__output_create_xml_node(out, "pacemakerd",
"sys_from", sys_from,
"state", state_s,
"last_updated", last_updated_s,
NULL);
free(last_updated_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t")
static int
profile_default(pcmk__output_t *out, va_list args) {
const char *xml_file = va_arg(args, const char *);
clock_t start = va_arg(args, clock_t);
clock_t end = va_arg(args, clock_t);
out->list_item(out, NULL, "Testing %s ... %.2f secs", xml_file,
(end - start) / (float) CLOCKS_PER_SEC);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("profile", "const char *", "clock_t", "clock_t")
static int
profile_xml(pcmk__output_t *out, va_list args) {
const char *xml_file = va_arg(args, const char *);
clock_t start = va_arg(args, clock_t);
clock_t end = va_arg(args, clock_t);
char *duration = pcmk__ftoa((end - start) / (float) CLOCKS_PER_SEC);
pcmk__output_create_xml_node(out, "timing",
"file", xml_file,
"duration", duration,
NULL);
free(duration);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("dc", "const char *")
static int
dc(pcmk__output_t *out, va_list args)
{
const char *dc = va_arg(args, const char *);
return out->info(out, "Designated Controller is: %s",
pcmk__s(dc, "not yet elected"));
}
PCMK__OUTPUT_ARGS("dc", "const char *")
static int
dc_text(pcmk__output_t *out, va_list args)
{
if (!out->is_quiet(out)) {
return dc(out, args);
} else {
const char *dc = va_arg(args, const char *);
if (dc != NULL) {
pcmk__formatted_printf(out, "%s\n", pcmk__s(dc, ""));
return pcmk_rc_ok;
}
}
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("dc", "const char *")
static int
dc_xml(pcmk__output_t *out, va_list args)
{
const char *dc = va_arg(args, const char *);
pcmk__output_create_xml_node(out, "dc",
"node_name", pcmk__s(dc, ""),
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *",
"const char *", "bool")
static int
crmadmin_node(pcmk__output_t *out, va_list args)
{
const char *type = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *id = va_arg(args, const char *);
bool bash_export = va_arg(args, int);
if (bash_export) {
return out->info(out, "export %s=%s",
pcmk__s(name, "<null>"), pcmk__s(id, ""));
} else {
return out->info(out, "%s node: %s (%s)", type ? type : "cluster",
pcmk__s(name, "<null>"), pcmk__s(id, "<null>"));
}
}
PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *",
"const char *", "bool")
static int
crmadmin_node_text(pcmk__output_t *out, va_list args)
{
if (!out->is_quiet(out)) {
return crmadmin_node(out, args);
} else {
const char *type G_GNUC_UNUSED = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *id G_GNUC_UNUSED = va_arg(args, const char *);
bool bash_export G_GNUC_UNUSED = va_arg(args, int);
pcmk__formatted_printf(out, "%s\n", pcmk__s(name, "<null>"));
return pcmk_rc_ok;
}
}
PCMK__OUTPUT_ARGS("crmadmin-node", "const char *", "const char *",
"const char *", "bool")
static int
crmadmin_node_xml(pcmk__output_t *out, va_list args)
{
const char *type = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *id = va_arg(args, const char *);
bool bash_export G_GNUC_UNUSED = va_arg(args, int);
pcmk__output_create_xml_node(out, "node",
"type", type ? type : "cluster",
"name", pcmk__s(name, ""),
"id", pcmk__s(id, ""),
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("digests", "const pe_resource_t *", "const pe_node_t *",
"const char *", "guint", "const op_digest_cache_t *")
static int
digests_text(pcmk__output_t *out, va_list args)
{
const pe_resource_t *rsc = va_arg(args, const pe_resource_t *);
const pe_node_t *node = va_arg(args, const pe_node_t *);
const char *task = va_arg(args, const char *);
guint interval_ms = va_arg(args, guint);
const op_digest_cache_t *digests = va_arg(args, const op_digest_cache_t *);
char *action_desc = NULL;
const char *rsc_desc = "unknown resource";
const char *node_desc = "unknown node";
if (interval_ms != 0) {
action_desc = crm_strdup_printf("%ums-interval %s action", interval_ms,
((task == NULL)? "unknown" : task));
} else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
action_desc = strdup("probe action");
} else {
action_desc = crm_strdup_printf("%s action",
((task == NULL)? "unknown" : task));
}
if ((rsc != NULL) && (rsc->id != NULL)) {
rsc_desc = rsc->id;
}
if ((node != NULL) && (node->details->uname != NULL)) {
node_desc = node->details->uname;
}
out->begin_list(out, NULL, NULL, "Digests for %s %s on %s",
rsc_desc, action_desc, node_desc);
free(action_desc);
if (digests == NULL) {
out->list_item(out, NULL, "none");
out->end_list(out);
return pcmk_rc_ok;
}
if (digests->digest_all_calc != NULL) {
out->list_item(out, NULL, "%s (all parameters)",
digests->digest_all_calc);
}
if (digests->digest_secure_calc != NULL) {
out->list_item(out, NULL, "%s (non-private parameters)",
digests->digest_secure_calc);
}
if (digests->digest_restart_calc != NULL) {
out->list_item(out, NULL, "%s (non-reloadable parameters)",
digests->digest_restart_calc);
}
out->end_list(out);
return pcmk_rc_ok;
}
static void
add_digest_xml(xmlNode *parent, const char *type, const char *digest,
xmlNode *digest_source)
{
if (digest != NULL) {
xmlNodePtr digest_xml = create_xml_node(parent, "digest");
crm_xml_add(digest_xml, "type", ((type == NULL)? "unspecified" : type));
crm_xml_add(digest_xml, "hash", digest);
if (digest_source != NULL) {
add_node_copy(digest_xml, digest_source);
}
}
}
PCMK__OUTPUT_ARGS("digests", "const pe_resource_t *", "const pe_node_t *",
"const char *", "guint", "const op_digest_cache_t *")
static int
digests_xml(pcmk__output_t *out, va_list args)
{
const pe_resource_t *rsc = va_arg(args, const pe_resource_t *);
const pe_node_t *node = va_arg(args, const pe_node_t *);
const char *task = va_arg(args, const char *);
guint interval_ms = va_arg(args, guint);
const op_digest_cache_t *digests = va_arg(args, const op_digest_cache_t *);
char *interval_s = crm_strdup_printf("%ums", interval_ms);
xmlNode *xml = NULL;
xml = pcmk__output_create_xml_node(out, "digests",
"resource", pcmk__s(rsc->id, ""),
"node",
pcmk__s(node->details->uname, ""),
"task", pcmk__s(task, ""),
"interval", interval_s,
NULL);
free(interval_s);
if (digests != NULL) {
add_digest_xml(xml, "all", digests->digest_all_calc,
digests->params_all);
add_digest_xml(xml, "nonprivate", digests->digest_secure_calc,
digests->params_secure);
add_digest_xml(xml, "nonreloadable", digests->digest_restart_calc,
digests->params_restart);
}
return pcmk_rc_ok;
}
#define STOP_SANITY_ASSERT(lineno) do { \
if ((current != NULL) && current->details->unclean) { \
/* It will be a pseudo op */ \
} else if (stop == NULL) { \
crm_err("%s:%d: No stop action exists for %s", \
__func__, lineno, rsc->id); \
CRM_ASSERT(stop != NULL); \
} else if (pcmk_is_set(stop->flags, pe_action_optional)) { \
crm_err("%s:%d: Action %s is still optional", \
__func__, lineno, stop->uuid); \
CRM_ASSERT(!pcmk_is_set(stop->flags, pe_action_optional)); \
} \
} while (0)
PCMK__OUTPUT_ARGS("rsc-action", "pe_resource_t *", "pe_node_t *", "pe_node_t *")
static int
rsc_action_default(pcmk__output_t *out, va_list args)
{
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
pe_node_t *current = va_arg(args, pe_node_t *);
pe_node_t *next = va_arg(args, pe_node_t *);
GList *possible_matches = NULL;
char *key = NULL;
int rc = pcmk_rc_no_output;
bool moving = false;
pe_node_t *start_node = NULL;
pe_action_t *start = NULL;
pe_action_t *stop = NULL;
pe_action_t *promote = NULL;
pe_action_t *demote = NULL;
pe_action_t *reason_op = NULL;
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)
|| (current == NULL && next == NULL)) {
const bool managed = pcmk_is_set(rsc->flags, pe_rsc_managed);
pe_rsc_info(rsc, "Leave %s\t(%s%s)",
rsc->id, role2text(rsc->role),
(managed? "" : " unmanaged"));
return rc;
}
moving = (current != NULL) && (next != NULL)
&& !pe__same_node(current, next);
possible_matches = pe__resource_actions(rsc, next, PCMK_ACTION_START,
false);
if (possible_matches) {
start = possible_matches->data;
g_list_free(possible_matches);
}
if ((start == NULL) || !pcmk_is_set(start->flags, pe_action_runnable)) {
start_node = NULL;
} else {
start_node = current;
}
possible_matches = pe__resource_actions(rsc, start_node, PCMK_ACTION_STOP,
false);
if (possible_matches) {
stop = possible_matches->data;
g_list_free(possible_matches);
} else if (pcmk_is_set(rsc->flags, pe_rsc_stop_unexpected)) {
/* The resource is multiply active with multiple-active set to
* stop_unexpected, and not stopping on its current node, but it should
* be stopping elsewhere.
*/
possible_matches = pe__resource_actions(rsc, NULL, PCMK_ACTION_STOP,
false);
if (possible_matches != NULL) {
stop = possible_matches->data;
g_list_free(possible_matches);
}
}
possible_matches = pe__resource_actions(rsc, next, PCMK_ACTION_PROMOTE,
false);
if (possible_matches) {
promote = possible_matches->data;
g_list_free(possible_matches);
}
possible_matches = pe__resource_actions(rsc, next, PCMK_ACTION_DEMOTE,
false);
if (possible_matches) {
demote = possible_matches->data;
g_list_free(possible_matches);
}
if (rsc->role == rsc->next_role) {
pe_action_t *migrate_op = NULL;
CRM_CHECK(next != NULL, return rc);
- possible_matches = pe__resource_actions(rsc, next, RSC_MIGRATED, false);
+ possible_matches = pe__resource_actions(rsc, next,
+ PCMK_ACTION_MIGRATE_FROM,
+ false);
if (possible_matches) {
migrate_op = possible_matches->data;
}
if ((migrate_op != NULL) && (current != NULL)
&& pcmk_is_set(migrate_op->flags, pe_action_runnable)) {
rc = out->message(out, "rsc-action-item", "Migrate", rsc, current,
next, start, NULL);
} else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) {
rc = out->message(out, "rsc-action-item", "Reload", rsc, current,
next, start, NULL);
} else if ((start == NULL)
|| pcmk_is_set(start->flags, pe_action_optional)) {
if ((demote != NULL) && (promote != NULL)
&& !pcmk_is_set(demote->flags, pe_action_optional)
&& !pcmk_is_set(promote->flags, pe_action_optional)) {
rc = out->message(out, "rsc-action-item", "Re-promote", rsc,
current, next, promote, demote);
} else {
pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id,
role2text(rsc->role), pe__node_name(next));
}
} else if (!pcmk_is_set(start->flags, pe_action_runnable)) {
if ((stop == NULL) || (stop->reason == NULL)) {
reason_op = start;
} else {
reason_op = stop;
}
rc = out->message(out, "rsc-action-item", "Stop", rsc, current,
NULL, stop, reason_op);
STOP_SANITY_ASSERT(__LINE__);
} else if (moving && current) {
const bool failed = pcmk_is_set(rsc->flags, pe_rsc_failed);
rc = out->message(out, "rsc-action-item",
(failed? "Recover" : "Move"), rsc, current, next,
stop, NULL);
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
rc = out->message(out, "rsc-action-item", "Recover", rsc, current,
NULL, stop, NULL);
STOP_SANITY_ASSERT(__LINE__);
} else {
rc = out->message(out, "rsc-action-item", "Restart", rsc, current,
next, start, NULL);
#if 0
/* @TODO This can be reached in situations that should really be
* "Start" (see for example the migrate-fail-7 regression test)
*/
STOP_SANITY_ASSERT(__LINE__);
#endif
}
g_list_free(possible_matches);
return rc;
}
if ((stop != NULL)
&& ((rsc->next_role == RSC_ROLE_STOPPED)
|| ((start != NULL)
&& !pcmk_is_set(start->flags, pe_action_runnable)))) {
key = stop_key(rsc);
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *node = iter->data;
pe_action_t *stop_op = NULL;
reason_op = start;
possible_matches = find_actions(rsc->actions, key, node);
if (possible_matches) {
stop_op = possible_matches->data;
g_list_free(possible_matches);
}
if (stop_op != NULL) {
if (pcmk_is_set(stop_op->flags, pe_action_runnable)) {
STOP_SANITY_ASSERT(__LINE__);
}
if (stop_op->reason != NULL) {
reason_op = stop_op;
}
}
if (out->message(out, "rsc-action-item", "Stop", rsc, node, NULL,
stop_op, reason_op) == pcmk_rc_ok) {
rc = pcmk_rc_ok;
}
}
free(key);
} else if ((stop != NULL)
&& pcmk_all_flags_set(rsc->flags, pe_rsc_failed|pe_rsc_stop)) {
/* 'stop' may be NULL if the failure was ignored */
rc = out->message(out, "rsc-action-item", "Recover", rsc, current,
next, stop, start);
STOP_SANITY_ASSERT(__LINE__);
} else if (moving) {
rc = out->message(out, "rsc-action-item", "Move", rsc, current, next,
stop, NULL);
STOP_SANITY_ASSERT(__LINE__);
} else if (pcmk_is_set(rsc->flags, pe_rsc_reload)) {
rc = out->message(out, "rsc-action-item", "Reload", rsc, current, next,
start, NULL);
} else if (stop != NULL && !pcmk_is_set(stop->flags, pe_action_optional)) {
rc = out->message(out, "rsc-action-item", "Restart", rsc, current,
next, start, NULL);
STOP_SANITY_ASSERT(__LINE__);
} else if (rsc->role == RSC_ROLE_PROMOTED) {
CRM_LOG_ASSERT(current != NULL);
rc = out->message(out, "rsc-action-item", "Demote", rsc, current,
next, demote, NULL);
} else if (rsc->next_role == RSC_ROLE_PROMOTED) {
CRM_LOG_ASSERT(next);
rc = out->message(out, "rsc-action-item", "Promote", rsc, current,
next, promote, NULL);
} else if ((rsc->role == RSC_ROLE_STOPPED)
&& (rsc->next_role > RSC_ROLE_STOPPED)) {
rc = out->message(out, "rsc-action-item", "Start", rsc, current, next,
start, NULL);
}
return rc;
}
PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *")
static int
node_action(pcmk__output_t *out, va_list args)
{
const char *task = va_arg(args, const char *);
const char *node_name = va_arg(args, const char *);
const char *reason = va_arg(args, const char *);
if (task == NULL) {
return pcmk_rc_no_output;
} else if (reason) {
out->list_item(out, NULL, "%s %s '%s'", task, node_name, reason);
} else {
crm_notice(" * %s %s", task, node_name);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-action", "const char *", "const char *", "const char *")
static int
node_action_xml(pcmk__output_t *out, va_list args)
{
const char *task = va_arg(args, const char *);
const char *node_name = va_arg(args, const char *);
const char *reason = va_arg(args, const char *);
if (task == NULL) {
return pcmk_rc_no_output;
} else if (reason) {
pcmk__output_create_xml_node(out, "node_action",
"task", task,
"node", node_name,
"reason", reason,
NULL);
} else {
crm_notice(" * %s %s", task, node_name);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("node-info", "int", "const char *", "const char *",
"const char *", "bool", "bool")
static int
node_info_default(pcmk__output_t *out, va_list args)
{
int node_id = va_arg(args, int);
const char *node_name = va_arg(args, const char *);
const char *uuid = va_arg(args, const char *);
const char *state = va_arg(args, const char *);
bool have_quorum = (bool) va_arg(args, int);
bool is_remote = (bool) va_arg(args, int);
return out->info(out,
"Node %d: %s "
"(uuid=%s, state=%s, have_quorum=%s, is_remote=%s)",
node_id, pcmk__s(node_name, "unknown"),
pcmk__s(uuid, "unknown"), pcmk__s(state, "unknown"),
pcmk__btoa(have_quorum), pcmk__btoa(is_remote));
}
PCMK__OUTPUT_ARGS("node-info", "int", "const char *", "const char *",
"const char *", "bool", "bool")
static int
node_info_xml(pcmk__output_t *out, va_list args)
{
int node_id = va_arg(args, int);
const char *node_name = va_arg(args, const char *);
const char *uuid = va_arg(args, const char *);
const char *state = va_arg(args, const char *);
bool have_quorum = (bool) va_arg(args, int);
bool is_remote = (bool) va_arg(args, int);
char *id_s = crm_strdup_printf("%d", node_id);
pcmk__output_create_xml_node(out, "node-info",
"nodeid", id_s,
XML_ATTR_UNAME, node_name,
XML_ATTR_ID, uuid,
XML_NODE_IS_PEER, state,
XML_ATTR_HAVE_QUORUM, pcmk__btoa(have_quorum),
XML_NODE_IS_REMOTE, pcmk__btoa(is_remote),
NULL);
free(id_s);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *",
"xmlNodePtr")
static int
inject_cluster_action(pcmk__output_t *out, va_list args)
{
const char *node = va_arg(args, const char *);
const char *task = va_arg(args, const char *);
xmlNodePtr rsc = va_arg(args, xmlNodePtr);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
if (rsc != NULL) {
out->list_item(out, NULL, "Cluster action: %s for %s on %s",
task, ID(rsc), node);
} else {
out->list_item(out, NULL, "Cluster action: %s on %s", task, node);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-cluster-action", "const char *", "const char *",
"xmlNodePtr")
static int
inject_cluster_action_xml(pcmk__output_t *out, va_list args)
{
const char *node = va_arg(args, const char *);
const char *task = va_arg(args, const char *);
xmlNodePtr rsc = va_arg(args, xmlNodePtr);
xmlNodePtr xml_node = NULL;
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
xml_node = pcmk__output_create_xml_node(out, "cluster_action",
"task", task,
"node", node,
NULL);
if (rsc) {
crm_xml_add(xml_node, "id", ID(rsc));
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *")
static int
inject_fencing_action(pcmk__output_t *out, va_list args)
{
const char *target = va_arg(args, const char *);
const char *op = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
out->list_item(out, NULL, "Fencing %s (%s)", target, op);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-fencing-action", "const char *", "const char *")
static int
inject_fencing_action_xml(pcmk__output_t *out, va_list args)
{
const char *target = va_arg(args, const char *);
const char *op = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
pcmk__output_create_xml_node(out, "fencing_action",
"target", target,
"op", op,
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr")
static int
inject_attr(pcmk__output_t *out, va_list args)
{
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
xmlNodePtr cib_node = va_arg(args, xmlNodePtr);
xmlChar *node_path = NULL;
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
node_path = xmlGetNodePath(cib_node);
out->list_item(out, NULL, "Injecting attribute %s=%s into %s '%s'",
name, value, node_path, ID(cib_node));
free(node_path);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-attr", "const char *", "const char *", "xmlNodePtr")
static int
inject_attr_xml(pcmk__output_t *out, va_list args)
{
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
xmlNodePtr cib_node = va_arg(args, xmlNodePtr);
xmlChar *node_path = NULL;
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
node_path = xmlGetNodePath(cib_node);
pcmk__output_create_xml_node(out, "inject_attr",
"name", name,
"value", value,
"node_path", node_path,
"cib_node", ID(cib_node),
NULL);
free(node_path);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-spec", "const char *")
static int
inject_spec(pcmk__output_t *out, va_list args)
{
const char *spec = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
out->list_item(out, NULL, "Injecting %s into the configuration", spec);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-spec", "const char *")
static int
inject_spec_xml(pcmk__output_t *out, va_list args)
{
const char *spec = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
pcmk__output_create_xml_node(out, "inject_spec",
"spec", spec,
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *")
static int
inject_modify_config(pcmk__output_t *out, va_list args)
{
const char *quorum = va_arg(args, const char *);
const char *watchdog = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
out->begin_list(out, NULL, NULL, "Performing Requested Modifications");
if (quorum) {
out->list_item(out, NULL, "Setting quorum: %s", quorum);
}
if (watchdog) {
out->list_item(out, NULL, "Setting watchdog: %s", watchdog);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-modify-config", "const char *", "const char *")
static int
inject_modify_config_xml(pcmk__output_t *out, va_list args)
{
const char *quorum = va_arg(args, const char *);
const char *watchdog = va_arg(args, const char *);
xmlNodePtr node = NULL;
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
node = pcmk__output_xml_create_parent(out, "modifications", NULL);
if (quorum) {
crm_xml_add(node, "quorum", quorum);
}
if (watchdog) {
crm_xml_add(node, "watchdog", watchdog);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *")
static int
inject_modify_node(pcmk__output_t *out, va_list args)
{
const char *action = va_arg(args, const char *);
const char *node = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
if (pcmk__str_eq(action, "Online", pcmk__str_none)) {
out->list_item(out, NULL, "Bringing node %s online", node);
return pcmk_rc_ok;
} else if (pcmk__str_eq(action, "Offline", pcmk__str_none)) {
out->list_item(out, NULL, "Taking node %s offline", node);
return pcmk_rc_ok;
} else if (pcmk__str_eq(action, "Failing", pcmk__str_none)) {
out->list_item(out, NULL, "Failing node %s", node);
return pcmk_rc_ok;
}
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("inject-modify-node", "const char *", "const char *")
static int
inject_modify_node_xml(pcmk__output_t *out, va_list args)
{
const char *action = va_arg(args, const char *);
const char *node = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
pcmk__output_create_xml_node(out, "modify_node",
"action", action,
"node", node,
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *")
static int
inject_modify_ticket(pcmk__output_t *out, va_list args)
{
const char *action = va_arg(args, const char *);
const char *ticket = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
if (pcmk__str_eq(action, "Standby", pcmk__str_none)) {
out->list_item(out, NULL, "Making ticket %s standby", ticket);
} else {
out->list_item(out, NULL, "%s ticket %s", action, ticket);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-modify-ticket", "const char *", "const char *")
static int
inject_modify_ticket_xml(pcmk__output_t *out, va_list args)
{
const char *action = va_arg(args, const char *);
const char *ticket = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
pcmk__output_create_xml_node(out, "modify_ticket",
"action", action,
"ticket", ticket,
NULL);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *")
static int
inject_pseudo_action(pcmk__output_t *out, va_list args)
{
const char *node = va_arg(args, const char *);
const char *task = va_arg(args, const char *);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
out->list_item(out, NULL, "Pseudo action: %s%s%s",
task, ((node == NULL)? "" : " on "), pcmk__s(node, ""));
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-pseudo-action", "const char *", "const char *")
static int
inject_pseudo_action_xml(pcmk__output_t *out, va_list args)
{
const char *node = va_arg(args, const char *);
const char *task = va_arg(args, const char *);
xmlNodePtr xml_node = NULL;
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
xml_node = pcmk__output_create_xml_node(out, "pseudo_action",
"task", task,
NULL);
if (node) {
crm_xml_add(xml_node, "node", node);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *",
"const char *", "guint")
static int
inject_rsc_action(pcmk__output_t *out, va_list args)
{
const char *rsc = va_arg(args, const char *);
const char *operation = va_arg(args, const char *);
const char *node = va_arg(args, const char *);
guint interval_ms = va_arg(args, guint);
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
if (interval_ms) {
out->list_item(out, NULL, "Resource action: %-15s %s=%u on %s",
rsc, operation, interval_ms, node);
} else {
out->list_item(out, NULL, "Resource action: %-15s %s on %s",
rsc, operation, node);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("inject-rsc-action", "const char *", "const char *",
"const char *", "guint")
static int
inject_rsc_action_xml(pcmk__output_t *out, va_list args)
{
const char *rsc = va_arg(args, const char *);
const char *operation = va_arg(args, const char *);
const char *node = va_arg(args, const char *);
guint interval_ms = va_arg(args, guint);
xmlNodePtr xml_node = NULL;
if (out->is_quiet(out)) {
return pcmk_rc_no_output;
}
xml_node = pcmk__output_create_xml_node(out, "rsc_action",
"resource", rsc,
"op", operation,
"node", node,
NULL);
if (interval_ms) {
char *interval_s = pcmk__itoa(interval_ms);
crm_xml_add(xml_node, "interval", interval_s);
free(interval_s);
}
return pcmk_rc_ok;
}
#define CHECK_RC(retcode, retval) \
if (retval == pcmk_rc_ok) { \
retcode = pcmk_rc_ok; \
}
PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *",
"enum pcmk_pacemakerd_state", "crm_exit_t",
"stonith_history_t *", "enum pcmk__fence_history", "uint32_t",
"uint32_t", "const char *", "GList *", "GList *")
int
pcmk__cluster_status_text(pcmk__output_t *out, va_list args)
{
pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
crm_exit_t history_rc = va_arg(args, crm_exit_t);
stonith_history_t *stonith_history = va_arg(args, stonith_history_t *);
enum pcmk__fence_history fence_history = va_arg(args, int);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
const char *prefix = va_arg(args, const char *);
GList *unames = va_arg(args, GList *);
GList *resources = va_arg(args, GList *);
int rc = pcmk_rc_no_output;
bool already_printed_failure = false;
CHECK_RC(rc, out->message(out, "cluster-summary", data_set, pcmkd_state,
section_opts, show_opts));
if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) {
CHECK_RC(rc, out->message(out, "node-list", data_set->nodes, unames,
resources, show_opts, rc == pcmk_rc_ok));
}
/* Print resources section, if needed */
if (pcmk_is_set(section_opts, pcmk_section_resources)) {
CHECK_RC(rc, out->message(out, "resource-list", data_set, show_opts,
true, unames, resources, rc == pcmk_rc_ok));
}
/* print Node Attributes section if requested */
if (pcmk_is_set(section_opts, pcmk_section_attributes)) {
CHECK_RC(rc, out->message(out, "node-attribute-list", data_set,
show_opts, (rc == pcmk_rc_ok), unames,
resources));
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (pcmk_any_flags_set(section_opts,
pcmk_section_operations|pcmk_section_failcounts)) {
CHECK_RC(rc, out->message(out, "node-summary", data_set, unames,
resources, section_opts, show_opts,
(rc == pcmk_rc_ok)));
}
/* If there were any failed actions, print them */
if (pcmk_is_set(section_opts, pcmk_section_failures)
&& xml_has_children(data_set->failed)) {
CHECK_RC(rc, out->message(out, "failed-action-list", data_set, unames,
resources, show_opts, rc == pcmk_rc_ok));
}
/* Print failed stonith actions */
if (pcmk_is_set(section_opts, pcmk_section_fence_failed) &&
fence_history != pcmk__fence_history_none) {
if (history_rc == 0) {
stonith_history_t *hp = NULL;
hp = stonith__first_matching_event(stonith_history,
stonith__event_state_eq,
GINT_TO_POINTER(st_failed));
if (hp) {
CHECK_RC(rc, out->message(out, "failed-fencing-list",
stonith_history, unames, section_opts,
show_opts, rc == pcmk_rc_ok));
}
} else {
PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
out->list_item(out, NULL, "Failed to get fencing history: %s",
crm_exit_str(history_rc));
out->end_list(out);
already_printed_failure = true;
}
}
/* Print tickets if requested */
if (pcmk_is_set(section_opts, pcmk_section_tickets)) {
CHECK_RC(rc, out->message(out, "ticket-list", data_set,
(rc == pcmk_rc_ok)));
}
/* Print negative location constraints if requested */
if (pcmk_is_set(section_opts, pcmk_section_bans)) {
CHECK_RC(rc, out->message(out, "ban-list", data_set, prefix, resources,
show_opts, rc == pcmk_rc_ok));
}
/* Print stonith history */
if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) &&
fence_history != pcmk__fence_history_none) {
if (history_rc != 0) {
if (!already_printed_failure) {
PCMK__OUTPUT_SPACER_IF(out, rc == pcmk_rc_ok);
out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
out->list_item(out, NULL, "Failed to get fencing history: %s",
crm_exit_str(history_rc));
out->end_list(out);
}
} else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) {
stonith_history_t *hp = NULL;
hp = stonith__first_matching_event(stonith_history,
stonith__event_state_neq,
GINT_TO_POINTER(st_failed));
if (hp) {
CHECK_RC(rc, out->message(out, "fencing-list", hp, unames,
section_opts, show_opts,
rc == pcmk_rc_ok));
}
} else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) {
stonith_history_t *hp = NULL;
hp = stonith__first_matching_event(stonith_history,
stonith__event_state_pending,
NULL);
if (hp) {
CHECK_RC(rc, out->message(out, "pending-fencing-list", hp,
unames, section_opts, show_opts,
rc == pcmk_rc_ok));
}
}
}
return rc;
}
PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *",
"enum pcmk_pacemakerd_state", "crm_exit_t",
"stonith_history_t *", "enum pcmk__fence_history", "uint32_t",
"uint32_t", "const char *", "GList *", "GList *")
static int
cluster_status_xml(pcmk__output_t *out, va_list args)
{
pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
crm_exit_t history_rc = va_arg(args, crm_exit_t);
stonith_history_t *stonith_history = va_arg(args, stonith_history_t *);
enum pcmk__fence_history fence_history = va_arg(args, int);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
const char *prefix = va_arg(args, const char *);
GList *unames = va_arg(args, GList *);
GList *resources = va_arg(args, GList *);
out->message(out, "cluster-summary", data_set, pcmkd_state, section_opts,
show_opts);
/*** NODES ***/
if (pcmk_is_set(section_opts, pcmk_section_nodes)) {
out->message(out, "node-list", data_set->nodes, unames, resources,
show_opts, false);
}
/* Print resources section, if needed */
if (pcmk_is_set(section_opts, pcmk_section_resources)) {
/* XML output always displays full details. */
uint32_t full_show_opts = show_opts & ~pcmk_show_brief;
out->message(out, "resource-list", data_set, full_show_opts,
false, unames, resources, false);
}
/* print Node Attributes section if requested */
if (pcmk_is_set(section_opts, pcmk_section_attributes)) {
out->message(out, "node-attribute-list", data_set, show_opts, false,
unames, resources);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (pcmk_any_flags_set(section_opts,
pcmk_section_operations|pcmk_section_failcounts)) {
out->message(out, "node-summary", data_set, unames,
resources, section_opts, show_opts, false);
}
/* If there were any failed actions, print them */
if (pcmk_is_set(section_opts, pcmk_section_failures)
&& xml_has_children(data_set->failed)) {
out->message(out, "failed-action-list", data_set, unames, resources,
show_opts, false);
}
/* Print stonith history */
if (pcmk_is_set(section_opts, pcmk_section_fencing_all) &&
fence_history != pcmk__fence_history_none) {
out->message(out, "full-fencing-list", history_rc, stonith_history,
unames, section_opts, show_opts, false);
}
/* Print tickets if requested */
if (pcmk_is_set(section_opts, pcmk_section_tickets)) {
out->message(out, "ticket-list", data_set, false);
}
/* Print negative location constraints if requested */
if (pcmk_is_set(section_opts, pcmk_section_bans)) {
out->message(out, "ban-list", data_set, prefix, resources, show_opts,
false);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("cluster-status", "pe_working_set_t *",
"enum pcmk_pacemakerd_state", "crm_exit_t",
"stonith_history_t *", "enum pcmk__fence_history", "uint32_t",
"uint32_t", "const char *", "GList *", "GList *")
static int
cluster_status_html(pcmk__output_t *out, va_list args)
{
pe_working_set_t *data_set = va_arg(args, pe_working_set_t *);
enum pcmk_pacemakerd_state pcmkd_state =
(enum pcmk_pacemakerd_state) va_arg(args, int);
crm_exit_t history_rc = va_arg(args, crm_exit_t);
stonith_history_t *stonith_history = va_arg(args, stonith_history_t *);
enum pcmk__fence_history fence_history = va_arg(args, int);
uint32_t section_opts = va_arg(args, uint32_t);
uint32_t show_opts = va_arg(args, uint32_t);
const char *prefix = va_arg(args, const char *);
GList *unames = va_arg(args, GList *);
GList *resources = va_arg(args, GList *);
bool already_printed_failure = false;
out->message(out, "cluster-summary", data_set, pcmkd_state, section_opts,
show_opts);
/*** NODE LIST ***/
if (pcmk_is_set(section_opts, pcmk_section_nodes) && unames) {
out->message(out, "node-list", data_set->nodes, unames, resources,
show_opts, false);
}
/* Print resources section, if needed */
if (pcmk_is_set(section_opts, pcmk_section_resources)) {
out->message(out, "resource-list", data_set, show_opts, true, unames,
resources, false);
}
/* print Node Attributes section if requested */
if (pcmk_is_set(section_opts, pcmk_section_attributes)) {
out->message(out, "node-attribute-list", data_set, show_opts, false,
unames, resources);
}
/* If requested, print resource operations (which includes failcounts)
* or just failcounts
*/
if (pcmk_any_flags_set(section_opts,
pcmk_section_operations|pcmk_section_failcounts)) {
out->message(out, "node-summary", data_set, unames,
resources, section_opts, show_opts, false);
}
/* If there were any failed actions, print them */
if (pcmk_is_set(section_opts, pcmk_section_failures)
&& xml_has_children(data_set->failed)) {
out->message(out, "failed-action-list", data_set, unames, resources,
show_opts, false);
}
/* Print failed stonith actions */
if (pcmk_is_set(section_opts, pcmk_section_fence_failed) &&
fence_history != pcmk__fence_history_none) {
if (history_rc == 0) {
stonith_history_t *hp = NULL;
hp = stonith__first_matching_event(stonith_history,
stonith__event_state_eq,
GINT_TO_POINTER(st_failed));
if (hp) {
out->message(out, "failed-fencing-list", stonith_history,
unames, section_opts, show_opts, false);
}
} else {
out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
out->list_item(out, NULL, "Failed to get fencing history: %s",
crm_exit_str(history_rc));
out->end_list(out);
}
}
/* Print stonith history */
if (pcmk_any_flags_set(section_opts, pcmk_section_fencing_all) &&
fence_history != pcmk__fence_history_none) {
if (history_rc != 0) {
if (!already_printed_failure) {
out->begin_list(out, NULL, NULL, "Failed Fencing Actions");
out->list_item(out, NULL, "Failed to get fencing history: %s",
crm_exit_str(history_rc));
out->end_list(out);
}
} else if (pcmk_is_set(section_opts, pcmk_section_fence_worked)) {
stonith_history_t *hp = NULL;
hp = stonith__first_matching_event(stonith_history,
stonith__event_state_neq,
GINT_TO_POINTER(st_failed));
if (hp) {
out->message(out, "fencing-list", hp, unames, section_opts,
show_opts, false);
}
} else if (pcmk_is_set(section_opts, pcmk_section_fence_pending)) {
stonith_history_t *hp = NULL;
hp = stonith__first_matching_event(stonith_history,
stonith__event_state_pending,
NULL);
if (hp) {
out->message(out, "pending-fencing-list", hp, unames,
section_opts, show_opts, false);
}
}
}
/* Print tickets if requested */
if (pcmk_is_set(section_opts, pcmk_section_tickets)) {
out->message(out, "ticket-list", data_set, false);
}
/* Print negative location constraints if requested */
if (pcmk_is_set(section_opts, pcmk_section_bans)) {
out->message(out, "ban-list", data_set, prefix, resources, show_opts,
false);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *",
"const char *", "const char *")
static int
attribute_default(pcmk__output_t *out, va_list args)
{
const char *scope = va_arg(args, const char *);
const char *instance = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
const char *host = va_arg(args, const char *);
GString *s = g_string_sized_new(50);
if (!pcmk__str_empty(scope)) {
pcmk__g_strcat(s, "scope=\"", scope, "\" ", NULL);
}
if (!pcmk__str_empty(instance)) {
pcmk__g_strcat(s, "id=\"", instance, "\" ", NULL);
}
pcmk__g_strcat(s, "name=\"", pcmk__s(name, ""), "\" ", NULL);
if (!pcmk__str_empty(host)) {
pcmk__g_strcat(s, "host=\"", host, "\" ", NULL);
}
pcmk__g_strcat(s, "value=\"", pcmk__s(value, ""), "\"", NULL);
out->info(out, "%s", s->str);
g_string_free(s, TRUE);
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("attribute", "const char *", "const char *", "const char *",
"const char *", "const char *")
static int
attribute_xml(pcmk__output_t *out, va_list args)
{
const char *scope = va_arg(args, const char *);
const char *instance = va_arg(args, const char *);
const char *name = va_arg(args, const char *);
const char *value = va_arg(args, const char *);
const char *host = va_arg(args, const char *);
xmlNodePtr node = NULL;
node = pcmk__output_create_xml_node(out, "attribute",
"name", name,
"value", value ? value : "",
NULL);
if (!pcmk__str_empty(scope)) {
crm_xml_add(node, "scope", scope);
}
if (!pcmk__str_empty(instance)) {
crm_xml_add(node, "id", instance);
}
if (!pcmk__str_empty(host)) {
crm_xml_add(node, "host", host);
}
return pcmk_rc_ok;
}
PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *")
static int
rule_check_default(pcmk__output_t *out, va_list args)
{
const char *rule_id = va_arg(args, const char *);
int result = va_arg(args, int);
const char *error = va_arg(args, const char *);
switch (result) {
case pcmk_rc_within_range:
return out->info(out, "Rule %s is still in effect", rule_id);
case pcmk_rc_ok:
return out->info(out, "Rule %s satisfies conditions", rule_id);
case pcmk_rc_after_range:
return out->info(out, "Rule %s is expired", rule_id);
case pcmk_rc_before_range:
return out->info(out, "Rule %s has not yet taken effect", rule_id);
case pcmk_rc_op_unsatisfied:
return out->info(out, "Rule %s does not satisfy conditions",
rule_id);
default:
out->err(out,
"Could not determine whether rule %s is in effect: %s",
rule_id, ((error != NULL)? error : "unexpected error"));
return pcmk_rc_ok;
}
}
PCMK__OUTPUT_ARGS("rule-check", "const char *", "int", "const char *")
static int
rule_check_xml(pcmk__output_t *out, va_list args)
{
const char *rule_id = va_arg(args, const char *);
int result = va_arg(args, int);
const char *error = va_arg(args, const char *);
char *rc_str = pcmk__itoa(pcmk_rc2exitc(result));
pcmk__output_create_xml_node(out, "rule-check",
"rule-id", rule_id,
"rc", rc_str,
NULL);
free(rc_str);
switch (result) {
case pcmk_rc_within_range:
case pcmk_rc_ok:
case pcmk_rc_after_range:
case pcmk_rc_before_range:
case pcmk_rc_op_unsatisfied:
return pcmk_rc_ok;
default:
out->err(out,
"Could not determine whether rule %s is in effect: %s",
rule_id, ((error != NULL)? error : "unexpected error"));
return pcmk_rc_ok;
}
}
PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *")
static int
result_code_none(pcmk__output_t *out, va_list args)
{
return pcmk_rc_no_output;
}
PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *")
static int
result_code_text(pcmk__output_t *out, va_list args)
{
int code = va_arg(args, int);
const char *name = va_arg(args, const char *);
const char *desc = va_arg(args, const char *);
static int code_width = 0;
if (out->is_quiet(out)) {
/* If out->is_quiet(), don't print the code. Print name and/or desc in a
* compact format for text output, or print nothing at all for none-type
* output.
*/
if ((name != NULL) && (desc != NULL)) {
pcmk__formatted_printf(out, "%s - %s\n", name, desc);
} else if ((name != NULL) || (desc != NULL)) {
pcmk__formatted_printf(out, "%s\n", ((name != NULL)? name : desc));
}
return pcmk_rc_ok;
}
/* Get length of longest (most negative) standard Pacemaker return code
* This should be longer than all the values of any other type of return
* code.
*/
if (code_width == 0) {
long long most_negative = pcmk_rc_error - (long long) pcmk__n_rc + 1;
code_width = (int) snprintf(NULL, 0, "%lld", most_negative);
}
if ((name != NULL) && (desc != NULL)) {
static int name_width = 0;
if (name_width == 0) {
// Get length of longest standard Pacemaker return code name
for (int lpc = 0; lpc < pcmk__n_rc; lpc++) {
int len = (int) strlen(pcmk_rc_name(pcmk_rc_error - lpc));
name_width = QB_MAX(name_width, len);
}
}
return out->info(out, "% *d: %-*s %s", code_width, code, name_width,
name, desc);
}
if ((name != NULL) || (desc != NULL)) {
return out->info(out, "% *d: %s", code_width, code,
((name != NULL)? name : desc));
}
return out->info(out, "% *d", code_width, code);
}
PCMK__OUTPUT_ARGS("result-code", "int", "const char *", "const char *")
static int
result_code_xml(pcmk__output_t *out, va_list args)
{
int code = va_arg(args, int);
const char *name = va_arg(args, const char *);
const char *desc = va_arg(args, const char *);
char *code_str = pcmk__itoa(code);
pcmk__output_create_xml_node(out, "result-code",
"code", code_str,
XML_ATTR_NAME, name,
XML_ATTR_DESC, desc,
NULL);
free(code_str);
return pcmk_rc_ok;
}
static pcmk__message_entry_t fmt_functions[] = {
{ "attribute", "default", attribute_default },
{ "attribute", "xml", attribute_xml },
{ "cluster-status", "default", pcmk__cluster_status_text },
{ "cluster-status", "html", cluster_status_html },
{ "cluster-status", "xml", cluster_status_xml },
{ "crmadmin-node", "default", crmadmin_node },
{ "crmadmin-node", "text", crmadmin_node_text },
{ "crmadmin-node", "xml", crmadmin_node_xml },
{ "dc", "default", dc },
{ "dc", "text", dc_text },
{ "dc", "xml", dc_xml },
{ "digests", "default", digests_text },
{ "digests", "xml", digests_xml },
{ "health", "default", health },
{ "health", "text", health_text },
{ "health", "xml", health_xml },
{ "inject-attr", "default", inject_attr },
{ "inject-attr", "xml", inject_attr_xml },
{ "inject-cluster-action", "default", inject_cluster_action },
{ "inject-cluster-action", "xml", inject_cluster_action_xml },
{ "inject-fencing-action", "default", inject_fencing_action },
{ "inject-fencing-action", "xml", inject_fencing_action_xml },
{ "inject-modify-config", "default", inject_modify_config },
{ "inject-modify-config", "xml", inject_modify_config_xml },
{ "inject-modify-node", "default", inject_modify_node },
{ "inject-modify-node", "xml", inject_modify_node_xml },
{ "inject-modify-ticket", "default", inject_modify_ticket },
{ "inject-modify-ticket", "xml", inject_modify_ticket_xml },
{ "inject-pseudo-action", "default", inject_pseudo_action },
{ "inject-pseudo-action", "xml", inject_pseudo_action_xml },
{ "inject-rsc-action", "default", inject_rsc_action },
{ "inject-rsc-action", "xml", inject_rsc_action_xml },
{ "inject-spec", "default", inject_spec },
{ "inject-spec", "xml", inject_spec_xml },
{ "locations-list", "default", locations_list },
{ "locations-list", "xml", locations_list_xml },
{ "node-action", "default", node_action },
{ "node-action", "xml", node_action_xml },
{ "node-info", "default", node_info_default },
{ "node-info", "xml", node_info_xml },
{ "pacemakerd-health", "default", pacemakerd_health },
{ "pacemakerd-health", "html", pacemakerd_health_html },
{ "pacemakerd-health", "text", pacemakerd_health_text },
{ "pacemakerd-health", "xml", pacemakerd_health_xml },
{ "profile", "default", profile_default, },
{ "profile", "xml", profile_xml },
{ "result-code", "none", result_code_none },
{ "result-code", "text", result_code_text },
{ "result-code", "xml", result_code_xml },
{ "rsc-action", "default", rsc_action_default },
{ "rsc-action-item", "default", rsc_action_item },
{ "rsc-action-item", "xml", rsc_action_item_xml },
{ "rsc-is-colocated-with-list", "default", rsc_is_colocated_with_list },
{ "rsc-is-colocated-with-list", "xml", rsc_is_colocated_with_list_xml },
{ "rscs-colocated-with-list", "default", rscs_colocated_with_list },
{ "rscs-colocated-with-list", "xml", rscs_colocated_with_list_xml },
{ "rule-check", "default", rule_check_default },
{ "rule-check", "xml", rule_check_xml },
{ "locations-and-colocations", "default", locations_and_colocations },
{ "locations-and-colocations", "xml", locations_and_colocations_xml },
{ NULL, NULL, NULL }
};
void
pcmk__register_lib_messages(pcmk__output_t *out) {
pcmk__register_messages(out, fmt_functions);
}
diff --git a/lib/pacemaker/pcmk_resource.c b/lib/pacemaker/pcmk_resource.c
index 35687389f0..e65f5a61bb 100644
--- a/lib/pacemaker/pcmk_resource.c
+++ b/lib/pacemaker/pcmk_resource.c
@@ -1,173 +1,173 @@
/*
* Copyright 2021-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <errno.h>
#include <glib.h>
#include <libxml/tree.h>
#include <crm/common/mainloop.h>
#include <crm/common/results.h>
#include <crm/common/output_internal.h>
#include <crm/pengine/internal.h>
#include <pacemaker.h>
#include <pacemaker-internal.h>
// Search path for resource operation history (takes node name and resource ID)
#define XPATH_OP_HISTORY "//" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE "[@" XML_ATTR_ID "='%s']"
static xmlNode *
best_op(const pe_resource_t *rsc, const pe_node_t *node)
{
char *xpath = NULL;
xmlNode *history = NULL;
xmlNode *best = NULL;
bool best_effective_op = false;
guint best_interval = 0;
bool best_failure = false;
const char *best_digest = NULL;
// Find node's resource history
xpath = crm_strdup_printf(XPATH_OP_HISTORY, node->details->uname, rsc->id);
history = get_xpath_object(xpath, rsc->cluster->input, LOG_NEVER);
free(xpath);
// Examine each history entry
for (xmlNode *lrm_rsc_op = first_named_child(history, XML_LRM_TAG_RSC_OP);
lrm_rsc_op != NULL; lrm_rsc_op = crm_next_same_xml(lrm_rsc_op)) {
const char *digest = crm_element_value(lrm_rsc_op,
XML_LRM_ATTR_RESTART_DIGEST);
guint interval_ms = 0;
const char *task = crm_element_value(lrm_rsc_op, XML_LRM_ATTR_TASK);
bool effective_op = false;
bool failure = pcmk__ends_with(ID(lrm_rsc_op), "_last_failure_0");
crm_element_value_ms(lrm_rsc_op, XML_LRM_ATTR_INTERVAL, &interval_ms);
effective_op = interval_ms == 0
&& pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR,
PCMK_ACTION_START,
PCMK_ACTION_PROMOTE,
- RSC_MIGRATED, NULL);
+ PCMK_ACTION_MIGRATE_FROM, NULL);
if (best == NULL) {
goto is_best;
}
if (best_effective_op) {
// Do not use an ineffective op if there's an effective one.
if (!effective_op) {
continue;
}
// Do not use an ineffective non-recurring op if there's a recurring one
} else if (best_interval != 0
&& !effective_op
&& interval_ms == 0) {
continue;
}
// Do not use last failure if there's a successful one.
if (!best_failure && failure) {
continue;
}
// Do not use an op without a restart digest if there's one with.
if (best_digest != NULL && digest == NULL) {
continue;
}
// Do not use an older op if there's a newer one.
if (pe__is_newer_op(best, lrm_rsc_op, true) > 0) {
continue;
}
is_best:
best = lrm_rsc_op;
best_effective_op = effective_op;
best_interval = interval_ms;
best_failure = failure;
best_digest = digest;
}
return best;
}
/*!
* \internal
* \brief Calculate and output resource operation digests
*
* \param[in,out] out Output object
* \param[in,out] rsc Resource to calculate digests for
* \param[in] node Node whose operation history should be used
* \param[in] overrides Hash table of configuration parameters to override
*
* \return Standard Pacemaker return code
*/
int
pcmk__resource_digests(pcmk__output_t *out, pe_resource_t *rsc,
const pe_node_t *node, GHashTable *overrides)
{
const char *task = NULL;
xmlNode *xml_op = NULL;
op_digest_cache_t *digests = NULL;
guint interval_ms = 0;
int rc = pcmk_rc_ok;
if ((out == NULL) || (rsc == NULL) || (node == NULL)) {
return EINVAL;
}
if (rsc->variant != pe_native) {
// Only primitives get operation digests
return EOPNOTSUPP;
}
// Find XML of operation history to use
xml_op = best_op(rsc, node);
// Generate an operation key
if (xml_op != NULL) {
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
}
if (task == NULL) { // Assume start if no history is available
task = PCMK_ACTION_START;
interval_ms = 0;
}
// Calculate and show digests
digests = pe__calculate_digests(rsc, task, &interval_ms, node, xml_op,
overrides, true, rsc->cluster);
rc = out->message(out, "digests", rsc, node, task, interval_ms, digests);
pe__free_digests(digests);
return rc;
}
int
pcmk_resource_digests(xmlNodePtr *xml, pe_resource_t *rsc,
const pe_node_t *node, GHashTable *overrides,
pe_working_set_t *data_set)
{
pcmk__output_t *out = NULL;
int rc = pcmk_rc_ok;
rc = pcmk__xml_output_new(&out, xml);
if (rc != pcmk_rc_ok) {
return rc;
}
pcmk__register_lib_messages(out);
rc = pcmk__resource_digests(out, rsc, node, overrides);
pcmk__xml_output_finish(out, xml);
return rc;
}
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
index 14e77b39f2..a8ba8a01b2 100644
--- a/lib/pacemaker/pcmk_sched_actions.c
+++ b/lib/pacemaker/pcmk_sched_actions.c
@@ -1,1929 +1,1929 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <sys/param.h>
#include <glib.h>
#include <crm/lrmd_internal.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Get the action flags relevant to ordering constraints
*
* \param[in,out] action Action to check
* \param[in] node Node that *other* action in the ordering is on
* (used only for clone resource actions)
*
* \return Action flags that should be used for orderings
*/
static uint32_t
action_flags_for_ordering(pe_action_t *action, const pe_node_t *node)
{
bool runnable = false;
uint32_t flags;
// For non-resource actions, return the action flags
if (action->rsc == NULL) {
return action->flags;
}
/* For non-clone resources, or a clone action not assigned to a node,
* return the flags as determined by the resource method without a node
* specified.
*/
flags = action->rsc->cmds->action_flags(action, NULL);
if ((node == NULL) || !pe_rsc_is_clone(action->rsc)) {
return flags;
}
/* Otherwise (i.e., for clone resource actions on a specific node), first
* remember whether the non-node-specific action is runnable.
*/
runnable = pcmk_is_set(flags, pe_action_runnable);
// Then recheck the resource method with the node
flags = action->rsc->cmds->action_flags(action, node);
/* For clones in ordering constraints, the node-specific "runnable" doesn't
* matter, just the non-node-specific setting (i.e., is the action runnable
* anywhere).
*
* This applies only to runnable, and only for ordering constraints. This
* function shouldn't be used for other types of constraints without
* changes. Not very satisfying, but it's logical and appears to work well.
*/
if (runnable && !pcmk_is_set(flags, pe_action_runnable)) {
pe__set_raw_action_flags(flags, action->rsc->id,
pe_action_runnable);
}
return flags;
}
/*!
* \internal
* \brief Get action UUID that should be used with a resource ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the UUID and resource of the first action in an
* ordering, this returns the UUID of the action that should actually be used
* for ordering (for example, "CLONE_started_0" instead of "CLONE_start_0").
*
* \param[in] first_uuid UUID of first action in ordering
* \param[in] first_rsc Resource of first action in ordering
*
* \return Newly allocated copy of UUID to use with ordering
* \note It is the caller's responsibility to free the return value.
*/
static char *
action_uuid_for_ordering(const char *first_uuid, const pe_resource_t *first_rsc)
{
guint interval_ms = 0;
char *uuid = NULL;
char *rid = NULL;
char *first_task_str = NULL;
enum action_tasks first_task = no_action;
enum action_tasks remapped_task = no_action;
// Only non-notify actions for collective resources need remapping
if ((strstr(first_uuid, PCMK_ACTION_NOTIFY) != NULL)
|| (first_rsc->variant < pe_group)) {
goto done;
}
// Only non-recurring actions need remapping
CRM_ASSERT(parse_op_key(first_uuid, &rid, &first_task_str, &interval_ms));
if (interval_ms > 0) {
goto done;
}
first_task = text2task(first_task_str);
switch (first_task) {
case stop_rsc:
case start_rsc:
case action_notify:
case action_promote:
case action_demote:
remapped_task = first_task + 1;
break;
case stopped_rsc:
case started_rsc:
case action_notified:
case action_promoted:
case action_demoted:
remapped_task = first_task;
break;
case monitor_rsc:
case shutdown_crm:
case stonith_node:
break;
default:
crm_err("Unknown action '%s' in ordering", first_task_str);
break;
}
if (remapped_task != no_action) {
/* If a (clone) resource has notifications enabled, we want to order
* relative to when all notifications have been sent for the remapped
* task. Only outermost resources or those in bundles have
* notifications.
*/
if (pcmk_is_set(first_rsc->flags, pe_rsc_notify)
&& ((first_rsc->parent == NULL)
|| (pe_rsc_is_clone(first_rsc)
&& (first_rsc->parent->variant == pe_container)))) {
uuid = pcmk__notify_key(rid, "confirmed-post",
task2text(remapped_task));
} else {
uuid = pcmk__op_key(rid, task2text(remapped_task), 0);
}
pe_rsc_trace(first_rsc,
"Remapped action UUID %s to %s for ordering purposes",
first_uuid, uuid);
}
done:
if (uuid == NULL) {
uuid = strdup(first_uuid);
CRM_ASSERT(uuid != NULL);
}
free(first_task_str);
free(rid);
return uuid;
}
/*!
* \internal
* \brief Get actual action that should be used with an ordering
*
* When an action is ordered relative to an action for a collective resource
* (clone, group, or bundle), it actually needs to be ordered after all
* instances of the collective have completed the relevant action (for example,
* given "start CLONE then start RSC", RSC must wait until all instances of
* CLONE have started). Given the first action in an ordering, this returns the
* the action that should actually be used for ordering (for example, the
* started action instead of the start action).
*
* \param[in] action First action in an ordering
*
* \return Actual action that should be used for the ordering
*/
static pe_action_t *
action_for_ordering(pe_action_t *action)
{
pe_action_t *result = action;
pe_resource_t *rsc = action->rsc;
if ((rsc != NULL) && (rsc->variant >= pe_group) && (action->uuid != NULL)) {
char *uuid = action_uuid_for_ordering(action->uuid, rsc);
result = find_first_action(rsc->actions, uuid, NULL, NULL);
if (result == NULL) {
crm_warn("Not remapping %s to %s because %s does not have "
"remapped action", action->uuid, uuid, rsc->id);
result = action;
}
free(uuid);
}
return result;
}
/*!
* \internal
* \brief Wrapper for update_ordered_actions() method for readability
*
* \param[in,out] rsc Resource to call method for
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this
* node (only used when interleaving instances)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates
* (may include pe_action_optional to affect only
* mandatory actions, and pe_action_runnable to
* affect only runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
static inline uint32_t
update(pe_resource_t *rsc, pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags, uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
return rsc->cmds->update_ordered_actions(first, then, node, flags, filter,
type, data_set);
}
/*!
* \internal
* \brief Update flags for ordering's actions appropriately for ordering's flags
*
* \param[in,out] first First action in an ordering
* \param[in,out] then Then action in an ordering
* \param[in] first_flags Action flags for \p first for ordering purposes
* \param[in] then_flags Action flags for \p then for ordering purposes
* \param[in,out] order Action wrapper for \p first in ordering
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags
*/
static uint32_t
update_action_for_ordering_flags(pe_action_t *first, pe_action_t *then,
uint32_t first_flags, uint32_t then_flags,
pe_action_wrapper_t *order,
pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
/* The node will only be used for clones. If interleaved, node will be NULL,
* otherwise the ordering scope will be limited to the node. Normally, the
* whole 'then' clone should restart if 'first' is restarted, so then->node
* is needed.
*/
pe_node_t *node = then->node;
if (pcmk_is_set(order->type, pe_order_implies_then_on_node)) {
/* For unfencing, only instances of 'then' on the same node as 'first'
* (the unfencing operation) should restart, so reset node to
* first->node, at which point this case is handled like a normal
* pe_order_implies_then.
*/
pe__clear_order_flags(order->type, pe_order_implies_then_on_node);
pe__set_order_flags(order->type, pe_order_implies_then);
node = first->node;
pe_rsc_trace(then->rsc,
"%s then %s: mapped pe_order_implies_then_on_node to "
"pe_order_implies_then on %s",
first->uuid, then->uuid, pe__node_name(node));
}
if (pcmk_is_set(order->type, pe_order_implies_then)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node,
first_flags & pe_action_optional,
pe_action_optional, pe_order_implies_then,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)) {
pe__clear_action_flags(then, pe_action_optional);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_then",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_restart) && (then->rsc != NULL)) {
enum pe_action_flags restart = pe_action_optional|pe_action_runnable;
changed |= update(then->rsc, first, then, node, first_flags, restart,
pe_order_restart, data_set);
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_restart",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first)) {
if (first->rsc != NULL) {
changed |= update(first->rsc, first, then, node, first_flags,
pe_action_optional, pe_order_implies_first,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_optional)
&& pcmk_is_set(first->flags, pe_action_runnable)) {
pe__clear_action_flags(first, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_first);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_promoted_implies_first)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node,
first_flags & pe_action_optional,
pe_action_optional,
pe_order_promoted_implies_first, data_set);
}
pe_rsc_trace(then->rsc,
"%s then %s: %s after pe_order_promoted_implies_first",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_one_or_more)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pe_action_runnable, pe_order_one_or_more,
data_set);
} else if (pcmk_is_set(first_flags, pe_action_runnable)) {
// We have another runnable instance of "first"
then->runnable_before++;
/* Mark "then" as runnable if it requires a certain number of
* "before" instances to be runnable, and they now are.
*/
if ((then->runnable_before >= then->required_runnable_before)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pe__set_action_flags(then, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_one_or_more",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_probe) && (then->rsc != NULL)) {
if (!pcmk_is_set(first_flags, pe_action_runnable)
&& (first->rsc->running_on != NULL)) {
pe_rsc_trace(then->rsc,
"%s then %s: ignoring because first is stopping",
first->uuid, then->uuid);
order->type = pe_order_none;
} else {
changed |= update(then->rsc, first, then, node, first_flags,
pe_action_runnable, pe_order_runnable_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_probe",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_runnable_left)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pe_action_runnable, pe_order_runnable_left,
data_set);
} else if (!pcmk_is_set(first_flags, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_runnable_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_implies_first_migratable)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pe_action_optional,
pe_order_implies_first_migratable, data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after "
"pe_order_implies_first_migratable",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_pseudo_left)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pe_action_optional, pe_order_pseudo_left,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_pseudo_left",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_optional)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pe_action_runnable, pe_order_optional, data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_optional",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(order->type, pe_order_asymmetrical)) {
if (then->rsc != NULL) {
changed |= update(then->rsc, first, then, node, first_flags,
pe_action_runnable, pe_order_asymmetrical,
data_set);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after pe_order_asymmetrical",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
if (pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk_is_set(order->type, pe_order_implies_then_printed)
&& !pcmk_is_set(first_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
then->uuid, first->uuid);
pe__set_action_flags(then, pe_action_print_always);
// Don't bother marking 'then' as changed just for this
}
if (pcmk_is_set(order->type, pe_order_implies_first_printed)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
pe_rsc_trace(then->rsc, "%s will be in graph because %s is required",
first->uuid, then->uuid);
pe__set_action_flags(first, pe_action_print_always);
// Don't bother marking 'first' as changed just for this
}
if (pcmk_any_flags_set(order->type, pe_order_implies_then
|pe_order_implies_first
|pe_order_restart)
&& (first->rsc != NULL)
&& !pcmk_is_set(first->rsc->flags, pe_rsc_managed)
&& pcmk_is_set(first->rsc->flags, pe_rsc_block)
&& !pcmk_is_set(first->flags, pe_action_runnable)
&& pcmk__str_eq(first->task, PCMK_ACTION_STOP, pcmk__str_none)) {
if (pcmk_is_set(then->flags, pe_action_runnable)) {
pe__clear_action_flags(then, pe_action_runnable);
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
}
pe_rsc_trace(then->rsc, "%s then %s: %s after checking whether first "
"is blocked, unmanaged, unrunnable stop",
first->uuid, then->uuid,
(changed? "changed" : "unchanged"));
}
return changed;
}
// Convenience macros for logging action properties
#define action_type_str(flags) \
(pcmk_is_set((flags), pe_action_pseudo)? "pseudo-action" : "action")
#define action_optional_str(flags) \
(pcmk_is_set((flags), pe_action_optional)? "optional" : "required")
#define action_runnable_str(flags) \
(pcmk_is_set((flags), pe_action_runnable)? "runnable" : "unrunnable")
#define action_node_str(a) \
(((a)->node == NULL)? "no node" : (a)->node->details->uname)
/*!
* \internal
* \brief Update an action's flags for all orderings where it is "then"
*
* \param[in,out] then Action to update
* \param[in,out] data_set Cluster working set
*/
void
pcmk__update_action_for_orderings(pe_action_t *then, pe_working_set_t *data_set)
{
GList *lpc = NULL;
uint32_t changed = pcmk__updated_none;
int last_flags = then->flags;
pe_rsc_trace(then->rsc, "Updating %s %s (%s %s) on %s",
action_type_str(then->flags), then->uuid,
action_optional_str(then->flags),
action_runnable_str(then->flags), action_node_str(then));
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
/* Initialize current known "runnable before" actions. As
* update_action_for_ordering_flags() is called for each of then's
* before actions, this number will increment as runnable 'first'
* actions are encountered.
*/
then->runnable_before = 0;
if (then->required_runnable_before == 0) {
/* @COMPAT This ordering constraint uses the deprecated
* "require-all=false" attribute. Treat it like "clone-min=1".
*/
then->required_runnable_before = 1;
}
/* The pe_order_one_or_more clause of update_action_for_ordering_flags()
* (called below) will reset runnable if appropriate.
*/
pe__clear_action_flags(then, pe_action_runnable);
}
for (lpc = then->actions_before; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pe_action_t *first = other->action;
pe_node_t *then_node = then->node;
pe_node_t *first_node = first->node;
if ((first->rsc != NULL)
&& (first->rsc->variant == pe_group)
&& pcmk__str_eq(first->task, PCMK_ACTION_START, pcmk__str_none)) {
first_node = first->rsc->fns->location(first->rsc, NULL, FALSE);
if (first_node != NULL) {
pe_rsc_trace(first->rsc, "Found %s for 'first' %s",
pe__node_name(first_node), first->uuid);
}
}
if ((then->rsc != NULL)
&& (then->rsc->variant == pe_group)
&& pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)) {
then_node = then->rsc->fns->location(then->rsc, NULL, FALSE);
if (then_node != NULL) {
pe_rsc_trace(then->rsc, "Found %s for 'then' %s",
pe__node_name(then_node), then->uuid);
}
}
// Disable constraint if it only applies when on same node, but isn't
if (pcmk_is_set(other->type, pe_order_same_node)
&& (first_node != NULL) && (then_node != NULL)
&& !pe__same_node(first_node, then_node)) {
pe_rsc_trace(then->rsc,
"Disabled ordering %s on %s then %s on %s: "
"not same node",
other->action->uuid, pe__node_name(first_node),
then->uuid, pe__node_name(then_node));
other->type = pe_order_none;
continue;
}
pcmk__clear_updated_flags(changed, then, pcmk__updated_first);
if ((first->rsc != NULL)
&& pcmk_is_set(other->type, pe_order_then_cancels_first)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
/* 'then' is required, so we must abandon 'first'
* (e.g. a required stop cancels any agent reload).
*/
pe__set_action_flags(other->action, pe_action_optional);
if (!strcmp(first->task, PCMK_ACTION_RELOAD_AGENT)) {
pe__clear_resource_flags(first->rsc, pe_rsc_reload);
}
}
if ((first->rsc != NULL) && (then->rsc != NULL)
&& (first->rsc != then->rsc) && !is_parent(then->rsc, first->rsc)) {
first = action_for_ordering(first);
}
if (first != other->action) {
pe_rsc_trace(then->rsc, "Ordering %s after %s instead of %s",
then->uuid, first->uuid, other->action->uuid);
}
pe_rsc_trace(then->rsc,
"%s (%#.6x) then %s (%#.6x): type=%#.6x node=%s",
first->uuid, first->flags, then->uuid, then->flags,
other->type, action_node_str(first));
if (first == other->action) {
/* 'first' was not remapped (e.g. from 'start' to 'running'), which
* could mean it is a non-resource action, a primitive resource
* action, or already expanded.
*/
uint32_t first_flags, then_flags;
first_flags = action_flags_for_ordering(first, then_node);
then_flags = action_flags_for_ordering(then, first_node);
changed |= update_action_for_ordering_flags(first, then,
first_flags, then_flags,
other, data_set);
/* 'first' was for a complex resource (clone, group, etc),
* create a new dependency if necessary
*/
} else if (order_actions(first, then, other->type)) {
/* This was the first time 'first' and 'then' were associated,
* start again to get the new actions_before list
*/
pcmk__set_updated_flags(changed, then, pcmk__updated_then);
pe_rsc_trace(then->rsc,
"Disabled ordering %s then %s in favor of %s then %s",
other->action->uuid, then->uuid, first->uuid,
then->uuid);
other->type = pe_order_none;
}
if (pcmk_is_set(changed, pcmk__updated_first)) {
crm_trace("Re-processing %s and its 'after' actions "
"because it changed", first->uuid);
for (GList *lpc2 = first->actions_after; lpc2 != NULL;
lpc2 = lpc2->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc2->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
pcmk__update_action_for_orderings(first, data_set);
}
}
if (pcmk_is_set(then->flags, pe_action_requires_any)) {
if (last_flags == then->flags) {
pcmk__clear_updated_flags(changed, then, pcmk__updated_then);
} else {
pcmk__set_updated_flags(changed, then, pcmk__updated_then);
}
}
if (pcmk_is_set(changed, pcmk__updated_then)) {
crm_trace("Re-processing %s and its 'after' actions because it changed",
then->uuid);
if (pcmk_is_set(last_flags, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)) {
pcmk__block_colocation_dependents(then);
}
pcmk__update_action_for_orderings(then, data_set);
for (lpc = then->actions_after; lpc != NULL; lpc = lpc->next) {
pe_action_wrapper_t *other = (pe_action_wrapper_t *) lpc->data;
pcmk__update_action_for_orderings(other->action, data_set);
}
}
}
static inline bool
is_primitive_action(const pe_action_t *action)
{
return action && action->rsc && (action->rsc->variant == pe_native);
}
/*!
* \internal
* \brief Clear a single action flag and set reason text
*
* \param[in,out] action Action whose flag should be cleared
* \param[in] flag Action flag that should be cleared
* \param[in] reason Action that is the reason why flag is being cleared
*/
#define clear_action_flag_because(action, flag, reason) do { \
if (pcmk_is_set((action)->flags, (flag))) { \
pe__clear_action_flags(action, flag); \
if ((action)->rsc != (reason)->rsc) { \
char *reason_text = pe__action2reason((reason), (flag)); \
pe_action_set_reason((action), reason_text, false); \
free(reason_text); \
} \
} \
} while (0)
/*!
* \internal
* \brief Update actions in an asymmetric ordering
*
* If the "first" action in an asymmetric ordering is unrunnable, make the
* "second" action unrunnable as well, if appropriate.
*
* \param[in] first 'First' action in an asymmetric ordering
* \param[in,out] then 'Then' action in an asymmetric ordering
*/
static void
handle_asymmetric_ordering(const pe_action_t *first, pe_action_t *then)
{
/* Only resource actions after an unrunnable 'first' action need updates for
* asymmetric ordering.
*/
if ((then->rsc == NULL) || pcmk_is_set(first->flags, pe_action_runnable)) {
return;
}
// Certain optional 'then' actions are unaffected by unrunnable 'first'
if (pcmk_is_set(then->flags, pe_action_optional)) {
enum rsc_role_e then_rsc_role = then->rsc->fns->state(then->rsc, TRUE);
if ((then_rsc_role == RSC_ROLE_STOPPED)
&& pcmk__str_eq(then->task, PCMK_ACTION_STOP, pcmk__str_none)) {
/* If 'then' should stop after 'first' but is already stopped, the
* ordering is irrelevant.
*/
return;
} else if ((then_rsc_role >= RSC_ROLE_STARTED)
&& pcmk__str_eq(then->task, PCMK_ACTION_START, pcmk__str_none)
&& pe__rsc_running_on_only(then->rsc, then->node)) {
/* Similarly if 'then' should start after 'first' but is already
* started on a single node.
*/
return;
}
}
// 'First' can't run, so 'then' can't either
clear_action_flag_because(then, pe_action_optional, first);
clear_action_flag_because(then, pe_action_runnable, first);
}
/*!
* \internal
* \brief Set action bits appropriately when pe_restart_order is used
*
* \param[in,out] first 'First' action in an ordering with pe_restart_order
* \param[in,out] then 'Then' action in an ordering with pe_restart_order
* \param[in] filter What action flags to care about
*
* \note pe_restart_order is set for "stop resource before starting it" and
* "stop later group member before stopping earlier group member"
*/
static void
handle_restart_ordering(pe_action_t *first, pe_action_t *then, uint32_t filter)
{
const char *reason = NULL;
CRM_ASSERT(is_primitive_action(first));
CRM_ASSERT(is_primitive_action(then));
// We need to update the action in two cases:
// ... if 'then' is required
if (pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
reason = "restart";
}
/* ... if 'then' is unrunnable action on same resource (if a resource
* should restart but can't start, we still want to stop)
*/
if (pcmk_is_set(filter, pe_action_runnable)
&& !pcmk_is_set(then->flags, pe_action_runnable)
&& pcmk_is_set(then->rsc->flags, pe_rsc_managed)
&& (first->rsc == then->rsc)) {
reason = "stop";
}
if (reason == NULL) {
return;
}
pe_rsc_trace(first->rsc, "Handling %s -> %s for %s",
first->uuid, then->uuid, reason);
// Make 'first' required if it is runnable
if (pcmk_is_set(first->flags, pe_action_runnable)) {
clear_action_flag_because(first, pe_action_optional, then);
}
// Make 'first' required if 'then' is required
if (!pcmk_is_set(then->flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
// Make 'first' unmigratable if 'then' is unmigratable
if (!pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable, then);
}
// Make 'then' unrunnable if 'first' is required but unrunnable
if (!pcmk_is_set(first->flags, pe_action_optional)
&& !pcmk_is_set(first->flags, pe_action_runnable)) {
clear_action_flag_because(then, pe_action_runnable, first);
}
}
/*!
* \internal
* \brief Update two actions according to an ordering between them
*
* Given information about an ordering of two actions, update the actions' flags
* (and runnable_before members if appropriate) as appropriate for the ordering.
* Effects may cascade to other orderings involving the actions as well.
*
* \param[in,out] first 'First' action in an ordering
* \param[in,out] then 'Then' action in an ordering
* \param[in] node If not NULL, limit scope of ordering to this node
* (ignored)
* \param[in] flags Action flags for \p first for ordering purposes
* \param[in] filter Action flags to limit scope of certain updates (may
* include pe_action_optional to affect only mandatory
* actions, and pe_action_runnable to affect only
* runnable actions)
* \param[in] type Group of enum pe_ordering flags to apply
* \param[in,out] data_set Cluster working set
*
* \return Group of enum pcmk__updated flags indicating what was updated
*/
uint32_t
pcmk__update_ordered_actions(pe_action_t *first, pe_action_t *then,
const pe_node_t *node, uint32_t flags,
uint32_t filter, uint32_t type,
pe_working_set_t *data_set)
{
uint32_t changed = pcmk__updated_none;
uint32_t then_flags = 0U;
uint32_t first_flags = 0U;
CRM_ASSERT((first != NULL) && (then != NULL) && (data_set != NULL));
then_flags = then->flags;
first_flags = first->flags;
if (pcmk_is_set(type, pe_order_asymmetrical)) {
handle_asymmetric_ordering(first, then);
}
if (pcmk_is_set(type, pe_order_implies_first)
&& !pcmk_is_set(then_flags, pe_action_optional)) {
// Then is required, and implies first should be, too
if (pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(flags, pe_action_optional)
&& pcmk_is_set(first_flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
if (pcmk_is_set(flags, pe_action_migrate_runnable)
&& !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable, then);
}
}
if (pcmk_is_set(type, pe_order_promoted_implies_first)
&& (then->rsc != NULL) && (then->rsc->role == RSC_ROLE_PROMOTED)
&& pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(then->flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
if (pcmk_is_set(first->flags, pe_action_migrate_runnable)
&& !pcmk_is_set(then->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(first, pe_action_migrate_runnable,
then);
}
}
if (pcmk_is_set(type, pe_order_implies_first_migratable)
&& pcmk_is_set(filter, pe_action_optional)) {
if (!pcmk_all_flags_set(then->flags, pe_action_migrate_runnable
|pe_action_runnable)) {
clear_action_flag_because(first, pe_action_runnable, then);
}
if (!pcmk_is_set(then->flags, pe_action_optional)) {
clear_action_flag_because(first, pe_action_optional, then);
}
}
if (pcmk_is_set(type, pe_order_pseudo_left)
&& pcmk_is_set(filter, pe_action_optional)
&& !pcmk_is_set(first->flags, pe_action_runnable)) {
clear_action_flag_because(then, pe_action_migrate_runnable, first);
pe__clear_action_flags(then, pe_action_pseudo);
}
if (pcmk_is_set(type, pe_order_runnable_left)
&& pcmk_is_set(filter, pe_action_runnable)
&& pcmk_is_set(then->flags, pe_action_runnable)
&& !pcmk_is_set(flags, pe_action_runnable)) {
clear_action_flag_because(then, pe_action_runnable, first);
clear_action_flag_because(then, pe_action_migrate_runnable, first);
}
if (pcmk_is_set(type, pe_order_implies_then)
&& pcmk_is_set(filter, pe_action_optional)
&& pcmk_is_set(then->flags, pe_action_optional)
&& !pcmk_is_set(flags, pe_action_optional)
&& !pcmk_is_set(first->flags, pe_action_migrate_runnable)) {
clear_action_flag_because(then, pe_action_optional, first);
}
if (pcmk_is_set(type, pe_order_restart)) {
handle_restart_ordering(first, then, filter);
}
if (then_flags != then->flags) {
pcmk__set_updated_flags(changed, first, pcmk__updated_then);
pe_rsc_trace(then->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'first' %s (%#.6x)",
then->uuid, pe__node_name(then->node),
then->flags, then_flags, first->uuid, first->flags);
if ((then->rsc != NULL) && (then->rsc->parent != NULL)) {
// Required to handle "X_stop then X_start" for cloned groups
pcmk__update_action_for_orderings(then, data_set);
}
}
if (first_flags != first->flags) {
pcmk__set_updated_flags(changed, first, pcmk__updated_first);
pe_rsc_trace(first->rsc,
"%s on %s: flags are now %#.6x (was %#.6x) "
"because of 'then' %s (%#.6x)",
first->uuid, pe__node_name(first->node),
first->flags, first_flags, then->uuid, then->flags);
}
return changed;
}
/*!
* \internal
* \brief Trace-log an action (optionally with its dependent actions)
*
* \param[in] pre_text If not NULL, prefix the log with this plus ": "
* \param[in] action Action to log
* \param[in] details If true, recursively log dependent actions
*/
void
pcmk__log_action(const char *pre_text, const pe_action_t *action, bool details)
{
const char *node_uname = NULL;
const char *node_uuid = NULL;
const char *desc = NULL;
CRM_CHECK(action != NULL, return);
if (!pcmk_is_set(action->flags, pe_action_pseudo)) {
if (action->node != NULL) {
node_uname = action->node->details->uname;
node_uuid = action->node->details->id;
} else {
node_uname = "<none>";
}
}
switch (text2task(action->task)) {
case stonith_node:
case shutdown_crm:
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
default:
if (pcmk_is_set(action->flags, pe_action_optional)) {
desc = "Optional ";
} else if (pcmk_is_set(action->flags, pe_action_pseudo)) {
desc = "Pseudo ";
} else if (!pcmk_is_set(action->flags, pe_action_runnable)) {
desc = "!!Non-Startable!! ";
} else if (pcmk_is_set(action->flags, pe_action_processed)) {
desc = "";
} else {
desc = "(Provisional) ";
}
crm_trace("%s%s%sAction %d: %s %s%s%s%s%s%s",
((pre_text == NULL)? "" : pre_text),
((pre_text == NULL)? "" : ": "),
desc, action->id, action->uuid,
(action->rsc? action->rsc->id : "<none>"),
(node_uname? "\ton " : ""), (node_uname? node_uname : ""),
(node_uuid? "\t\t(" : ""), (node_uuid? node_uuid : ""),
(node_uuid? ")" : ""));
break;
}
if (details) {
const GList *iter = NULL;
const pe_action_wrapper_t *other = NULL;
crm_trace("\t\t====== Preceding Actions");
for (iter = action->actions_before; iter != NULL; iter = iter->next) {
other = (const pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== Subsequent Actions");
for (iter = action->actions_after; iter != NULL; iter = iter->next) {
other = (const pe_action_wrapper_t *) iter->data;
pcmk__log_action("\t\t", other->action, false);
}
crm_trace("\t\t====== End");
} else {
crm_trace("\t\t(before=%d, after=%d)",
g_list_length(action->actions_before),
g_list_length(action->actions_after));
}
}
/*!
* \internal
* \brief Create a new shutdown action for a node
*
* \param[in,out] node Node being shut down
*
* \return Newly created shutdown action for \p node
*/
pe_action_t *
pcmk__new_shutdown_action(pe_node_t *node)
{
char *shutdown_id = NULL;
pe_action_t *shutdown_op = NULL;
CRM_ASSERT(node != NULL);
shutdown_id = crm_strdup_printf("%s-%s", CRM_OP_SHUTDOWN,
node->details->uname);
shutdown_op = custom_action(NULL, shutdown_id, CRM_OP_SHUTDOWN, node, FALSE,
TRUE, node->details->data_set);
pcmk__order_stops_before_shutdown(node, shutdown_op);
add_hash_param(shutdown_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
return shutdown_op;
}
/*!
* \internal
* \brief Calculate and add an operation digest to XML
*
* Calculate an operation digest, which enables us to later determine when a
* restart is needed due to the resource's parameters being changed, and add it
* to given XML.
*
* \param[in] op Operation result from executor
* \param[in,out] update XML to add digest to
*/
static void
add_op_digest_to_xml(const lrmd_event_data_t *op, xmlNode *update)
{
char *digest = NULL;
xmlNode *args_xml = NULL;
if (op->params == NULL) {
return;
}
args_xml = create_xml_node(NULL, XML_TAG_PARAMS);
g_hash_table_foreach(op->params, hash2field, args_xml);
pcmk__filter_op_for_digest(args_xml);
digest = calculate_operation_digest(args_xml, NULL);
crm_xml_add(update, XML_LRM_ATTR_OP_DIGEST, digest);
free_xml(args_xml);
free(digest);
}
#define FAKE_TE_ID "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
/*!
* \internal
* \brief Create XML for resource operation history update
*
* \param[in,out] parent Parent XML node to add to
* \param[in,out] op Operation event data
* \param[in] caller_version DC feature set
* \param[in] target_rc Expected result of operation
* \param[in] node Name of node on which operation was performed
* \param[in] origin Arbitrary description of update source
*
* \return Newly created XML node for history update
*/
xmlNode *
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
const char *caller_version, int target_rc,
const char *node, const char *origin)
{
char *key = NULL;
char *magic = NULL;
char *op_id = NULL;
char *op_id_additional = NULL;
char *local_user_data = NULL;
const char *exit_reason = NULL;
xmlNode *xml_op = NULL;
const char *task = NULL;
CRM_CHECK(op != NULL, return NULL);
crm_trace("Creating history XML for %s-interval %s action for %s on %s "
"(DC version: %s, origin: %s)",
pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
((node == NULL)? "no node" : node), caller_version, origin);
task = op->op_type;
/* Record a successful agent reload as a start, and a failed one as a
* monitor, to make life easier for the scheduler when determining the
* current state.
*
* @COMPAT We should check "reload" here only if the operation was for a
* pre-OCF-1.1 resource agent, but we don't know that here, and we should
* only ever get results for actions scheduled by us, so we can reasonably
* assume any "reload" is actually a pre-1.1 agent reload.
*/
if (pcmk__str_any_of(task, PCMK_ACTION_RELOAD, PCMK_ACTION_RELOAD_AGENT,
NULL)) {
if (op->op_status == PCMK_EXEC_DONE) {
task = PCMK_ACTION_START;
} else {
task = PCMK_ACTION_MONITOR;
}
}
key = pcmk__op_key(op->rsc_id, task, op->interval_ms);
if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
const char *n_type = crm_meta_value(op->params, "notify_type");
const char *n_task = crm_meta_value(op->params, "notify_operation");
CRM_LOG_ASSERT(n_type != NULL);
CRM_LOG_ASSERT(n_task != NULL);
op_id = pcmk__notify_key(op->rsc_id, n_type, n_task);
if (op->op_status != PCMK_EXEC_PENDING) {
/* Ignore notify errors.
*
* @TODO It might be better to keep the correct result here, and
* ignore it in process_graph_event().
*/
lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
/* Migration history is preserved separately, which usually matters for
* multiple nodes and is important for future cluster transitions.
*/
} else if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED, NULL)) {
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
op_id = strdup(key);
} else if (did_rsc_op_fail(op, target_rc)) {
op_id = pcmk__op_key(op->rsc_id, "last_failure", 0);
if (op->interval_ms == 0) {
// Ensure 'last' gets updated, in case record-pending is true
op_id_additional = pcmk__op_key(op->rsc_id, "last", 0);
}
exit_reason = op->exit_reason;
} else if (op->interval_ms > 0) {
op_id = strdup(key);
} else {
op_id = pcmk__op_key(op->rsc_id, "last", 0);
}
again:
xml_op = pcmk__xe_match(parent, XML_LRM_TAG_RSC_OP, XML_ATTR_ID, op_id);
if (xml_op == NULL) {
xml_op = create_xml_node(parent, XML_LRM_TAG_RSC_OP);
}
if (op->user_data == NULL) {
crm_debug("Generating fake transition key for: " PCMK__OP_FMT
" %d from %s", op->rsc_id, op->op_type, op->interval_ms,
op->call_id, origin);
local_user_data = pcmk__transition_key(-1, op->call_id, target_rc,
FAKE_TE_ID);
op->user_data = local_user_data;
}
if (magic == NULL) {
magic = crm_strdup_printf("%d:%d;%s", op->op_status, op->rc,
(const char *) op->user_data);
}
crm_xml_add(xml_op, XML_ATTR_ID, op_id);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK_KEY, key);
crm_xml_add(xml_op, XML_LRM_ATTR_TASK, task);
crm_xml_add(xml_op, XML_ATTR_ORIGIN, origin);
crm_xml_add(xml_op, XML_ATTR_CRM_VERSION, caller_version);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_KEY, op->user_data);
crm_xml_add(xml_op, XML_ATTR_TRANSITION_MAGIC, magic);
crm_xml_add(xml_op, XML_LRM_ATTR_EXIT_REASON, pcmk__s(exit_reason, ""));
crm_xml_add(xml_op, XML_LRM_ATTR_TARGET, node); // For context during triage
crm_xml_add_int(xml_op, XML_LRM_ATTR_CALLID, op->call_id);
crm_xml_add_int(xml_op, XML_LRM_ATTR_RC, op->rc);
crm_xml_add_int(xml_op, XML_LRM_ATTR_OPSTATUS, op->op_status);
crm_xml_add_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, op->interval_ms);
if (compare_version("2.1", caller_version) <= 0) {
if (op->t_run || op->t_rcchange || op->exec_time || op->queue_time) {
crm_trace("Timing data (" PCMK__OP_FMT
"): last=%u change=%u exec=%u queue=%u",
op->rsc_id, op->op_type, op->interval_ms,
op->t_run, op->t_rcchange, op->exec_time, op->queue_time);
if ((op->interval_ms != 0) && (op->t_rcchange != 0)) {
// Recurring ops may have changed rc after initial run
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_rcchange);
} else {
crm_xml_add_ll(xml_op, XML_RSC_OP_LAST_CHANGE,
(long long) op->t_run);
}
crm_xml_add_int(xml_op, XML_RSC_OP_T_EXEC, op->exec_time);
crm_xml_add_int(xml_op, XML_RSC_OP_T_QUEUE, op->queue_time);
}
}
if (pcmk__str_any_of(op->op_type, PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED, NULL)) {
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
/*
* Record migrate_source and migrate_target always for migrate ops.
*/
const char *name = XML_LRM_ATTR_MIGRATE_SOURCE;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
name = XML_LRM_ATTR_MIGRATE_TARGET;
crm_xml_add(xml_op, name, crm_meta_value(op->params, name));
}
add_op_digest_to_xml(op, xml_op);
if (op_id_additional) {
free(op_id);
op_id = op_id_additional;
op_id_additional = NULL;
goto again;
}
if (local_user_data) {
free(local_user_data);
op->user_data = NULL;
}
free(magic);
free(op_id);
free(key);
return xml_op;
}
/*!
* \internal
* \brief Check whether an action shutdown-locks a resource to a node
*
* If the shutdown-lock cluster property is set, resources will not be recovered
* on a different node if cleanly stopped, and may start only on that same node.
* This function checks whether that applies to a given action, so that the
* transition graph can be marked appropriately.
*
* \param[in] action Action to check
*
* \return true if \p action locks its resource to the action's node,
* otherwise false
*/
bool
pcmk__action_locks_rsc_to_node(const pe_action_t *action)
{
// Only resource actions taking place on resource's lock node are locked
if ((action == NULL) || (action->rsc == NULL)
|| !pe__same_node(action->node, action->rsc->lock_node)) {
return false;
}
/* During shutdown, only stops are locked (otherwise, another action such as
* a demote would cause the controller to clear the lock)
*/
if (action->node->details->shutdown && (action->task != NULL)
&& (strcmp(action->task, PCMK_ACTION_STOP) != 0)) {
return false;
}
return true;
}
/* lowest to highest */
static gint
sort_action_id(gconstpointer a, gconstpointer b)
{
const pe_action_wrapper_t *action_wrapper2 = (const pe_action_wrapper_t *)a;
const pe_action_wrapper_t *action_wrapper1 = (const pe_action_wrapper_t *)b;
if (a == NULL) {
return 1;
}
if (b == NULL) {
return -1;
}
if (action_wrapper1->action->id < action_wrapper2->action->id) {
return 1;
}
if (action_wrapper1->action->id > action_wrapper2->action->id) {
return -1;
}
return 0;
}
/*!
* \internal
* \brief Remove any duplicate action inputs, merging action flags
*
* \param[in,out] action Action whose inputs should be checked
*/
void
pcmk__deduplicate_action_inputs(pe_action_t *action)
{
GList *item = NULL;
GList *next = NULL;
pe_action_wrapper_t *last_input = NULL;
action->actions_before = g_list_sort(action->actions_before,
sort_action_id);
for (item = action->actions_before; item != NULL; item = next) {
pe_action_wrapper_t *input = (pe_action_wrapper_t *) item->data;
next = item->next;
if ((last_input != NULL)
&& (input->action->id == last_input->action->id)) {
crm_trace("Input %s (%d) duplicate skipped for action %s (%d)",
input->action->uuid, input->action->id,
action->uuid, action->id);
/* For the purposes of scheduling, the ordering flags no longer
* matter, but crm_simulate looks at certain ones when creating a
* dot graph. Combining the flags is sufficient for that purpose.
*/
last_input->type |= input->type;
if (input->state == pe_link_dumped) {
last_input->state = pe_link_dumped;
}
free(item->data);
action->actions_before = g_list_delete_link(action->actions_before,
item);
} else {
last_input = input;
input->state = pe_link_not_dumped;
}
}
}
/*!
* \internal
* \brief Output all scheduled actions
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__output_actions(pe_working_set_t *data_set)
{
pcmk__output_t *out = data_set->priv;
// Output node (non-resource) actions
for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
char *node_name = NULL;
char *task = NULL;
pe_action_t *action = (pe_action_t *) iter->data;
if (action->rsc != NULL) {
continue; // Resource actions will be output later
} else if (pcmk_is_set(action->flags, pe_action_optional)) {
continue; // This action was not scheduled
}
if (pcmk__str_eq(action->task, CRM_OP_SHUTDOWN, pcmk__str_none)) {
task = strdup("Shutdown");
} else if (pcmk__str_eq(action->task, CRM_OP_FENCE, pcmk__str_none)) {
const char *op = g_hash_table_lookup(action->meta,
"stonith_action");
task = crm_strdup_printf("Fence (%s)", op);
} else {
continue; // Don't display other node action types
}
if (pe__is_guest_node(action->node)) {
const pe_resource_t *remote = action->node->details->remote_rsc;
node_name = crm_strdup_printf("%s (resource: %s)",
pe__node_name(action->node),
remote->container->id);
} else if (action->node != NULL) {
node_name = crm_strdup_printf("%s", pe__node_name(action->node));
}
out->message(out, "node-action", task, node_name, action->reason);
free(node_name);
free(task);
}
// Output resource actions
for (GList *iter = data_set->resources; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
rsc->cmds->output_actions(rsc);
}
}
/*!
* \internal
* \brief Check whether action from resource history is still in configuration
*
* \param[in] rsc Resource that action is for
* \param[in] task Action's name
* \param[in] interval_ms Action's interval (in milliseconds)
*
* \return true if action is still in resource configuration, otherwise false
*/
static bool
action_in_config(const pe_resource_t *rsc, const char *task, guint interval_ms)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
bool config = (find_rsc_op_entry(rsc, key) != NULL);
free(key);
return config;
}
/*!
* \internal
* \brief Get action name needed to compare digest for configuration changes
*
* \param[in] task Action name from history
* \param[in] interval_ms Action interval (in milliseconds)
*
* \return Action name whose digest should be compared
*/
static const char *
task_for_digest(const char *task, guint interval_ms)
{
/* Certain actions need to be compared against the parameters used to start
* the resource.
*/
if ((interval_ms == 0)
- && pcmk__str_any_of(task, PCMK_ACTION_MONITOR, RSC_MIGRATED,
+ && pcmk__str_any_of(task, PCMK_ACTION_MONITOR, PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_PROMOTE, NULL)) {
task = PCMK_ACTION_START;
}
return task;
}
/*!
* \internal
* \brief Check whether only sanitized parameters to an action changed
*
* When collecting CIB files for troubleshooting, crm_report will mask
* sensitive resource parameters. If simulations were run using that, affected
* resources would appear to need a restart, which would complicate
* troubleshooting. To avoid that, we save a "secure digest" of non-sensitive
* parameters. This function used that digest to check whether only masked
* parameters are different.
*
* \param[in] xml_op Resource history entry with secure digest
* \param[in] digest_data Operation digest information being compared
* \param[in] data_set Cluster working set
*
* \return true if only sanitized parameters changed, otherwise false
*/
static bool
only_sanitized_changed(const xmlNode *xml_op,
const op_digest_cache_t *digest_data,
const pe_working_set_t *data_set)
{
const char *digest_secure = NULL;
if (!pcmk_is_set(data_set->flags, pe_flag_sanitized)) {
// The scheduler is not being run as a simulation
return false;
}
digest_secure = crm_element_value(xml_op, XML_LRM_ATTR_SECURE_DIGEST);
return (digest_data->rc != RSC_DIGEST_MATCH) && (digest_secure != NULL)
&& (digest_data->digest_secure_calc != NULL)
&& (strcmp(digest_data->digest_secure_calc, digest_secure) == 0);
}
/*!
* \internal
* \brief Force a restart due to a configuration change
*
* \param[in,out] rsc Resource that action is for
* \param[in] task Name of action whose configuration changed
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in,out] node Node where resource should be restarted
*/
static void
force_restart(pe_resource_t *rsc, const char *task, guint interval_ms,
pe_node_t *node)
{
char *key = pcmk__op_key(rsc->id, task, interval_ms);
pe_action_t *required = custom_action(rsc, key, task, NULL, FALSE, TRUE,
rsc->cluster);
pe_action_set_reason(required, "resource definition change", true);
trigger_unfencing(rsc, node, "Device parameters changed", NULL,
rsc->cluster);
}
/*!
* \internal
* \brief Schedule a reload of a resource on a node
*
* \param[in,out] data Resource to reload
* \param[in] user_data Where resource should be reloaded
*/
static void
schedule_reload(gpointer data, gpointer user_data)
{
pe_resource_t *rsc = data;
const pe_node_t *node = user_data;
pe_action_t *reload = NULL;
// For collective resources, just call recursively for children
if (rsc->variant > pe_native) {
g_list_foreach(rsc->children, schedule_reload, user_data);
return;
}
// Skip the reload in certain situations
if ((node == NULL)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)
|| pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Skip reload of %s:%s%s %s",
rsc->id,
pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : " unmanaged",
pcmk_is_set(rsc->flags, pe_rsc_failed)? " failed" : "",
(node == NULL)? "inactive" : node->details->uname);
return;
}
/* If a resource's configuration changed while a start was pending,
* force a full restart instead of a reload.
*/
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
pe_rsc_trace(rsc, "%s: preventing agent reload because start pending",
rsc->id);
custom_action(rsc, stop_key(rsc), PCMK_ACTION_STOP, node, FALSE, TRUE,
rsc->cluster);
return;
}
// Schedule the reload
pe__set_resource_flags(rsc, pe_rsc_reload);
reload = custom_action(rsc, reload_key(rsc), PCMK_ACTION_RELOAD_AGENT, node,
FALSE, TRUE, rsc->cluster);
pe_action_set_reason(reload, "resource definition change", FALSE);
// Set orderings so that a required stop or demote cancels the reload
pcmk__new_ordering(NULL, NULL, reload, rsc, stop_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
rsc->cluster);
pcmk__new_ordering(NULL, NULL, reload, rsc, demote_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
rsc->cluster);
}
/*!
* \internal
* \brief Handle any configuration change for an action
*
* Given an action from resource history, if the resource's configuration
* changed since the action was done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, etc.).
*
* \param[in,out] rsc Resource that action is for
* \param[in,out] node Node that action was on
* \param[in] xml_op Action XML from resource history
*
* \return true if action configuration changed, otherwise false
*/
bool
pcmk__check_action_config(pe_resource_t *rsc, pe_node_t *node,
const xmlNode *xml_op)
{
guint interval_ms = 0;
const char *task = NULL;
const op_digest_cache_t *digest_data = NULL;
CRM_CHECK((rsc != NULL) && (node != NULL) && (xml_op != NULL),
return false);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_CHECK(task != NULL, return false);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
// If this is a recurring action, check whether it has been orphaned
if (interval_ms > 0) {
if (action_in_config(rsc, task, interval_ms)) {
pe_rsc_trace(rsc, "%s-interval %s for %s on %s is in configuration",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node));
} else if (pcmk_is_set(rsc->cluster->flags,
pe_flag_stop_action_orphans)) {
pcmk__schedule_cancel(rsc,
crm_element_value(xml_op,
XML_LRM_ATTR_CALLID),
task, interval_ms, node, "orphan");
return true;
} else {
pe_rsc_debug(rsc, "%s-interval %s for %s on %s is orphaned",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node));
return true;
}
}
crm_trace("Checking %s-interval %s for %s on %s for configuration changes",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node));
task = task_for_digest(task, interval_ms);
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, rsc->cluster);
if (only_sanitized_changed(xml_op, digest_data, rsc->cluster)) {
if (!pcmk__is_daemon && (rsc->cluster->priv != NULL)) {
pcmk__output_t *out = rsc->cluster->priv;
out->info(out,
"Only 'private' parameters to %s-interval %s for %s "
"on %s changed: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node),
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
}
return false;
}
switch (digest_data->rc) {
case RSC_DIGEST_RESTART:
crm_log_xml_debug(digest_data->params_restart, "params:restart");
force_restart(rsc, task, interval_ms, node);
return true;
case RSC_DIGEST_ALL:
case RSC_DIGEST_UNKNOWN:
// Changes that can potentially be handled by an agent reload
if (interval_ms > 0) {
/* Recurring actions aren't reloaded per se, they are just
* re-scheduled so the next run uses the new parameters.
* The old instance will be cancelled automatically.
*/
crm_log_xml_debug(digest_data->params_all, "params:reschedule");
pcmk__reschedule_recurring(rsc, task, interval_ms, node);
} else if (crm_element_value(xml_op,
XML_LRM_ATTR_RESTART_DIGEST) != NULL) {
// Agent supports reload, so use it
trigger_unfencing(rsc, node,
"Device parameters changed (reload)", NULL,
rsc->cluster);
crm_log_xml_debug(digest_data->params_all, "params:reload");
schedule_reload((gpointer) rsc, (gpointer) node);
} else {
pe_rsc_trace(rsc,
"Restarting %s "
"because agent doesn't support reload", rsc->id);
crm_log_xml_debug(digest_data->params_restart,
"params:restart");
force_restart(rsc, task, interval_ms, node);
}
return true;
default:
break;
}
return false;
}
/*!
* \internal
* \brief Create a list of resource's action history entries, sorted by call ID
*
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[out] start_index Where to store index of start-like action, if any
* \param[out] stop_index Where to store index of stop action, if any
*/
static GList *
rsc_history_as_list(const xmlNode *rsc_entry, int *start_index, int *stop_index)
{
GList *ops = NULL;
for (xmlNode *rsc_op = first_named_child(rsc_entry, XML_LRM_TAG_RSC_OP);
rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
ops = g_list_prepend(ops, rsc_op);
}
ops = g_list_sort(ops, sort_op_by_callid);
calculate_active_ops(ops, start_index, stop_index);
return ops;
}
/*!
* \internal
* \brief Process a resource's action history from the CIB status
*
* Given a resource's action history, if the resource's configuration
* changed since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in] rsc_entry Resource's <lrm_rsc_op> status XML
* \param[in,out] rsc Resource whose history is being processed
* \param[in,out] node Node whose history is being processed
*/
static void
process_rsc_history(const xmlNode *rsc_entry, pe_resource_t *rsc,
pe_node_t *node)
{
int offset = -1;
int stop_index = 0;
int start_index = 0;
GList *sorted_op_list = NULL;
if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
if (pe_rsc_is_anon_clone(pe__const_top_resource(rsc, false))) {
pe_rsc_trace(rsc,
"Skipping configuration check "
"for orphaned clone instance %s",
rsc->id);
} else {
pe_rsc_trace(rsc,
"Skipping configuration check and scheduling clean-up "
"for orphaned resource %s", rsc->id);
pcmk__schedule_cleanup(rsc, node, false);
}
return;
}
if (pe_find_node_id(rsc->running_on, node->details->id) == NULL) {
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, false)) {
pcmk__schedule_cleanup(rsc, node, false);
}
pe_rsc_trace(rsc,
"Skipping configuration check for %s "
"because no longer active on %s",
rsc->id, pe__node_name(node));
return;
}
pe_rsc_trace(rsc, "Checking for configuration changes for %s on %s",
rsc->id, pe__node_name(node));
if (pcmk__rsc_agent_changed(rsc, node, rsc_entry, true)) {
pcmk__schedule_cleanup(rsc, node, false);
}
sorted_op_list = rsc_history_as_list(rsc_entry, &start_index, &stop_index);
if (start_index < stop_index) {
return; // Resource is stopped
}
for (GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
xmlNode *rsc_op = (xmlNode *) iter->data;
const char *task = NULL;
guint interval_ms = 0;
if (++offset < start_index) {
// Skip actions that happened before a start
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms > 0)
&& (pcmk_is_set(rsc->flags, pe_rsc_maintenance)
|| node->details->maintenance)) {
// Maintenance mode cancels recurring operations
pcmk__schedule_cancel(rsc,
crm_element_value(rsc_op,
XML_LRM_ATTR_CALLID),
task, interval_ms, node, "maintenance mode");
} else if ((interval_ms > 0)
|| pcmk__strcase_any_of(task, PCMK_ACTION_MONITOR,
PCMK_ACTION_START,
PCMK_ACTION_PROMOTE,
- RSC_MIGRATED, NULL)) {
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
/* If a resource operation failed, and the operation's definition
* has changed, clear any fail count so they can be retried fresh.
*/
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't assigned resources to nodes yet, so if the
* REMOTE_CONTAINER_HACK is used, we may calculate the digest
* based on the literal "#uname" value rather than the properly
* substituted value. That would mistakenly make the action
* definition appear to have been changed. Defer the check until
* later in this case.
*/
pe__add_param_check(rsc_op, rsc, node, pe_check_active,
rsc->cluster);
} else if (pcmk__check_action_config(rsc, node, rsc_op)
&& (pe_get_failcount(node, rsc, NULL, pe_fc_effective,
NULL) != 0)) {
pe__clear_failcount(rsc, node, "action definition changed",
rsc->cluster);
}
}
}
g_list_free(sorted_op_list);
}
/*!
* \internal
* \brief Process a node's action history from the CIB status
*
* Given a node's resource history, if the resource's configuration changed
* since the actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in,out] node Node whose history is being processed
* \param[in] lrm_rscs Node's <lrm_resources> from CIB status XML
*/
static void
process_node_history(pe_node_t *node, const xmlNode *lrm_rscs)
{
crm_trace("Processing node history for %s", pe__node_name(node));
for (const xmlNode *rsc_entry = first_named_child(lrm_rscs,
XML_LRM_TAG_RESOURCE);
rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
if (xml_has_children(rsc_entry)) {
GList *result = pcmk__rscs_matching_id(ID(rsc_entry),
node->details->data_set);
for (GList *iter = result; iter != NULL; iter = iter->next) {
pe_resource_t *rsc = (pe_resource_t *) iter->data;
if (rsc->variant == pe_native) {
process_rsc_history(rsc_entry, rsc, node);
}
}
g_list_free(result);
}
}
}
// XPath to find a node's resource history
#define XPATH_NODE_HISTORY "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE "[@" XML_ATTR_UNAME "='%s']" \
"/" XML_CIB_TAG_LRM "/" XML_LRM_TAG_RESOURCES
/*!
* \internal
* \brief Process any resource configuration changes in the CIB status
*
* Go through all nodes' resource history, and if a resource's configuration
* changed since its actions were done, schedule any actions needed (restart,
* reload, unfencing, rescheduling recurring actions, clean-up, etc.).
* (This also cancels recurring actions for maintenance mode, which is not
* entirely related but convenient to do here.)
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__handle_rsc_config_changes(pe_working_set_t *data_set)
{
crm_trace("Check resource and action configuration for changes");
/* Rather than iterate through the status section, iterate through the nodes
* and search for the appropriate status subsection for each. This skips
* orphaned nodes and lets us eliminate some cases before searching the XML.
*/
for (GList *iter = data_set->nodes; iter != NULL; iter = iter->next) {
pe_node_t *node = (pe_node_t *) iter->data;
/* Don't bother checking actions for a node that can't run actions ...
* unless it's in maintenance mode, in which case we still need to
* cancel any existing recurring monitors.
*/
if (node->details->maintenance
|| pcmk__node_available(node, false, false)) {
char *xpath = NULL;
xmlNode *history = NULL;
xpath = crm_strdup_printf(XPATH_NODE_HISTORY, node->details->uname);
history = get_xpath_object(xpath, data_set->input, LOG_NEVER);
free(xpath);
process_node_history(node, history);
}
}
}
diff --git a/lib/pacemaker/pcmk_sched_migration.c b/lib/pacemaker/pcmk_sched_migration.c
index e11ee3da73..3ccdf8f6ef 100644
--- a/lib/pacemaker/pcmk_sched_migration.c
+++ b/lib/pacemaker/pcmk_sched_migration.c
@@ -1,410 +1,419 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
/*!
* \internal
* \brief Add migration source and target meta-attributes to an action
*
* \param[in,out] action Action to add meta-attributes to
* \param[in] source Node to add as migration source
* \param[in] target Node to add as migration target
*/
static void
add_migration_meta(pe_action_t *action, const pe_node_t *source,
const pe_node_t *target)
{
add_hash_param(action->meta, XML_LRM_ATTR_MIGRATE_SOURCE,
source->details->uname);
add_hash_param(action->meta, XML_LRM_ATTR_MIGRATE_TARGET,
target->details->uname);
}
/*!
* \internal
* \brief Create internal migration actions for a migrateable resource
*
* \param[in,out] rsc Resource to create migration actions for
* \param[in] current Node that resource is originally active on
*/
void
pcmk__create_migration_actions(pe_resource_t *rsc, const pe_node_t *current)
{
pe_action_t *migrate_to = NULL;
pe_action_t *migrate_from = NULL;
pe_action_t *start = NULL;
pe_action_t *stop = NULL;
pe_rsc_trace(rsc, "Creating actions to %smigrate %s from %s to %s",
((rsc->partial_migration_target == NULL)? "" : "partially "),
rsc->id, pe__node_name(current),
pe__node_name(rsc->allocated_to));
start = start_action(rsc, rsc->allocated_to, TRUE);
stop = stop_action(rsc, current, TRUE);
if (rsc->partial_migration_target == NULL) {
migrate_to = custom_action(rsc, pcmk__op_key(rsc->id,
PCMK_ACTION_MIGRATE_TO, 0),
PCMK_ACTION_MIGRATE_TO, current, TRUE, TRUE,
rsc->cluster);
}
- migrate_from = custom_action(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
- RSC_MIGRATED, rsc->allocated_to, TRUE, TRUE,
+ migrate_from = custom_action(rsc, pcmk__op_key(rsc->id,
+ PCMK_ACTION_MIGRATE_FROM, 0),
+ PCMK_ACTION_MIGRATE_FROM, rsc->allocated_to,
+ TRUE, TRUE,
rsc->cluster);
if ((migrate_from != NULL)
&& ((migrate_to != NULL) || (rsc->partial_migration_target != NULL))) {
pe__set_action_flags(start, pe_action_migrate_runnable);
pe__set_action_flags(stop, pe_action_migrate_runnable);
// This is easier than trying to delete it from the graph
pe__set_action_flags(start, pe_action_pseudo);
if (rsc->partial_migration_target == NULL) {
pe__set_action_flags(migrate_from, pe_action_migrate_runnable);
if (migrate_to != NULL) {
pe__set_action_flags(migrate_to, pe_action_migrate_runnable);
migrate_to->needs = start->needs;
}
// Probe -> migrate_to -> migrate_from
pcmk__new_ordering(rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
NULL, rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_MIGRATE_TO, 0),
NULL, pe_order_optional, rsc->cluster);
pcmk__new_ordering(rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_MIGRATE_TO, 0),
- NULL,
- rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
+ NULL, rsc,
+ pcmk__op_key(rsc->id,
+ PCMK_ACTION_MIGRATE_FROM, 0),
NULL,
pe_order_optional
|pe_order_implies_first_migratable,
rsc->cluster);
} else {
pe__set_action_flags(migrate_from, pe_action_migrate_runnable);
migrate_from->needs = start->needs;
// Probe -> migrate_from (migrate_to already completed)
pcmk__new_ordering(rsc,
pcmk__op_key(rsc->id, PCMK_ACTION_MONITOR, 0),
- NULL,
- rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0),
+ NULL, rsc,
+ pcmk__op_key(rsc->id,
+ PCMK_ACTION_MIGRATE_FROM, 0),
NULL, pe_order_optional, rsc->cluster);
}
// migrate_from before stop or start
- pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL,
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_MIGRATE_FROM,
+ 0),
+ NULL,
rsc, pcmk__op_key(rsc->id, PCMK_ACTION_STOP, 0),
NULL,
pe_order_optional|pe_order_implies_first_migratable,
rsc->cluster);
- pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, RSC_MIGRATED, 0), NULL,
- rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
+ pcmk__new_ordering(rsc, pcmk__op_key(rsc->id, PCMK_ACTION_MIGRATE_FROM,
+ 0),
NULL,
- pe_order_optional
- |pe_order_implies_first_migratable
- |pe_order_pseudo_left,
+ rsc, pcmk__op_key(rsc->id, PCMK_ACTION_START, 0),
+ NULL, pe_order_optional
+ |pe_order_implies_first_migratable
+ |pe_order_pseudo_left,
rsc->cluster);
}
if (migrate_to != NULL) {
add_migration_meta(migrate_to, current, rsc->allocated_to);
if (!rsc->is_remote_node) {
/* migrate_to takes place on the source node, but can affect the
* target node depending on how the agent is written. Because of
* this, pending migrate_to actions must be recorded in the CIB,
* in case the source node loses membership while the migrate_to
* action is still in flight.
*
* However we know Pacemaker Remote connection resources don't
* require this, so we skip this for them. (Although it wouldn't
* hurt, and now that record-pending defaults to true, skipping it
* matters even less.)
*/
add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true");
}
}
if (migrate_from != NULL) {
add_migration_meta(migrate_from, current, rsc->allocated_to);
}
}
/*!
* \internal
* \brief Abort a dangling migration by scheduling a stop (and possibly cleanup)
*
* \param[in] data Source node of dangling migration
* \param[in,out] user_data Resource involved in dangling migration
*/
void
pcmk__abort_dangling_migration(void *data, void *user_data)
{
const pe_node_t *dangling_source = (const pe_node_t *) data;
pe_resource_t *rsc = (pe_resource_t *) user_data;
pe_action_t *stop = NULL;
bool cleanup = pcmk_is_set(rsc->cluster->flags, pe_flag_remove_after_stop);
pe_rsc_trace(rsc,
"Scheduling stop%s for %s on %s due to dangling migration",
(cleanup? " and cleanup" : ""), rsc->id,
pe__node_name(dangling_source));
stop = stop_action(rsc, dangling_source, FALSE);
pe__set_action_flags(stop, pe_action_dangle);
if (cleanup) {
pcmk__schedule_cleanup(rsc, dangling_source, false);
}
}
/*!
* \internal
* \brief Check whether a resource can migrate
*
* \param[in] rsc Resource to check
* \param[in] node Resource's current node
*
* \return true if \p rsc can migrate, otherwise false
*/
bool
pcmk__rsc_can_migrate(const pe_resource_t *rsc, const pe_node_t *current)
{
CRM_CHECK(rsc != NULL, return false);
if (!pcmk_is_set(rsc->flags, pe_rsc_allow_migrate)) {
pe_rsc_trace(rsc, "%s cannot migrate because "
"the configuration does not allow it",
rsc->id);
return false;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "%s cannot migrate because it is not managed",
rsc->id);
return false;
}
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "%s cannot migrate because it is failed",
rsc->id);
return false;
}
if (pcmk_is_set(rsc->flags, pe_rsc_start_pending)) {
pe_rsc_trace(rsc, "%s cannot migrate because it has a start pending",
rsc->id);
return false;
}
if ((current == NULL) || current->details->unclean) {
pe_rsc_trace(rsc, "%s cannot migrate because "
"its current node (%s) is unclean",
rsc->id, pe__node_name(current));
return false;
}
if ((rsc->allocated_to == NULL) || rsc->allocated_to->details->unclean) {
pe_rsc_trace(rsc, "%s cannot migrate because "
"its next node (%s) is unclean",
rsc->id, pe__node_name(rsc->allocated_to));
return false;
}
return true;
}
/*!
* \internal
* \brief Get an action name from an action or operation key
*
* \param[in] action If not NULL, get action name from here
* \param[in] key If not NULL, get action name from here
*
* \return Newly allocated copy of action name (or NULL if none available)
*/
static char *
task_from_action_or_key(const pe_action_t *action, const char *key)
{
char *res = NULL;
if (action != NULL) {
res = strdup(action->task);
CRM_ASSERT(res != NULL);
} else if (key != NULL) {
parse_op_key(key, NULL, &res, NULL);
}
return res;
}
/*!
* \internal
* \brief Order migration actions equivalent to a given ordering
*
* Orderings involving start, stop, demote, and promote actions must be honored
* during a migration as well, so duplicate any such ordering for the
* corresponding migration actions.
*
* \param[in,out] order Ordering constraint to check
*/
void
pcmk__order_migration_equivalents(pe__ordering_t *order)
{
char *first_task = NULL;
char *then_task = NULL;
bool then_migratable;
bool first_migratable;
// Only orderings between unrelated resources are relevant
if ((order->lh_rsc == NULL) || (order->rh_rsc == NULL)
|| (order->lh_rsc == order->rh_rsc)
|| is_parent(order->lh_rsc, order->rh_rsc)
|| is_parent(order->rh_rsc, order->lh_rsc)) {
return;
}
// Only orderings involving at least one migratable resource are relevant
first_migratable = pcmk_is_set(order->lh_rsc->flags, pe_rsc_allow_migrate);
then_migratable = pcmk_is_set(order->rh_rsc->flags, pe_rsc_allow_migrate);
if (!first_migratable && !then_migratable) {
return;
}
// Check which actions are involved
first_task = task_from_action_or_key(order->lh_action,
order->lh_action_task);
then_task = task_from_action_or_key(order->rh_action,
order->rh_action_task);
if (pcmk__str_eq(first_task, PCMK_ACTION_START, pcmk__str_none)
&& pcmk__str_eq(then_task, PCMK_ACTION_START, pcmk__str_none)) {
uint32_t flags = pe_order_optional;
if (first_migratable && then_migratable) {
/* A start then B start
* -> A migrate_from then B migrate_to */
pcmk__new_ordering(order->lh_rsc,
- pcmk__op_key(order->lh_rsc->id, RSC_MIGRATED, 0),
+ pcmk__op_key(order->lh_rsc->id,
+ PCMK_ACTION_MIGRATE_FROM, 0),
NULL, order->rh_rsc,
pcmk__op_key(order->rh_rsc->id,
PCMK_ACTION_MIGRATE_TO, 0),
NULL, flags, order->lh_rsc->cluster);
}
if (then_migratable) {
if (first_migratable) {
pe__set_order_flags(flags, pe_order_apply_first_non_migratable);
}
/* A start then B start
* -> A start then B migrate_to (if start is not part of a
* migration)
*/
pcmk__new_ordering(order->lh_rsc,
pcmk__op_key(order->lh_rsc->id,
PCMK_ACTION_START, 0),
NULL, order->rh_rsc,
pcmk__op_key(order->rh_rsc->id,
PCMK_ACTION_MIGRATE_TO, 0),
NULL, flags, order->lh_rsc->cluster);
}
} else if (then_migratable
&& pcmk__str_eq(first_task, PCMK_ACTION_STOP, pcmk__str_none)
&& pcmk__str_eq(then_task, PCMK_ACTION_STOP, pcmk__str_none)) {
uint32_t flags = pe_order_optional;
if (first_migratable) {
pe__set_order_flags(flags, pe_order_apply_first_non_migratable);
}
/* For an ordering "stop A then stop B", if A is moving via restart, and
* B is migrating, enforce that B's migrate_to occurs after A's stop.
*/
pcmk__new_ordering(order->lh_rsc,
pcmk__op_key(order->lh_rsc->id, PCMK_ACTION_STOP, 0),
NULL,
order->rh_rsc,
pcmk__op_key(order->rh_rsc->id,
PCMK_ACTION_MIGRATE_TO, 0),
NULL, flags, order->lh_rsc->cluster);
// Also order B's migrate_from after A's stop during partial migrations
if (order->rh_rsc->partial_migration_target) {
pcmk__new_ordering(order->lh_rsc,
pcmk__op_key(order->lh_rsc->id, PCMK_ACTION_STOP,
0),
NULL, order->rh_rsc,
- pcmk__op_key(order->rh_rsc->id, RSC_MIGRATED, 0),
+ pcmk__op_key(order->rh_rsc->id,
+ PCMK_ACTION_MIGRATE_FROM, 0),
NULL, flags, order->lh_rsc->cluster);
}
} else if (pcmk__str_eq(first_task, PCMK_ACTION_PROMOTE, pcmk__str_none)
&& pcmk__str_eq(then_task, PCMK_ACTION_START, pcmk__str_none)) {
uint32_t flags = pe_order_optional;
if (then_migratable) {
/* A promote then B start
* -> A promote then B migrate_to */
pcmk__new_ordering(order->lh_rsc,
pcmk__op_key(order->lh_rsc->id,
PCMK_ACTION_PROMOTE, 0),
NULL, order->rh_rsc,
pcmk__op_key(order->rh_rsc->id,
PCMK_ACTION_MIGRATE_TO, 0),
NULL, flags, order->lh_rsc->cluster);
}
} else if (pcmk__str_eq(first_task, PCMK_ACTION_DEMOTE, pcmk__str_none)
&& pcmk__str_eq(then_task, PCMK_ACTION_STOP, pcmk__str_none)) {
uint32_t flags = pe_order_optional;
if (then_migratable) {
/* A demote then B stop
* -> A demote then B migrate_to */
pcmk__new_ordering(order->lh_rsc,
pcmk__op_key(order->lh_rsc->id,
PCMK_ACTION_DEMOTE, 0),
NULL, order->rh_rsc,
pcmk__op_key(order->rh_rsc->id,
PCMK_ACTION_MIGRATE_TO, 0),
NULL, flags, order->lh_rsc->cluster);
// Order B migrate_from after A demote during partial migrations
if (order->rh_rsc->partial_migration_target) {
pcmk__new_ordering(order->lh_rsc,
pcmk__op_key(order->lh_rsc->id,
PCMK_ACTION_DEMOTE, 0),
NULL, order->rh_rsc,
pcmk__op_key(order->rh_rsc->id,
- RSC_MIGRATED, 0),
+ PCMK_ACTION_MIGRATE_FROM, 0),
NULL, flags, order->lh_rsc->cluster);
}
}
}
free(first_task);
free(then_task);
}
diff --git a/lib/pacemaker/pcmk_sched_recurring.c b/lib/pacemaker/pcmk_sched_recurring.c
index bfef8567de..44d1797cf0 100644
--- a/lib/pacemaker/pcmk_sched_recurring.c
+++ b/lib/pacemaker/pcmk_sched_recurring.c
@@ -1,717 +1,717 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
// Information parsed from an operation history entry in the CIB
struct op_history {
// XML attributes
const char *id; // ID of history entry
const char *name; // Action name
// Parsed information
char *key; // Operation key for action
enum rsc_role_e role; // Action role (or RSC_ROLE_UNKNOWN for default)
guint interval_ms; // Action interval
};
/*!
* \internal
* \brief Parse an interval from XML
*
* \param[in] xml XML containing an interval attribute
*
* \return Interval parsed from XML (or 0 as default)
*/
static guint
xe_interval(const xmlNode *xml)
{
return crm_parse_interval_spec(crm_element_value(xml,
XML_LRM_ATTR_INTERVAL));
}
/*!
* \internal
* \brief Check whether an operation exists multiple times in resource history
*
* \param[in] rsc Resource with history to search
* \param[in] name Name of action to search for
* \param[in] interval_ms Interval (in milliseconds) of action to search for
*
* \return true if an operation with \p name and \p interval_ms exists more than
* once in the operation history of \p rsc, otherwise false
*/
static bool
is_op_dup(const pe_resource_t *rsc, const char *name, guint interval_ms)
{
const char *id = NULL;
for (xmlNode *op = first_named_child(rsc->ops_xml, "op");
op != NULL; op = crm_next_same_xml(op)) {
// Check whether action name and interval match
if (!pcmk__str_eq(crm_element_value(op, "name"), name, pcmk__str_none)
|| (xe_interval(op) != interval_ms)) {
continue;
}
if (ID(op) == NULL) {
continue; // Shouldn't be possible
}
if (id == NULL) {
id = ID(op); // First matching op
} else {
pcmk__config_err("Operation %s is duplicate of %s (do not use "
"same name and interval combination more "
"than once per resource)", ID(op), id);
return true;
}
}
return false;
}
/*!
* \internal
* \brief Check whether an action name is one that can be recurring
*
* \param[in] name Action name to check
*
* \return true if \p name is an action known to be unsuitable as a recurring
* operation, otherwise false
*
* \note Pacemaker's current philosophy is to allow users to configure recurring
* operations except for a short list of actions known not to be suitable
* for that (as opposed to allowing only actions known to be suitable,
* which includes only monitor). Among other things, this approach allows
* users to define their own custom operations and make them recurring,
* though that use case is not well tested.
*/
static bool
op_cannot_recur(const char *name)
{
return pcmk__str_any_of(name, PCMK_ACTION_STOP, PCMK_ACTION_START,
PCMK_ACTION_DEMOTE, PCMK_ACTION_PROMOTE,
PCMK_ACTION_RELOAD_AGENT,
- PCMK_ACTION_MIGRATE_TO, CRMD_ACTION_MIGRATED,
+ PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
NULL);
}
/*!
* \internal
* \brief Check whether a resource history entry is for a recurring action
*
* \param[in] rsc Resource that history entry is for
* \param[in] xml XML of resource history entry to check
* \param[out] op Where to store parsed info if recurring
*
* \return true if \p xml is for a recurring action, otherwise false
*/
static bool
is_recurring_history(const pe_resource_t *rsc, const xmlNode *xml,
struct op_history *op)
{
const char *role = NULL;
op->interval_ms = xe_interval(xml);
if (op->interval_ms == 0) {
return false; // Not recurring
}
op->id = ID(xml);
if (pcmk__str_empty(op->id)) {
pcmk__config_err("Ignoring resource history entry without ID");
return false; // Shouldn't be possible (unless CIB was manually edited)
}
op->name = crm_element_value(xml, "name");
if (op_cannot_recur(op->name)) {
pcmk__config_err("Ignoring %s because %s action cannot be recurring",
op->id, pcmk__s(op->name, "unnamed"));
return false;
}
// There should only be one recurring operation per action/interval
if (is_op_dup(rsc, op->name, op->interval_ms)) {
return false;
}
// Ensure role is valid if specified
role = crm_element_value(xml, "role");
if (role == NULL) {
op->role = RSC_ROLE_UNKNOWN;
} else {
op->role = text2role(role);
if (op->role == RSC_ROLE_UNKNOWN) {
pcmk__config_err("Ignoring %s because %s is not a valid role",
op->id, role);
}
}
// Disabled resources don't get monitored
op->key = pcmk__op_key(rsc->id, op->name, op->interval_ms);
if (find_rsc_op_entry(rsc, op->key) == NULL) {
crm_trace("Not creating recurring action %s for disabled resource %s",
op->id, rsc->id);
free(op->key);
return false;
}
return true;
}
/*!
* \internal
* \brief Check whether a recurring action for an active role should be optional
*
* \param[in] rsc Resource that recurring action is for
* \param[in] node Node that \p rsc will be active on (if any)
* \param[in] key Operation key for recurring action to check
* \param[in,out] start Start action for \p rsc
*
* \return true if recurring action should be optional, otherwise false
*/
static bool
active_recurring_should_be_optional(const pe_resource_t *rsc,
const pe_node_t *node, const char *key,
pe_action_t *start)
{
GList *possible_matches = NULL;
if (node == NULL) { // Should only be possible if unmanaged and stopped
pe_rsc_trace(rsc, "%s will be mandatory because resource is unmanaged",
key);
return false;
}
if (!pcmk_is_set(rsc->cmds->action_flags(start, NULL),
pe_action_optional)) {
pe_rsc_trace(rsc, "%s will be mandatory because %s is",
key, start->uuid);
return false;
}
possible_matches = find_actions_exact(rsc->actions, key, node);
if (possible_matches == NULL) {
pe_rsc_trace(rsc, "%s will be mandatory because it is not active on %s",
key, pe__node_name(node));
return false;
}
for (const GList *iter = possible_matches;
iter != NULL; iter = iter->next) {
const pe_action_t *op = (const pe_action_t *) iter->data;
if (pcmk_is_set(op->flags, pe_action_reschedule)) {
pe_rsc_trace(rsc,
"%s will be mandatory because "
"it needs to be rescheduled", key);
g_list_free(possible_matches);
return false;
}
}
g_list_free(possible_matches);
return true;
}
/*!
* \internal
* \brief Create recurring action from resource history entry for an active role
*
* \param[in,out] rsc Resource that resource history is for
* \param[in,out] start Start action for \p rsc on \p node
* \param[in] node Node that resource will be active on (if any)
* \param[in] op Resource history entry
*/
static void
recurring_op_for_active(pe_resource_t *rsc, pe_action_t *start,
const pe_node_t *node, const struct op_history *op)
{
pe_action_t *mon = NULL;
bool is_optional = true;
const bool is_default_role = (op->role == RSC_ROLE_UNKNOWN);
// We're only interested in recurring actions for active roles
if (op->role == RSC_ROLE_STOPPED) {
return;
}
is_optional = active_recurring_should_be_optional(rsc, node, op->key,
start);
if ((!is_default_role && (rsc->next_role != op->role))
|| (is_default_role && (rsc->next_role == RSC_ROLE_PROMOTED))) {
// Configured monitor role doesn't match role resource will have
if (is_optional) { // It's running, so cancel it
char *after_key = NULL;
pe_action_t *cancel_op = pcmk__new_cancel_action(rsc, op->name,
op->interval_ms,
node);
switch (rsc->role) {
case RSC_ROLE_UNPROMOTED:
case RSC_ROLE_STARTED:
if (rsc->next_role == RSC_ROLE_PROMOTED) {
after_key = promote_key(rsc);
} else if (rsc->next_role == RSC_ROLE_STOPPED) {
after_key = stop_key(rsc);
}
break;
case RSC_ROLE_PROMOTED:
after_key = demote_key(rsc);
break;
default:
break;
}
if (after_key) {
pcmk__new_ordering(rsc, NULL, cancel_op, rsc, after_key, NULL,
pe_order_runnable_left, rsc->cluster);
}
}
do_crm_log((is_optional? LOG_INFO : LOG_TRACE),
"%s recurring action %s because %s configured for %s role "
"(not %s)",
(is_optional? "Cancelling" : "Ignoring"), op->key, op->id,
role2text(is_default_role? RSC_ROLE_UNPROMOTED : op->role),
role2text(rsc->next_role));
return;
}
pe_rsc_trace(rsc,
"Creating %s recurring action %s for %s (%s %s on %s)",
(is_optional? "optional" : "mandatory"), op->key,
op->id, rsc->id, role2text(rsc->next_role),
pe__node_name(node));
mon = custom_action(rsc, strdup(op->key), op->name, node, is_optional, TRUE,
rsc->cluster);
if (!pcmk_is_set(start->flags, pe_action_runnable)) {
pe_rsc_trace(rsc, "%s is unrunnable because start is", mon->uuid);
pe__clear_action_flags(mon, pe_action_runnable);
} else if ((node == NULL) || !node->details->online
|| node->details->unclean) {
pe_rsc_trace(rsc, "%s is unrunnable because no node is available",
mon->uuid);
pe__clear_action_flags(mon, pe_action_runnable);
} else if (!pcmk_is_set(mon->flags, pe_action_optional)) {
pe_rsc_info(rsc, "Start %s-interval %s for %s on %s",
pcmk__readable_interval(op->interval_ms), mon->task,
rsc->id, pe__node_name(node));
}
if (rsc->next_role == RSC_ROLE_PROMOTED) {
pe__add_action_expected_result(mon, CRM_EX_PROMOTED);
}
// Order monitor relative to other actions
if ((node == NULL) || pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__new_ordering(rsc, start_key(rsc), NULL,
NULL, strdup(mon->uuid), mon,
pe_order_implies_then|pe_order_runnable_left,
rsc->cluster);
pcmk__new_ordering(rsc, reload_key(rsc), NULL,
NULL, strdup(mon->uuid), mon,
pe_order_implies_then|pe_order_runnable_left,
rsc->cluster);
if (rsc->next_role == RSC_ROLE_PROMOTED) {
pcmk__new_ordering(rsc, promote_key(rsc), NULL,
rsc, NULL, mon,
pe_order_optional|pe_order_runnable_left,
rsc->cluster);
} else if (rsc->role == RSC_ROLE_PROMOTED) {
pcmk__new_ordering(rsc, demote_key(rsc), NULL,
rsc, NULL, mon,
pe_order_optional|pe_order_runnable_left,
rsc->cluster);
}
}
}
/*!
* \internal
* \brief Cancel a recurring action if running on a node
*
* \param[in,out] rsc Resource that action is for
* \param[in] node Node to cancel action on
* \param[in] key Operation key for action
* \param[in] name Action name
* \param[in] interval_ms Action interval (in milliseconds)
*/
static void
cancel_if_running(pe_resource_t *rsc, const pe_node_t *node, const char *key,
const char *name, guint interval_ms)
{
GList *possible_matches = find_actions_exact(rsc->actions, key, node);
pe_action_t *cancel_op = NULL;
if (possible_matches == NULL) {
return; // Recurring action isn't running on this node
}
g_list_free(possible_matches);
cancel_op = pcmk__new_cancel_action(rsc, name, interval_ms, node);
switch (rsc->next_role) {
case RSC_ROLE_STARTED:
case RSC_ROLE_UNPROMOTED:
/* Order starts after cancel. If the current role is
* stopped, this cancels the monitor before the resource
* starts; if the current role is started, then this cancels
* the monitor on a migration target before starting there.
*/
pcmk__new_ordering(rsc, NULL, cancel_op,
rsc, start_key(rsc), NULL,
pe_order_runnable_left, rsc->cluster);
break;
default:
break;
}
pe_rsc_info(rsc,
"Cancelling %s-interval %s action for %s on %s because "
"configured for " RSC_ROLE_STOPPED_S " role (not %s)",
pcmk__readable_interval(interval_ms), name, rsc->id,
pe__node_name(node), role2text(rsc->next_role));
}
/*!
* \internal
* \brief Order an action after all probes of a resource on a node
*
* \param[in,out] rsc Resource to check for probes
* \param[in] node Node to check for probes of \p rsc
* \param[in,out] action Action to order after probes of \p rsc on \p node
*/
static void
order_after_probes(pe_resource_t *rsc, const pe_node_t *node,
pe_action_t *action)
{
GList *probes = pe__resource_actions(rsc, node, PCMK_ACTION_MONITOR, FALSE);
for (GList *iter = probes; iter != NULL; iter = iter->next) {
order_actions((pe_action_t *) iter->data, action,
pe_order_runnable_left);
}
g_list_free(probes);
}
/*!
* \internal
* \brief Order an action after all stops of a resource on a node
*
* \param[in,out] rsc Resource to check for stops
* \param[in] node Node to check for stops of \p rsc
* \param[in,out] action Action to order after stops of \p rsc on \p node
*/
static void
order_after_stops(pe_resource_t *rsc, const pe_node_t *node,
pe_action_t *action)
{
GList *stop_ops = pe__resource_actions(rsc, node, PCMK_ACTION_STOP, TRUE);
for (GList *iter = stop_ops; iter != NULL; iter = iter->next) {
pe_action_t *stop = (pe_action_t *) iter->data;
if (!pcmk_is_set(stop->flags, pe_action_optional)
&& !pcmk_is_set(action->flags, pe_action_optional)
&& !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "%s optional on %s: unmanaged",
action->uuid, pe__node_name(node));
pe__set_action_flags(action, pe_action_optional);
}
if (!pcmk_is_set(stop->flags, pe_action_runnable)) {
crm_debug("%s unrunnable on %s: stop is unrunnable",
action->uuid, pe__node_name(node));
pe__clear_action_flags(action, pe_action_runnable);
}
if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__new_ordering(rsc, stop_key(rsc), stop,
NULL, NULL, action,
pe_order_implies_then|pe_order_runnable_left,
rsc->cluster);
}
}
g_list_free(stop_ops);
}
/*!
* \internal
* \brief Create recurring action from resource history entry for inactive role
*
* \param[in,out] rsc Resource that resource history is for
* \param[in] node Node that resource will be active on (if any)
* \param[in] op Resource history entry
*/
static void
recurring_op_for_inactive(pe_resource_t *rsc, const pe_node_t *node,
const struct op_history *op)
{
GList *possible_matches = NULL;
// We're only interested in recurring actions for the inactive role
if (op->role != RSC_ROLE_STOPPED) {
return;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
crm_notice("Ignoring %s (recurring monitors for " RSC_ROLE_STOPPED_S
" role are not supported for anonymous clones)", op->id);
return; // @TODO add support
}
pe_rsc_trace(rsc, "Creating recurring action %s for %s on nodes "
"where it should not be running", op->id, rsc->id);
for (GList *iter = rsc->cluster->nodes; iter != NULL; iter = iter->next) {
pe_node_t *stop_node = (pe_node_t *) iter->data;
bool is_optional = true;
pe_action_t *stopped_mon = NULL;
// Cancel action on node where resource will be active
if ((node != NULL)
&& pcmk__str_eq(stop_node->details->uname, node->details->uname,
pcmk__str_casei)) {
cancel_if_running(rsc, node, op->key, op->name, op->interval_ms);
continue;
}
// Recurring action on this node is optional if it's already active here
possible_matches = find_actions_exact(rsc->actions, op->key, stop_node);
is_optional = (possible_matches != NULL);
g_list_free(possible_matches);
pe_rsc_trace(rsc,
"Creating %s recurring action %s for %s (%s "
RSC_ROLE_STOPPED_S " on %s)",
(is_optional? "optional" : "mandatory"),
op->key, op->id, rsc->id, pe__node_name(stop_node));
stopped_mon = custom_action(rsc, strdup(op->key), op->name, stop_node,
is_optional, TRUE, rsc->cluster);
pe__add_action_expected_result(stopped_mon, CRM_EX_NOT_RUNNING);
if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
order_after_probes(rsc, stop_node, stopped_mon);
}
/* The recurring action is for the inactive role, so it shouldn't be
* performed until the resource is inactive.
*/
order_after_stops(rsc, stop_node, stopped_mon);
if (!stop_node->details->online || stop_node->details->unclean) {
pe_rsc_debug(rsc, "%s unrunnable on %s: node unavailable)",
stopped_mon->uuid, pe__node_name(stop_node));
pe__clear_action_flags(stopped_mon, pe_action_runnable);
}
if (pcmk_is_set(stopped_mon->flags, pe_action_runnable)
&& !pcmk_is_set(stopped_mon->flags, pe_action_optional)) {
crm_notice("Start recurring %s-interval %s for "
RSC_ROLE_STOPPED_S " %s on %s",
pcmk__readable_interval(op->interval_ms),
stopped_mon->task, rsc->id, pe__node_name(stop_node));
}
}
}
/*!
* \internal
* \brief Create recurring actions for a resource
*
* \param[in,out] rsc Resource to create recurring actions for
*/
void
pcmk__create_recurring_actions(pe_resource_t *rsc)
{
pe_action_t *start = NULL;
if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "Skipping recurring actions for blocked resource %s",
rsc->id);
return;
}
if (pcmk_is_set(rsc->flags, pe_rsc_maintenance)) {
pe_rsc_trace(rsc, "Skipping recurring actions for %s "
"in maintenance mode", rsc->id);
return;
}
if (rsc->allocated_to == NULL) {
// Recurring actions for active roles not needed
} else if (rsc->allocated_to->details->maintenance) {
pe_rsc_trace(rsc,
"Skipping recurring actions for %s on %s "
"in maintenance mode",
rsc->id, pe__node_name(rsc->allocated_to));
} else if ((rsc->next_role != RSC_ROLE_STOPPED)
|| !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
// Recurring actions for active roles needed
start = start_action(rsc, rsc->allocated_to, TRUE);
}
pe_rsc_trace(rsc, "Creating any recurring actions needed for %s", rsc->id);
for (xmlNode *op = first_named_child(rsc->ops_xml, "op");
op != NULL; op = crm_next_same_xml(op)) {
struct op_history op_history = { NULL, };
if (!is_recurring_history(rsc, op, &op_history)) {
continue;
}
if (start != NULL) {
recurring_op_for_active(rsc, start, rsc->allocated_to, &op_history);
}
recurring_op_for_inactive(rsc, rsc->allocated_to, &op_history);
free(op_history.key);
}
}
/*!
* \internal
* \brief Create an executor cancel action
*
* \param[in,out] rsc Resource of action to cancel
* \param[in] task Name of action to cancel
* \param[in] interval_ms Interval of action to cancel
* \param[in] node Node of action to cancel
*
* \return Created op
*/
pe_action_t *
pcmk__new_cancel_action(pe_resource_t *rsc, const char *task, guint interval_ms,
const pe_node_t *node)
{
pe_action_t *cancel_op = NULL;
char *key = NULL;
char *interval_ms_s = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL) && (node != NULL));
// @TODO dangerous if possible to schedule another action with this key
key = pcmk__op_key(rsc->id, task, interval_ms);
cancel_op = custom_action(rsc, key, PCMK_ACTION_CANCEL, node, FALSE, TRUE,
rsc->cluster);
pcmk__str_update(&cancel_op->task, PCMK_ACTION_CANCEL);
pcmk__str_update(&cancel_op->cancel_task, task);
interval_ms_s = crm_strdup_printf("%u", interval_ms);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_TASK, task);
add_hash_param(cancel_op->meta, XML_LRM_ATTR_INTERVAL_MS, interval_ms_s);
free(interval_ms_s);
return cancel_op;
}
/*!
* \internal
* \brief Schedule cancellation of a recurring action
*
* \param[in,out] rsc Resource that action is for
* \param[in] call_id Action's call ID from history
* \param[in] task Action name
* \param[in] interval_ms Action interval
* \param[in] node Node that history entry is for
* \param[in] reason Short description of why action is cancelled
*/
void
pcmk__schedule_cancel(pe_resource_t *rsc, const char *call_id, const char *task,
guint interval_ms, const pe_node_t *node,
const char *reason)
{
pe_action_t *cancel = NULL;
CRM_CHECK((rsc != NULL) && (task != NULL)
&& (node != NULL) && (reason != NULL),
return);
crm_info("Recurring %s-interval %s for %s will be stopped on %s: %s",
pcmk__readable_interval(interval_ms), task, rsc->id,
pe__node_name(node), reason);
cancel = pcmk__new_cancel_action(rsc, task, interval_ms, node);
add_hash_param(cancel->meta, XML_LRM_ATTR_CALLID, call_id);
// Cancellations happen after stops
pcmk__new_ordering(rsc, stop_key(rsc), NULL, rsc, NULL, cancel,
pe_order_optional, rsc->cluster);
}
/*!
* \internal
* \brief Reschedule a recurring action
*
* \param[in,out] rsc Resource that action is for
* \param[in] task Name of action being rescheduled
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in,out] node Node where action should be rescheduled
*/
void
pcmk__reschedule_recurring(pe_resource_t *rsc, const char *task,
guint interval_ms, pe_node_t *node)
{
pe_action_t *op = NULL;
trigger_unfencing(rsc, node, "Device parameters changed (reschedule)",
NULL, rsc->cluster);
op = custom_action(rsc, pcmk__op_key(rsc->id, task, interval_ms),
task, node, TRUE, TRUE, rsc->cluster);
pe__set_action_flags(op, pe_action_reschedule);
}
/*!
* \internal
* \brief Check whether an action is recurring
*
* \param[in] action Action to check
*
* \return true if \p action has a nonzero interval, otherwise false
*/
bool
pcmk__action_is_recurring(const pe_action_t *action)
{
guint interval_ms = 0;
if (pcmk__guint_from_hash(action->meta,
XML_LRM_ATTR_INTERVAL_MS, 0,
&interval_ms) != pcmk_rc_ok) {
return false;
}
return (interval_ms > 0);
}
diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c
index 53eb2e85ee..3d3c89e4d3 100644
--- a/lib/pacemaker/pcmk_sched_remote.c
+++ b/lib/pacemaker/pcmk_sched_remote.c
@@ -1,725 +1,725 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
enum remote_connection_state {
remote_state_unknown = 0,
remote_state_alive = 1,
remote_state_resting = 2,
remote_state_failed = 3,
remote_state_stopped = 4
};
static const char *
state2text(enum remote_connection_state state)
{
switch (state) {
case remote_state_unknown:
return "unknown";
case remote_state_alive:
return "alive";
case remote_state_resting:
return "resting";
case remote_state_failed:
return "failed";
case remote_state_stopped:
return "stopped";
}
return "impossible";
}
/* We always use pe_order_preserve with these convenience functions to exempt
* internally generated constraints from the prohibition of user constraints
* involving remote connection resources.
*
* The start ordering additionally uses pe_order_runnable_left so that the
* specified action is not runnable if the start is not runnable.
*/
static inline void
order_start_then_action(pe_resource_t *first_rsc, pe_action_t *then_action,
uint32_t extra)
{
if ((first_rsc != NULL) && (then_action != NULL)) {
pcmk__new_ordering(first_rsc, start_key(first_rsc), NULL,
then_action->rsc, NULL, then_action,
pe_order_preserve|pe_order_runnable_left|extra,
first_rsc->cluster);
}
}
static inline void
order_action_then_stop(pe_action_t *first_action, pe_resource_t *then_rsc,
uint32_t extra)
{
if ((first_action != NULL) && (then_rsc != NULL)) {
pcmk__new_ordering(first_action->rsc, NULL, first_action,
then_rsc, stop_key(then_rsc), NULL,
pe_order_preserve|extra, then_rsc->cluster);
}
}
static enum remote_connection_state
get_remote_node_state(const pe_node_t *node)
{
const pe_resource_t *remote_rsc = NULL;
const pe_node_t *cluster_node = NULL;
CRM_ASSERT(node != NULL);
remote_rsc = node->details->remote_rsc;
CRM_ASSERT(remote_rsc != NULL);
cluster_node = pe__current_node(remote_rsc);
/* If the cluster node the remote connection resource resides on
* is unclean or went offline, we can't process any operations
* on that remote node until after it starts elsewhere.
*/
if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
|| (remote_rsc->allocated_to == NULL)) {
// The connection resource is not going to run anywhere
if ((cluster_node != NULL) && cluster_node->details->unclean) {
/* The remote connection is failed because its resource is on a
* failed node and can't be recovered elsewhere, so we must fence.
*/
return remote_state_failed;
}
if (!pcmk_is_set(remote_rsc->flags, pe_rsc_failed)) {
/* Connection resource is cleanly stopped */
return remote_state_stopped;
}
/* Connection resource is failed */
if ((remote_rsc->next_role == RSC_ROLE_STOPPED)
&& remote_rsc->remote_reconnect_ms
&& node->details->remote_was_fenced
&& !pe__shutdown_requested(node)) {
/* We won't know whether the connection is recoverable until the
* reconnect interval expires and we reattempt connection.
*/
return remote_state_unknown;
}
/* The remote connection is in a failed state. If there are any
* resources known to be active on it (stop) or in an unknown state
* (probe), we must assume the worst and fence it.
*/
return remote_state_failed;
} else if (cluster_node == NULL) {
/* Connection is recoverable but not currently running anywhere, so see
* if we can recover it first
*/
return remote_state_unknown;
} else if (cluster_node->details->unclean
|| !(cluster_node->details->online)) {
// Connection is running on a dead node, see if we can recover it first
return remote_state_resting;
} else if (pcmk__list_of_multiple(remote_rsc->running_on)
&& (remote_rsc->partial_migration_source != NULL)
&& (remote_rsc->partial_migration_target != NULL)) {
/* We're in the middle of migrating a connection resource, so wait until
* after the migration completes before performing any actions.
*/
return remote_state_resting;
}
return remote_state_alive;
}
/*!
* \internal
* \brief Order actions on remote node relative to actions for the connection
*
* \param[in,out] action An action scheduled on a Pacemaker Remote node
*/
static void
apply_remote_ordering(pe_action_t *action)
{
pe_resource_t *remote_rsc = NULL;
enum action_tasks task = text2task(action->task);
enum remote_connection_state state = get_remote_node_state(action->node);
uint32_t order_opts = pe_order_none;
if (action->rsc == NULL) {
return;
}
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc != NULL);
crm_trace("Order %s action %s relative to %s%s (state: %s)",
action->task, action->uuid,
pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id, state2text(state));
if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED, NULL)) {
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
order_opts = pe_order_none;
if (state == remote_state_failed) {
/* Force recovery, by making this action required */
pe__set_order_flags(order_opts, pe_order_implies_then);
}
/* Ensure connection is up before running this action */
order_start_then_action(remote_rsc, action, order_opts);
break;
case stop_rsc:
if (state == remote_state_alive) {
order_action_then_stop(action, remote_rsc,
pe_order_implies_first);
} else if (state == remote_state_failed) {
/* The resource is active on the node, but since we don't have a
* valid connection, the only way to stop the resource is by
* fencing the node. There is no need to order the stop relative
* to the remote connection, since the stop will become implied
* by the fencing.
*/
pe_fence_node(remote_rsc->cluster, action->node,
"resources are active but "
"connection is unrecoverable",
FALSE);
} else if (remote_rsc->next_role == RSC_ROLE_STOPPED) {
/* State must be remote_state_unknown or remote_state_stopped.
* Since the connection is not coming back up in this
* transition, stop this resource first.
*/
order_action_then_stop(action, remote_rsc,
pe_order_implies_first);
} else {
/* The connection is going to be started somewhere else, so
* stop this resource after that completes.
*/
order_start_then_action(remote_rsc, action, pe_order_none);
}
break;
case action_demote:
/* Only order this demote relative to the connection start if the
* connection isn't being torn down. Otherwise, the demote would be
* blocked because the connection start would not be allowed.
*/
if ((state == remote_state_resting)
|| (state == remote_state_unknown)) {
order_start_then_action(remote_rsc, action, pe_order_none);
} /* Otherwise we can rely on the stop ordering */
break;
default:
/* Wait for the connection resource to be up */
if (pcmk__action_is_recurring(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
order_start_then_action(remote_rsc, action,
pe_order_implies_then);
} else {
pe_node_t *cluster_node = pe__current_node(remote_rsc);
if ((task == monitor_rsc) && (state == remote_state_failed)) {
/* We would only be here if we do not know the state of the
* resource on the remote node. Since we have no way to find
* out, it is necessary to fence the node.
*/
pe_fence_node(remote_rsc->cluster, action->node,
"resources are in unknown state "
"and connection is unrecoverable", FALSE);
}
if ((cluster_node != NULL) && (state == remote_state_stopped)) {
/* The connection is currently up, but is going down
* permanently. Make sure we check services are actually
* stopped _before_ we let the connection get closed.
*/
order_action_then_stop(action, remote_rsc,
pe_order_runnable_left);
} else {
order_start_then_action(remote_rsc, action, pe_order_none);
}
}
break;
}
}
static void
apply_container_ordering(pe_action_t *action)
{
/* VMs are also classified as containers for these purposes... in
* that they both involve a 'thing' running on a real or remote
* cluster node.
*
* This allows us to be smarter about the type and extent of
* recovery actions required in various scenarios
*/
pe_resource_t *remote_rsc = NULL;
pe_resource_t *container = NULL;
enum action_tasks task = text2task(action->task);
CRM_ASSERT(action->rsc != NULL);
CRM_ASSERT(action->node != NULL);
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc != NULL);
container = remote_rsc->container;
CRM_ASSERT(container != NULL);
if (pcmk_is_set(container->flags, pe_rsc_failed)) {
pe_fence_node(action->rsc->cluster, action->node, "container failed",
FALSE);
}
crm_trace("Order %s action %s relative to %s%s for %s%s",
action->task, action->uuid,
pcmk_is_set(remote_rsc->flags, pe_rsc_failed)? "failed " : "",
remote_rsc->id,
pcmk_is_set(container->flags, pe_rsc_failed)? "failed " : "",
container->id);
if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO,
- CRMD_ACTION_MIGRATED, NULL)) {
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
/* Migration ops map to "no_action", but we need to apply the same
* ordering as for stop or demote (see get_router_node()).
*/
task = stop_rsc;
}
switch (task) {
case start_rsc:
case action_promote:
// Force resource recovery if the container is recovered
order_start_then_action(container, action, pe_order_implies_then);
// Wait for the connection resource to be up, too
order_start_then_action(remote_rsc, action, pe_order_none);
break;
case stop_rsc:
case action_demote:
if (pcmk_is_set(container->flags, pe_rsc_failed)) {
/* When the container representing a guest node fails, any stop
* or demote actions for resources running on the guest node
* are implied by the container stopping. This is similar to
* how fencing operations work for cluster nodes and remote
* nodes.
*/
} else {
/* Ensure the operation happens before the connection is brought
* down.
*
* If we really wanted to, we could order these after the
* connection start, IFF the container's current role was
* stopped (otherwise we re-introduce an ordering loop when the
* connection is restarting).
*/
order_action_then_stop(action, remote_rsc, pe_order_none);
}
break;
default:
/* Wait for the connection resource to be up */
if (pcmk__action_is_recurring(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
if (task != no_action) {
order_start_then_action(remote_rsc, action,
pe_order_implies_then);
}
} else {
order_start_then_action(remote_rsc, action, pe_order_none);
}
break;
}
}
/*!
* \internal
* \brief Order all relevant actions relative to remote connection actions
*
* \param[in,out] data_set Cluster working set
*/
void
pcmk__order_remote_connection_actions(pe_working_set_t *data_set)
{
if (!pcmk_is_set(data_set->flags, pe_flag_have_remote_nodes)) {
return;
}
crm_trace("Creating remote connection orderings");
for (GList *iter = data_set->actions; iter != NULL; iter = iter->next) {
pe_action_t *action = iter->data;
pe_resource_t *remote = NULL;
// We are only interested in resource actions
if (action->rsc == NULL) {
continue;
}
/* Special case: If we are clearing the failcount of an actual
* remote connection resource, then make sure this happens before
* any start of the resource in this transition.
*/
if (action->rsc->is_remote_node &&
pcmk__str_eq(action->task, CRM_OP_CLEAR_FAILCOUNT,
pcmk__str_none)) {
pcmk__new_ordering(action->rsc, NULL, action, action->rsc,
pcmk__op_key(action->rsc->id, PCMK_ACTION_START,
0),
NULL, pe_order_optional, data_set);
continue;
}
// We are only interested in actions assigned to a node
if (action->node == NULL) {
continue;
}
if (!pe__is_guest_or_remote_node(action->node)) {
continue;
}
/* We are only interested in real actions.
*
* @TODO This is probably wrong; pseudo-actions might be converted to
* real actions and vice versa later in update_actions() at the end of
* pcmk__apply_orderings().
*/
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
continue;
}
remote = action->node->details->remote_rsc;
if (remote == NULL) {
// Orphaned
continue;
}
/* Another special case: if a resource is moving to a Pacemaker Remote
* node, order the stop on the original node after any start of the
* remote connection. This ensures that if the connection fails to
* start, we leave the resource running on the original node.
*/
if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_none)) {
for (GList *item = action->rsc->actions; item != NULL;
item = item->next) {
pe_action_t *rsc_action = item->data;
if (!pe__same_node(rsc_action->node, action->node)
&& pcmk__str_eq(rsc_action->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
pcmk__new_ordering(remote, start_key(remote), NULL,
action->rsc, NULL, rsc_action,
pe_order_optional, data_set);
}
}
}
/* The action occurs across a remote connection, so create
* ordering constraints that guarantee the action occurs while the node
* is active (after start, before stop ... things like that).
*
* This is somewhat brittle in that we need to make sure the results of
* this ordering are compatible with the result of get_router_node().
* It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
* of this logic rather than create_graph_action().
*/
if (remote->container) {
crm_trace("Container ordering for %s", action->uuid);
apply_container_ordering(action);
} else {
crm_trace("Remote ordering for %s", action->uuid);
apply_remote_ordering(action);
}
}
}
/*!
* \internal
* \brief Check whether a node is a failed remote node
*
* \param[in] node Node to check
*
* \return true if \p node is a failed remote node, false otherwise
*/
bool
pcmk__is_failed_remote_node(const pe_node_t *node)
{
return pe__is_remote_node(node) && (node->details->remote_rsc != NULL)
&& (get_remote_node_state(node) == remote_state_failed);
}
/*!
* \internal
* \brief Check whether a given resource corresponds to a given node as guest
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p node is a guest node and \p rsc is its containing
* resource, otherwise false
*/
bool
pcmk__rsc_corresponds_to_guest(const pe_resource_t *rsc, const pe_node_t *node)
{
return (rsc != NULL) && (rsc->fillers != NULL) && (node != NULL)
&& (node->details->remote_rsc != NULL)
&& (node->details->remote_rsc->container == rsc);
}
/*!
* \internal
* \brief Get proper connection host that a remote action must be routed through
*
* A remote connection resource might be starting, stopping, or migrating in the
* same transition that an action needs to be executed on its Pacemaker Remote
* node. Determine the proper node that the remote action should be routed
* through.
*
* \param[in] action (Potentially remote) action to route
*
* \return Connection host that action should be routed through if remote,
* otherwise NULL
*/
pe_node_t *
pcmk__connection_host_for_action(const pe_action_t *action)
{
pe_node_t *began_on = NULL;
pe_node_t *ended_on = NULL;
bool partial_migration = false;
const char *task = action->task;
if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_none)
|| !pe__is_guest_or_remote_node(action->node)) {
return NULL;
}
CRM_ASSERT(action->node->details->remote_rsc != NULL);
began_on = pe__current_node(action->node->details->remote_rsc);
ended_on = action->node->details->remote_rsc->allocated_to;
if (action->node->details->remote_rsc
&& (action->node->details->remote_rsc->container == NULL)
&& action->node->details->remote_rsc->partial_migration_target) {
partial_migration = true;
}
if (began_on == NULL) {
crm_trace("Routing %s for %s through remote connection's "
"next node %s (starting)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(ended_on? ended_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return ended_on;
}
if (ended_on == NULL) {
crm_trace("Routing %s for %s through remote connection's "
"current node %s (stopping)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(began_on? began_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return began_on;
}
if (pe__same_node(began_on, ended_on)) {
crm_trace("Routing %s for %s through remote connection's "
"current node %s (not moving)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(began_on? began_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return began_on;
}
/* If we get here, the remote connection is moving during this transition.
* This means some actions for resources behind the connection will get
* routed through the cluster node the connection resource is currently on,
* and others are routed through the cluster node the connection will end up
* on.
*/
if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
task = g_hash_table_lookup(action->meta, "notify_operation");
}
/*
* Stop, demote, and migration actions must occur before the connection can
* move (these actions are required before the remote resource can stop). In
* this case, we know these actions have to be routed through the initial
* cluster node the connection resource lived on before the move takes
* place.
*
* The exception is a partial migration of a (non-guest) remote connection
* resource; in that case, all actions (even these) will be ordered after
* the connection's pseudo-start on the migration target, so the target is
* the router node.
*/
if (pcmk__strcase_any_of(task, PCMK_ACTION_CANCEL, PCMK_ACTION_STOP,
- PCMK_ACTION_DEMOTE, "migrate_from",
+ PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_MIGRATE_TO, NULL)
&& !partial_migration) {
crm_trace("Routing %s for %s through remote connection's "
"current node %s (moving)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(began_on? began_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return began_on;
}
/* Everything else (start, promote, monitor, probe, refresh,
* clear failcount, delete, ...) must occur after the connection starts on
* the node it is moving to.
*/
crm_trace("Routing %s for %s through remote connection's "
"next node %s (moving)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(ended_on? ended_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return ended_on;
}
/*!
* \internal
* \brief Replace remote connection's addr="#uname" with actual address
*
* REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource
* with its "addr" parameter set to "#uname", pull the actual value from the
* parameters evaluated without a node (which was put there earlier in
* pcmk__create_graph() when the bundle's expand() method was called).
*
* \param[in,out] rsc Resource to check
* \param[in,out] params Resource parameters evaluated per node
*/
void
pcmk__substitute_remote_addr(pe_resource_t *rsc, GHashTable *params)
{
const char *remote_addr = g_hash_table_lookup(params,
XML_RSC_ATTR_REMOTE_RA_ADDR);
if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) {
GHashTable *base = pe_rsc_params(rsc, NULL, rsc->cluster);
remote_addr = g_hash_table_lookup(base, XML_RSC_ATTR_REMOTE_RA_ADDR);
if (remote_addr != NULL) {
g_hash_table_insert(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
strdup(remote_addr));
}
}
}
/*!
* \brief Add special bundle meta-attributes to XML
*
* If a given action will be executed on a guest node (including a bundle),
* add the special bundle meta-attribute "container-attribute-target" and
* environment variable "physical_host" as XML attributes (using meta-attribute
* naming).
*
* \param[in,out] args_xml XML to add attributes to
* \param[in] action Action to check
*/
void
pcmk__add_bundle_meta_to_xml(xmlNode *args_xml, const pe_action_t *action)
{
const pe_node_t *guest = action->node;
const pe_node_t *host = NULL;
enum action_tasks task;
if (!pe__is_guest_node(guest)) {
return;
}
task = text2task(action->task);
if ((task == action_notify) || (task == action_notified)) {
task = text2task(g_hash_table_lookup(action->meta, "notify_operation"));
}
switch (task) {
case stop_rsc:
case stopped_rsc:
case action_demote:
case action_demoted:
// "Down" actions take place on guest's current host
host = pe__current_node(guest->details->remote_rsc->container);
break;
case start_rsc:
case started_rsc:
case monitor_rsc:
case action_promote:
case action_promoted:
// "Up" actions take place on guest's next host
host = guest->details->remote_rsc->container->allocated_to;
break;
default:
break;
}
if (host != NULL) {
hash2metafield((gpointer) XML_RSC_ATTR_TARGET,
(gpointer) g_hash_table_lookup(action->rsc->meta,
XML_RSC_ATTR_TARGET),
(gpointer) args_xml);
hash2metafield((gpointer) PCMK__ENV_PHYSICAL_HOST,
(gpointer) host->details->uname,
(gpointer) args_xml);
}
}
diff --git a/lib/pengine/common.c b/lib/pengine/common.c
index fa474e22d0..6a3e3cf465 100644
--- a/lib/pengine/common.c
+++ b/lib/pengine/common.c
@@ -1,627 +1,627 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/util.h>
#include <glib.h>
#include <crm/pengine/internal.h>
gboolean was_processing_error = FALSE;
gboolean was_processing_warning = FALSE;
static bool
check_placement_strategy(const char *value)
{
return pcmk__strcase_any_of(value, "default", "utilization", "minimal",
"balanced", NULL);
}
static pcmk__cluster_option_t pe_opts[] = {
/* name, old name, type, allowed values,
* default value, validator,
* short description,
* long description
*/
{
"no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide",
"stop", pcmk__valid_quorum,
N_("What to do when the cluster does not have quorum"),
NULL
},
{
"symmetric-cluster", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether resources can run on any node by default"),
NULL
},
{
"maintenance-mode", NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether the cluster should refrain from monitoring, starting, "
"and stopping resources"),
NULL
},
{
"start-failure-is-fatal", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether a start failure should prevent a resource from being "
"recovered on the same node"),
N_("When true, the cluster will immediately ban a resource from a node "
"if it fails to start there. When false, the cluster will instead "
"check the resource's fail count against its migration-threshold.")
},
{
"enable-startup-probes", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether the cluster should check for active resources during start-up"),
NULL
},
{
XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether to lock resources to a cleanly shut down node"),
N_("When true, resources active on a node when it is cleanly shut down "
"are kept \"locked\" to that node (not allowed to run elsewhere) "
"until they start again on that node after it rejoins (or for at "
"most shutdown-lock-limit, if set). Stonith resources and "
"Pacemaker Remote connections are never locked. Clone and bundle "
"instances and the promoted role of promotable clones are "
"currently never locked, though support could be added in a future "
"release.")
},
{
XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("Do not lock resources to a cleanly shut down node longer than "
"this"),
N_("If shutdown-lock is true and this is set to a nonzero time "
"duration, shutdown locks will expire after this much time has "
"passed since the shutdown was initiated, even if the node has not "
"rejoined.")
},
// Fencing-related options
{
"stonith-enabled", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("*** Advanced Use Only *** "
"Whether nodes may be fenced as part of recovery"),
N_("If false, unresponsive nodes are immediately assumed to be harmless, "
"and resources that were active on them may be recovered "
"elsewhere. This can result in a \"split-brain\" situation, "
"potentially leading to data loss and/or service unavailability.")
},
{
"stonith-action", NULL, "select", "reboot, off, poweroff",
"reboot", pcmk__is_fencing_action,
N_("Action to send to fence device when a node needs to be fenced "
"(\"poweroff\" is a deprecated alias for \"off\")"),
NULL
},
{
"stonith-timeout", NULL, "time", NULL,
"60s", pcmk__valid_interval_spec,
N_("*** Advanced Use Only *** Unused by Pacemaker"),
N_("This value is not used by Pacemaker, but is kept for backward "
"compatibility, and certain legacy fence agents might use it.")
},
{
XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether watchdog integration is enabled"),
N_("This is set automatically by the cluster according to whether SBD "
"is detected to be in use. User-configured values are ignored. "
"The value `true` is meaningful if diskless SBD is used and "
"`stonith-watchdog-timeout` is nonzero. In that case, if fencing "
"is required, watchdog-based self-fencing will be performed via "
"SBD without requiring a fencing resource explicitly configured.")
},
{
"concurrent-fencing", NULL, "boolean", NULL,
PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean,
N_("Allow performing fencing operations in parallel"),
NULL
},
{
"startup-fencing", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"),
N_("Setting this to false may lead to a \"split-brain\" situation,"
"potentially leading to data loss and/or service unavailability.")
},
{
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL,
"0", pcmk__valid_interval_spec,
N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"),
N_("Apply specified delay for the fencings that are targeting the lost "
"nodes with the highest total resource priority in case we don't "
"have the majority of the nodes in our cluster partition, so that "
"the more significant nodes potentially win any fencing match, "
"which is especially meaningful under split-brain of 2-node "
"cluster. A promoted resource instance takes the base priority + 1 "
"on calculation if the base priority is not 0. Any static/random "
"delays that are introduced by `pcmk_delay_base/max` configured "
"for the corresponding fencing resources will be added to this "
"delay. This delay should be significantly greater than, safely "
"twice, the maximum `pcmk_delay_base/max`. By default, priority "
"fencing delay is disabled.")
},
{
XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL,
"10min", pcmk__valid_interval_spec,
N_("How long to wait for a node that has joined the cluster to join "
"the process group"),
N_("A node that has joined the cluster can be pending on joining the "
"process group. We wait up to this much time for it. If it times "
"out, fencing targeting the node will be issued if enabled.")
},
{
"cluster-delay", NULL, "time", NULL,
"60s", pcmk__valid_interval_spec,
N_("Maximum time for node-to-node communication"),
N_("The node elected Designated Controller (DC) will consider an action "
"failed if it does not get a response from the node executing the "
"action within this time (after considering the action's own "
"timeout). The \"correct\" value will depend on the speed and "
"load of your network and cluster nodes.")
},
{
"batch-limit", NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("Maximum number of jobs that the cluster may execute in parallel "
"across all nodes"),
N_("The \"correct\" value will depend on the speed and load of your "
"network and cluster nodes. If set to 0, the cluster will "
"impose a dynamically calculated limit when any node has a "
"high load.")
},
{
"migration-limit", NULL, "integer", NULL,
"-1", pcmk__valid_number,
N_("The number of live migration actions that the cluster is allowed "
"to execute in parallel on a node (-1 means no limit)")
},
/* Orphans and stopping */
{
"stop-all-resources", NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("Whether the cluster should stop all active resources"),
NULL
},
{
"stop-orphan-resources", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether to stop resources that were removed from the configuration"),
NULL
},
{
"stop-orphan-actions", NULL, "boolean", NULL,
"true", pcmk__valid_boolean,
N_("Whether to cancel recurring actions removed from the configuration"),
NULL
},
{
"remove-after-stop", NULL, "boolean", NULL,
"false", pcmk__valid_boolean,
N_("*** Deprecated *** Whether to remove stopped resources from "
"the executor"),
N_("Values other than default are poorly tested and potentially dangerous."
" This option will be removed in a future release.")
},
/* Storing inputs */
{
"pe-error-series-max", NULL, "integer", NULL,
"-1", pcmk__valid_number,
N_("The number of scheduler inputs resulting in errors to save"),
N_("Zero to disable, -1 to store unlimited.")
},
{
"pe-warn-series-max", NULL, "integer", NULL,
"5000", pcmk__valid_number,
N_("The number of scheduler inputs resulting in warnings to save"),
N_("Zero to disable, -1 to store unlimited.")
},
{
"pe-input-series-max", NULL, "integer", NULL,
"4000", pcmk__valid_number,
N_("The number of scheduler inputs without errors or warnings to save"),
N_("Zero to disable, -1 to store unlimited.")
},
/* Node health */
{
PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select",
PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", "
PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", "
PCMK__VALUE_CUSTOM,
PCMK__VALUE_NONE, pcmk__validate_health_strategy,
N_("How cluster should react to node health attributes"),
N_("Requires external entities to create node attributes (named with "
"the prefix \"#health\") with values \"red\", "
"\"yellow\", or \"green\".")
},
{
PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("Base health score assigned to a node"),
N_("Only used when \"node-health-strategy\" is set to \"progressive\".")
},
{
PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("The score to use for a node health attribute whose value is \"green\""),
N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
},
{
PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL,
"0", pcmk__valid_number,
N_("The score to use for a node health attribute whose value is \"yellow\""),
N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
},
{
PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL,
"-INFINITY", pcmk__valid_number,
N_("The score to use for a node health attribute whose value is \"red\""),
N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".")
},
/*Placement Strategy*/
{
"placement-strategy", NULL, "select",
"default, utilization, minimal, balanced",
"default", check_placement_strategy,
N_("How the cluster should allocate resources to nodes"),
NULL
},
};
void
pe_metadata(pcmk__output_t *out)
{
const char *desc_short = "Pacemaker scheduler options";
const char *desc_long = "Cluster options used by Pacemaker's scheduler";
gchar *s = pcmk__format_option_metadata("pacemaker-schedulerd", desc_short,
desc_long, pe_opts,
PCMK__NELEM(pe_opts));
out->output_xml(out, "metadata", s);
g_free(s);
}
void
verify_pe_options(GHashTable * options)
{
pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts));
}
const char *
pe_pref(GHashTable * options, const char *name)
{
return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name);
}
const char *
fail2text(enum action_fail_response fail)
{
const char *result = "<unknown>";
switch (fail) {
case action_fail_ignore:
result = "ignore";
break;
case action_fail_demote:
result = "demote";
break;
case action_fail_block:
result = "block";
break;
case action_fail_recover:
result = "recover";
break;
case action_fail_migrate:
result = "migrate";
break;
case action_fail_stop:
result = "stop";
break;
case action_fail_fence:
result = "fence";
break;
case action_fail_standby:
result = "standby";
break;
case action_fail_restart_container:
result = "restart-container";
break;
case action_fail_reset_remote:
result = "reset-remote";
break;
}
return result;
}
enum action_tasks
text2task(const char *task)
{
if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)) {
return stop_rsc;
} else if (pcmk__str_eq(task, CRMD_ACTION_STOPPED, pcmk__str_casei)) {
return stopped_rsc;
} else if (pcmk__str_eq(task, PCMK_ACTION_START, pcmk__str_casei)) {
return start_rsc;
} else if (pcmk__str_eq(task, CRMD_ACTION_STARTED, pcmk__str_casei)) {
return started_rsc;
} else if (pcmk__str_eq(task, CRM_OP_SHUTDOWN, pcmk__str_casei)) {
return shutdown_crm;
} else if (pcmk__str_eq(task, CRM_OP_FENCE, pcmk__str_casei)) {
return stonith_node;
} else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
return monitor_rsc;
} else if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
return action_notify;
} else if (pcmk__str_eq(task, CRMD_ACTION_NOTIFIED, pcmk__str_casei)) {
return action_notified;
} else if (pcmk__str_eq(task, PCMK_ACTION_PROMOTE, pcmk__str_casei)) {
return action_promote;
} else if (pcmk__str_eq(task, PCMK_ACTION_DEMOTE, pcmk__str_casei)) {
return action_demote;
} else if (pcmk__str_eq(task, CRMD_ACTION_PROMOTED, pcmk__str_casei)) {
return action_promoted;
} else if (pcmk__str_eq(task, CRMD_ACTION_DEMOTED, pcmk__str_casei)) {
return action_demoted;
}
#if SUPPORT_TRACING
if (pcmk__str_eq(task, PCMK_ACTION_CANCEL, pcmk__str_casei)) {
return no_action;
} else if (pcmk__str_eq(task, PCMK_ACTION_DELETE, pcmk__str_casei)) {
return no_action;
} else if (pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
return no_action;
} else if (pcmk__str_eq(task, PCMK_ACTION_MIGRATE_TO, pcmk__str_casei)) {
return no_action;
- } else if (pcmk__str_eq(task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
+ } else if (pcmk__str_eq(task, PCMK_ACTION_MIGRATE_FROM, pcmk__str_casei)) {
return no_action;
}
crm_trace("Unsupported action: %s", task);
#endif
return no_action;
}
const char *
task2text(enum action_tasks task)
{
const char *result = "<unknown>";
switch (task) {
case no_action:
result = "no_action";
break;
case stop_rsc:
result = PCMK_ACTION_STOP;
break;
case stopped_rsc:
result = CRMD_ACTION_STOPPED;
break;
case start_rsc:
result = PCMK_ACTION_START;
break;
case started_rsc:
result = CRMD_ACTION_STARTED;
break;
case shutdown_crm:
result = CRM_OP_SHUTDOWN;
break;
case stonith_node:
result = CRM_OP_FENCE;
break;
case monitor_rsc:
result = PCMK_ACTION_MONITOR;
break;
case action_notify:
result = PCMK_ACTION_NOTIFY;
break;
case action_notified:
result = CRMD_ACTION_NOTIFIED;
break;
case action_promote:
result = PCMK_ACTION_PROMOTE;
break;
case action_promoted:
result = CRMD_ACTION_PROMOTED;
break;
case action_demote:
result = PCMK_ACTION_DEMOTE;
break;
case action_demoted:
result = CRMD_ACTION_DEMOTED;
break;
}
return result;
}
const char *
role2text(enum rsc_role_e role)
{
switch (role) {
case RSC_ROLE_UNKNOWN:
return RSC_ROLE_UNKNOWN_S;
case RSC_ROLE_STOPPED:
return RSC_ROLE_STOPPED_S;
case RSC_ROLE_STARTED:
return RSC_ROLE_STARTED_S;
case RSC_ROLE_UNPROMOTED:
#ifdef PCMK__COMPAT_2_0
return RSC_ROLE_UNPROMOTED_LEGACY_S;
#else
return RSC_ROLE_UNPROMOTED_S;
#endif
case RSC_ROLE_PROMOTED:
#ifdef PCMK__COMPAT_2_0
return RSC_ROLE_PROMOTED_LEGACY_S;
#else
return RSC_ROLE_PROMOTED_S;
#endif
}
CRM_CHECK(role >= RSC_ROLE_UNKNOWN, return RSC_ROLE_UNKNOWN_S);
CRM_CHECK(role < RSC_ROLE_MAX, return RSC_ROLE_UNKNOWN_S);
// coverity[dead_error_line]
return RSC_ROLE_UNKNOWN_S;
}
enum rsc_role_e
text2role(const char *role)
{
CRM_ASSERT(role != NULL);
if (pcmk__str_eq(role, RSC_ROLE_STOPPED_S, pcmk__str_casei)) {
return RSC_ROLE_STOPPED;
} else if (pcmk__str_eq(role, RSC_ROLE_STARTED_S, pcmk__str_casei)) {
return RSC_ROLE_STARTED;
} else if (pcmk__strcase_any_of(role, RSC_ROLE_UNPROMOTED_S,
RSC_ROLE_UNPROMOTED_LEGACY_S, NULL)) {
return RSC_ROLE_UNPROMOTED;
} else if (pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S, NULL)) {
return RSC_ROLE_PROMOTED;
} else if (pcmk__str_eq(role, RSC_ROLE_UNKNOWN_S, pcmk__str_casei)) {
return RSC_ROLE_UNKNOWN;
}
crm_err("Unknown role: %s", role);
return RSC_ROLE_UNKNOWN;
}
void
add_hash_param(GHashTable * hash, const char *name, const char *value)
{
CRM_CHECK(hash != NULL, return);
crm_trace("Adding name='%s' value='%s' to hash table",
pcmk__s(name, "<null>"), pcmk__s(value, "<null>"));
if (name == NULL || value == NULL) {
return;
} else if (pcmk__str_eq(value, "#default", pcmk__str_casei)) {
return;
} else if (g_hash_table_lookup(hash, name) == NULL) {
g_hash_table_insert(hash, strdup(name), strdup(value));
}
}
/*!
* \internal
* \brief Look up an attribute value on the appropriate node
*
* If \p node is a guest node and either the \c XML_RSC_ATTR_TARGET meta
* attribute is set to "host" for \p rsc or \p force_host is \c true, query the
* attribute on the node's host. Otherwise, query the attribute on \p node
* itself.
*
* \param[in] node Node to query attribute value on by default
* \param[in] name Name of attribute to query
* \param[in] rsc Resource on whose behalf we're querying
* \param[in] node_type Type of resource location lookup
* \param[in] force_host Force a lookup on the guest node's host, regardless of
* the \c XML_RSC_ATTR_TARGET value
*
* \return Value of the attribute on \p node or on the host of \p node
*
* \note If \p force_host is \c true, \p node \e must be a guest node.
*/
const char *
pe__node_attribute_calculated(const pe_node_t *node, const char *name,
const pe_resource_t *rsc,
enum pe__rsc_node node_type,
bool force_host)
{
// @TODO: Use pe__is_guest_node() after merging libpe_{rules,status}
bool is_guest = (node != NULL) && (node->details->type == node_remote)
&& (node->details->remote_rsc != NULL)
&& (node->details->remote_rsc->container != NULL);
const char *source = NULL;
const char *node_type_s = NULL;
const char *reason = NULL;
const pe_resource_t *container = NULL;
const pe_node_t *host = NULL;
CRM_ASSERT((node != NULL) && (name != NULL) && (rsc != NULL)
&& (!force_host || is_guest));
/* Ignore XML_RSC_ATTR_TARGET if node is not a guest node. This represents a
* user configuration error.
*/
source = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET);
if (!force_host
&& (!is_guest || !pcmk__str_eq(source, "host", pcmk__str_casei))) {
return g_hash_table_lookup(node->details->attrs, name);
}
container = node->details->remote_rsc->container;
switch (node_type) {
case pe__rsc_node_assigned:
node_type_s = "assigned";
host = container->allocated_to;
if (host == NULL) {
reason = "not assigned";
}
break;
case pe__rsc_node_current:
node_type_s = "current";
if (container->running_on != NULL) {
host = container->running_on->data;
}
if (host == NULL) {
reason = "inactive";
}
break;
default:
// Add support for other enum pe__rsc_node values if needed
CRM_ASSERT(false);
break;
}
if (host != NULL) {
const char *value = g_hash_table_lookup(host->details->attrs, name);
pe_rsc_trace(rsc,
"%s: Value lookup for %s on %s container host %s %s%s",
rsc->id, name, node_type_s, pe__node_name(host),
((value != NULL)? "succeeded: " : "failed"),
pcmk__s(value, ""));
return value;
}
pe_rsc_trace(rsc,
"%s: Not looking for %s on %s container host: %s is %s",
rsc->id, name, node_type_s, container->id, reason);
return NULL;
}
const char *
pe_node_attribute_raw(const pe_node_t *node, const char *name)
{
if(node == NULL) {
return NULL;
}
return g_hash_table_lookup(node->details->attrs, name);
}
diff --git a/lib/pengine/native.c b/lib/pengine/native.c
index 0275c51502..d5358ba092 100644
--- a/lib/pengine/native.c
+++ b/lib/pengine/native.c
@@ -1,1436 +1,1437 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdint.h>
#include <crm/common/output.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/status.h>
#include <crm/pengine/complex.h>
#include <crm/pengine/internal.h>
#include <crm/msg_xml.h>
#include <pe_status_private.h>
#ifdef PCMK__COMPAT_2_0
#define PROVIDER_SEP "::"
#else
#define PROVIDER_SEP ":"
#endif
/*!
* \internal
* \brief Check whether a resource is active on multiple nodes
*/
static bool
is_multiply_active(const pe_resource_t *rsc)
{
unsigned int count = 0;
if (rsc->variant == pe_native) {
pe__find_active_requires(rsc, &count);
}
return count > 1;
}
static void
native_priority_to_node(pe_resource_t * rsc, pe_node_t * node, gboolean failed)
{
int priority = 0;
if ((rsc->priority == 0) || (failed == TRUE)) {
return;
}
if (rsc->role == RSC_ROLE_PROMOTED) {
// Promoted instance takes base priority + 1
priority = rsc->priority + 1;
} else {
priority = rsc->priority;
}
node->details->priority += priority;
pe_rsc_trace(rsc, "%s now has priority %d with %s'%s' (priority: %d%s)",
pe__node_name(node), node->details->priority,
(rsc->role == RSC_ROLE_PROMOTED)? "promoted " : "",
rsc->id, rsc->priority,
(rsc->role == RSC_ROLE_PROMOTED)? " + 1" : "");
/* Priority of a resource running on a guest node is added to the cluster
* node as well. */
if (node->details->remote_rsc
&& node->details->remote_rsc->container) {
GList *gIter = node->details->remote_rsc->container->running_on;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *a_node = gIter->data;
a_node->details->priority += priority;
pe_rsc_trace(rsc, "%s now has priority %d with %s'%s' (priority: %d%s) "
"from guest node %s",
pe__node_name(a_node), a_node->details->priority,
(rsc->role == RSC_ROLE_PROMOTED)? "promoted " : "",
rsc->id, rsc->priority,
(rsc->role == RSC_ROLE_PROMOTED)? " + 1" : "",
pe__node_name(node));
}
}
}
void
native_add_running(pe_resource_t * rsc, pe_node_t * node, pe_working_set_t * data_set, gboolean failed)
{
GList *gIter = rsc->running_on;
CRM_CHECK(node != NULL, return);
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *a_node = (pe_node_t *) gIter->data;
CRM_CHECK(a_node != NULL, return);
if (pcmk__str_eq(a_node->details->id, node->details->id, pcmk__str_casei)) {
return;
}
}
pe_rsc_trace(rsc, "Adding %s to %s %s", rsc->id, pe__node_name(node),
pcmk_is_set(rsc->flags, pe_rsc_managed)? "" : "(unmanaged)");
rsc->running_on = g_list_append(rsc->running_on, node);
if (rsc->variant == pe_native) {
node->details->running_rsc = g_list_append(node->details->running_rsc, rsc);
native_priority_to_node(rsc, node, failed);
}
if (rsc->variant == pe_native && node->details->maintenance) {
pe__clear_resource_flags(rsc, pe_rsc_managed);
pe__set_resource_flags(rsc, pe_rsc_maintenance);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_resource_t *p = rsc->parent;
pe_rsc_info(rsc, "resource %s isn't managed", rsc->id);
resource_location(rsc, node, INFINITY, "not_managed_default", data_set);
while(p && node->details->online) {
/* add without the additional location constraint */
p->running_on = g_list_append(p->running_on, node);
p = p->parent;
}
return;
}
if (is_multiply_active(rsc)) {
switch (rsc->recovery_type) {
case pcmk_multiply_active_stop:
{
GHashTableIter gIter;
pe_node_t *local_node = NULL;
/* make sure it doesn't come up again */
if (rsc->allowed_nodes != NULL) {
g_hash_table_destroy(rsc->allowed_nodes);
}
rsc->allowed_nodes = pe__node_list2table(data_set->nodes);
g_hash_table_iter_init(&gIter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&local_node)) {
local_node->weight = -INFINITY;
}
}
break;
case pcmk_multiply_active_block:
pe__clear_resource_flags(rsc, pe_rsc_managed);
pe__set_resource_flags(rsc, pe_rsc_block);
/* If the resource belongs to a group or bundle configured with
* multiple-active=block, block the entire entity.
*/
if (rsc->parent
&& (rsc->parent->variant == pe_group || rsc->parent->variant == pe_container)
&& (rsc->parent->recovery_type == pcmk_multiply_active_block)) {
GList *gIter = rsc->parent->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
pe__clear_resource_flags(child, pe_rsc_managed);
pe__set_resource_flags(child, pe_rsc_block);
}
}
break;
// pcmk_multiply_active_restart, pcmk_multiply_active_unexpected
default:
/* The scheduler will do the right thing because the relevant
* variables and flags are set when unpacking the history.
*/
break;
}
crm_debug("%s is active on multiple nodes including %s: %s",
rsc->id, pe__node_name(node),
recovery2text(rsc->recovery_type));
} else {
pe_rsc_trace(rsc, "Resource %s is active on %s",
rsc->id, pe__node_name(node));
}
if (rsc->parent != NULL) {
native_add_running(rsc->parent, node, data_set, FALSE);
}
}
static void
recursive_clear_unique(pe_resource_t *rsc, gpointer user_data)
{
pe__clear_resource_flags(rsc, pe_rsc_unique);
add_hash_param(rsc->meta, XML_RSC_ATTR_UNIQUE, XML_BOOLEAN_FALSE);
g_list_foreach(rsc->children, (GFunc) recursive_clear_unique, NULL);
}
gboolean
native_unpack(pe_resource_t * rsc, pe_working_set_t * data_set)
{
pe_resource_t *parent = uber_parent(rsc);
const char *standard = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
uint32_t ra_caps = pcmk_get_ra_caps(standard);
pe_rsc_trace(rsc, "Processing resource %s...", rsc->id);
// Only some agent standards support unique and promotable clones
if (!pcmk_is_set(ra_caps, pcmk_ra_cap_unique)
&& pcmk_is_set(rsc->flags, pe_rsc_unique) && pe_rsc_is_clone(parent)) {
/* @COMPAT We should probably reject this situation as an error (as we
* do for promotable below) rather than warn and convert, but that would
* be a backward-incompatible change that we should probably do with a
* transform at a schema major version bump.
*/
pe__force_anon(standard, parent, rsc->id, data_set);
/* Clear globally-unique on the parent and all its descendants unpacked
* so far (clearing the parent should make any future children unpacking
* correct). We have to clear this resource explicitly because it isn't
* hooked into the parent's children yet.
*/
recursive_clear_unique(parent, NULL);
recursive_clear_unique(rsc, NULL);
}
if (!pcmk_is_set(ra_caps, pcmk_ra_cap_promotable)
&& pcmk_is_set(parent->flags, pe_rsc_promotable)) {
pe_err("Resource %s is of type %s and therefore "
"cannot be used as a promotable clone resource",
rsc->id, standard);
return FALSE;
}
return TRUE;
}
static bool
rsc_is_on_node(pe_resource_t *rsc, const pe_node_t *node, int flags)
{
pe_rsc_trace(rsc, "Checking whether %s is on %s",
rsc->id, pe__node_name(node));
if (pcmk_is_set(flags, pe_find_current) && rsc->running_on) {
for (GList *iter = rsc->running_on; iter; iter = iter->next) {
pe_node_t *loc = (pe_node_t *) iter->data;
if (loc->details == node->details) {
return true;
}
}
} else if (pcmk_is_set(flags, pe_find_inactive)
&& (rsc->running_on == NULL)) {
return true;
} else if (!pcmk_is_set(flags, pe_find_current) && rsc->allocated_to
&& (rsc->allocated_to->details == node->details)) {
return true;
}
return false;
}
pe_resource_t *
native_find_rsc(pe_resource_t * rsc, const char *id, const pe_node_t *on_node,
int flags)
{
bool match = false;
pe_resource_t *result = NULL;
CRM_CHECK(id && rsc && rsc->id, return NULL);
if (flags & pe_find_clone) {
const char *rid = ID(rsc->xml);
if (!pe_rsc_is_clone(pe__const_top_resource(rsc, false))) {
match = false;
} else if (!strcmp(id, rsc->id) || pcmk__str_eq(id, rid, pcmk__str_none)) {
match = true;
}
} else if (!strcmp(id, rsc->id)) {
match = true;
} else if (pcmk_is_set(flags, pe_find_renamed)
&& rsc->clone_name && strcmp(rsc->clone_name, id) == 0) {
match = true;
} else if (pcmk_is_set(flags, pe_find_any)
|| (pcmk_is_set(flags, pe_find_anon)
&& !pcmk_is_set(rsc->flags, pe_rsc_unique))) {
match = pe_base_name_eq(rsc, id);
}
if (match && on_node) {
if (!rsc_is_on_node(rsc, on_node, flags)) {
match = false;
}
}
if (match) {
return rsc;
}
for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
result = rsc->fns->find_rsc(child, id, on_node, flags);
if (result) {
return result;
}
}
return NULL;
}
// create is ignored
char *
native_parameter(pe_resource_t * rsc, pe_node_t * node, gboolean create, const char *name,
pe_working_set_t * data_set)
{
char *value_copy = NULL;
const char *value = NULL;
GHashTable *params = NULL;
CRM_CHECK(rsc != NULL, return NULL);
CRM_CHECK(name != NULL && strlen(name) != 0, return NULL);
pe_rsc_trace(rsc, "Looking up %s in %s", name, rsc->id);
params = pe_rsc_params(rsc, node, data_set);
value = g_hash_table_lookup(params, name);
if (value == NULL) {
/* try meta attributes instead */
value = g_hash_table_lookup(rsc->meta, name);
}
pcmk__str_update(&value_copy, value);
return value_copy;
}
gboolean
native_active(pe_resource_t * rsc, gboolean all)
{
for (GList *gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
pe_node_t *a_node = (pe_node_t *) gIter->data;
if (a_node->details->unclean) {
pe_rsc_trace(rsc, "Resource %s: %s is unclean",
rsc->id, pe__node_name(a_node));
return TRUE;
} else if (a_node->details->online == FALSE && pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "Resource %s: %s is offline",
rsc->id, pe__node_name(a_node));
} else {
pe_rsc_trace(rsc, "Resource %s active on %s",
rsc->id, pe__node_name(a_node));
return TRUE;
}
}
return FALSE;
}
struct print_data_s {
long options;
void *print_data;
};
static const char *
native_pending_state(const pe_resource_t *rsc)
{
const char *pending_state = NULL;
if (pcmk__str_eq(rsc->pending_task, PCMK_ACTION_START, pcmk__str_casei)) {
pending_state = "Starting";
} else if (pcmk__str_eq(rsc->pending_task, PCMK_ACTION_STOP,
pcmk__str_casei)) {
pending_state = "Stopping";
} else if (pcmk__str_eq(rsc->pending_task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_casei)) {
pending_state = "Migrating";
- } else if (pcmk__str_eq(rsc->pending_task, CRMD_ACTION_MIGRATED, pcmk__str_casei)) {
+ } else if (pcmk__str_eq(rsc->pending_task, PCMK_ACTION_MIGRATE_FROM,
+ pcmk__str_casei)) {
/* Work might be done in here. */
pending_state = "Migrating";
} else if (pcmk__str_eq(rsc->pending_task, PCMK_ACTION_PROMOTE,
pcmk__str_casei)) {
pending_state = "Promoting";
} else if (pcmk__str_eq(rsc->pending_task, PCMK_ACTION_DEMOTE,
pcmk__str_casei)) {
pending_state = "Demoting";
}
return pending_state;
}
static const char *
native_pending_task(const pe_resource_t *rsc)
{
const char *pending_task = NULL;
if (pcmk__str_eq(rsc->pending_task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
pending_task = "Monitoring";
/* Pending probes are not printed, even if pending
* operations are requested. If someone ever requests that
* behavior, uncomment this and the corresponding part of
* unpack.c:unpack_rsc_op().
*/
/*
} else if (pcmk__str_eq(rsc->pending_task, "probe", pcmk__str_casei)) {
pending_task = "Checking";
*/
}
return pending_task;
}
static enum rsc_role_e
native_displayable_role(const pe_resource_t *rsc)
{
enum rsc_role_e role = rsc->role;
if ((role == RSC_ROLE_STARTED)
&& pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pe_rsc_promotable)) {
role = RSC_ROLE_UNPROMOTED;
}
return role;
}
static const char *
native_displayable_state(const pe_resource_t *rsc, bool print_pending)
{
const char *rsc_state = NULL;
if (print_pending) {
rsc_state = native_pending_state(rsc);
}
if (rsc_state == NULL) {
rsc_state = role2text(native_displayable_role(rsc));
}
return rsc_state;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
native_print_xml(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
const char *rsc_state = native_displayable_state(rsc, pcmk_is_set(options, pe_print_pending));
const char *target_role = NULL;
/* resource information. */
status_print("%s<resource ", pre_text);
status_print(XML_ATTR_ID "=\"%s\" ", rsc_printable_id(rsc));
status_print("resource_agent=\"%s%s%s:%s\" ", class,
((prov == NULL)? "" : PROVIDER_SEP),
((prov == NULL)? "" : prov),
crm_element_value(rsc->xml, XML_ATTR_TYPE));
status_print("role=\"%s\" ", rsc_state);
if (rsc->meta) {
target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
}
if (target_role) {
status_print("target_role=\"%s\" ", target_role);
}
status_print("active=\"%s\" ", pcmk__btoa(rsc->fns->active(rsc, TRUE)));
status_print("orphaned=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_orphan));
status_print("blocked=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_block));
status_print("managed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_managed));
status_print("failed=\"%s\" ", pe__rsc_bool_str(rsc, pe_rsc_failed));
status_print("failure_ignored=\"%s\" ",
pe__rsc_bool_str(rsc, pe_rsc_failure_ignored));
status_print("nodes_running_on=\"%d\" ", g_list_length(rsc->running_on));
if (options & pe_print_pending) {
const char *pending_task = native_pending_task(rsc);
if (pending_task) {
status_print("pending=\"%s\" ", pending_task);
}
}
/* print out the nodes this resource is running on */
if (options & pe_print_rsconly) {
status_print("/>\n");
/* do nothing */
} else if (rsc->running_on != NULL) {
GList *gIter = rsc->running_on;
status_print(">\n");
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
status_print("%s <node name=\"%s\" " XML_ATTR_ID "=\"%s\" "
"cached=\"%s\"/>\n",
pre_text, pcmk__s(node->details->uname, ""),
node->details->id, pcmk__btoa(!node->details->online));
}
status_print("%s</resource>\n", pre_text);
} else {
status_print("/>\n");
}
}
// Append a flag to resource description string's flags list
static bool
add_output_flag(GString *s, const char *flag_desc, bool have_flags)
{
g_string_append(s, (have_flags? ", " : " ("));
g_string_append(s, flag_desc);
return true;
}
// Append a node name to resource description string's node list
static bool
add_output_node(GString *s, const char *node, bool have_nodes)
{
g_string_append(s, (have_nodes? " " : " [ "));
g_string_append(s, node);
return true;
}
/*!
* \internal
* \brief Create a string description of a resource
*
* \param[in] rsc Resource to describe
* \param[in] name Desired identifier for the resource
* \param[in] node If not NULL, node that resource is "on"
* \param[in] show_opts Bitmask of pcmk_show_opt_e.
* \param[in] target_role Resource's target role
* \param[in] show_nodes Whether to display nodes when multiply active
*
* \return Newly allocated string description of resource
* \note Caller must free the result with g_free().
*/
gchar *
pcmk__native_output_string(const pe_resource_t *rsc, const char *name,
const pe_node_t *node, uint32_t show_opts,
const char *target_role, bool show_nodes)
{
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
const char *provider = NULL;
const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
GString *outstr = NULL;
bool have_flags = false;
if (rsc->variant != pe_native) {
return NULL;
}
CRM_CHECK(name != NULL, name = "unknown");
CRM_CHECK(kind != NULL, kind = "unknown");
CRM_CHECK(class != NULL, class = "unknown");
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
}
if ((node == NULL) && (rsc->lock_node != NULL)) {
node = rsc->lock_node;
}
if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only)
|| pcmk__list_of_multiple(rsc->running_on)) {
node = NULL;
}
outstr = g_string_sized_new(128);
// Resource name and agent
pcmk__g_strcat(outstr,
name, "\t(", class, ((provider == NULL)? "" : PROVIDER_SEP),
pcmk__s(provider, ""), ":", kind, "):\t", NULL);
// State on node
if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
g_string_append(outstr, " ORPHANED");
}
if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
enum rsc_role_e role = native_displayable_role(rsc);
g_string_append(outstr, " FAILED");
if (role > RSC_ROLE_UNPROMOTED) {
pcmk__add_word(&outstr, 0, role2text(role));
}
} else {
bool show_pending = pcmk_is_set(show_opts, pcmk_show_pending);
pcmk__add_word(&outstr, 0, native_displayable_state(rsc, show_pending));
}
if (node) {
pcmk__add_word(&outstr, 0, pe__node_name(node));
}
// Failed probe operation
if (native_displayable_role(rsc) == RSC_ROLE_STOPPED) {
xmlNode *probe_op = pe__failed_probe_for_rsc(rsc, node ? node->details->uname : NULL);
if (probe_op != NULL) {
int rc;
pcmk__scan_min_int(crm_element_value(probe_op, XML_LRM_ATTR_RC), &rc, 0);
pcmk__g_strcat(outstr, " (", services_ocf_exitcode_str(rc), ") ",
NULL);
}
}
// Flags, as: (<flag> [...])
if (node && !(node->details->online) && node->details->unclean) {
have_flags = add_output_flag(outstr, "UNCLEAN", have_flags);
}
if (node && (node == rsc->lock_node)) {
have_flags = add_output_flag(outstr, "LOCKED", have_flags);
}
if (pcmk_is_set(show_opts, pcmk_show_pending)) {
const char *pending_task = native_pending_task(rsc);
if (pending_task) {
have_flags = add_output_flag(outstr, pending_task, have_flags);
}
}
if (target_role) {
enum rsc_role_e target_role_e = text2role(target_role);
/* Only show target role if it limits our abilities (i.e. ignore
* Started, as it is the default anyways, and doesn't prevent the
* resource from becoming promoted).
*/
if (target_role_e == RSC_ROLE_STOPPED) {
have_flags = add_output_flag(outstr, "disabled", have_flags);
} else if (pcmk_is_set(pe__const_top_resource(rsc, false)->flags,
pe_rsc_promotable)
&& target_role_e == RSC_ROLE_UNPROMOTED) {
have_flags = add_output_flag(outstr, "target-role:", have_flags);
g_string_append(outstr, target_role);
}
}
// Blocked or maintenance implies unmanaged
if (pcmk_any_flags_set(rsc->flags, pe_rsc_block|pe_rsc_maintenance)) {
if (pcmk_is_set(rsc->flags, pe_rsc_block)) {
have_flags = add_output_flag(outstr, "blocked", have_flags);
} else if (pcmk_is_set(rsc->flags, pe_rsc_maintenance)) {
have_flags = add_output_flag(outstr, "maintenance", have_flags);
}
} else if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
have_flags = add_output_flag(outstr, "unmanaged", have_flags);
}
if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
have_flags = add_output_flag(outstr, "failure ignored", have_flags);
}
if (have_flags) {
g_string_append_c(outstr, ')');
}
// User-supplied description
if (pcmk_any_flags_set(show_opts, pcmk_show_rsc_only|pcmk_show_description)
|| pcmk__list_of_multiple(rsc->running_on)) {
const char *desc = crm_element_value(rsc->xml, XML_ATTR_DESC);
if (desc) {
g_string_append(outstr, " (");
g_string_append(outstr, desc);
g_string_append(outstr, ")");
}
}
if (show_nodes && !pcmk_is_set(show_opts, pcmk_show_rsc_only)
&& pcmk__list_of_multiple(rsc->running_on)) {
bool have_nodes = false;
for (GList *iter = rsc->running_on; iter != NULL; iter = iter->next) {
pe_node_t *n = (pe_node_t *) iter->data;
have_nodes = add_output_node(outstr, n->details->uname, have_nodes);
}
if (have_nodes) {
g_string_append(outstr, " ]");
}
}
return g_string_free(outstr, FALSE);
}
int
pe__common_output_html(pcmk__output_t *out, const pe_resource_t *rsc,
const char *name, const pe_node_t *node,
uint32_t show_opts)
{
const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
const char *target_role = NULL;
xmlNodePtr list_node = NULL;
const char *cl = NULL;
CRM_ASSERT(rsc->variant == pe_native);
CRM_ASSERT(kind != NULL);
if (rsc->meta) {
const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC);
if (crm_is_true(is_internal)
&& !pcmk_is_set(show_opts, pcmk_show_implicit_rscs)) {
crm_trace("skipping print of internal resource %s", rsc->id);
return pcmk_rc_no_output;
}
target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
}
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
cl = "rsc-managed";
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
cl = "rsc-failed";
} else if (rsc->variant == pe_native && (rsc->running_on == NULL)) {
cl = "rsc-failed";
} else if (pcmk__list_of_multiple(rsc->running_on)) {
cl = "rsc-multiple";
} else if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
cl = "rsc-failure-ignored";
} else {
cl = "rsc-ok";
}
{
gchar *s = pcmk__native_output_string(rsc, name, node, show_opts,
target_role, true);
list_node = pcmk__output_create_html_node(out, "li", NULL, NULL, NULL);
pcmk_create_html_node(list_node, "span", NULL, cl, s);
g_free(s);
}
return pcmk_rc_ok;
}
int
pe__common_output_text(pcmk__output_t *out, const pe_resource_t *rsc,
const char *name, const pe_node_t *node,
uint32_t show_opts)
{
const char *target_role = NULL;
CRM_ASSERT(rsc->variant == pe_native);
if (rsc->meta) {
const char *is_internal = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INTERNAL_RSC);
if (crm_is_true(is_internal)
&& !pcmk_is_set(show_opts, pcmk_show_implicit_rscs)) {
crm_trace("skipping print of internal resource %s", rsc->id);
return pcmk_rc_no_output;
}
target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
}
{
gchar *s = pcmk__native_output_string(rsc, name, node, show_opts,
target_role, true);
out->list_item(out, NULL, "%s", s);
g_free(s);
}
return pcmk_rc_ok;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
void
common_print(pe_resource_t *rsc, const char *pre_text, const char *name,
const pe_node_t *node, long options, void *print_data)
{
const char *target_role = NULL;
CRM_ASSERT(rsc->variant == pe_native);
if (rsc->meta) {
const char *is_internal = g_hash_table_lookup(rsc->meta,
XML_RSC_ATTR_INTERNAL_RSC);
if (crm_is_true(is_internal)
&& !pcmk_is_set(options, pe_print_implicit)) {
crm_trace("skipping print of internal resource %s", rsc->id);
return;
}
target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
}
if (options & pe_print_xml) {
native_print_xml(rsc, pre_text, options, print_data);
return;
}
if ((pre_text == NULL) && (options & pe_print_printf)) {
pre_text = " ";
}
if (options & pe_print_html) {
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
status_print("<font color=\"yellow\">");
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
status_print("<font color=\"red\">");
} else if (rsc->running_on == NULL) {
status_print("<font color=\"red\">");
} else if (pcmk__list_of_multiple(rsc->running_on)) {
status_print("<font color=\"orange\">");
} else if (pcmk_is_set(rsc->flags, pe_rsc_failure_ignored)) {
status_print("<font color=\"yellow\">");
} else {
status_print("<font color=\"green\">");
}
}
{
gchar *resource_s = pcmk__native_output_string(rsc, name, node, options,
target_role, false);
status_print("%s%s", (pre_text? pre_text : ""), resource_s);
g_free(resource_s);
}
if (pcmk_is_set(options, pe_print_html)) {
status_print(" </font> ");
}
if (!pcmk_is_set(options, pe_print_rsconly)
&& pcmk__list_of_multiple(rsc->running_on)) {
GList *gIter = rsc->running_on;
int counter = 0;
if (options & pe_print_html) {
status_print("<ul>\n");
} else if ((options & pe_print_printf)
|| (options & pe_print_ncurses)) {
status_print("[");
}
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *n = (pe_node_t *) gIter->data;
counter++;
if (options & pe_print_html) {
status_print("<li>\n%s", pe__node_name(n));
} else if ((options & pe_print_printf)
|| (options & pe_print_ncurses)) {
status_print(" %s", pe__node_name(n));
} else if ((options & pe_print_log)) {
status_print("\t%d : %s", counter, pe__node_name(n));
} else {
status_print("%s", pe__node_name(n));
}
if (options & pe_print_html) {
status_print("</li>\n");
}
}
if (options & pe_print_html) {
status_print("</ul>\n");
} else if ((options & pe_print_printf)
|| (options & pe_print_ncurses)) {
status_print(" ]");
}
}
if (options & pe_print_html) {
status_print("<br/>\n");
} else if (options & pe_print_suppres_nl) {
/* nothing */
} else if ((options & pe_print_printf) || (options & pe_print_ncurses)) {
status_print("\n");
}
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
void
native_print(pe_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
const pe_node_t *node = NULL;
CRM_ASSERT(rsc->variant == pe_native);
if (options & pe_print_xml) {
native_print_xml(rsc, pre_text, options, print_data);
return;
}
node = pe__current_node(rsc);
if (node == NULL) {
// This is set only if a non-probe action is pending on this node
node = rsc->pending_node;
}
common_print(rsc, pre_text, rsc_printable_id(rsc), node, options, print_data);
}
PCMK__OUTPUT_ARGS("primitive", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__resource_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node G_GNUC_UNUSED = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
bool print_pending = pcmk_is_set(show_opts, pcmk_show_pending);
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
const char *rsc_state = native_displayable_state(rsc, print_pending);
const char *desc = NULL;
char ra_name[LINE_MAX];
char *nodes_running_on = NULL;
const char *lock_node_name = NULL;
int rc = pcmk_rc_no_output;
const char *target_role = NULL;
desc = pe__resource_description(rsc, show_opts);
if (rsc->meta != NULL) {
target_role = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_TARGET_ROLE);
}
CRM_ASSERT(rsc->variant == pe_native);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return pcmk_rc_no_output;
}
/* resource information. */
snprintf(ra_name, LINE_MAX, "%s%s%s:%s", class,
((prov == NULL)? "" : PROVIDER_SEP), ((prov == NULL)? "" : prov),
crm_element_value(rsc->xml, XML_ATTR_TYPE));
nodes_running_on = pcmk__itoa(g_list_length(rsc->running_on));
if (rsc->lock_node != NULL) {
lock_node_name = rsc->lock_node->details->uname;
}
rc = pe__name_and_nvpairs_xml(out, true, "resource", 15,
"id", rsc_printable_id(rsc),
"resource_agent", ra_name,
"role", rsc_state,
"target_role", target_role,
"active", pcmk__btoa(rsc->fns->active(rsc, TRUE)),
"orphaned", pe__rsc_bool_str(rsc, pe_rsc_orphan),
"blocked", pe__rsc_bool_str(rsc, pe_rsc_block),
"maintenance", pe__rsc_bool_str(rsc, pe_rsc_maintenance),
"managed", pe__rsc_bool_str(rsc, pe_rsc_managed),
"failed", pe__rsc_bool_str(rsc, pe_rsc_failed),
"failure_ignored", pe__rsc_bool_str(rsc, pe_rsc_failure_ignored),
"nodes_running_on", nodes_running_on,
"pending", (print_pending? native_pending_task(rsc) : NULL),
"locked_to", lock_node_name,
"description", desc);
free(nodes_running_on);
CRM_ASSERT(rc == pcmk_rc_ok);
if (rsc->running_on != NULL) {
GList *gIter = rsc->running_on;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
rc = pe__name_and_nvpairs_xml(out, false, "node", 3,
"name", node->details->uname,
"id", node->details->id,
"cached", pcmk__btoa(node->details->online));
CRM_ASSERT(rc == pcmk_rc_ok);
}
}
pcmk__output_xml_pop_parent(out);
return rc;
}
PCMK__OUTPUT_ARGS("primitive", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__resource_html(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node G_GNUC_UNUSED = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const pe_node_t *node = pe__current_node(rsc);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return pcmk_rc_no_output;
}
CRM_ASSERT(rsc->variant == pe_native);
if (node == NULL) {
// This is set only if a non-probe action is pending on this node
node = rsc->pending_node;
}
return pe__common_output_html(out, rsc, rsc_printable_id(rsc), node, show_opts);
}
PCMK__OUTPUT_ARGS("primitive", "uint32_t", "pe_resource_t *", "GList *", "GList *")
int
pe__resource_text(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pe_resource_t *rsc = va_arg(args, pe_resource_t *);
GList *only_node G_GNUC_UNUSED = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const pe_node_t *node = pe__current_node(rsc);
CRM_ASSERT(rsc->variant == pe_native);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return pcmk_rc_no_output;
}
if (node == NULL) {
// This is set only if a non-probe action is pending on this node
node = rsc->pending_node;
}
return pe__common_output_text(out, rsc, rsc_printable_id(rsc), node, show_opts);
}
void
native_free(pe_resource_t * rsc)
{
pe_rsc_trace(rsc, "Freeing resource action list (not the data)");
common_free(rsc);
}
enum rsc_role_e
native_resource_state(const pe_resource_t * rsc, gboolean current)
{
enum rsc_role_e role = rsc->next_role;
if (current) {
role = rsc->role;
}
pe_rsc_trace(rsc, "%s state: %s", rsc->id, role2text(role));
return role;
}
/*!
* \internal
* \brief List nodes where a resource (or any of its children) is
*
* \param[in] rsc Resource to check
* \param[out] list List to add result to
* \param[in] current 0 = where allocated, 1 = where running,
* 2 = where running or pending
*
* \return If list contains only one node, that node, or NULL otherwise
*/
pe_node_t *
native_location(const pe_resource_t *rsc, GList **list, int current)
{
// @COMPAT: Accept a pe__rsc_node argument instead of int current
pe_node_t *one = NULL;
GList *result = NULL;
if (rsc->children) {
GList *gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *child = (pe_resource_t *) gIter->data;
child->fns->location(child, &result, current);
}
} else if (current) {
if (rsc->running_on) {
result = g_list_copy(rsc->running_on);
}
if ((current == 2) && rsc->pending_node
&& !pe_find_node_id(result, rsc->pending_node->details->id)) {
result = g_list_append(result, rsc->pending_node);
}
} else if (current == FALSE && rsc->allocated_to) {
result = g_list_append(NULL, rsc->allocated_to);
}
if (result && (result->next == NULL)) {
one = result->data;
}
if (list) {
GList *gIter = result;
for (; gIter != NULL; gIter = gIter->next) {
pe_node_t *node = (pe_node_t *) gIter->data;
if (*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) {
*list = g_list_append(*list, node);
}
}
}
g_list_free(result);
return one;
}
static void
get_rscs_brief(GList *rsc_list, GHashTable * rsc_table, GHashTable * active_table)
{
GList *gIter = rsc_list;
for (; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
const char *kind = crm_element_value(rsc->xml, XML_ATTR_TYPE);
int offset = 0;
char buffer[LINE_MAX];
int *rsc_counter = NULL;
int *active_counter = NULL;
if (rsc->variant != pe_native) {
continue;
}
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s", class);
if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_provider)) {
const char *prov = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
if (prov != NULL) {
offset += snprintf(buffer + offset, LINE_MAX - offset,
PROVIDER_SEP "%s", prov);
}
}
offset += snprintf(buffer + offset, LINE_MAX - offset, ":%s", kind);
CRM_LOG_ASSERT(offset > 0);
if (rsc_table) {
rsc_counter = g_hash_table_lookup(rsc_table, buffer);
if (rsc_counter == NULL) {
rsc_counter = calloc(1, sizeof(int));
*rsc_counter = 0;
g_hash_table_insert(rsc_table, strdup(buffer), rsc_counter);
}
(*rsc_counter)++;
}
if (active_table) {
GList *gIter2 = rsc->running_on;
for (; gIter2 != NULL; gIter2 = gIter2->next) {
pe_node_t *node = (pe_node_t *) gIter2->data;
GHashTable *node_table = NULL;
if (node->details->unclean == FALSE && node->details->online == FALSE &&
pcmk_is_set(rsc->flags, pe_rsc_managed)) {
continue;
}
node_table = g_hash_table_lookup(active_table, node->details->uname);
if (node_table == NULL) {
node_table = pcmk__strkey_table(free, free);
g_hash_table_insert(active_table, strdup(node->details->uname), node_table);
}
active_counter = g_hash_table_lookup(node_table, buffer);
if (active_counter == NULL) {
active_counter = calloc(1, sizeof(int));
*active_counter = 0;
g_hash_table_insert(node_table, strdup(buffer), active_counter);
}
(*active_counter)++;
}
}
}
}
static void
destroy_node_table(gpointer data)
{
GHashTable *node_table = data;
if (node_table) {
g_hash_table_destroy(node_table);
}
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
void
print_rscs_brief(GList *rsc_list, const char *pre_text, long options,
void *print_data, gboolean print_all)
{
GHashTable *rsc_table = pcmk__strkey_table(free, free);
GHashTable *active_table = pcmk__strkey_table(free, destroy_node_table);
GHashTableIter hash_iter;
char *type = NULL;
int *rsc_counter = NULL;
get_rscs_brief(rsc_list, rsc_table, active_table);
g_hash_table_iter_init(&hash_iter, rsc_table);
while (g_hash_table_iter_next(&hash_iter, (gpointer *)&type, (gpointer *)&rsc_counter)) {
GHashTableIter hash_iter2;
char *node_name = NULL;
GHashTable *node_table = NULL;
int active_counter_all = 0;
g_hash_table_iter_init(&hash_iter2, active_table);
while (g_hash_table_iter_next(&hash_iter2, (gpointer *)&node_name, (gpointer *)&node_table)) {
int *active_counter = g_hash_table_lookup(node_table, type);
if (active_counter == NULL || *active_counter == 0) {
continue;
} else {
active_counter_all += *active_counter;
}
if (options & pe_print_rsconly) {
node_name = NULL;
}
if (options & pe_print_html) {
status_print("<li>\n");
}
if (print_all) {
status_print("%s%d/%d\t(%s):\tActive %s\n", pre_text ? pre_text : "",
active_counter ? *active_counter : 0,
rsc_counter ? *rsc_counter : 0, type,
active_counter && (*active_counter > 0) && node_name ? node_name : "");
} else {
status_print("%s%d\t(%s):\tActive %s\n", pre_text ? pre_text : "",
active_counter ? *active_counter : 0, type,
active_counter && (*active_counter > 0) && node_name ? node_name : "");
}
if (options & pe_print_html) {
status_print("</li>\n");
}
}
if (print_all && active_counter_all == 0) {
if (options & pe_print_html) {
status_print("<li>\n");
}
status_print("%s%d/%d\t(%s):\tActive\n", pre_text ? pre_text : "",
active_counter_all,
rsc_counter ? *rsc_counter : 0, type);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
}
if (rsc_table) {
g_hash_table_destroy(rsc_table);
rsc_table = NULL;
}
if (active_table) {
g_hash_table_destroy(active_table);
active_table = NULL;
}
}
int
pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, uint32_t show_opts)
{
GHashTable *rsc_table = pcmk__strkey_table(free, free);
GHashTable *active_table = pcmk__strkey_table(free, destroy_node_table);
GList *sorted_rscs;
int rc = pcmk_rc_no_output;
get_rscs_brief(rsc_list, rsc_table, active_table);
/* Make a list of the rsc_table keys so that it can be sorted. This is to make sure
* output order stays consistent between systems.
*/
sorted_rscs = g_hash_table_get_keys(rsc_table);
sorted_rscs = g_list_sort(sorted_rscs, (GCompareFunc) strcmp);
for (GList *gIter = sorted_rscs; gIter; gIter = gIter->next) {
char *type = (char *) gIter->data;
int *rsc_counter = g_hash_table_lookup(rsc_table, type);
GList *sorted_nodes = NULL;
int active_counter_all = 0;
/* Also make a list of the active_table keys so it can be sorted. If there's
* more than one instance of a type of resource running, we need the nodes to
* be sorted to make sure output order stays consistent between systems.
*/
sorted_nodes = g_hash_table_get_keys(active_table);
sorted_nodes = g_list_sort(sorted_nodes, (GCompareFunc) pcmk__numeric_strcasecmp);
for (GList *gIter2 = sorted_nodes; gIter2; gIter2 = gIter2->next) {
char *node_name = (char *) gIter2->data;
GHashTable *node_table = g_hash_table_lookup(active_table, node_name);
int *active_counter = NULL;
if (node_table == NULL) {
continue;
}
active_counter = g_hash_table_lookup(node_table, type);
if (active_counter == NULL || *active_counter == 0) {
continue;
} else {
active_counter_all += *active_counter;
}
if (pcmk_is_set(show_opts, pcmk_show_rsc_only)) {
node_name = NULL;
}
if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
out->list_item(out, NULL, "%d/%d\t(%s):\tActive %s",
*active_counter,
rsc_counter ? *rsc_counter : 0, type,
(*active_counter > 0) && node_name ? node_name : "");
} else {
out->list_item(out, NULL, "%d\t(%s):\tActive %s",
*active_counter, type,
(*active_counter > 0) && node_name ? node_name : "");
}
rc = pcmk_rc_ok;
}
if (pcmk_is_set(show_opts, pcmk_show_inactive_rscs) && active_counter_all == 0) {
out->list_item(out, NULL, "%d/%d\t(%s):\tActive",
active_counter_all,
rsc_counter ? *rsc_counter : 0, type);
rc = pcmk_rc_ok;
}
if (sorted_nodes) {
g_list_free(sorted_nodes);
}
}
if (rsc_table) {
g_hash_table_destroy(rsc_table);
rsc_table = NULL;
}
if (active_table) {
g_hash_table_destroy(active_table);
active_table = NULL;
}
if (sorted_rscs) {
g_list_free(sorted_rscs);
}
return rc;
}
gboolean
pe__native_is_filtered(const pe_resource_t *rsc, GList *only_rsc,
gboolean check_parent)
{
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches) ||
pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches)) {
return FALSE;
} else if (check_parent && rsc->parent) {
const pe_resource_t *up = pe__const_top_resource(rsc, true);
return up->fns->is_filtered(up, only_rsc, FALSE);
}
return TRUE;
}
/*!
* \internal
* \brief Get maximum primitive resource instances per node
*
* \param[in] rsc Primitive resource to check
*
* \return Maximum number of \p rsc instances that can be active on one node
*/
unsigned int
pe__primitive_max_per_node(const pe_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pe_native));
return 1U;
}
diff --git a/lib/pengine/pe_actions.c b/lib/pengine/pe_actions.c
index 8566bf120a..2c3b98d77b 100644
--- a/lib/pengine/pe_actions.c
+++ b/lib/pengine/pe_actions.c
@@ -1,1695 +1,1695 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <stdbool.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/pengine/internal.h>
#include <crm/common/xml_internal.h>
#include "pe_status_private.h"
static void unpack_operation(pe_action_t *action, const xmlNode *xml_obj,
const pe_resource_t *container,
pe_working_set_t *data_set, guint interval_ms);
static void
add_singleton(pe_working_set_t *data_set, pe_action_t *action)
{
if (data_set->singletons == NULL) {
data_set->singletons = pcmk__strkey_table(NULL, NULL);
}
g_hash_table_insert(data_set->singletons, action->uuid, action);
}
static pe_action_t *
lookup_singleton(pe_working_set_t *data_set, const char *action_uuid)
{
if (data_set->singletons == NULL) {
return NULL;
}
return g_hash_table_lookup(data_set->singletons, action_uuid);
}
/*!
* \internal
* \brief Find an existing action that matches arguments
*
* \param[in] key Action key to match
* \param[in] rsc Resource to match (if any)
* \param[in] node Node to match (if any)
* \param[in] data_set Cluster working set
*
* \return Existing action that matches arguments (or NULL if none)
*/
static pe_action_t *
find_existing_action(const char *key, const pe_resource_t *rsc,
const pe_node_t *node, const pe_working_set_t *data_set)
{
GList *matches = NULL;
pe_action_t *action = NULL;
/* When rsc is NULL, it would be quicker to check data_set->singletons,
* but checking all data_set->actions takes the node into account.
*/
matches = find_actions(((rsc == NULL)? data_set->actions : rsc->actions),
key, node);
if (matches == NULL) {
return NULL;
}
CRM_LOG_ASSERT(!pcmk__list_of_multiple(matches));
action = matches->data;
g_list_free(matches);
return action;
}
static xmlNode *
find_rsc_op_entry_helper(const pe_resource_t *rsc, const char *key,
gboolean include_disabled)
{
guint interval_ms = 0;
gboolean do_retry = TRUE;
char *local_key = NULL;
const char *name = NULL;
const char *interval_spec = NULL;
char *match_key = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
retry:
for (operation = pcmk__xe_first_child(rsc->ops_xml); operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
bool enabled = false;
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok &&
!enabled) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
match_key = pcmk__op_key(rsc->id, name, interval_ms);
if (pcmk__str_eq(key, match_key, pcmk__str_casei)) {
op = operation;
}
free(match_key);
if (rsc->clone_name) {
match_key = pcmk__op_key(rsc->clone_name, name, interval_ms);
if (pcmk__str_eq(key, match_key, pcmk__str_casei)) {
op = operation;
}
free(match_key);
}
if (op != NULL) {
free(local_key);
return op;
}
}
}
free(local_key);
if (do_retry == FALSE) {
return NULL;
}
do_retry = FALSE;
if ((strstr(key, PCMK_ACTION_MIGRATE_TO) != NULL)
- || strstr(key, CRMD_ACTION_MIGRATED)) {
+ || (strstr(key, PCMK_ACTION_MIGRATE_FROM) != NULL)) {
local_key = pcmk__op_key(rsc->id, "migrate", 0);
key = local_key;
goto retry;
} else if (strstr(key, "_notify_")) {
local_key = pcmk__op_key(rsc->id, PCMK_ACTION_NOTIFY, 0);
key = local_key;
goto retry;
}
return NULL;
}
xmlNode *
find_rsc_op_entry(const pe_resource_t *rsc, const char *key)
{
return find_rsc_op_entry_helper(rsc, key, FALSE);
}
/*!
* \internal
* \brief Create a new action object
*
* \param[in] key Action key
* \param[in] task Action name
* \param[in,out] rsc Resource that action is for (if any)
* \param[in] node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in] for_graph Whether action should be recorded in transition graph
* \param[in,out] data_set Cluster working set
*
* \return Newly allocated action
* \note This function takes ownership of \p key. It is the caller's
* responsibility to free the return value with pe_free_action().
*/
static pe_action_t *
new_action(char *key, const char *task, pe_resource_t *rsc,
const pe_node_t *node, bool optional, bool for_graph,
pe_working_set_t *data_set)
{
pe_action_t *action = calloc(1, sizeof(pe_action_t));
CRM_ASSERT(action != NULL);
action->rsc = rsc;
action->task = strdup(task); CRM_ASSERT(action->task != NULL);
action->uuid = key;
action->extra = pcmk__strkey_table(free, free);
action->meta = pcmk__strkey_table(free, free);
if (node) {
action->node = pe__copy_node(node);
}
if (pcmk__str_eq(task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
// Resource history deletion for a node can be done on the DC
pe__set_action_flags(action, pe_action_dc);
}
pe__set_action_flags(action, pe_action_runnable);
if (optional) {
pe__set_action_flags(action, pe_action_optional);
} else {
pe__clear_action_flags(action, pe_action_optional);
}
if (rsc != NULL) {
guint interval_ms = 0;
action->op_entry = find_rsc_op_entry_helper(rsc, key, TRUE);
parse_op_key(key, NULL, NULL, &interval_ms);
unpack_operation(action, action->op_entry, rsc->container, data_set,
interval_ms);
}
if (for_graph) {
pe_rsc_trace(rsc, "Created %s action %d (%s): %s for %s on %s",
(optional? "optional" : "required"),
data_set->action_id, key, task,
((rsc == NULL)? "no resource" : rsc->id),
pe__node_name(node));
action->id = data_set->action_id++;
data_set->actions = g_list_prepend(data_set->actions, action);
if (rsc == NULL) {
add_singleton(data_set, action);
} else {
rsc->actions = g_list_prepend(rsc->actions, action);
}
}
return action;
}
/*!
* \internal
* \brief Evaluate node attribute values for an action
*
* \param[in,out] action Action to unpack attributes for
* \param[in,out] data_set Cluster working set
*/
static void
unpack_action_node_attributes(pe_action_t *action, pe_working_set_t *data_set)
{
if (!pcmk_is_set(action->flags, pe_action_have_node_attrs)
&& (action->op_entry != NULL)) {
pe_rule_eval_data_t rule_data = {
.node_hash = action->node->details->attrs,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
pe__set_action_flags(action, pe_action_have_node_attrs);
pe__unpack_dataset_nvpairs(action->op_entry, XML_TAG_ATTR_SETS,
&rule_data, action->extra, NULL,
FALSE, data_set);
}
}
/*!
* \internal
* \brief Update an action's optional flag
*
* \param[in,out] action Action to update
* \param[in] optional Requested optional status
*/
static void
update_action_optional(pe_action_t *action, gboolean optional)
{
// Force a non-recurring action to be optional if its resource is unmanaged
if ((action->rsc != NULL) && (action->node != NULL)
&& !pcmk_is_set(action->flags, pe_action_pseudo)
&& !pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& (g_hash_table_lookup(action->meta,
XML_LRM_ATTR_INTERVAL_MS) == NULL)) {
pe_rsc_debug(action->rsc, "%s on %s is optional (%s is unmanaged)",
action->uuid, pe__node_name(action->node),
action->rsc->id);
pe__set_action_flags(action, pe_action_optional);
// We shouldn't clear runnable here because ... something
// Otherwise require the action if requested
} else if (!optional) {
pe__clear_action_flags(action, pe_action_optional);
}
}
static enum pe_quorum_policy
effective_quorum_policy(pe_resource_t *rsc, pe_working_set_t *data_set)
{
enum pe_quorum_policy policy = data_set->no_quorum_policy;
if (pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
policy = no_quorum_ignore;
} else if (data_set->no_quorum_policy == no_quorum_demote) {
switch (rsc->role) {
case RSC_ROLE_PROMOTED:
case RSC_ROLE_UNPROMOTED:
if (rsc->next_role > RSC_ROLE_UNPROMOTED) {
pe__set_next_role(rsc, RSC_ROLE_UNPROMOTED,
"no-quorum-policy=demote");
}
policy = no_quorum_ignore;
break;
default:
policy = no_quorum_stop;
break;
}
}
return policy;
}
/*!
* \internal
* \brief Update a resource action's runnable flag
*
* \param[in,out] action Action to update
* \param[in] for_graph Whether action should be recorded in transition graph
* \param[in,out] data_set Cluster working set
*
* \note This may also schedule fencing if a stop is unrunnable.
*/
static void
update_resource_action_runnable(pe_action_t *action, bool for_graph,
pe_working_set_t *data_set)
{
if (pcmk_is_set(action->flags, pe_action_pseudo)) {
return;
}
if (action->node == NULL) {
pe_rsc_trace(action->rsc, "%s is unrunnable (unallocated)",
action->uuid);
pe__clear_action_flags(action, pe_action_runnable);
} else if (!pcmk_is_set(action->flags, pe_action_dc)
&& !(action->node->details->online)
&& (!pe__is_guest_node(action->node)
|| action->node->details->remote_requires_reset)) {
pe__clear_action_flags(action, pe_action_runnable);
do_crm_log((for_graph? LOG_WARNING: LOG_TRACE),
"%s on %s is unrunnable (node is offline)",
action->uuid, pe__node_name(action->node));
if (pcmk_is_set(action->rsc->flags, pe_rsc_managed)
&& for_graph
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei)
&& !(action->node->details->unclean)) {
pe_fence_node(data_set, action->node, "stop is unrunnable", false);
}
} else if (!pcmk_is_set(action->flags, pe_action_dc)
&& action->node->details->pending) {
pe__clear_action_flags(action, pe_action_runnable);
do_crm_log((for_graph? LOG_WARNING: LOG_TRACE),
"Action %s on %s is unrunnable (node is pending)",
action->uuid, pe__node_name(action->node));
} else if (action->needs == rsc_req_nothing) {
pe_action_set_reason(action, NULL, TRUE);
if (pe__is_guest_node(action->node)
&& !pe_can_fence(data_set, action->node)) {
/* An action that requires nothing usually does not require any
* fencing in order to be runnable. However, there is an exception:
* such an action cannot be completed if it is on a guest node whose
* host is unclean and cannot be fenced.
*/
pe_rsc_debug(action->rsc, "%s on %s is unrunnable "
"(node's host cannot be fenced)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pe_action_runnable);
} else {
pe_rsc_trace(action->rsc,
"%s on %s does not require fencing or quorum",
action->uuid, pe__node_name(action->node));
pe__set_action_flags(action, pe_action_runnable);
}
} else {
switch (effective_quorum_policy(action->rsc, data_set)) {
case no_quorum_stop:
pe_rsc_debug(action->rsc, "%s on %s is unrunnable (no quorum)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, "no quorum", true);
break;
case no_quorum_freeze:
if (!action->rsc->fns->active(action->rsc, TRUE)
|| (action->rsc->next_role > action->rsc->role)) {
pe_rsc_debug(action->rsc,
"%s on %s is unrunnable (no quorum)",
action->uuid, pe__node_name(action->node));
pe__clear_action_flags(action, pe_action_runnable);
pe_action_set_reason(action, "quorum freeze", true);
}
break;
default:
//pe_action_set_reason(action, NULL, TRUE);
pe__set_action_flags(action, pe_action_runnable);
break;
}
}
}
/*!
* \internal
* \brief Update a resource object's flags for a new action on it
*
* \param[in,out] rsc Resource that action is for (if any)
* \param[in] action New action
*/
static void
update_resource_flags_for_action(pe_resource_t *rsc, const pe_action_t *action)
{
/* @COMPAT pe_rsc_starting and pe_rsc_stopping are not actually used
* within Pacemaker, and should be deprecated and eventually removed
*/
if (pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei)) {
pe__set_resource_flags(rsc, pe_rsc_stopping);
} else if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_casei)) {
if (pcmk_is_set(action->flags, pe_action_runnable)) {
pe__set_resource_flags(rsc, pe_rsc_starting);
} else {
pe__clear_resource_flags(rsc, pe_rsc_starting);
}
}
}
static bool
valid_stop_on_fail(const char *value)
{
return !pcmk__strcase_any_of(value, "standby", "demote", "stop", NULL);
}
static const char *
unpack_operation_on_fail(pe_action_t * action)
{
const char *name = NULL;
const char *role = NULL;
const char *on_fail = NULL;
const char *interval_spec = NULL;
const char *value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ON_FAIL);
if (pcmk__str_eq(action->task, PCMK_ACTION_STOP, pcmk__str_casei)
&& !valid_stop_on_fail(value)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s stop "
"action to default value because '%s' is not "
"allowed for stop", action->rsc->id, value);
return NULL;
} else if (pcmk__str_eq(action->task, PCMK_ACTION_DEMOTE, pcmk__str_casei)
&& (value == NULL)) {
// demote on_fail defaults to monitor value for promoted role if present
xmlNode *operation = NULL;
CRM_CHECK(action->rsc != NULL, return NULL);
for (operation = pcmk__xe_first_child(action->rsc->ops_xml);
(operation != NULL) && (value == NULL);
operation = pcmk__xe_next(operation)) {
bool enabled = false;
if (!pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
continue;
}
name = crm_element_value(operation, "name");
role = crm_element_value(operation, "role");
on_fail = crm_element_value(operation, XML_OP_ATTR_ON_FAIL);
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!on_fail) {
continue;
} else if (pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok && !enabled) {
continue;
} else if (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_casei)
|| !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S,
NULL)) {
continue;
} else if (crm_parse_interval_spec(interval_spec) == 0) {
continue;
} else if (pcmk__str_eq(on_fail, "demote", pcmk__str_casei)) {
continue;
}
value = on_fail;
}
} else if (pcmk__str_eq(action->task, CRM_OP_LRM_DELETE, pcmk__str_casei)) {
value = "ignore";
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
name = crm_element_value(action->op_entry, "name");
role = crm_element_value(action->op_entry, "role");
interval_spec = crm_element_value(action->op_entry,
XML_LRM_ATTR_INTERVAL);
if (!pcmk__str_eq(name, PCMK_ACTION_PROMOTE, pcmk__str_casei)
&& (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_casei)
|| !pcmk__strcase_any_of(role, RSC_ROLE_PROMOTED_S,
RSC_ROLE_PROMOTED_LEGACY_S, NULL)
|| (crm_parse_interval_spec(interval_spec) == 0))) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for %s %s "
"action to default value because 'demote' is not "
"allowed for it", action->rsc->id, name);
return NULL;
}
}
return value;
}
static int
unpack_timeout(const char *value)
{
int timeout_ms = crm_get_msec(value);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
return timeout_ms;
}
// true if value contains valid, non-NULL interval origin for recurring op
static bool
unpack_interval_origin(const char *value, const xmlNode *xml_obj,
guint interval_ms, const crm_time_t *now,
long long *start_delay)
{
long long result = 0;
guint interval_sec = interval_ms / 1000;
crm_time_t *origin = NULL;
// Ignore unspecified values and non-recurring operations
if ((value == NULL) || (interval_ms == 0) || (now == NULL)) {
return false;
}
// Parse interval origin from text
origin = crm_time_new(value);
if (origin == NULL) {
pcmk__config_err("Ignoring '" XML_OP_ATTR_ORIGIN "' for operation "
"'%s' because '%s' is not valid",
(ID(xml_obj)? ID(xml_obj) : "(missing ID)"), value);
return false;
}
// Get seconds since origin (negative if origin is in the future)
result = crm_time_get_seconds(now) - crm_time_get_seconds(origin);
crm_time_free(origin);
// Calculate seconds from closest interval to now
result = result % interval_sec;
// Calculate seconds remaining until next interval
result = ((result <= 0)? 0 : interval_sec) - result;
crm_info("Calculated a start delay of %llds for operation '%s'",
result,
(ID(xml_obj)? ID(xml_obj) : "(unspecified)"));
if (start_delay != NULL) {
*start_delay = result * 1000; // milliseconds
}
return true;
}
static int
unpack_start_delay(const char *value, GHashTable *meta)
{
int start_delay = 0;
if (value != NULL) {
start_delay = crm_get_msec(value);
if (start_delay < 0) {
start_delay = 0;
}
if (meta) {
g_hash_table_replace(meta, strdup(XML_OP_ATTR_START_DELAY),
pcmk__itoa(start_delay));
}
}
return start_delay;
}
static xmlNode *
find_min_interval_mon(pe_resource_t * rsc, gboolean include_disabled)
{
guint interval_ms = 0;
guint min_interval_ms = G_MAXUINT;
const char *name = NULL;
const char *interval_spec = NULL;
xmlNode *op = NULL;
xmlNode *operation = NULL;
for (operation = pcmk__xe_first_child(rsc->ops_xml);
operation != NULL;
operation = pcmk__xe_next(operation)) {
if (pcmk__str_eq((const char *)operation->name, "op", pcmk__str_none)) {
bool enabled = false;
name = crm_element_value(operation, "name");
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
if (!include_disabled && pcmk__xe_get_bool_attr(operation, "enabled", &enabled) == pcmk_rc_ok &&
!enabled) {
continue;
}
if (!pcmk__str_eq(name, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
continue;
}
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms && (interval_ms < min_interval_ms)) {
min_interval_ms = interval_ms;
op = operation;
}
}
}
return op;
}
/*!
* \brief Unpack operation XML into an action structure
*
* Unpack an operation's meta-attributes (normalizing the interval, timeout,
* and start delay values as integer milliseconds), requirements, and
* failure policy.
*
* \param[in,out] action Action to unpack into
* \param[in] xml_obj Operation XML (or NULL if all defaults)
* \param[in] container Resource that contains affected resource, if any
* \param[in,out] data_set Cluster state
* \param[in] interval_ms How frequently to perform the operation
*/
static void
unpack_operation(pe_action_t *action, const xmlNode *xml_obj,
const pe_resource_t *container,
pe_working_set_t *data_set, guint interval_ms)
{
int timeout_ms = 0;
const char *value = NULL;
bool is_probe = false;
pe_rsc_eval_data_t rsc_rule_data = {
.standard = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_CLASS),
.provider = crm_element_value(action->rsc->xml, XML_AGENT_ATTR_PROVIDER),
.agent = crm_element_value(action->rsc->xml, XML_EXPR_ATTR_TYPE)
};
pe_op_eval_data_t op_rule_data = {
.op_name = action->task,
.interval = interval_ms
};
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = &rsc_rule_data,
.op_data = &op_rule_data
};
CRM_CHECK(action && action->rsc, return);
is_probe = pcmk_is_probe(action->task, interval_ms);
// Cluster-wide <op_defaults> <meta_attributes>
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, FALSE, data_set);
// Determine probe default timeout differently
if (is_probe) {
xmlNode *min_interval_mon = find_min_interval_mon(action->rsc, FALSE);
if (min_interval_mon) {
value = crm_element_value(min_interval_mon, XML_ATTR_TIMEOUT);
if (value) {
crm_trace("\t%s: Setting default timeout to minimum-interval "
"monitor's timeout '%s'", action->uuid, value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
strdup(value));
}
}
}
if (xml_obj) {
xmlAttrPtr xIter = NULL;
// <op> <meta_attributes> take precedence over defaults
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_META_SETS, &rule_data,
action->meta, NULL, TRUE, data_set);
/* Anything set as an <op> XML property has highest precedence.
* This ensures we use the name and interval from the <op> tag.
*/
for (xIter = xml_obj->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = pcmk__xml_attr_value(xIter);
g_hash_table_replace(action->meta, strdup(prop_name), strdup(prop_value));
}
}
g_hash_table_remove(action->meta, "id");
// Normalize interval to milliseconds
if (interval_ms > 0) {
g_hash_table_replace(action->meta, strdup(XML_LRM_ATTR_INTERVAL),
crm_strdup_printf("%u", interval_ms));
} else {
g_hash_table_remove(action->meta, XML_LRM_ATTR_INTERVAL);
}
/*
* Timeout order of precedence:
* 1. pcmk_monitor_timeout (if rsc has pcmk_ra_cap_fence_params
* and task is start or a probe; pcmk_monitor_timeout works
* by default for a recurring monitor)
* 2. explicit op timeout on the primitive
* 3. default op timeout
* a. if probe, then min-interval monitor's timeout
* b. else, in XML_CIB_TAG_OPCONFIG
* 4. CRM_DEFAULT_OP_TIMEOUT_S
*
* #1 overrides general rule of <op> XML property having highest
* precedence.
*/
if (pcmk_is_set(pcmk_get_ra_caps(rsc_rule_data.standard),
pcmk_ra_cap_fence_params)
&& (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_casei)
|| is_probe)) {
GHashTable *params = pe_rsc_params(action->rsc, action->node, data_set);
value = g_hash_table_lookup(params, "pcmk_monitor_timeout");
if (value) {
crm_trace("\t%s: Setting timeout to pcmk_monitor_timeout '%s', "
"overriding default", action->uuid, value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
strdup(value));
}
}
// Normalize timeout to positive milliseconds
value = g_hash_table_lookup(action->meta, XML_ATTR_TIMEOUT);
timeout_ms = unpack_timeout(value);
g_hash_table_replace(action->meta, strdup(XML_ATTR_TIMEOUT),
pcmk__itoa(timeout_ms));
if (!pcmk__strcase_any_of(action->task, PCMK_ACTION_START,
PCMK_ACTION_PROMOTE, NULL)) {
action->needs = rsc_req_nothing;
value = "nothing (not start or promote)";
} else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_fencing)) {
action->needs = rsc_req_stonith;
value = "fencing";
} else if (pcmk_is_set(action->rsc->flags, pe_rsc_needs_quorum)) {
action->needs = rsc_req_quorum;
value = "quorum";
} else {
action->needs = rsc_req_nothing;
value = "nothing";
}
pe_rsc_trace(action->rsc, "%s requires %s", action->uuid, value);
value = unpack_operation_on_fail(action);
if (value == NULL) {
} else if (pcmk__str_eq(value, "block", pcmk__str_casei)) {
action->on_fail = action_fail_block;
g_hash_table_insert(action->meta, strdup(XML_OP_ATTR_ON_FAIL), strdup("block"));
value = "block"; // The above could destroy the original string
} else if (pcmk__str_eq(value, "fence", pcmk__str_casei)) {
action->on_fail = action_fail_fence;
value = "node fencing";
if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
pcmk__config_err("Resetting '" XML_OP_ATTR_ON_FAIL "' for "
"operation '%s' to 'stop' because 'fence' is not "
"valid when fencing is disabled", action->uuid);
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
}
} else if (pcmk__str_eq(value, "standby", pcmk__str_casei)) {
action->on_fail = action_fail_standby;
value = "node standby";
} else if (pcmk__strcase_any_of(value, "ignore", PCMK__VALUE_NOTHING,
NULL)) {
action->on_fail = action_fail_ignore;
value = "ignore";
} else if (pcmk__str_eq(value, "migrate", pcmk__str_casei)) {
action->on_fail = action_fail_migrate;
value = "force migration";
} else if (pcmk__str_eq(value, "stop", pcmk__str_casei)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop resource";
} else if (pcmk__str_eq(value, "restart", pcmk__str_casei)) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate)";
} else if (pcmk__str_eq(value, "restart-container", pcmk__str_casei)) {
if (container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate)";
} else {
value = NULL;
}
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
action->on_fail = action_fail_demote;
value = "demote instance";
} else {
pe_err("Resource %s: Unknown failure type (%s)", action->rsc->id, value);
value = NULL;
}
/* defaults */
if (value == NULL && container) {
action->on_fail = action_fail_restart_container;
value = "restart container (and possibly migrate) (default)";
/* For remote nodes, ensure that any failure that results in dropping an
* active connection to the node results in fencing of the node.
*
* There are only two action failures that don't result in fencing.
* 1. probes - probe failures are expected.
* 2. start - a start failure indicates that an active connection does not already
* exist. The user can set op on-fail=fence if they really want to fence start
* failures. */
} else if (((value == NULL) || !pcmk_is_set(action->rsc->flags, pe_rsc_managed))
&& pe__resource_is_remote_conn(action->rsc, data_set)
&& !(pcmk__str_eq(action->task, PCMK_ACTION_MONITOR,
pcmk__str_casei)
&& (interval_ms == 0))
&& !pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_casei)) {
if (!pcmk_is_set(action->rsc->flags, pe_rsc_managed)) {
action->on_fail = action_fail_stop;
action->fail_role = RSC_ROLE_STOPPED;
value = "stop unmanaged remote node (enforcing default)";
} else {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
value = "fence remote node (default)";
} else {
value = "recover remote node connection (default)";
}
if (action->rsc->remote_reconnect_ms) {
action->fail_role = RSC_ROLE_STOPPED;
}
action->on_fail = action_fail_reset_remote;
}
} else if ((value == NULL)
&& pcmk__str_eq(action->task, PCMK_ACTION_STOP,
pcmk__str_casei)) {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
action->on_fail = action_fail_fence;
value = "resource fence (default)";
} else {
action->on_fail = action_fail_block;
value = "resource block (default)";
}
} else if (value == NULL) {
action->on_fail = action_fail_recover;
value = "restart (and possibly migrate) (default)";
}
pe_rsc_trace(action->rsc, "%s failure handling: %s",
action->uuid, value);
value = NULL;
if (xml_obj != NULL) {
value = g_hash_table_lookup(action->meta, "role_after_failure");
if (value) {
pe_warn_once(pe_wo_role_after,
"Support for role_after_failure is deprecated and will be removed in a future release");
}
}
if (value != NULL && action->fail_role == RSC_ROLE_UNKNOWN) {
action->fail_role = text2role(value);
}
/* defaults */
if (action->fail_role == RSC_ROLE_UNKNOWN) {
if (pcmk__str_eq(action->task, PCMK_ACTION_PROMOTE, pcmk__str_casei)) {
action->fail_role = RSC_ROLE_UNPROMOTED;
} else {
action->fail_role = RSC_ROLE_STARTED;
}
}
pe_rsc_trace(action->rsc, "%s failure results in: %s",
action->uuid, role2text(action->fail_role));
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_START_DELAY);
if (value) {
unpack_start_delay(value, action->meta);
} else {
long long start_delay = 0;
value = g_hash_table_lookup(action->meta, XML_OP_ATTR_ORIGIN);
if (unpack_interval_origin(value, xml_obj, interval_ms, data_set->now,
&start_delay)) {
g_hash_table_replace(action->meta, strdup(XML_OP_ATTR_START_DELAY),
crm_strdup_printf("%lld", start_delay));
}
}
}
/*!
* \brief Create or update an action object
*
* \param[in,out] rsc Resource that action is for (if any)
* \param[in,out] key Action key (must be non-NULL)
* \param[in] task Action name (must be non-NULL)
* \param[in] on_node Node that action is on (if any)
* \param[in] optional Whether action should be considered optional
* \param[in] save_action Whether action should be recorded in transition graph
* \param[in,out] data_set Cluster working set
*
* \return Action object corresponding to arguments
* \note This function takes ownership of (and might free) \p key. If
* \p save_action is true, \p data_set will own the returned action,
* otherwise it is the caller's responsibility to free the return value
* with pe_free_action().
*/
pe_action_t *
custom_action(pe_resource_t *rsc, char *key, const char *task,
const pe_node_t *on_node, gboolean optional, gboolean save_action,
pe_working_set_t *data_set)
{
pe_action_t *action = NULL;
CRM_ASSERT((key != NULL) && (task != NULL) && (data_set != NULL));
if (save_action) {
action = find_existing_action(key, rsc, on_node, data_set);
}
if (action == NULL) {
action = new_action(key, task, rsc, on_node, optional, save_action,
data_set);
} else {
free(key);
}
update_action_optional(action, optional);
if (rsc != NULL) {
if (action->node != NULL) {
unpack_action_node_attributes(action, data_set);
}
update_resource_action_runnable(action, save_action, data_set);
if (save_action) {
update_resource_flags_for_action(rsc, action);
}
}
return action;
}
pe_action_t *
get_pseudo_op(const char *name, pe_working_set_t * data_set)
{
pe_action_t *op = lookup_singleton(data_set, name);
if (op == NULL) {
op = custom_action(NULL, strdup(name), name, NULL, TRUE, TRUE, data_set);
pe__set_action_flags(op, pe_action_pseudo|pe_action_runnable);
}
return op;
}
static GList *
find_unfencing_devices(GList *candidates, GList *matches)
{
for (GList *gIter = candidates; gIter != NULL; gIter = gIter->next) {
pe_resource_t *candidate = gIter->data;
if (candidate->children != NULL) {
matches = find_unfencing_devices(candidate->children, matches);
} else if (!pcmk_is_set(candidate->flags, pe_rsc_fence_device)) {
continue;
} else if (pcmk_is_set(candidate->flags, pe_rsc_needs_unfencing)) {
matches = g_list_prepend(matches, candidate);
} else if (pcmk__str_eq(g_hash_table_lookup(candidate->meta,
PCMK_STONITH_PROVIDES),
PCMK__VALUE_UNFENCING,
pcmk__str_casei)) {
matches = g_list_prepend(matches, candidate);
}
}
return matches;
}
static int
node_priority_fencing_delay(const pe_node_t *node,
const pe_working_set_t *data_set)
{
int member_count = 0;
int online_count = 0;
int top_priority = 0;
int lowest_priority = 0;
GList *gIter = NULL;
// `priority-fencing-delay` is disabled
if (data_set->priority_fencing_delay <= 0) {
return 0;
}
/* No need to request a delay if the fencing target is not a normal cluster
* member, for example if it's a remote node or a guest node. */
if (node->details->type != node_member) {
return 0;
}
// No need to request a delay if the fencing target is in our partition
if (node->details->online) {
return 0;
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
pe_node_t *n = gIter->data;
if (n->details->type != node_member) {
continue;
}
member_count ++;
if (n->details->online) {
online_count++;
}
if (member_count == 1
|| n->details->priority > top_priority) {
top_priority = n->details->priority;
}
if (member_count == 1
|| n->details->priority < lowest_priority) {
lowest_priority = n->details->priority;
}
}
// No need to delay if we have more than half of the cluster members
if (online_count > member_count / 2) {
return 0;
}
/* All the nodes have equal priority.
* Any configured corresponding `pcmk_delay_base/max` will be applied. */
if (lowest_priority == top_priority) {
return 0;
}
if (node->details->priority < top_priority) {
return 0;
}
return data_set->priority_fencing_delay;
}
pe_action_t *
pe_fence_op(pe_node_t *node, const char *op, bool optional,
const char *reason, bool priority_delay, pe_working_set_t *data_set)
{
char *op_key = NULL;
pe_action_t *stonith_op = NULL;
if(op == NULL) {
op = data_set->stonith_action;
}
op_key = crm_strdup_printf("%s-%s-%s", CRM_OP_FENCE, node->details->uname, op);
stonith_op = lookup_singleton(data_set, op_key);
if(stonith_op == NULL) {
stonith_op = custom_action(NULL, op_key, CRM_OP_FENCE, node, TRUE, TRUE, data_set);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET, node->details->uname);
add_hash_param(stonith_op->meta, XML_LRM_ATTR_TARGET_UUID, node->details->id);
add_hash_param(stonith_op->meta, "stonith_action", op);
if (pcmk_is_set(data_set->flags, pe_flag_enable_unfencing)) {
/* Extra work to detect device changes
*/
GString *digests_all = g_string_sized_new(1024);
GString *digests_secure = g_string_sized_new(1024);
GList *matches = find_unfencing_devices(data_set->resources, NULL);
char *key = NULL;
char *value = NULL;
for (GList *gIter = matches; gIter != NULL; gIter = gIter->next) {
pe_resource_t *match = gIter->data;
const char *agent = g_hash_table_lookup(match->meta,
XML_ATTR_TYPE);
op_digest_cache_t *data = NULL;
data = pe__compare_fencing_digest(match, agent, node, data_set);
if(data->rc == RSC_DIGEST_ALL) {
optional = FALSE;
crm_notice("Unfencing node %s because the definition of "
"%s changed", pe__node_name(node), match->id);
if (!pcmk__is_daemon && data_set->priv != NULL) {
pcmk__output_t *out = data_set->priv;
out->info(out,
"notice: Unfencing node %s because the "
"definition of %s changed",
pe__node_name(node), match->id);
}
}
pcmk__g_strcat(digests_all,
match->id, ":", agent, ":",
data->digest_all_calc, ",", NULL);
pcmk__g_strcat(digests_secure,
match->id, ":", agent, ":",
data->digest_secure_calc, ",", NULL);
}
key = strdup(XML_OP_ATTR_DIGESTS_ALL);
value = strdup((const char *) digests_all->str);
CRM_ASSERT((key != NULL) && (value != NULL));
g_hash_table_insert(stonith_op->meta, key, value);
g_string_free(digests_all, TRUE);
key = strdup(XML_OP_ATTR_DIGESTS_SECURE);
value = strdup((const char *) digests_secure->str);
CRM_ASSERT((key != NULL) && (value != NULL));
g_hash_table_insert(stonith_op->meta, key, value);
g_string_free(digests_secure, TRUE);
}
} else {
free(op_key);
}
if (data_set->priority_fencing_delay > 0
/* It's a suitable case where `priority-fencing-delay` applies.
* At least add `priority-fencing-delay` field as an indicator. */
&& (priority_delay
/* The priority delay needs to be recalculated if this function has
* been called by schedule_fencing_and_shutdowns() after node
* priority has already been calculated by native_add_running().
*/
|| g_hash_table_lookup(stonith_op->meta,
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY) != NULL)) {
/* Add `priority-fencing-delay` to the fencing op even if it's 0 for
* the targeting node. So that it takes precedence over any possible
* `pcmk_delay_base/max`.
*/
char *delay_s = pcmk__itoa(node_priority_fencing_delay(node, data_set));
g_hash_table_insert(stonith_op->meta,
strdup(XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY),
delay_s);
}
if(optional == FALSE && pe_can_fence(data_set, node)) {
pe__clear_action_flags(stonith_op, pe_action_optional);
pe_action_set_reason(stonith_op, reason, false);
} else if(reason && stonith_op->reason == NULL) {
stonith_op->reason = strdup(reason);
}
return stonith_op;
}
void
pe_free_action(pe_action_t * action)
{
if (action == NULL) {
return;
}
g_list_free_full(action->actions_before, free); /* pe_action_wrapper_t* */
g_list_free_full(action->actions_after, free); /* pe_action_wrapper_t* */
if (action->extra) {
g_hash_table_destroy(action->extra);
}
if (action->meta) {
g_hash_table_destroy(action->meta);
}
free(action->cancel_task);
free(action->reason);
free(action->task);
free(action->uuid);
free(action->node);
free(action);
}
int
pe_get_configured_timeout(pe_resource_t *rsc, const char *action, pe_working_set_t *data_set)
{
xmlNode *child = NULL;
GHashTable *action_meta = NULL;
const char *timeout_spec = NULL;
int timeout_ms = 0;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
for (child = first_named_child(rsc->ops_xml, XML_ATTR_OP);
child != NULL; child = crm_next_same_xml(child)) {
if (pcmk__str_eq(action, crm_element_value(child, XML_NVPAIR_ATTR_NAME),
pcmk__str_casei)) {
timeout_spec = crm_element_value(child, XML_ATTR_TIMEOUT);
break;
}
}
if (timeout_spec == NULL && data_set->op_defaults) {
action_meta = pcmk__strkey_table(free, free);
pe__unpack_dataset_nvpairs(data_set->op_defaults, XML_TAG_META_SETS,
&rule_data, action_meta, NULL, FALSE, data_set);
timeout_spec = g_hash_table_lookup(action_meta, XML_ATTR_TIMEOUT);
}
// @TODO check meta-attributes
// @TODO maybe use min-interval monitor timeout as default for monitors
timeout_ms = crm_get_msec(timeout_spec);
if (timeout_ms < 0) {
timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S);
}
if (action_meta != NULL) {
g_hash_table_destroy(action_meta);
}
return timeout_ms;
}
enum action_tasks
get_complex_task(const pe_resource_t *rsc, const char *name)
{
enum action_tasks task = text2task(name);
if ((rsc != NULL) && (rsc->variant == pe_native)) {
switch (task) {
case stopped_rsc:
case started_rsc:
case action_demoted:
case action_promoted:
crm_trace("Folding %s back into its atomic counterpart for %s",
name, rsc->id);
--task;
break;
default:
break;
}
}
return task;
}
/*!
* \internal
* \brief Find first matching action in a list
*
* \param[in] input List of actions to search
* \param[in] uuid If not NULL, action must have this UUID
* \param[in] task If not NULL, action must have this action name
* \param[in] on_node If not NULL, action must be on this node
*
* \return First action in list that matches criteria, or NULL if none
*/
pe_action_t *
find_first_action(const GList *input, const char *uuid, const char *task,
const pe_node_t *on_node)
{
CRM_CHECK(uuid || task, return NULL);
for (const GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (uuid != NULL && !pcmk__str_eq(uuid, action->uuid, pcmk__str_casei)) {
continue;
} else if (task != NULL && !pcmk__str_eq(task, action->task, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
return action;
} else if (action->node == NULL) {
continue;
} else if (on_node->details == action->node->details) {
return action;
}
}
return NULL;
}
GList *
find_actions(GList *input, const char *key, const pe_node_t *on_node)
{
GList *gIter = input;
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if (!pcmk__str_eq(key, action->uuid, pcmk__str_casei)) {
continue;
} else if (on_node == NULL) {
crm_trace("Action %s matches (ignoring node)", key);
result = g_list_prepend(result, action);
} else if (action->node == NULL) {
crm_trace("Action %s matches (unallocated, assigning to %s)",
key, pe__node_name(on_node));
action->node = pe__copy_node(on_node);
result = g_list_prepend(result, action);
} else if (on_node->details == action->node->details) {
crm_trace("Action %s on %s matches", key, pe__node_name(on_node));
result = g_list_prepend(result, action);
}
}
return result;
}
GList *
find_actions_exact(GList *input, const char *key, const pe_node_t *on_node)
{
GList *result = NULL;
CRM_CHECK(key != NULL, return NULL);
if (on_node == NULL) {
return NULL;
}
for (GList *gIter = input; gIter != NULL; gIter = gIter->next) {
pe_action_t *action = (pe_action_t *) gIter->data;
if ((action->node != NULL)
&& pcmk__str_eq(key, action->uuid, pcmk__str_casei)
&& pcmk__str_eq(on_node->details->id, action->node->details->id,
pcmk__str_casei)) {
crm_trace("Action %s on %s matches", key, pe__node_name(on_node));
result = g_list_prepend(result, action);
}
}
return result;
}
/*!
* \brief Find all actions of given type for a resource
*
* \param[in] rsc Resource to search
* \param[in] node Find only actions scheduled on this node
* \param[in] task Action name to search for
* \param[in] require_node If TRUE, NULL node or action node will not match
*
* \return List of actions found (or NULL if none)
* \note If node is not NULL and require_node is FALSE, matching actions
* without a node will be assigned to node.
*/
GList *
pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node,
const char *task, bool require_node)
{
GList *result = NULL;
char *key = pcmk__op_key(rsc->id, task, 0);
if (require_node) {
result = find_actions_exact(rsc->actions, key, node);
} else {
result = find_actions(rsc->actions, key, node);
}
free(key);
return result;
}
/*!
* \internal
* \brief Create an action reason string based on the action itself
*
* \param[in] action Action to create reason string for
* \param[in] flag Action flag that was cleared
*
* \return Newly allocated string suitable for use as action reason
* \note It is the caller's responsibility to free() the result.
*/
char *
pe__action2reason(const pe_action_t *action, enum pe_action_flags flag)
{
const char *change = NULL;
switch (flag) {
case pe_action_runnable:
change = "unrunnable";
break;
case pe_action_migrate_runnable:
change = "unmigrateable";
break;
case pe_action_optional:
change = "required";
break;
default:
// Bug: caller passed unsupported flag
CRM_CHECK(change != NULL, change = "");
break;
}
return crm_strdup_printf("%s%s%s %s", change,
(action->rsc == NULL)? "" : " ",
(action->rsc == NULL)? "" : action->rsc->id,
action->task);
}
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite)
{
if (action->reason != NULL && overwrite) {
pe_rsc_trace(action->rsc, "Changing %s reason from '%s' to '%s'",
action->uuid, action->reason, pcmk__s(reason, "(none)"));
} else if (action->reason == NULL) {
pe_rsc_trace(action->rsc, "Set %s reason to '%s'",
action->uuid, pcmk__s(reason, "(none)"));
} else {
// crm_assert(action->reason != NULL && !overwrite);
return;
}
pcmk__str_update(&action->reason, reason);
}
/*!
* \internal
* \brief Create an action to clear a resource's history from CIB
*
* \param[in,out] rsc Resource to clear
* \param[in] node Node to clear history on
* \param[in,out] data_set Cluster working set
*
* \return New action to clear resource history
*/
pe_action_t *
pe__clear_resource_history(pe_resource_t *rsc, const pe_node_t *node,
pe_working_set_t *data_set)
{
char *key = NULL;
CRM_ASSERT(rsc && node);
key = pcmk__op_key(rsc->id, CRM_OP_LRM_DELETE, 0);
return custom_action(rsc, key, CRM_OP_LRM_DELETE, node, FALSE, TRUE,
data_set);
}
#define sort_return(an_int, why) do { \
free(a_uuid); \
free(b_uuid); \
crm_trace("%s (%d) %c %s (%d) : %s", \
a_xml_id, a_call_id, an_int>0?'>':an_int<0?'<':'=', \
b_xml_id, b_call_id, why); \
return an_int; \
} while(0)
int
pe__is_newer_op(const xmlNode *xml_a, const xmlNode *xml_b,
bool same_node_default)
{
int a_call_id = -1;
int b_call_id = -1;
char *a_uuid = NULL;
char *b_uuid = NULL;
const char *a_xml_id = crm_element_value(xml_a, XML_ATTR_ID);
const char *b_xml_id = crm_element_value(xml_b, XML_ATTR_ID);
const char *a_node = crm_element_value(xml_a, XML_LRM_ATTR_TARGET);
const char *b_node = crm_element_value(xml_b, XML_LRM_ATTR_TARGET);
bool same_node = true;
/* @COMPAT The on_node attribute was added to last_failure as of 1.1.13 (via
* 8b3ca1c) and the other entries as of 1.1.12 (via 0b07b5c).
*
* In case that any of the lrm_rsc_op entries doesn't have on_node
* attribute, we need to explicitly tell whether the two operations are on
* the same node.
*/
if (a_node == NULL || b_node == NULL) {
same_node = same_node_default;
} else {
same_node = pcmk__str_eq(a_node, b_node, pcmk__str_casei);
}
if (same_node && pcmk__str_eq(a_xml_id, b_xml_id, pcmk__str_none)) {
/* We have duplicate lrm_rsc_op entries in the status
* section which is unlikely to be a good thing
* - we can handle it easily enough, but we need to get
* to the bottom of why it's happening.
*/
pe_err("Duplicate lrm_rsc_op entries named %s", a_xml_id);
sort_return(0, "duplicate");
}
crm_element_value_int(xml_a, XML_LRM_ATTR_CALLID, &a_call_id);
crm_element_value_int(xml_b, XML_LRM_ATTR_CALLID, &b_call_id);
if (a_call_id == -1 && b_call_id == -1) {
/* both are pending ops so it doesn't matter since
* stops are never pending
*/
sort_return(0, "pending");
} else if (same_node && a_call_id >= 0 && a_call_id < b_call_id) {
sort_return(-1, "call id");
} else if (same_node && b_call_id >= 0 && a_call_id > b_call_id) {
sort_return(1, "call id");
} else if (a_call_id >= 0 && b_call_id >= 0
&& (!same_node || a_call_id == b_call_id)) {
/*
* The op and last_failed_op are the same
* Order on last-rc-change
*/
time_t last_a = -1;
time_t last_b = -1;
crm_element_value_epoch(xml_a, XML_RSC_OP_LAST_CHANGE, &last_a);
crm_element_value_epoch(xml_b, XML_RSC_OP_LAST_CHANGE, &last_b);
crm_trace("rc-change: %lld vs %lld",
(long long) last_a, (long long) last_b);
if (last_a >= 0 && last_a < last_b) {
sort_return(-1, "rc-change");
} else if (last_b >= 0 && last_a > last_b) {
sort_return(1, "rc-change");
}
sort_return(0, "rc-change");
} else {
/* One of the inputs is a pending operation
* Attempt to use XML_ATTR_TRANSITION_MAGIC to determine its age relative to the other
*/
int a_id = -1;
int b_id = -1;
const char *a_magic = crm_element_value(xml_a, XML_ATTR_TRANSITION_MAGIC);
const char *b_magic = crm_element_value(xml_b, XML_ATTR_TRANSITION_MAGIC);
CRM_CHECK(a_magic != NULL && b_magic != NULL, sort_return(0, "No magic"));
if (!decode_transition_magic(a_magic, &a_uuid, &a_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic a");
}
if (!decode_transition_magic(b_magic, &b_uuid, &b_id, NULL, NULL, NULL,
NULL)) {
sort_return(0, "bad magic b");
}
/* try to determine the relative age of the operation...
* some pending operations (e.g. a start) may have been superseded
* by a subsequent stop
*
* [a|b]_id == -1 means it's a shutdown operation and _always_ comes last
*/
if (!pcmk__str_eq(a_uuid, b_uuid, pcmk__str_casei) || a_id == b_id) {
/*
* some of the logic in here may be redundant...
*
* if the UUID from the TE doesn't match then one better
* be a pending operation.
* pending operations don't survive between elections and joins
* because we query the LRM directly
*/
if (b_call_id == -1) {
sort_return(-1, "transition + call");
} else if (a_call_id == -1) {
sort_return(1, "transition + call");
}
} else if ((a_id >= 0 && a_id < b_id) || b_id == -1) {
sort_return(-1, "transition");
} else if ((b_id >= 0 && a_id > b_id) || a_id == -1) {
sort_return(1, "transition");
}
}
/* we should never end up here */
CRM_CHECK(FALSE, sort_return(0, "default"));
}
gint
sort_op_by_callid(gconstpointer a, gconstpointer b)
{
const xmlNode *xml_a = a;
const xmlNode *xml_b = b;
return pe__is_newer_op(xml_a, xml_b, true);
}
/*!
* \internal
* \brief Create a new pseudo-action for a resource
*
* \param[in,out] rsc Resource to create action for
* \param[in] task Action name
* \param[in] optional Whether action should be considered optional
* \param[in] runnable Whethe action should be considered runnable
*
* \return New action object corresponding to arguments
*/
pe_action_t *
pe__new_rsc_pseudo_action(pe_resource_t *rsc, const char *task, bool optional,
bool runnable)
{
pe_action_t *action = NULL;
CRM_ASSERT((rsc != NULL) && (task != NULL));
action = custom_action(rsc, pcmk__op_key(rsc->id, task, 0), task, NULL,
optional, TRUE, rsc->cluster);
pe__set_action_flags(action, pe_action_pseudo);
if (runnable) {
pe__set_action_flags(action, pe_action_runnable);
}
return action;
}
/*!
* \internal
* \brief Add the expected result to an action
*
* \param[in,out] action Action to add expected result to
* \param[in] expected_result Expected result to add
*
* \note This is more efficient than calling add_hash_param().
*/
void
pe__add_action_expected_result(pe_action_t *action, int expected_result)
{
char *name = NULL;
CRM_ASSERT((action != NULL) && (action->meta != NULL));
name = strdup(XML_ATTR_TE_TARGET_RC);
CRM_ASSERT (name != NULL);
g_hash_table_insert(action->meta, name, pcmk__itoa(expected_result));
}
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 2b0b0cf08f..993baa1dc8 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,4908 +1,4911 @@
/*
* Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdio.h>
#include <string.h>
#include <glib.h>
#include <time.h>
#include <crm/crm.h>
#include <crm/services.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/common/util.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/internal.h>
#include <pe_status_private.h>
CRM_TRACE_INIT_DATA(pe_status);
// A (parsed) resource action history entry
struct action_history {
pe_resource_t *rsc; // Resource that history is for
pe_node_t *node; // Node that history is for
xmlNode *xml; // History entry XML
// Parsed from entry XML
const char *id; // XML ID of history entry
const char *key; // Operation key of action
const char *task; // Action name
const char *exit_reason; // Exit reason given for result
guint interval_ms; // Action interval
int call_id; // Call ID of action
int expected_exit_status; // Expected exit status of action
int exit_status; // Actual exit status of action
int execution_status; // Execution status of action
};
/* This uses pcmk__set_flags_as()/pcmk__clear_flags_as() directly rather than
* use pe__set_working_set_flags()/pe__clear_working_set_flags() so that the
* flag is stringified more readably in log messages.
*/
#define set_config_flag(data_set, option, flag) do { \
const char *scf_value = pe_pref((data_set)->config_hash, (option)); \
if (scf_value != NULL) { \
if (crm_is_true(scf_value)) { \
(data_set)->flags = pcmk__set_flags_as(__func__, __LINE__, \
LOG_TRACE, "Working set", \
crm_system_name, (data_set)->flags, \
(flag), #flag); \
} else { \
(data_set)->flags = pcmk__clear_flags_as(__func__, __LINE__,\
LOG_TRACE, "Working set", \
crm_system_name, (data_set)->flags, \
(flag), #flag); \
} \
} \
} while(0)
static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
xmlNode **last_failure,
enum action_fail_response *failed);
static void determine_remote_online_status(pe_working_set_t *data_set,
pe_node_t *this_node);
static void add_node_attrs(const xmlNode *xml_obj, pe_node_t *node,
bool overwrite, pe_working_set_t *data_set);
static void determine_online_status(const xmlNode *node_state,
pe_node_t *this_node,
pe_working_set_t *data_set);
static void unpack_node_lrm(pe_node_t *node, const xmlNode *xml,
pe_working_set_t *data_set);
// Bitmask for warnings we only want to print once
uint32_t pe_wo = 0;
static gboolean
is_dangling_guest_node(pe_node_t *node)
{
/* we are looking for a remote-node that was supposed to be mapped to a
* container resource, but all traces of that container have disappeared
* from both the config and the status section. */
if (pe__is_guest_or_remote_node(node) &&
node->details->remote_rsc &&
node->details->remote_rsc->container == NULL &&
pcmk_is_set(node->details->remote_rsc->flags,
pe_rsc_orphan_container_filler)) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Schedule a fence action for a node
*
* \param[in,out] data_set Current working set of cluster
* \param[in,out] node Node to fence
* \param[in] reason Text description of why fencing is needed
* \param[in] priority_delay Whether to consider `priority-fencing-delay`
*/
void
pe_fence_node(pe_working_set_t * data_set, pe_node_t * node,
const char *reason, bool priority_delay)
{
CRM_CHECK(node, return);
/* A guest node is fenced by marking its container as failed */
if (pe__is_guest_node(node)) {
pe_resource_t *rsc = node->details->remote_rsc->container;
if (!pcmk_is_set(rsc->flags, pe_rsc_failed)) {
if (!pcmk_is_set(rsc->flags, pe_rsc_managed)) {
crm_notice("Not fencing guest node %s "
"(otherwise would because %s): "
"its guest resource %s is unmanaged",
pe__node_name(node), reason, rsc->id);
} else {
crm_warn("Guest node %s will be fenced "
"(by recovering its guest resource %s): %s",
pe__node_name(node), rsc->id, reason);
/* We don't mark the node as unclean because that would prevent the
* node from running resources. We want to allow it to run resources
* in this transition if the recovery succeeds.
*/
node->details->remote_requires_reset = TRUE;
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
}
}
} else if (is_dangling_guest_node(node)) {
crm_info("Cleaning up dangling connection for guest node %s: "
"fencing was already done because %s, "
"and guest resource no longer exists",
pe__node_name(node), reason);
pe__set_resource_flags(node->details->remote_rsc,
pe_rsc_failed|pe_rsc_stop);
} else if (pe__is_remote_node(node)) {
pe_resource_t *rsc = node->details->remote_rsc;
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed)) {
crm_notice("Not fencing remote node %s "
"(otherwise would because %s): connection is unmanaged",
pe__node_name(node), reason);
} else if(node->details->remote_requires_reset == FALSE) {
node->details->remote_requires_reset = TRUE;
crm_warn("Remote node %s %s: %s",
pe__node_name(node),
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
}
node->details->unclean = TRUE;
// No need to apply `priority-fencing-delay` for remote nodes
pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set);
} else if (node->details->unclean) {
crm_trace("Cluster node %s %s because %s",
pe__node_name(node),
pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
reason);
} else {
crm_warn("Cluster node %s %s: %s",
pe__node_name(node),
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set);
}
}
// @TODO xpaths can't handle templates, rules, or id-refs
// nvpair with provides or requires set to unfencing
#define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
"[(@" XML_NVPAIR_ATTR_NAME "='" PCMK_STONITH_PROVIDES "'" \
"or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
"and @" XML_NVPAIR_ATTR_VALUE "='" PCMK__VALUE_UNFENCING "']"
// unfencing in rsc_defaults or any resource
#define XPATH_ENABLE_UNFENCING \
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
"//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
"|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
"/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
static void
set_if_xpath(uint64_t flag, const char *xpath, pe_working_set_t *data_set)
{
xmlXPathObjectPtr result = NULL;
if (!pcmk_is_set(data_set->flags, flag)) {
result = xpath_search(data_set->input, xpath);
if (result && (numXpathResults(result) > 0)) {
pe__set_working_set_flags(data_set, flag);
}
freeXpathObject(result);
}
}
gboolean
unpack_config(xmlNode * config, pe_working_set_t * data_set)
{
const char *value = NULL;
GHashTable *config_hash = pcmk__strkey_table(free, free);
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
data_set->config_hash = config_hash;
pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash,
CIB_OPTIONS_FIRST, FALSE, data_set);
verify_pe_options(data_set->config_hash);
set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
if (!pcmk_is_set(data_set->flags, pe_flag_startup_probes)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
if (value && crm_is_true(value)) {
crm_info("Watchdog-based self-fencing will be performed via SBD if "
"fencing is required and stonith-watchdog-timeout is nonzero");
pe__set_working_set_flags(data_set, pe_flag_have_stonith_resource);
}
/* Set certain flags via xpath here, so they can be used before the relevant
* configuration sections are unpacked.
*/
set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
value = pe_pref(data_set->config_hash, "stonith-timeout");
data_set->stonith_timeout = (int) crm_parse_interval_spec(value);
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
crm_debug("STONITH of failed nodes is %s",
pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)? "enabled" : "disabled");
data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
if (!strcmp(data_set->stonith_action, "poweroff")) {
pe_warn_once(pe_wo_poweroff,
"Support for stonith-action of 'poweroff' is deprecated "
"and will be removed in a future release (use 'off' instead)");
data_set->stonith_action = "off";
}
crm_trace("STONITH will %s nodes", data_set->stonith_action);
set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
crm_debug("Concurrent fencing is %s",
pcmk_is_set(data_set->flags, pe_flag_concurrent_fencing)? "enabled" : "disabled");
value = pe_pref(data_set->config_hash,
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);
if (value) {
data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000;
crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay);
}
set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
crm_debug("Stop all active resources: %s",
pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_stop_everything)));
set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
if (pcmk_is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pe_pref(data_set->config_hash, "no-quorum-policy");
if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) {
data_set->no_quorum_policy = no_quorum_ignore;
} else if (pcmk__str_eq(value, "freeze", pcmk__str_casei)) {
data_set->no_quorum_policy = no_quorum_freeze;
} else if (pcmk__str_eq(value, "demote", pcmk__str_casei)) {
data_set->no_quorum_policy = no_quorum_demote;
} else if (pcmk__str_eq(value, "suicide", pcmk__str_casei)) {
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
int do_panic = 0;
crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC,
&do_panic);
if (do_panic || pcmk_is_set(data_set->flags, pe_flag_have_quorum)) {
data_set->no_quorum_policy = no_quorum_suicide;
} else {
crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
pcmk__config_err("Resetting no-quorum-policy to 'stop' because "
"fencing is disabled");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
data_set->no_quorum_policy = no_quorum_stop;
}
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
crm_debug("On loss of quorum: Freeze resources");
break;
case no_quorum_stop:
crm_debug("On loss of quorum: Stop ALL resources");
break;
case no_quorum_demote:
crm_debug("On loss of quorum: "
"Demote promotable resources and stop other resources");
break;
case no_quorum_suicide:
crm_notice("On loss of quorum: Fence all remaining nodes");
break;
case no_quorum_ignore:
crm_notice("On loss of quorum: Ignore");
break;
}
set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
crm_trace("Orphan resources are %s",
pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)? "stopped" : "ignored");
set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
crm_trace("Orphan resource actions are %s",
pcmk_is_set(data_set->flags, pe_flag_stop_action_orphans)? "stopped" : "ignored");
value = pe_pref(data_set->config_hash, "remove-after-stop");
if (value != NULL) {
if (crm_is_true(value)) {
pe__set_working_set_flags(data_set, pe_flag_remove_after_stop);
#ifndef PCMK__COMPAT_2_0
pe_warn_once(pe_wo_remove_after,
"Support for the remove-after-stop cluster property is"
" deprecated and will be removed in a future release");
#endif
} else {
pe__clear_working_set_flags(data_set, pe_flag_remove_after_stop);
}
}
set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
crm_trace("Maintenance mode: %s",
pcmk__btoa(pcmk_is_set(data_set->flags, pe_flag_maintenance_mode)));
set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
crm_trace("Start failures are %s",
pcmk_is_set(data_set->flags, pe_flag_start_failure_fatal)? "always fatal" : "handled by failcount");
if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
}
if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) {
crm_trace("Unseen nodes will be fenced");
} else {
pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
}
pe__unpack_node_health_scores(data_set);
data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
crm_trace("Placement strategy: %s", data_set->placement_strategy);
set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock);
crm_trace("Resources will%s be locked to cleanly shut down nodes",
(pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not"));
if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
value = pe_pref(data_set->config_hash,
XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT);
data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000;
crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock);
}
value = pe_pref(data_set->config_hash,
XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT);
data_set->node_pending_timeout = crm_parse_interval_spec(value) / 1000;
crm_trace("Node pending timeout is %us", data_set->node_pending_timeout);
return TRUE;
}
pe_node_t *
pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set)
{
pe_node_t *new_node = NULL;
if (pe_find_node(data_set->nodes, uname) != NULL) {
pcmk__config_warn("More than one node entry has name '%s'", uname);
}
new_node = calloc(1, sizeof(pe_node_t));
if (new_node == NULL) {
return NULL;
}
new_node->weight = char2score(score);
new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
if (new_node->details == NULL) {
free(new_node);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->rsc_discovery_enabled = TRUE;
new_node->details->running_rsc = NULL;
new_node->details->data_set = data_set;
if (pcmk__str_eq(type, "member", pcmk__str_null_matches | pcmk__str_casei)) {
new_node->details->type = node_member;
} else if (pcmk__str_eq(type, "remote", pcmk__str_casei)) {
new_node->details->type = node_remote;
pe__set_working_set_flags(data_set, pe_flag_have_remote_nodes);
} else {
/* @COMPAT 'ping' is the default for backward compatibility, but it
* should be changed to 'member' at a compatibility break
*/
if (!pcmk__str_eq(type, "ping", pcmk__str_casei)) {
pcmk__config_warn("Node %s has unrecognized type '%s', "
"assuming 'ping'", pcmk__s(uname, "without name"),
type);
}
pe_warn_once(pe_wo_ping_node,
"Support for nodes of type 'ping' (such as %s) is "
"deprecated and will be removed in a future release",
pcmk__s(uname, "unnamed node"));
new_node->details->type = node_ping;
}
new_node->details->attrs = pcmk__strkey_table(free, free);
if (pe__is_guest_or_remote_node(new_node)) {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("remote"));
} else {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("cluster"));
}
new_node->details->utilization = pcmk__strkey_table(free, free);
new_node->details->digest_cache = pcmk__strkey_table(free,
pe__free_digests);
data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node,
pe__cmp_node_name);
return new_node;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
{
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = ID(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
const char *is_managed = NULL;
for (attr_set = pcmk__xe_first_child(xml_obj); attr_set != NULL;
attr_set = pcmk__xe_next(attr_set)) {
if (!pcmk__str_eq((const char *)attr_set->name, XML_TAG_META_SETS,
pcmk__str_casei)) {
continue;
}
for (attr = pcmk__xe_first_child(attr_set); attr != NULL;
attr = pcmk__xe_next(attr)) {
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
if (pcmk__str_eq(name, XML_RSC_ATTR_REMOTE_NODE, pcmk__str_casei)) {
remote_name = value;
} else if (pcmk__str_eq(name, "remote-addr", pcmk__str_casei)) {
remote_server = value;
} else if (pcmk__str_eq(name, "remote-port", pcmk__str_casei)) {
remote_port = value;
} else if (pcmk__str_eq(name, "remote-connect-timeout", pcmk__str_casei)) {
connect_timeout = value;
} else if (pcmk__str_eq(name, "remote-allow-migrate", pcmk__str_casei)) {
remote_allow_migrate=value;
} else if (pcmk__str_eq(name, XML_RSC_ATTR_MANAGED, pcmk__str_casei)) {
is_managed = value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (pe_find_resource(data->resources, remote_name) != NULL) {
return NULL;
}
pe_create_remote_xml(parent, remote_name, container_id,
remote_allow_migrate, is_managed,
connect_timeout, remote_server, remote_port);
return remote_name;
}
static void
handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node)
{
if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
/* Ignore fencing for remote nodes that don't have a connection resource
* associated with them. This happens when remote node entries get left
* in the nodes section after the connection resource is removed.
*/
return;
}
if (pcmk_is_set(data_set->flags, pe_flag_startup_fencing)) {
// All nodes are unclean until we've seen their status entry
new_node->details->unclean = TRUE;
} else {
// Blind faith ...
new_node->details->unclean = FALSE;
}
/* We need to be able to determine if a node's status section
* exists or not separate from whether the node is unclean. */
new_node->details->unseen = TRUE;
}
gboolean
unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
pe_node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
for (xml_obj = pcmk__xe_first_child(xml_nodes); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, pcmk__str_none)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
pcmk__config_err("Ignoring <" XML_CIB_TAG_NODE
"> entry in configuration without id");
continue;
}
new_node = pe_create_node(id, uname, type, score, data_set);
if (new_node == NULL) {
return FALSE;
}
handle_startup_fencing(data_set, new_node);
add_node_attrs(xml_obj, new_node, FALSE, data_set);
crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
crm_info("Creating a fake local node");
pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
data_set);
}
return TRUE;
}
static void
setup_container(pe_resource_t * rsc, pe_working_set_t * data_set)
{
const char *container_id = NULL;
if (rsc->children) {
g_list_foreach(rsc->children, (GFunc) setup_container, data_set);
return;
}
container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
if (container_id && !pcmk__str_eq(container_id, rsc->id, pcmk__str_casei)) {
pe_resource_t *container = pe_find_resource(data_set->resources, container_id);
if (container) {
rsc->container = container;
pe__set_resource_flags(container, pe_rsc_is_container);
container->fillers = g_list_append(container->fillers, rsc);
pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
} else {
pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
/* Create remote nodes and guest nodes from the resource configuration
* before unpacking resources.
*/
for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
const char *new_node_id = NULL;
/* Check for remote nodes, which are defined by ocf:pacemaker:remote
* primitives.
*/
if (xml_contains_remote_node(xml_obj)) {
new_node_id = ID(xml_obj);
/* The "pe_find_node" check is here to make sure we don't iterate over
* an expanded node that has already been added to the node list. */
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found remote node %s defined by resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes, which are defined by special meta-attributes
* of a primitive of any type (for example, VirtualDomain or Xen).
*/
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, pcmk__str_none)) {
/* This will add an ocf:pacemaker:remote primitive to the
* configuration for the guest node's connection, to be unpacked
* later.
*/
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s",
new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Check for guest nodes inside a group. Clones are currently not
* supported as guest nodes.
*/
if (pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, pcmk__str_none)) {
xmlNode *xml_obj2 = NULL;
for (xml_obj2 = pcmk__xe_first_child(xml_obj); xml_obj2 != NULL;
xml_obj2 = pcmk__xe_next(xml_obj2)) {
new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest node %s in resource %s inside group %s",
new_node_id, ID(xml_obj2), ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
}
}
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the scheduler calculations.
*/
static void
link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc)
{
pe_node_t *remote_node = NULL;
if (new_rsc->is_remote_node == FALSE) {
return;
}
if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
remote_node = pe_find_node(data_set->nodes, new_rsc->id);
CRM_CHECK(remote_node != NULL, return);
pe_rsc_trace(new_rsc, "Linking remote connection resource %s to %s",
new_rsc->id, pe__node_name(remote_node));
remote_node->details->remote_rsc = new_rsc;
if (new_rsc->container == NULL) {
/* Handle start-up fencing for remote nodes (as opposed to guest nodes)
* the same as is done for cluster nodes.
*/
handle_startup_fencing(data_set, remote_node);
} else {
/* pe_create_node() marks the new node as "remote" or "cluster"; now
* that we know the node is a guest node, update it correctly.
*/
g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("container"));
}
}
static void
destroy_tag(gpointer data)
{
pe_tag_t *tag = data;
if (tag) {
free(tag->id);
g_list_free_full(tag->refs, free);
free(tag);
}
}
/*!
* \internal
* \brief Parse configuration XML for resource information
*
* \param[in] xml_resources Top of resource configuration XML
* \param[in,out] data_set Where to put resource information
*
* \return TRUE
*
* \note unpack_remote_nodes() MUST be called before this, so that the nodes can
* be used when pe__unpack_resource() calls resource_location()
*/
gboolean
unpack_resources(const xmlNode *xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
GList *gIter = NULL;
data_set->template_rsc_sets = pcmk__strkey_table(free, destroy_tag);
for (xml_obj = pcmk__xe_first_child(xml_resources); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
pe_resource_t *new_rsc = NULL;
const char *id = ID(xml_obj);
if (pcmk__str_empty(id)) {
pcmk__config_err("Ignoring <%s> resource without ID",
crm_element_name(xml_obj));
continue;
}
if (pcmk__str_eq((const char *) xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE,
pcmk__str_none)) {
if (g_hash_table_lookup_extended(data_set->template_rsc_sets, id,
NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
g_hash_table_insert(data_set->template_rsc_sets, strdup(id), NULL);
}
continue;
}
crm_trace("Unpacking <%s " XML_ATTR_ID "='%s'>",
crm_element_name(xml_obj), id);
if (pe__unpack_resource(xml_obj, &new_rsc, NULL,
data_set) == pcmk_rc_ok) {
data_set->resources = g_list_append(data_set->resources, new_rsc);
pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
} else {
pcmk__config_err("Ignoring <%s> resource '%s' "
"because configuration is invalid",
crm_element_name(xml_obj), id);
}
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
pe_resource_t *rsc = (pe_resource_t *) gIter->data;
setup_container(rsc, data_set);
link_rsc2remotenode(data_set, rsc);
}
data_set->resources = g_list_sort(data_set->resources,
pe__cmp_rsc_priority);
if (pcmk_is_set(data_set->flags, pe_flag_quick_location)) {
/* Ignore */
} else if (pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)
&& !pcmk_is_set(data_set->flags, pe_flag_have_stonith_resource)) {
pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option");
pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
gboolean
unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
{
xmlNode *xml_tag = NULL;
data_set->tags = pcmk__strkey_table(free, destroy_tag);
for (xml_tag = pcmk__xe_first_child(xml_tags); xml_tag != NULL;
xml_tag = pcmk__xe_next(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = ID(xml_tag);
if (!pcmk__str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, pcmk__str_none)) {
continue;
}
if (tag_id == NULL) {
pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID,
crm_element_name(xml_tag));
continue;
}
for (xml_obj_ref = pcmk__xe_first_child(xml_tag); xml_obj_ref != NULL;
xml_obj_ref = pcmk__xe_next(xml_obj_ref)) {
const char *obj_ref = ID(xml_obj_ref);
if (!pcmk__str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, pcmk__str_none)) {
continue;
}
if (obj_ref == NULL) {
pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID,
crm_element_name(xml_obj_ref), tag_id);
continue;
}
if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
return FALSE;
}
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
pe_ticket_t *ticket = NULL;
ticket_id = ID(xml_ticket);
if (pcmk__str_empty(ticket_id)) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, data_set);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = pcmk__xml_attr_value(xIter);
if (pcmk__str_eq(prop_name, XML_ATTR_ID, pcmk__str_none)) {
continue;
}
g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
}
granted = g_hash_table_lookup(ticket->state, "granted");
if (granted && crm_is_true(granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, "last-granted");
if (last_granted) {
long long last_granted_ll;
pcmk__scan_ll(last_granted, &last_granted_ll, 0LL);
ticket->last_granted = (time_t) last_granted_ll;
}
standby = g_hash_table_lookup(ticket->state, "standby");
if (standby && crm_is_true(standby)) {
ticket->standby = TRUE;
if (ticket->granted) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
ticket->standby = FALSE;
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
for (xml_obj = pcmk__xe_first_child(xml_tickets); xml_obj != NULL;
xml_obj = pcmk__xe_next(xml_obj)) {
if (!pcmk__str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, pcmk__str_none)) {
continue;
}
unpack_ticket_state(xml_obj, data_set);
}
return TRUE;
}
static void
unpack_handle_remote_attrs(pe_node_t *this_node, const xmlNode *state,
pe_working_set_t *data_set)
{
const char *resource_discovery_enabled = NULL;
const xmlNode *attrs = NULL;
pe_resource_t *rsc = NULL;
if (!pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
return;
}
if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
return;
}
crm_trace("Processing Pacemaker Remote node %s", pe__node_name(this_node));
pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_MAINTENANCE),
&(this_node->details->remote_maintenance), 0);
rsc = this_node->details->remote_rsc;
if (this_node->details->remote_requires_reset == FALSE) {
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
}
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (pe__shutdown_requested(this_node)) {
crm_info("%s is shutting down", pe__node_name(this_node));
this_node->details->shutdown = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("%s is in standby mode", pe__node_name(this_node));
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_managed))) {
crm_info("%s is in maintenance mode", pe__node_name(this_node));
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
if (pe__is_remote_node(this_node)
&& !pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY
" attribute on Pacemaker Remote node %s"
" because fencing is disabled",
pe__node_name(this_node));
} else {
/* This is either a remote node with fencing enabled, or a guest
* node. We don't care whether fencing is enabled when fencing guest
* nodes, because they are "fenced" by recovering their containing
* resource.
*/
crm_info("%s has resource discovery disabled",
pe__node_name(this_node));
this_node->details->rsc_discovery_enabled = FALSE;
}
}
}
/*!
* \internal
* \brief Unpack a cluster node's transient attributes
*
* \param[in] state CIB node state XML
* \param[in,out] node Cluster node whose attributes are being unpacked
* \param[in,out] data_set Cluster working set
*/
static void
unpack_transient_attributes(const xmlNode *state, pe_node_t *node,
pe_working_set_t *data_set)
{
const char *discovery = NULL;
const xmlNode *attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS,
FALSE);
add_node_attrs(attrs, node, TRUE, data_set);
if (crm_is_true(pe_node_attribute_raw(node, "standby"))) {
crm_info("%s is in standby mode", pe__node_name(node));
node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(node, "maintenance"))) {
crm_info("%s is in maintenance mode", pe__node_name(node));
node->details->maintenance = TRUE;
}
discovery = pe_node_attribute_raw(node, XML_NODE_ATTR_RSC_DISCOVERY);
if ((discovery != NULL) && !crm_is_true(discovery)) {
crm_warn("Ignoring " XML_NODE_ATTR_RSC_DISCOVERY
" attribute for %s because disabling resource discovery "
"is not allowed for cluster nodes", pe__node_name(node));
}
}
/*!
* \internal
* \brief Unpack a node state entry (first pass)
*
* Unpack one node state entry from status. This unpacks information from the
* node_state element itself and node attributes inside it, but not the
* resource history inside it. Multiple passes through the status are needed to
* fully unpack everything.
*
* \param[in] state CIB node state XML
* \param[in,out] data_set Cluster working set
*/
static void
unpack_node_state(const xmlNode *state, pe_working_set_t *data_set)
{
const char *id = NULL;
const char *uname = NULL;
pe_node_t *this_node = NULL;
id = crm_element_value(state, XML_ATTR_ID);
if (id == NULL) {
crm_warn("Ignoring malformed " XML_CIB_TAG_STATE " entry without "
XML_ATTR_ID);
return;
}
uname = crm_element_value(state, XML_ATTR_UNAME);
if (uname == NULL) {
/* If a joining peer makes the cluster acquire the quorum from corosync
* meanwhile it has not joined CPG membership of pacemaker-controld yet,
* it's possible that the created node_state entry doesn't have an uname
* yet. We should recognize the node as `pending` and wait for it to
* join CPG.
*/
crm_trace("Handling " XML_CIB_TAG_STATE " entry with id=\"%s\" without "
XML_ATTR_UNAME, id);
}
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
pcmk__config_warn("Ignoring recorded node state for id=\"%s\" (%s) "
"because it is no longer in the configuration",
id, pcmk__s(uname, "uname unknown"));
return;
}
if (pe__is_guest_or_remote_node(this_node)) {
/* We can't determine the online status of Pacemaker Remote nodes until
* after all resource history has been unpacked. In this first pass, we
* do need to mark whether the node has been fenced, as this plays a
* role during unpacking cluster node resource state.
*/
pcmk__scan_min_int(crm_element_value(state, XML_NODE_IS_FENCED),
&(this_node->details->remote_was_fenced), 0);
return;
}
unpack_transient_attributes(state, this_node, data_set);
/* Provisionally mark this cluster node as clean. We have at least seen it
* in the current cluster's lifetime.
*/
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
crm_trace("Determining online status of cluster node %s (id %s)",
pe__node_name(this_node), id);
determine_online_status(state, this_node, data_set);
if (!pcmk_is_set(data_set->flags, pe_flag_have_quorum)
&& this_node->details->online
&& (data_set->no_quorum_policy == no_quorum_suicide)) {
/* Everything else should flow from this automatically
* (at least until the scheduler becomes able to migrate off
* healthy resources)
*/
pe_fence_node(data_set, this_node, "cluster does not have quorum",
FALSE);
}
}
/*!
* \internal
* \brief Unpack nodes' resource history as much as possible
*
* Unpack as many nodes' resource history as possible in one pass through the
* status. We need to process Pacemaker Remote nodes' connections/containers
* before unpacking their history; the connection/container history will be
* in another node's history, so it might take multiple passes to unpack
* everything.
*
* \param[in] status CIB XML status section
* \param[in] fence If true, treat any not-yet-unpacked nodes as unseen
* \param[in,out] data_set Cluster working set
*
* \return Standard Pacemaker return code (specifically pcmk_rc_ok if done,
* or EAGAIN if more unpacking remains to be done)
*/
static int
unpack_node_history(const xmlNode *status, bool fence,
pe_working_set_t *data_set)
{
int rc = pcmk_rc_ok;
// Loop through all node_state entries in CIB status
for (const xmlNode *state = first_named_child(status, XML_CIB_TAG_STATE);
state != NULL; state = crm_next_same_xml(state)) {
const char *id = ID(state);
const char *uname = crm_element_value(state, XML_ATTR_UNAME);
pe_node_t *this_node = NULL;
if ((id == NULL) || (uname == NULL)) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history from malformed "
XML_CIB_TAG_STATE " without id and/or uname");
continue;
}
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
// Warning already logged in first pass through status section
crm_trace("Not unpacking resource history for node %s because "
"no longer in configuration", id);
continue;
}
if (this_node->details->unpacked) {
crm_trace("Not unpacking resource history for node %s because "
"already unpacked", id);
continue;
}
if (fence) {
// We're processing all remaining nodes
} else if (pe__is_guest_node(this_node)) {
/* We can unpack a guest node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection and containing resource are both up.
*/
pe_resource_t *rsc = this_node->details->remote_rsc;
if ((rsc == NULL) || (rsc->role != RSC_ROLE_STARTED)
|| (rsc->container->role != RSC_ROLE_STARTED)) {
crm_trace("Not unpacking resource history for guest node %s "
"because container and connection are not known to "
"be up", id);
continue;
}
} else if (pe__is_remote_node(this_node)) {
/* We can unpack a remote node's history only after we've unpacked
* other resource history to the point that we know that the node's
* connection is up, with the exception of when shutdown locks are
* in use.
*/
pe_resource_t *rsc = this_node->details->remote_rsc;
if ((rsc == NULL)
|| (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)
&& (rsc->role != RSC_ROLE_STARTED))) {
crm_trace("Not unpacking resource history for remote node %s "
"because connection is not known to be up", id);
continue;
}
/* If fencing and shutdown locks are disabled and we're not processing
* unseen nodes, then we don't want to unpack offline nodes until online
* nodes have been unpacked. This allows us to number active clone
* instances first.
*/
} else if (!pcmk_any_flags_set(data_set->flags, pe_flag_stonith_enabled
|pe_flag_shutdown_lock)
&& !this_node->details->online) {
crm_trace("Not unpacking resource history for offline "
"cluster node %s", id);
continue;
}
if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(data_set, this_node);
unpack_handle_remote_attrs(this_node, state, data_set);
}
crm_trace("Unpacking resource history for %snode %s",
(fence? "unseen " : ""), id);
this_node->details->unpacked = TRUE;
unpack_node_lrm(this_node, state, data_set);
rc = EAGAIN; // Other node histories might depend on this one
}
return rc;
}
/* remove nodes that are down, stopping */
/* create positive rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode * status, pe_working_set_t * data_set)
{
xmlNode *state = NULL;
crm_trace("Beginning unpack");
if (data_set->tickets == NULL) {
data_set->tickets = pcmk__strkey_table(free, destroy_ticket);
}
for (state = pcmk__xe_first_child(status); state != NULL;
state = pcmk__xe_next(state)) {
if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, pcmk__str_none)) {
unpack_tickets_state((xmlNode *) state, data_set);
} else if (pcmk__str_eq((const char *)state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
unpack_node_state(state, data_set);
}
}
while (unpack_node_history(status, FALSE, data_set) == EAGAIN) {
crm_trace("Another pass through node resource histories is needed");
}
// Now catch any nodes we didn't see
unpack_node_history(status,
pcmk_is_set(data_set->flags, pe_flag_stonith_enabled),
data_set);
/* Now that we know where resources are, we can schedule stops of containers
* with failed bundle connections
*/
if (data_set->stop_needed != NULL) {
for (GList *item = data_set->stop_needed; item; item = item->next) {
pe_resource_t *container = item->data;
pe_node_t *node = pe__current_node(container);
if (node) {
stop_action(container, node, FALSE);
}
}
g_list_free(data_set->stop_needed);
data_set->stop_needed = NULL;
}
/* Now that we know status of all Pacemaker Remote connections and nodes,
* we can stop connections for node shutdowns, and check the online status
* of remote/guest nodes that didn't have any node history to unpack.
*/
for (GList *gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
pe_node_t *this_node = gIter->data;
if (!pe__is_guest_or_remote_node(this_node)) {
continue;
}
if (this_node->details->shutdown
&& (this_node->details->remote_rsc != NULL)) {
pe__set_next_role(this_node->details->remote_rsc, RSC_ROLE_STOPPED,
"remote shutdown");
}
if (!this_node->details->unpacked) {
determine_remote_online_status(data_set, this_node);
}
}
return TRUE;
}
static gboolean
determine_online_status_no_fencing(pe_working_set_t *data_set,
const xmlNode *node_state,
pe_node_t *this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
int member = false;
bool crmd_online = false;
long long when_member = 0;
long long when_online = 0;
// @COMPAT DCs < 2.1.7 use boolean instead of time for cluster membership
if (crm_str_to_boolean(in_cluster, &member) != 1) {
pcmk__scan_ll(in_cluster, &when_member, 0LL);
member = (when_member > 0) ? true : false;
}
if (pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei)) {
crmd_online = true;
} else if (pcmk__str_eq(is_peer, OFFLINESTATUS, pcmk__str_casei)) {
crmd_online = false;
} else {
pcmk__scan_ll(is_peer, &when_online, 0LL);
crmd_online = (when_online > 0) ? true : false;
}
if (!member) {
crm_trace("Node is down: in_cluster=%s",
pcmk__s(in_cluster, "<null>"));
} else if (crmd_online) {
if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
online = TRUE;
} else {
crm_debug("Node is not ready to run resources: %s", join);
}
} else if (this_node->details->expected_up == FALSE) {
crm_trace("Controller is down: "
"in_cluster=%s is_peer=%s join=%s expected=%s",
pcmk__s(in_cluster, "<null>"), pcmk__s(is_peer, "<null>"),
pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
} else {
/* mark it unclean */
pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE);
crm_info("in_cluster=%s is_peer=%s join=%s expected=%s",
pcmk__s(in_cluster, "<null>"), pcmk__s(is_peer, "<null>"),
pcmk__s(join, "<null>"), pcmk__s(exp_state, "<null>"));
}
return online;
}
static gboolean
determine_online_status_fencing(pe_working_set_t *data_set,
const xmlNode *node_state, pe_node_t *this_node)
{
gboolean online = FALSE;
gboolean do_terminate = FALSE;
bool crmd_online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
const char *terminate = pe_node_attribute_raw(this_node, "terminate");
int member = false;
long long when_member = 0;
long long when_online = 0;
/*
- XML_NODE_JOIN_STATE ::= member|down|pending|banned
- XML_NODE_EXPECTED ::= member|down
@COMPAT with entries recorded for DCs < 2.1.7
- XML_NODE_IN_CLUSTER ::= true|false
- XML_NODE_IS_PEER ::= online|offline
Since crm_feature_set 3.18.0 (pacemaker-2.1.7):
- XML_NODE_IN_CLUSTER ::= <timestamp>|0
Since when node has been a cluster member. A value 0 of means the node is not
a cluster member.
- XML_NODE_IS_PEER ::= <timestamp>|0
Since when peer has been online in CPG. A value 0 means the peer is offline
in CPG.
*/
if (crm_is_true(terminate)) {
do_terminate = TRUE;
} else if (terminate != NULL && strlen(terminate) > 0) {
/* could be a time() value */
char t = terminate[0];
if (t != '0' && isdigit(t)) {
do_terminate = TRUE;
}
}
crm_trace("%s: in_cluster=%s is_peer=%s join=%s expected=%s term=%d",
pe__node_name(this_node), pcmk__s(in_cluster, "<null>"),
pcmk__s(is_peer, "<null>"), pcmk__s(join, "<null>"),
pcmk__s(exp_state, "<null>"), do_terminate);
/* @COMPAT with boolean values of XML_NODE_IN_CLUSTER recorded for
* DCs < 2.1.7
*/
if (crm_str_to_boolean(in_cluster, &member) != 1) {
pcmk__scan_ll(in_cluster, &when_member, 0LL);
member = (when_member > 0) ? true : false;
}
online = member;
/* @COMPAT with "online"/"offline" values of XML_NODE_IS_PEER recorded for
* DCs < 2.1.7
*/
if (pcmk__str_eq(is_peer, ONLINESTATUS, pcmk__str_casei)) {
crmd_online = true;
} else if (pcmk__str_eq(is_peer, OFFLINESTATUS, pcmk__str_casei)) {
crmd_online = false;
} else {
pcmk__scan_ll(is_peer, &when_online, 0LL);
crmd_online = (when_online > 0) ? true : false;
}
if (exp_state == NULL) {
exp_state = CRMD_JOINSTATE_DOWN;
}
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", pe__node_name(this_node));
/* Slightly different criteria since we can't shut down a dead peer */
online = crmd_online;
} else if (in_cluster == NULL) {
pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE);
} else if (pcmk__str_eq(join, CRMD_JOINSTATE_NACK, pcmk__str_casei)) {
pe_fence_node(data_set, this_node,
"peer failed Pacemaker membership criteria", FALSE);
} else if (do_terminate == FALSE && pcmk__str_eq(exp_state, CRMD_JOINSTATE_DOWN, pcmk__str_casei)) {
if (when_member > 0
&& when_online == 0
&& (get_effective_time(data_set) - when_member
>= data_set->node_pending_timeout)) {
pe_fence_node(data_set, this_node,
"peer pending timed out on joining the process group",
FALSE);
} else if (member || crmd_online) {
crm_info("- %s is not ready to run resources",
pe__node_name(this_node));
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up",
pe__node_name(this_node));
}
} else if (do_terminate && pcmk__str_eq(join, CRMD_JOINSTATE_DOWN, pcmk__str_casei)
&& !member && !crmd_online) {
crm_info("%s was just shot", pe__node_name(this_node));
online = FALSE;
} else if (!member) {
// Consider `priority-fencing-delay` for lost nodes
pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE);
} else if (!crmd_online) {
pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE);
/* Everything is running at this point, now check join state */
} else if (do_terminate) {
pe_fence_node(data_set, this_node, "termination was requested", FALSE);
} else if (pcmk__str_eq(join, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
crm_info("%s is active", pe__node_name(this_node));
} else if (pcmk__strcase_any_of(join, CRMD_JOINSTATE_PENDING, CRMD_JOINSTATE_DOWN, NULL)) {
crm_info("%s is not ready to run resources", pe__node_name(this_node));
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE);
crm_warn("%s: in-cluster=%s is-peer=%s join=%s expected=%s term=%d shutdown=%d",
pe__node_name(this_node), pcmk__s(in_cluster, "<null>"),
pcmk__s(is_peer, "<null>"), pcmk__s(join, "<null>"),
pcmk__s(exp_state, "<null>"), do_terminate,
this_node->details->shutdown);
}
return online;
}
static void
determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_node)
{
pe_resource_t *rsc = this_node->details->remote_rsc;
pe_resource_t *container = NULL;
pe_node_t *host = NULL;
/* If there is a node state entry for a (former) Pacemaker Remote node
* but no resource creating that node, the node's connection resource will
* be NULL. Consider it an offline remote node in that case.
*/
if (rsc == NULL) {
this_node->details->online = FALSE;
goto remote_online_done;
}
container = rsc->container;
if (container && pcmk__list_of_1(rsc->running_on)) {
host = rsc->running_on->data;
}
/* If the resource is currently started, mark it online. */
if (rsc->role == RSC_ROLE_STARTED) {
crm_trace("%s node %s presumed ONLINE because connection resource is started",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
crm_trace("%s node %s shutting down because connection resource is stopping",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if(container && pcmk_is_set(container->flags, pe_rsc_failed)) {
crm_trace("Guest node %s UNCLEAN because guest resource failed",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
} else if (pcmk_is_set(rsc->flags, pe_rsc_failed)) {
crm_trace("%s node %s OFFLINE because connection resource failed",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
} else if (rsc->role == RSC_ROLE_STOPPED
|| (container && container->role == RSC_ROLE_STOPPED)) {
crm_trace("%s node %s OFFLINE because its resource is stopped",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = FALSE;
} else if (host && (host->details->online == FALSE)
&& host->details->unclean) {
crm_trace("Guest node %s UNCLEAN because host is unclean",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
}
remote_online_done:
crm_trace("Remote node %s online=%s",
this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
}
static void
determine_online_status(const xmlNode *node_state, pe_node_t *this_node,
pe_working_set_t *data_set)
{
gboolean online = FALSE;
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
CRM_CHECK(this_node != NULL, return);
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
if (pe__shutdown_requested(this_node)) {
this_node->details->shutdown = TRUE;
} else if (pcmk__str_eq(exp_state, CRMD_JOINSTATE_MEMBER, pcmk__str_casei)) {
this_node->details->expected_up = TRUE;
}
if (this_node->details->type == node_ping) {
this_node->details->unclean = FALSE;
online = FALSE; /* As far as resource management is concerned,
* the node is safely offline.
* Anyone caught abusing this logic will be shot
*/
} else if (!pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
online = determine_online_status_no_fencing(data_set, node_state, this_node);
} else {
online = determine_online_status_fencing(data_set, node_state, this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE; // @COMPAT deprecated and unused
this_node->weight = -INFINITY;
}
if (online && this_node->details->shutdown) {
/* don't run resources here */
this_node->fixed = TRUE; // @COMPAT deprecated and unused
this_node->weight = -INFINITY;
}
if (this_node->details->type == node_ping) {
crm_info("%s is not a Pacemaker node", pe__node_name(this_node));
} else if (this_node->details->unclean) {
pe_proc_warn("%s is unclean", pe__node_name(this_node));
} else if (this_node->details->online) {
crm_info("%s is %s", pe__node_name(this_node),
this_node->details->shutdown ? "shutting down" :
this_node->details->pending ? "pending" :
this_node->details->standby ? "standby" :
this_node->details->maintenance ? "maintenance" : "online");
} else {
crm_trace("%s is offline", pe__node_name(this_node));
}
}
/*!
* \internal
* \brief Find the end of a resource's name, excluding any clone suffix
*
* \param[in] id Resource ID to check
*
* \return Pointer to last character of resource's base name
*/
const char *
pe_base_name_end(const char *id)
{
if (!pcmk__str_empty(id)) {
const char *end = id + strlen(id) - 1;
for (const char *s = end; s > id; --s) {
switch (*s) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
return (s == end)? s : (s - 1);
default:
return end;
}
}
return end;
}
return NULL;
}
/*!
* \internal
* \brief Get a resource name excluding any clone suffix
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_strip(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
char *basename = NULL;
CRM_ASSERT(end);
basename = strndup(last_rsc_id, end - last_rsc_id + 1);
CRM_ASSERT(basename);
return basename;
}
/*!
* \internal
* \brief Get the name of the first instance of a cloned resource
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name plus :0
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_zero(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
size_t base_name_len = end - last_rsc_id + 1;
char *zero = NULL;
CRM_ASSERT(end);
zero = calloc(base_name_len + 3, sizeof(char));
CRM_ASSERT(zero);
memcpy(zero, last_rsc_id, base_name_len);
zero[base_name_len] = ':';
zero[base_name_len + 1] = '0';
return zero;
}
static pe_resource_t *
create_fake_resource(const char *rsc_id, const xmlNode *rsc_entry,
pe_working_set_t *data_set)
{
pe_resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (pe__unpack_resource(xml_rsc, &rsc, NULL, data_set) != pcmk_rc_ok) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
pe_node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
node = pe_find_node(data_set->nodes, rsc_id);
if (node == NULL) {
node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
}
link_rsc2remotenode(data_set, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
/* This orphaned rsc needs to be mapped to a container. */
crm_trace("Detected orphaned container filler %s", rsc_id);
pe__set_resource_flags(rsc, pe_rsc_orphan_container_filler);
}
pe__set_resource_flags(rsc, pe_rsc_orphan);
data_set->resources = g_list_append(data_set->resources, rsc);
return rsc;
}
/*!
* \internal
* \brief Create orphan instance for anonymous clone resource history
*
* \param[in,out] parent Clone resource that orphan will be added to
* \param[in] rsc_id Orphan's resource ID
* \param[in] node Where orphan is active (for logging only)
* \param[in,out] data_set Cluster working set
*
* \return Newly added orphaned instance of \p parent
*/
static pe_resource_t *
create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
const pe_node_t *node, pe_working_set_t *data_set)
{
pe_resource_t *top = pe__create_clone_child(parent, data_set);
// find_rsc() because we might be a cloned group
pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
top->id, parent->id, rsc_id, pe__node_name(node));
return orphan;
}
/*!
* \internal
* \brief Check a node for an instance of an anonymous clone
*
* Return a child instance of the specified anonymous clone, in order of
* preference: (1) the instance running on the specified node, if any;
* (2) an inactive instance (i.e. within the total of clone-max instances);
* (3) a newly created orphan (i.e. clone-max instances are already active).
*
* \param[in,out] data_set Cluster information
* \param[in] node Node on which to check for instance
* \param[in,out] parent Clone to check
* \param[in] rsc_id Name of cloned resource in history (without instance)
*/
static pe_resource_t *
find_anonymous_clone(pe_working_set_t *data_set, const pe_node_t *node,
pe_resource_t *parent, const char *rsc_id)
{
GList *rIter = NULL;
pe_resource_t *rsc = NULL;
pe_resource_t *inactive_instance = NULL;
gboolean skip_inactive = FALSE;
CRM_ASSERT(parent != NULL);
CRM_ASSERT(pe_rsc_is_clone(parent));
CRM_ASSERT(!pcmk_is_set(parent->flags, pe_rsc_unique));
// Check for active (or partially active, for cloned groups) instance
pe_rsc_trace(parent, "Looking for %s on %s in %s",
rsc_id, pe__node_name(node), parent->id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GList *locations = NULL;
pe_resource_t *child = rIter->data;
/* Check whether this instance is already known to be active or pending
* anywhere, at this stage of unpacking. Because this function is called
* for a resource before the resource's individual operation history
* entries are unpacked, locations will generally not contain the
* desired node.
*
* However, there are three exceptions:
* (1) when child is a cloned group and we have already unpacked the
* history of another member of the group on the same node;
* (2) when we've already unpacked the history of another numbered
* instance on the same node (which can happen if globally-unique
* was flipped from true to false); and
* (3) when we re-run calculations on the same data set as part of a
* simulation.
*/
child->fns->location(child, &locations, 2);
if (locations) {
/* We should never associate the same numbered anonymous clone
* instance with multiple nodes, and clone instances can't migrate,
* so there must be only one location, regardless of history.
*/
CRM_LOG_ASSERT(locations->next == NULL);
if (((pe_node_t *)locations->data)->details == node->details) {
/* This child instance is active on the requested node, so check
* for a corresponding configured resource. We use find_rsc()
* instead of child because child may be a cloned group, and we
* need the particular member corresponding to rsc_id.
*
* If the history entry is orphaned, rsc will be NULL.
*/
rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
if (rsc) {
/* If there are multiple instance history entries for an
* anonymous clone in a single node's history (which can
* happen if globally-unique is switched from true to
* false), we want to consider the instances beyond the
* first as orphans, even if there are inactive instance
* numbers available.
*/
if (rsc->running_on) {
crm_notice("Active (now-)anonymous clone %s has "
"multiple (orphan) instance histories on %s",
parent->id, pe__node_name(node));
skip_inactive = TRUE;
rsc = NULL;
} else {
pe_rsc_trace(parent, "Resource %s, active", rsc->id);
}
}
}
g_list_free(locations);
} else {
pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
if (!skip_inactive && !inactive_instance
&& !pcmk_is_set(child->flags, pe_rsc_block)) {
// Remember one inactive instance in case we don't find active
inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
pe_find_clone);
/* ... but don't use it if it was already associated with a
* pending action on another node
*/
if (inactive_instance && inactive_instance->pending_node
&& (inactive_instance->pending_node->details != node->details)) {
inactive_instance = NULL;
}
}
}
}
if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
rsc = inactive_instance;
}
/* If the resource has "requires" set to "quorum" or "nothing", and we don't
* have a clone instance for every node, we don't want to consume a valid
* instance number for unclean nodes. Such instances may appear to be active
* according to the history, but should be considered inactive, so we can
* start an instance elsewhere. Treat such instances as orphans.
*
* An exception is instances running on guest nodes -- since guest node
* "fencing" is actually just a resource stop, requires shouldn't apply.
*
* @TODO Ideally, we'd use an inactive instance number if it is not needed
* for any clean instances. However, we don't know that at this point.
*/
if ((rsc != NULL) && !pcmk_is_set(rsc->flags, pe_rsc_needs_fencing)
&& (!node->details->online || node->details->unclean)
&& !pe__is_guest_node(node)
&& !pe__is_universal_clone(parent, data_set)) {
rsc = NULL;
}
if (rsc == NULL) {
rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
}
return rsc;
}
static pe_resource_t *
unpack_find_resource(pe_working_set_t *data_set, const pe_node_t *node,
const char *rsc_id)
{
pe_resource_t *rsc = NULL;
pe_resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL) {
/* If we didn't find the resource by its name in the operation history,
* check it again as a clone instance. Even when clone-max=0, we create
* a single :0 orphan to match against here.
*/
char *clone0_id = clone_zero(rsc_id);
pe_resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
if (clone0 && !pcmk_is_set(clone0->flags, pe_rsc_unique)) {
rsc = clone0;
parent = uber_parent(clone0);
crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
} else {
crm_trace("%s is not known as %s either (orphan)",
rsc_id, clone0_id);
}
free(clone0_id);
} else if (rsc->variant > pe_native) {
crm_trace("Resource history for %s is orphaned because it is no longer primitive",
rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (pe_rsc_is_anon_clone(parent)) {
if (pe_rsc_is_bundled(parent)) {
rsc = pe__find_bundle_replica(parent->parent, node);
} else {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(data_set, node, parent, base);
free(base);
CRM_ASSERT(rsc != NULL);
}
}
if (rsc && !pcmk__str_eq(rsc_id, rsc->id, pcmk__str_casei)
&& !pcmk__str_eq(rsc_id, rsc->clone_name, pcmk__str_casei)) {
pcmk__str_update(&rsc->clone_name, rsc_id);
pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, pe__node_name(node), rsc->id,
(pcmk_is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
}
return rsc;
}
static pe_resource_t *
process_orphan_resource(const xmlNode *rsc_entry, const pe_node_t *node,
pe_working_set_t *data_set)
{
pe_resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, pe__node_name(node));
rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
if (rsc == NULL) {
return NULL;
}
if (!pcmk_is_set(data_set->flags, pe_flag_stop_rsc_orphans)) {
pe__clear_resource_flags(rsc, pe_rsc_managed);
} else {
CRM_CHECK(rsc != NULL, return NULL);
pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set);
}
return rsc;
}
static void
process_rsc_state(pe_resource_t * rsc, pe_node_t * node,
enum action_fail_response on_fail)
{
pe_node_t *tmpnode = NULL;
char *reason = NULL;
enum action_fail_response save_on_fail = action_fail_ignore;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role), pe__node_name(node),
fail2text(on_fail));
/* process current state */
if (rsc->role != RSC_ROLE_UNKNOWN) {
pe_resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
pe_node_t *n = pe__copy_node(node);
pe_rsc_trace(rsc, "%s%s%s known on %s",
rsc->id,
((rsc->clone_name == NULL)? "" : " also known as "),
((rsc->clone_name == NULL)? "" : rsc->clone_name),
pe__node_name(n));
g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
}
if (pcmk_is_set(iter->flags, pe_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
/* If a managed resource is believed to be running, but node is down ... */
if (rsc->role > RSC_ROLE_STOPPED
&& node->details->online == FALSE
&& node->details->maintenance == FALSE
&& pcmk_is_set(rsc->flags, pe_rsc_managed)) {
gboolean should_fence = FALSE;
/* If this is a guest node, fence it (regardless of whether fencing is
* enabled, because guest node fencing is done by recovery of the
* container resource rather than by the fencer). Mark the resource
* we're processing as failed. When the guest comes back up, its
* operation history in the CIB will be cleared, freeing the affected
* resource to run again once we are sure we know its state.
*/
if (pe__is_guest_node(node)) {
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
should_fence = TRUE;
} else if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) {
if (pe__is_remote_node(node) && node->details->remote_rsc
&& !pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
/* Setting unseen means that fencing of the remote node will
* occur only if the connection resource is not going to start
* somewhere. This allows connection resources on a failed
* cluster node to move to another node without requiring the
* remote nodes to be fenced as well.
*/
node->details->unseen = TRUE;
reason = crm_strdup_printf("%s is active there (fencing will be"
" revoked if remote connection can "
"be re-established elsewhere)",
rsc->id);
}
should_fence = TRUE;
}
if (should_fence) {
if (reason == NULL) {
reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
}
pe_fence_node(rsc->cluster, node, reason, FALSE);
}
free(reason);
}
/* In order to calculate priority_fencing_delay correctly, save the failure information and pass it to native_add_running(). */
save_on_fail = on_fail;
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = action_fail_ignore;
}
switch (on_fail) {
case action_fail_ignore:
/* nothing to do */
break;
case action_fail_demote:
pe__set_resource_flags(rsc, pe_rsc_failed);
demote_action(rsc, node, FALSE);
break;
case action_fail_fence:
/* treat it as if it is still running
* but also mark the node as unclean
*/
reason = crm_strdup_printf("%s failed there", rsc->id);
pe_fence_node(rsc->cluster, node, reason, FALSE);
free(reason);
break;
case action_fail_standby:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case action_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
pe__clear_resource_flags(rsc, pe_rsc_managed);
pe__set_resource_flags(rsc, pe_rsc_block);
break;
case action_fail_migrate:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY, "__action_migration_auto__",
rsc->cluster);
break;
case action_fail_stop:
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "on-fail=stop");
break;
case action_fail_recover:
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
stop_action(rsc, node, FALSE);
}
break;
case action_fail_restart_container:
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
if (rsc->container && pe_rsc_is_bundled(rsc)) {
/* A bundle's remote connection can run on a different node than
* the bundle's container. We don't necessarily know where the
* container is running yet, so remember it and add a stop
* action for it later.
*/
rsc->cluster->stop_needed =
g_list_prepend(rsc->cluster->stop_needed, rsc->container);
} else if (rsc->container) {
stop_action(rsc->container, node, FALSE);
} else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
stop_action(rsc, node, FALSE);
}
break;
case action_fail_reset_remote:
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
if (pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)) {
tmpnode = NULL;
if (rsc->is_remote_node) {
tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
}
if (tmpnode &&
pe__is_remote_node(tmpnode) &&
tmpnode->details->remote_was_fenced == 0) {
/* The remote connection resource failed in a way that
* should result in fencing the remote node.
*/
pe_fence_node(rsc->cluster, tmpnode,
"remote connection is unrecoverable", FALSE);
}
}
/* require the stop action regardless if fencing is occurring or not. */
if (rsc->role > RSC_ROLE_STOPPED) {
stop_action(rsc, node, FALSE);
}
/* if reconnect delay is in use, prevent the connection from exiting the
* "STOPPED" role until the failure is cleared by the delay timeout. */
if (rsc->remote_reconnect_ms) {
pe__set_next_role(rsc, RSC_ROLE_STOPPED, "remote reset");
}
break;
}
/* ensure a remote-node connection failure forces an unclean remote-node
* to be fenced. By setting unseen = FALSE, the remote-node failure will
* result in a fencing operation regardless if we're going to attempt to
* reconnect to the remote-node in this transition or not. */
if (pcmk_is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
tmpnode = pe_find_node(rsc->cluster->nodes, rsc->id);
if (tmpnode && tmpnode->details->unclean) {
tmpnode->details->unseen = FALSE;
}
}
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
if (pcmk_is_set(rsc->flags, pe_rsc_orphan)) {
if (pcmk_is_set(rsc->flags, pe_rsc_managed)) {
pcmk__config_warn("Detected active orphan %s running on %s",
rsc->id, pe__node_name(node));
} else {
pcmk__config_warn("Resource '%s' must be stopped manually on "
"%s because cluster is configured not to "
"stop active orphans",
rsc->id, pe__node_name(node));
}
}
native_add_running(rsc, node, rsc->cluster,
(save_on_fail != action_fail_ignore));
switch (on_fail) {
case action_fail_ignore:
break;
case action_fail_demote:
case action_fail_block:
pe__set_resource_flags(rsc, pe_rsc_failed);
break;
default:
pe__set_resource_flags(rsc, pe_rsc_failed|pe_rsc_stop);
break;
}
} else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
GList *possible_matches = pe__resource_actions(rsc, node,
PCMK_ACTION_STOP, FALSE);
GList *gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
pe_action_t *stop = (pe_action_t *) gIter->data;
pe__set_action_flags(stop, pe_action_optional);
}
g_list_free(possible_matches);
}
/* A successful stop after migrate_to on the migration source doesn't make
* the partially migrated resource stopped on the migration target.
*/
if (rsc->role == RSC_ROLE_STOPPED
&& rsc->partial_migration_source
&& rsc->partial_migration_source->details == node->details
&& rsc->partial_migration_target
&& rsc->running_on) {
rsc->role = RSC_ROLE_STARTED;
}
}
/* create active recurring operations as optional */
static void
process_recurring(pe_node_t * node, pe_resource_t * rsc,
int start_index, int stop_index,
GList *sorted_op_list, pe_working_set_t * data_set)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GList *gIter = sorted_op_list;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
guint interval_ms = 0;
char *key = NULL;
const char *id = ID(rsc_op);
counter++;
if (node->details->online == FALSE) {
pe_rsc_trace(rsc, "Skipping %s on %s: node is offline",
rsc->id, pe__node_name(node));
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pe_rsc_trace(rsc, "Skipping %s on %s: resource is not active",
id, pe__node_name(node));
continue;
} else if (counter < start_index) {
pe_rsc_trace(rsc, "Skipping %s on %s: old %d",
id, pe__node_name(node), counter);
continue;
}
crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if (interval_ms == 0) {
pe_rsc_trace(rsc, "Skipping %s on %s: non-recurring",
id, pe__node_name(node));
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (pcmk__str_eq(status, "-1", pcmk__str_casei)) {
pe_rsc_trace(rsc, "Skipping %s on %s: status",
id, pe__node_name(node));
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = pcmk__op_key(rsc->id, task, interval_ms);
pe_rsc_trace(rsc, "Creating %s on %s", key, pe__node_name(node));
custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
}
}
void
calculate_active_ops(const GList *sorted_op_list, int *start_index,
int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_clone_start = -1;
const char *task = NULL;
const char *status = NULL;
*stop_index = -1;
*start_index = -1;
for (const GList *iter = sorted_op_list; iter != NULL; iter = iter->next) {
const xmlNode *rsc_op = (const xmlNode *) iter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (pcmk__str_eq(task, PCMK_ACTION_STOP, pcmk__str_casei)
&& pcmk__str_eq(status, "0", pcmk__str_casei)) {
*stop_index = counter;
- } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START, CRMD_ACTION_MIGRATED, NULL)) {
+ } else if (pcmk__strcase_any_of(task, PCMK_ACTION_START,
+ PCMK_ACTION_MIGRATE_FROM, NULL)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index)
&& pcmk__str_eq(task, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if (pcmk__strcase_any_of(rc, "0", "8", NULL)) {
implied_monitor_start = counter;
}
} else if (pcmk__strcase_any_of(task, PCMK_ACTION_PROMOTE,
PCMK_ACTION_DEMOTE, NULL)) {
implied_clone_start = counter;
}
}
if (*start_index == -1) {
if (implied_clone_start != -1) {
*start_index = implied_clone_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
// If resource history entry has shutdown lock, remember lock node and time
static void
unpack_shutdown_lock(const xmlNode *rsc_entry, pe_resource_t *rsc,
const pe_node_t *node, pe_working_set_t *data_set)
{
time_t lock_time = 0; // When lock started (i.e. node shutdown time)
if ((crm_element_value_epoch(rsc_entry, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
&lock_time) == pcmk_ok) && (lock_time != 0)) {
if ((data_set->shutdown_lock > 0)
&& (get_effective_time(data_set)
> (lock_time + data_set->shutdown_lock))) {
pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
rsc->id, pe__node_name(node));
pe__clear_resource_history(rsc, node, data_set);
} else {
/* @COMPAT I don't like breaking const signatures, but
* rsc->lock_node should really be const -- we just can't change it
* until the next API compatibility break.
*/
rsc->lock_node = (pe_node_t *) node;
rsc->lock_time = lock_time;
}
}
}
/*!
* \internal
* \brief Unpack one lrm_resource entry from a node's CIB status
*
* \param[in,out] node Node whose status is being unpacked
* \param[in] rsc_entry lrm_resource XML being unpacked
* \param[in,out] data_set Cluster working set
*
* \return Resource corresponding to the entry, or NULL if no operation history
*/
static pe_resource_t *
unpack_lrm_resource(pe_node_t *node, const xmlNode *lrm_resource,
pe_working_set_t *data_set)
{
GList *gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
const char *rsc_id = ID(lrm_resource);
pe_resource_t *rsc = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
xmlNode *rsc_op = NULL;
xmlNode *last_failure = NULL;
enum action_fail_response on_fail = action_fail_ignore;
enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
if (rsc_id == NULL) {
crm_warn("Ignoring malformed " XML_LRM_TAG_RESOURCE
" entry without id");
return NULL;
}
crm_trace("Unpacking " XML_LRM_TAG_RESOURCE " for %s on %s",
rsc_id, pe__node_name(node));
// Build a list of individual lrm_rsc_op entries, so we can sort them
for (rsc_op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
rsc_op != NULL; rsc_op = crm_next_same_xml(rsc_op)) {
op_list = g_list_prepend(op_list, rsc_op);
}
if (!pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
}
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id);
if (rsc == NULL) {
if (op_list == NULL) {
// If there are no operations, there is nothing to do
return NULL;
} else {
rsc = process_orphan_resource(lrm_resource, node, data_set);
}
}
CRM_ASSERT(rsc != NULL);
// Check whether the resource is "shutdown-locked" to this node
if (pcmk_is_set(data_set->flags, pe_flag_shutdown_lock)) {
unpack_shutdown_lock(lrm_resource, rsc, node, data_set);
}
/* process operations */
saved_role = rsc->role;
rsc->role = RSC_ROLE_UNKNOWN;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail);
if (get_target_role(rsc, &req_role)) {
if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
pe__set_next_role(rsc, req_role, XML_RSC_ATTR_TARGET_ROLE);
} else if (req_role > rsc->next_role) {
pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
}
}
if (saved_role > rsc->role) {
rsc->role = saved_role;
}
return rsc;
}
static void
handle_orphaned_container_fillers(const xmlNode *lrm_rsc_list,
pe_working_set_t *data_set)
{
for (const xmlNode *rsc_entry = pcmk__xe_first_child(lrm_rsc_list);
rsc_entry != NULL; rsc_entry = pcmk__xe_next(rsc_entry)) {
pe_resource_t *rsc;
pe_resource_t *container;
const char *rsc_id;
const char *container_id;
if (!pcmk__str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, pcmk__str_casei)) {
continue;
}
container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
if (container_id == NULL || rsc_id == NULL) {
continue;
}
container = pe_find_resource(data_set->resources, container_id);
if (container == NULL) {
continue;
}
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL ||
!pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler) ||
rsc->container != NULL) {
continue;
}
pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
rsc->id, container_id);
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
}
}
/*!
* \internal
* \brief Unpack one node's lrm status section
*
* \param[in,out] node Node whose status is being unpacked
* \param[in] xml CIB node state XML
* \param[in,out] data_set Cluster working set
*/
static void
unpack_node_lrm(pe_node_t *node, const xmlNode *xml, pe_working_set_t *data_set)
{
bool found_orphaned_container_filler = false;
// Drill down to lrm_resources section
xml = find_xml_node(xml, XML_CIB_TAG_LRM, FALSE);
if (xml == NULL) {
return;
}
xml = find_xml_node(xml, XML_LRM_TAG_RESOURCES, FALSE);
if (xml == NULL) {
return;
}
// Unpack each lrm_resource entry
for (const xmlNode *rsc_entry = first_named_child(xml, XML_LRM_TAG_RESOURCE);
rsc_entry != NULL; rsc_entry = crm_next_same_xml(rsc_entry)) {
pe_resource_t *rsc = unpack_lrm_resource(node, rsc_entry, data_set);
if ((rsc != NULL)
&& pcmk_is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
found_orphaned_container_filler = true;
}
}
/* Now that all resource state has been unpacked for this node, map any
* orphaned container fillers to their container resource.
*/
if (found_orphaned_container_filler) {
handle_orphaned_container_fillers(xml, data_set);
}
}
static void
set_active(pe_resource_t * rsc)
{
const pe_resource_t *top = pe__const_top_resource(rsc, false);
if (top && pcmk_is_set(top->flags, pe_rsc_promotable)) {
rsc->role = RSC_ROLE_UNPROMOTED;
} else {
rsc->role = RSC_ROLE_STARTED;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
pe_node_t *node = value;
int *score = user_data;
node->weight = *score;
}
#define XPATH_NODE_STATE "/" XML_TAG_CIB "/" XML_CIB_TAG_STATUS \
"/" XML_CIB_TAG_STATE
#define SUB_XPATH_LRM_RESOURCE "/" XML_CIB_TAG_LRM \
"/" XML_LRM_TAG_RESOURCES \
"/" XML_LRM_TAG_RESOURCE
#define SUB_XPATH_LRM_RSC_OP "/" XML_LRM_TAG_RSC_OP
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
int target_rc, pe_working_set_t *data_set)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((resource != NULL) && (op != NULL) && (node != NULL),
return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node, "']"
SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", resource, "']"
SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_TASK "='", op, "'",
NULL);
/* Need to check against transition_magic too? */
if ((source != NULL) && (strcmp(op, PCMK_ACTION_MIGRATE_TO) == 0)) {
pcmk__g_strcat(xpath,
" and @" XML_LRM_ATTR_MIGRATE_TARGET "='", source, "']",
NULL);
- } else if ((source != NULL) && (strcmp(op, CRMD_ACTION_MIGRATED) == 0)) {
+ } else if ((source != NULL)
+ && (strcmp(op, PCMK_ACTION_MIGRATE_FROM) == 0)) {
pcmk__g_strcat(xpath,
" and @" XML_LRM_ATTR_MIGRATE_SOURCE "='", source, "']",
NULL);
} else {
g_string_append_c(xpath, ']');
}
xml = get_xpath_object((const char *) xpath->str, data_set->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
if (xml && target_rc >= 0) {
int rc = PCMK_OCF_UNKNOWN_ERROR;
int status = PCMK_EXEC_ERROR;
crm_element_value_int(xml, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, &status);
if ((rc != target_rc) || (status != PCMK_EXEC_DONE)) {
return NULL;
}
}
return xml;
}
static xmlNode *
find_lrm_resource(const char *rsc_id, const char *node_name,
pe_working_set_t *data_set)
{
GString *xpath = NULL;
xmlNode *xml = NULL;
CRM_CHECK((rsc_id != NULL) && (node_name != NULL), return NULL);
xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc_id, "']",
NULL);
xml = get_xpath_object((const char *) xpath->str, data_set->input,
LOG_DEBUG);
g_string_free(xpath, TRUE);
return xml;
}
/*!
* \internal
* \brief Check whether a resource has no completed action history on a node
*
* \param[in,out] rsc Resource to check
* \param[in] node_name Node to check
*
* \return true if \p rsc_id is unknown on \p node_name, otherwise false
*/
static bool
unknown_on_node(pe_resource_t *rsc, const char *node_name)
{
bool result = false;
xmlXPathObjectPtr search;
GString *xpath = g_string_sized_new(256);
pcmk__g_strcat(xpath,
XPATH_NODE_STATE "[@" XML_ATTR_UNAME "='", node_name, "']"
SUB_XPATH_LRM_RESOURCE "[@" XML_ATTR_ID "='", rsc->id, "']"
SUB_XPATH_LRM_RSC_OP "[@" XML_LRM_ATTR_RC "!='193']",
NULL);
search = xpath_search(rsc->cluster->input, (const char *) xpath->str);
result = (numXpathResults(search) == 0);
freeXpathObject(search);
g_string_free(xpath, TRUE);
return result;
}
/*!
* \brief Check whether a probe/monitor indicating the resource was not running
* on a node happened after some event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that monitor is being compared to
* \param[in] same_node Whether the operations are on the same node
* \param[in,out] data_set Cluster working set
*
* \return true if such a monitor happened after event, false otherwise
*/
static bool
monitor_not_running_after(const char *rsc_id, const char *node_name,
const xmlNode *xml_op, bool same_node,
pe_working_set_t *data_set)
{
/* Any probe/monitor operation on the node indicating it was not running
* there
*/
xmlNode *monitor = find_lrm_op(rsc_id, PCMK_ACTION_MONITOR, node_name,
NULL, PCMK_OCF_NOT_RUNNING, data_set);
return (monitor && pe__is_newer_op(monitor, xml_op, same_node) > 0);
}
/*!
* \brief Check whether any non-monitor operation on a node happened after some
* event
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] xml_op Event that non-monitor is being compared to
* \param[in] same_node Whether the operations are on the same node
* \param[in,out] data_set Cluster working set
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
non_monitor_after(const char *rsc_id, const char *node_name,
const xmlNode *xml_op, bool same_node,
pe_working_set_t *data_set)
{
xmlNode *lrm_resource = NULL;
lrm_resource = find_lrm_resource(rsc_id, node_name, data_set);
if (lrm_resource == NULL) {
return false;
}
for (xmlNode *op = first_named_child(lrm_resource, XML_LRM_TAG_RSC_OP);
op != NULL; op = crm_next_same_xml(op)) {
const char * task = NULL;
if (op == xml_op) {
continue;
}
task = crm_element_value(op, XML_LRM_ATTR_TASK);
if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_STOP,
- PCMK_ACTION_MIGRATE_TO, CRMD_ACTION_MIGRATED, NULL)
+ PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
+ NULL)
&& pe__is_newer_op(op, xml_op, same_node) > 0) {
return true;
}
}
return false;
}
/*!
* \brief Check whether the resource has newer state on a node after a migration
* attempt
*
* \param[in] rsc_id Resource being checked
* \param[in] node_name Node being checked
* \param[in] migrate_to Any migrate_to event that is being compared to
* \param[in] migrate_from Any migrate_from event that is being compared to
* \param[in,out] data_set Cluster working set
*
* \return true if such a operation happened after event, false otherwise
*/
static bool
newer_state_after_migrate(const char *rsc_id, const char *node_name,
const xmlNode *migrate_to,
const xmlNode *migrate_from,
pe_working_set_t *data_set)
{
const xmlNode *xml_op = migrate_to;
const char *source = NULL;
const char *target = NULL;
bool same_node = false;
if (migrate_from) {
xml_op = migrate_from;
}
source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
/* It's preferred to compare to the migrate event on the same node if
* existing, since call ids are more reliable.
*/
if (pcmk__str_eq(node_name, target, pcmk__str_casei)) {
if (migrate_from) {
xml_op = migrate_from;
same_node = true;
} else {
xml_op = migrate_to;
}
} else if (pcmk__str_eq(node_name, source, pcmk__str_casei)) {
if (migrate_to) {
xml_op = migrate_to;
same_node = true;
} else {
xml_op = migrate_from;
}
}
/* If there's any newer non-monitor operation on the node, or any newer
* probe/monitor operation on the node indicating it was not running there,
* the migration events potentially no longer matter for the node.
*/
return non_monitor_after(rsc_id, node_name, xml_op, same_node, data_set)
|| monitor_not_running_after(rsc_id, node_name, xml_op, same_node,
data_set);
}
/*!
* \internal
* \brief Parse migration source and target node names from history entry
*
* \param[in] entry Resource history entry for a migration action
* \param[in] source_node If not NULL, source must match this node
* \param[in] target_node If not NULL, target must match this node
* \param[out] source_name Where to store migration source node name
* \param[out] target_name Where to store migration target node name
*
* \return Standard Pacemaker return code
*/
static int
get_migration_node_names(const xmlNode *entry, const pe_node_t *source_node,
const pe_node_t *target_node,
const char **source_name, const char **target_name)
{
*source_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_SOURCE);
*target_name = crm_element_value(entry, XML_LRM_ATTR_MIGRATE_TARGET);
if ((*source_name == NULL) || (*target_name == NULL)) {
crm_err("Ignoring resource history entry %s without "
XML_LRM_ATTR_MIGRATE_SOURCE " and " XML_LRM_ATTR_MIGRATE_TARGET,
ID(entry));
return pcmk_rc_unpack_error;
}
if ((source_node != NULL)
&& !pcmk__str_eq(*source_name, source_node->details->uname,
pcmk__str_casei|pcmk__str_null_matches)) {
crm_err("Ignoring resource history entry %s because "
XML_LRM_ATTR_MIGRATE_SOURCE "='%s' does not match %s",
ID(entry), *source_name, pe__node_name(source_node));
return pcmk_rc_unpack_error;
}
if ((target_node != NULL)
&& !pcmk__str_eq(*target_name, target_node->details->uname,
pcmk__str_casei|pcmk__str_null_matches)) {
crm_err("Ignoring resource history entry %s because "
XML_LRM_ATTR_MIGRATE_TARGET "='%s' does not match %s",
ID(entry), *target_name, pe__node_name(target_node));
return pcmk_rc_unpack_error;
}
return pcmk_rc_ok;
}
/*
* \internal
* \brief Add a migration source to a resource's list of dangling migrations
*
* If the migrate_to and migrate_from actions in a live migration both
* succeeded, but there is no stop on the source, the migration is considered
* "dangling." Add the source to the resource's dangling migration list, which
* will be used to schedule a stop on the source without affecting the target.
*
* \param[in,out] rsc Resource involved in migration
* \param[in] node Migration source
*/
static void
add_dangling_migration(pe_resource_t *rsc, const pe_node_t *node)
{
pe_rsc_trace(rsc, "Dangling migration of %s requires stop on %s",
rsc->id, pe__node_name(node));
rsc->role = RSC_ROLE_STOPPED;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations,
(gpointer) node);
}
/*!
* \internal
* \brief Update resource role etc. after a successful migrate_to action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_to_success(struct action_history *history)
{
/* A complete migration sequence is:
* 1. migrate_to on source node (which succeeded if we get to this function)
* 2. migrate_from on target node
* 3. stop on source node
*
* If no migrate_from has happened, the migration is considered to be
* "partial". If the migrate_from succeeded but no stop has happened, the
* migration is considered to be "dangling".
*
* If a successful migrate_to and stop have happened on the source node, we
* still need to check for a partial migration, due to scenarios (easier to
* produce with batch-limit=1) like:
*
* - A resource is migrating from node1 to node2, and a migrate_to is
* initiated for it on node1.
*
* - node2 goes into standby mode while the migrate_to is pending, which
* aborts the transition.
*
* - Upon completion of the migrate_to, a new transition schedules a stop
* on both nodes and a start on node1.
*
* - If the new transition is aborted for any reason while the resource is
* stopping on node1, the transition after that stop completes will see
* the migrate_to and stop on the source, but it's still a partial
* migration, and the resource must be stopped on node2 because it is
* potentially active there due to the migrate_to.
*
* We also need to take into account that either node's history may be
* cleared at any point in the migration process.
*/
int from_rc = PCMK_OCF_OK;
int from_status = PCMK_EXEC_PENDING;
pe_node_t *target_node = NULL;
xmlNode *migrate_from = NULL;
const char *source = NULL;
const char *target = NULL;
bool source_newer_op = false;
bool target_newer_state = false;
bool active_on_target = false;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, history->node, NULL, &source,
&target) != pcmk_rc_ok) {
return;
}
// Check for newer state on the source
source_newer_op = non_monitor_after(history->rsc->id, source, history->xml,
true, history->rsc->cluster);
// Check for a migrate_from action from this source on the target
- migrate_from = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATED, target,
- source, -1, history->rsc->cluster);
+ migrate_from = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_FROM,
+ target, source, -1, history->rsc->cluster);
if (migrate_from != NULL) {
if (source_newer_op) {
/* There's a newer non-monitor operation on the source and a
* migrate_from on the target, so this migrate_to is irrelevant to
* the resource's state.
*/
return;
}
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS,
&from_status);
}
/* If the resource has newer state on both the source and target after the
* migration events, this migrate_to is irrelevant to the resource's state.
*/
target_newer_state = newer_state_after_migrate(history->rsc->id, target,
history->xml, migrate_from,
history->rsc->cluster);
if (source_newer_op && target_newer_state) {
return;
}
/* Check for dangling migration (migrate_from succeeded but stop not done).
* We know there's no stop because we already returned if the target has a
* migrate_from and the source has any newer non-monitor operation.
*/
if ((from_rc == PCMK_OCF_OK) && (from_status == PCMK_EXEC_DONE)) {
add_dangling_migration(history->rsc, history->node);
return;
}
/* Without newer state, this migrate_to implies the resource is active.
* (Clones are not allowed to migrate, so role can't be promoted.)
*/
history->rsc->role = RSC_ROLE_STARTED;
target_node = pe_find_node(history->rsc->cluster->nodes, target);
active_on_target = !target_newer_state && (target_node != NULL)
&& target_node->details->online;
if (from_status != PCMK_EXEC_PENDING) { // migrate_from failed on target
if (active_on_target) {
native_add_running(history->rsc, target_node, history->rsc->cluster,
TRUE);
} else {
// Mark resource as failed, require recovery, and prevent migration
pe__set_resource_flags(history->rsc, pe_rsc_failed|pe_rsc_stop);
pe__clear_resource_flags(history->rsc, pe_rsc_allow_migrate);
}
return;
}
// The migrate_from is pending, complete but erased, or to be scheduled
/* If there is no history at all for the resource on an online target, then
* it was likely cleaned. Just return, and we'll schedule a probe. Once we
* have the probe result, it will be reflected in target_newer_state.
*/
if ((target_node != NULL) && target_node->details->online
&& unknown_on_node(history->rsc, target)) {
return;
}
if (active_on_target) {
pe_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
source);
native_add_running(history->rsc, target_node, history->rsc->cluster,
FALSE);
if ((source_node != NULL) && source_node->details->online) {
/* This is a partial migration: the migrate_to completed
* successfully on the source, but the migrate_from has not
* completed. Remember the source and target; if the newly
* chosen target remains the same when we schedule actions
* later, we may continue with the migration.
*/
history->rsc->partial_migration_target = target_node;
history->rsc->partial_migration_source = source_node;
}
} else if (!source_newer_op) {
// Mark resource as failed, require recovery, and prevent migration
pe__set_resource_flags(history->rsc, pe_rsc_failed|pe_rsc_stop);
pe__clear_resource_flags(history->rsc, pe_rsc_allow_migrate);
}
}
/*!
* \internal
* \brief Update resource role etc. after a failed migrate_to action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_to_failure(struct action_history *history)
{
xmlNode *target_migrate_from = NULL;
const char *source = NULL;
const char *target = NULL;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, history->node, NULL, &source,
&target) != pcmk_rc_ok) {
return;
}
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
history->rsc->role = RSC_ROLE_STARTED;
// Check for migrate_from on the target
- target_migrate_from = find_lrm_op(history->rsc->id, CRMD_ACTION_MIGRATED,
- target, source, PCMK_OCF_OK,
- history->rsc->cluster);
+ target_migrate_from = find_lrm_op(history->rsc->id,
+ PCMK_ACTION_MIGRATE_FROM, target, source,
+ PCMK_OCF_OK, history->rsc->cluster);
if (/* If the resource state is unknown on the target, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(history->rsc, target)
/* If the resource has newer state on the target after the migration
* events, this migrate_to no longer matters for the target.
*/
&& !newer_state_after_migrate(history->rsc->id, target, history->xml,
target_migrate_from,
history->rsc->cluster)) {
/* The resource has no newer state on the target, so assume it's still
* active there.
* (if it is up).
*/
pe_node_t *target_node = pe_find_node(history->rsc->cluster->nodes,
target);
if (target_node && target_node->details->online) {
native_add_running(history->rsc, target_node, history->rsc->cluster,
FALSE);
}
} else if (!non_monitor_after(history->rsc->id, source, history->xml, true,
history->rsc->cluster)) {
/* We know the resource has newer state on the target, but this
* migrate_to still matters for the source as long as there's no newer
* non-monitor operation there.
*/
// Mark node as having dangling migration so we can force a stop later
history->rsc->dangling_migrations =
g_list_prepend(history->rsc->dangling_migrations,
(gpointer) history->node);
}
}
/*!
* \internal
* \brief Update resource role etc. after a failed migrate_from action
*
* \param[in,out] history Parsed action result history
*/
static void
unpack_migrate_from_failure(struct action_history *history)
{
xmlNode *source_migrate_to = NULL;
const char *source = NULL;
const char *target = NULL;
// Get source and target node names from XML
if (get_migration_node_names(history->xml, NULL, history->node, &source,
&target) != pcmk_rc_ok) {
return;
}
/* If a migration failed, we have to assume the resource is active. Clones
* are not allowed to migrate, so role can't be promoted.
*/
history->rsc->role = RSC_ROLE_STARTED;
// Check for a migrate_to on the source
source_migrate_to = find_lrm_op(history->rsc->id, PCMK_ACTION_MIGRATE_TO,
source, target, PCMK_OCF_OK,
history->rsc->cluster);
if (/* If the resource state is unknown on the source, it will likely be
* probed there.
* Don't just consider it running there. We will get back here anyway in
* case the probe detects it's running there.
*/
!unknown_on_node(history->rsc, source)
/* If the resource has newer state on the source after the migration
* events, this migrate_from no longer matters for the source.
*/
&& !newer_state_after_migrate(history->rsc->id, source,
source_migrate_to, history->xml,
history->rsc->cluster)) {
/* The resource has no newer state on the source, so assume it's still
* active there (if it is up).
*/
pe_node_t *source_node = pe_find_node(history->rsc->cluster->nodes,
source);
if (source_node && source_node->details->online) {
native_add_running(history->rsc, source_node, history->rsc->cluster,
TRUE);
}
}
}
/*!
* \internal
* \brief Add an action to cluster's list of failed actions
*
* \param[in,out] history Parsed action result history
*/
static void
record_failed_op(struct action_history *history)
{
if (!(history->node->details->online)) {
return;
}
for (const xmlNode *xIter = history->rsc->cluster->failed->children;
xIter != NULL; xIter = xIter->next) {
const char *key = pe__xe_history_key(xIter);
const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
if (pcmk__str_eq(history->key, key, pcmk__str_none)
&& pcmk__str_eq(uname, history->node->details->uname,
pcmk__str_casei)) {
crm_trace("Skipping duplicate entry %s on %s",
history->key, pe__node_name(history->node));
return;
}
}
crm_trace("Adding entry for %s on %s to failed action list",
history->key, pe__node_name(history->node));
crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
crm_xml_add(history->xml, XML_LRM_ATTR_RSCID, history->rsc->id);
add_node_copy(history->rsc->cluster->failed, history->xml);
}
static char *
last_change_str(const xmlNode *xml_op)
{
time_t when;
char *result = NULL;
if (crm_element_value_epoch(xml_op, XML_RSC_OP_LAST_CHANGE,
&when) == pcmk_ok) {
char *when_s = pcmk__epoch2str(&when, 0);
const char *p = strchr(when_s, ' ');
// Skip day of week to make message shorter
if ((p != NULL) && (*(++p) != '\0')) {
result = strdup(p);
CRM_ASSERT(result != NULL);
}
free(when_s);
}
if (result == NULL) {
result = strdup("unknown time");
CRM_ASSERT(result != NULL);
}
return result;
}
/*!
* \internal
* \brief Compare two on-fail values
*
* \param[in] first One on-fail value to compare
* \param[in] second The other on-fail value to compare
*
* \return A negative number if second is more severe than first, zero if they
* are equal, or a positive number if first is more severe than second.
* \note This is only needed until the action_fail_response values can be
* renumbered at the next API compatibility break.
*/
static int
cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
{
switch (first) {
case action_fail_demote:
switch (second) {
case action_fail_ignore:
return 1;
case action_fail_demote:
return 0;
default:
return -1;
}
break;
case action_fail_reset_remote:
switch (second) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
return 1;
case action_fail_reset_remote:
return 0;
default:
return -1;
}
break;
case action_fail_restart_container:
switch (second) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
case action_fail_reset_remote:
return 1;
case action_fail_restart_container:
return 0;
default:
return -1;
}
break;
default:
break;
}
switch (second) {
case action_fail_demote:
return (first == action_fail_ignore)? -1 : 1;
case action_fail_reset_remote:
switch (first) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
return -1;
default:
return 1;
}
break;
case action_fail_restart_container:
switch (first) {
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
case action_fail_reset_remote:
return -1;
default:
return 1;
}
break;
default:
break;
}
return first - second;
}
/*!
* \internal
* \brief Ban a resource (or its clone if an anonymous instance) from all nodes
*
* \param[in,out] rsc Resource to ban
*/
static void
ban_from_all_nodes(pe_resource_t *rsc)
{
int score = -INFINITY;
pe_resource_t *fail_rsc = rsc;
if (fail_rsc->parent != NULL) {
pe_resource_t *parent = uber_parent(fail_rsc);
if (pe_rsc_is_anon_clone(parent)) {
/* For anonymous clones, if an operation with on-fail=stop fails for
* any instance, the entire clone must stop.
*/
fail_rsc = parent;
}
}
// Ban the resource from all nodes
crm_notice("%s will not be started under current conditions", fail_rsc->id);
if (fail_rsc->allowed_nodes != NULL) {
g_hash_table_destroy(fail_rsc->allowed_nodes);
}
fail_rsc->allowed_nodes = pe__node_list2table(rsc->cluster->nodes);
g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
}
/*!
* \internal
* \brief Update resource role, failure handling, etc., after a failed action
*
* \param[in,out] history Parsed action result history
* \param[out] last_failure Set this to action XML
* \param[in,out] on_fail What should be done about the result
*/
static void
unpack_rsc_op_failure(struct action_history *history, xmlNode **last_failure,
enum action_fail_response *on_fail)
{
bool is_probe = false;
pe_action_t *action = NULL;
char *last_change_s = NULL;
*last_failure = history->xml;
is_probe = pcmk_xe_is_probe(history->xml);
last_change_s = last_change_str(history->xml);
if (!pcmk_is_set(history->rsc->cluster->flags, pe_flag_symmetric_cluster)
&& (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
crm_trace("Unexpected result (%s%s%s) was recorded for "
"%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
(is_probe? "probe" : history->task), history->rsc->id,
pe__node_name(history->node), last_change_s,
history->exit_status, history->id);
} else {
crm_warn("Unexpected result (%s%s%s) was recorded for "
"%s of %s on %s at %s " CRM_XS " exit-status=%d id=%s",
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
(is_probe? "probe" : history->task), history->rsc->id,
pe__node_name(history->node), last_change_s,
history->exit_status, history->id);
if (is_probe && (history->exit_status != PCMK_OCF_OK)
&& (history->exit_status != PCMK_OCF_NOT_RUNNING)
&& (history->exit_status != PCMK_OCF_RUNNING_PROMOTED)) {
/* A failed (not just unexpected) probe result could mean the user
* didn't know resources will be probed even where they can't run.
*/
crm_notice("If it is not possible for %s to run on %s, see "
"the resource-discovery option for location constraints",
history->rsc->id, pe__node_name(history->node));
}
record_failed_op(history);
}
free(last_change_s);
action = custom_action(history->rsc, strdup(history->key), history->task,
NULL, TRUE, FALSE, history->rsc->cluster);
if (cmp_on_fail(*on_fail, action->on_fail) < 0) {
pe_rsc_trace(history->rsc, "on-fail %s -> %s for %s (%s)",
fail2text(*on_fail), fail2text(action->on_fail),
action->uuid, history->key);
*on_fail = action->on_fail;
}
if (strcmp(history->task, PCMK_ACTION_STOP) == 0) {
resource_location(history->rsc, history->node, -INFINITY,
"__stop_fail__", history->rsc->cluster);
} else if (strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0) {
unpack_migrate_to_failure(history);
- } else if (strcmp(history->task, CRMD_ACTION_MIGRATED) == 0) {
+ } else if (strcmp(history->task, PCMK_ACTION_MIGRATE_FROM) == 0) {
unpack_migrate_from_failure(history);
} else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
history->rsc->role = RSC_ROLE_PROMOTED;
} else if (strcmp(history->task, PCMK_ACTION_DEMOTE) == 0) {
if (action->on_fail == action_fail_block) {
history->rsc->role = RSC_ROLE_PROMOTED;
pe__set_next_role(history->rsc, RSC_ROLE_STOPPED,
"demote with on-fail=block");
} else if (history->exit_status == PCMK_OCF_NOT_RUNNING) {
history->rsc->role = RSC_ROLE_STOPPED;
} else {
/* Staying in the promoted role would put the scheduler and
* controller into a loop. Setting the role to unpromoted is not
* dangerous because the resource will be stopped as part of
* recovery, and any promotion will be ordered after that stop.
*/
history->rsc->role = RSC_ROLE_UNPROMOTED;
}
}
if (is_probe && (history->exit_status == PCMK_OCF_NOT_INSTALLED)) {
/* leave stopped */
pe_rsc_trace(history->rsc, "Leaving %s stopped", history->rsc->id);
history->rsc->role = RSC_ROLE_STOPPED;
} else if (history->rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(history->rsc, "Setting %s active", history->rsc->id);
set_active(history->rsc);
}
pe_rsc_trace(history->rsc,
"Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
history->rsc->id, role2text(history->rsc->role),
pcmk__btoa(history->node->details->unclean),
fail2text(action->on_fail), role2text(action->fail_role));
if ((action->fail_role != RSC_ROLE_STARTED)
&& (history->rsc->next_role < action->fail_role)) {
pe__set_next_role(history->rsc, action->fail_role, "failure");
}
if (action->fail_role == RSC_ROLE_STOPPED) {
ban_from_all_nodes(history->rsc);
}
pe_free_action(action);
}
/*!
* \internal
* \brief Block a resource with a failed action if it cannot be recovered
*
* If resource action is a failed stop and fencing is not possible, mark the
* resource as unmanaged and blocked, since recovery cannot be done.
*
* \param[in,out] history Parsed action history entry
*/
static void
block_if_unrecoverable(struct action_history *history)
{
char *last_change_s = NULL;
if (strcmp(history->task, PCMK_ACTION_STOP) != 0) {
return; // All actions besides stop are always recoverable
}
if (pe_can_fence(history->node->details->data_set, history->node)) {
return; // Failed stops are recoverable via fencing
}
last_change_s = last_change_str(history->xml);
pe_proc_err("No further recovery can be attempted for %s "
"because %s on %s failed (%s%s%s) at %s "
CRM_XS " rc=%d id=%s",
history->rsc->id, history->task, pe__node_name(history->node),
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""),
last_change_s, history->exit_status, history->id);
free(last_change_s);
pe__clear_resource_flags(history->rsc, pe_rsc_managed);
pe__set_resource_flags(history->rsc, pe_rsc_block);
}
/*!
* \internal
* \brief Update action history's execution status and why
*
* \param[in,out] history Parsed action history entry
* \param[out] why Where to store reason for update
* \param[in] value New value
* \param[in] reason Description of why value was changed
*/
static inline void
remap_because(struct action_history *history, const char **why, int value,
const char *reason)
{
if (history->execution_status != value) {
history->execution_status = value;
*why = reason;
}
}
/*!
* \internal
* \brief Remap informational monitor results and operation status
*
* For the monitor results, certain OCF codes are for providing extended information
* to the user about services that aren't yet failed but not entirely healthy either.
* These must be treated as the "normal" result by Pacemaker.
*
* For operation status, the action result can be used to determine an appropriate
* status for the purposes of responding to the action. The status provided by the
* executor is not directly usable since the executor does not know what was expected.
*
* \param[in,out] history Parsed action history entry
* \param[in,out] on_fail What should be done about the result
* \param[in] expired Whether result is expired
*
* \note If the result is remapped and the node is not shutting down or failed,
* the operation will be recorded in the data set's list of failed operations
* to highlight it for the user.
*
* \note This may update the resource's current and next role.
*/
static void
remap_operation(struct action_history *history,
enum action_fail_response *on_fail, bool expired)
{
bool is_probe = false;
int orig_exit_status = history->exit_status;
int orig_exec_status = history->execution_status;
const char *why = NULL;
const char *task = history->task;
// Remap degraded results to their successful counterparts
history->exit_status = pcmk__effective_rc(history->exit_status);
if (history->exit_status != orig_exit_status) {
why = "degraded result";
if (!expired && (!history->node->details->shutdown
|| history->node->details->online)) {
record_failed_op(history);
}
}
if (!pe_rsc_is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml)
&& ((history->execution_status != PCMK_EXEC_DONE)
|| (history->exit_status != PCMK_OCF_NOT_RUNNING))) {
history->execution_status = PCMK_EXEC_DONE;
history->exit_status = PCMK_OCF_NOT_RUNNING;
why = "equivalent probe result";
}
/* If the executor reported an execution status of anything but done or
* error, consider that final. But for done or error, we know better whether
* it should be treated as a failure or not, because we know the expected
* result.
*/
switch (history->execution_status) {
case PCMK_EXEC_DONE:
case PCMK_EXEC_ERROR:
break;
// These should be treated as node-fatal
case PCMK_EXEC_NO_FENCE_DEVICE:
case PCMK_EXEC_NO_SECRETS:
remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
"node-fatal error");
goto remap_done;
default:
goto remap_done;
}
is_probe = pcmk_xe_is_probe(history->xml);
if (is_probe) {
task = "probe";
}
if (history->expected_exit_status < 0) {
/* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
* Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
* expected exit status in the transition key, which (along with the
* similar case of a corrupted transition key in the CIB) will be
* reported to this function as -1. Pacemaker 2.0+ does not support
* rolling upgrades from those versions or processing of saved CIB files
* from those versions, so we do not need to care much about this case.
*/
remap_because(history, &why, PCMK_EXEC_ERROR,
"obsolete history format");
crm_warn("Expected result not found for %s on %s "
"(corrupt or obsolete CIB?)",
history->key, pe__node_name(history->node));
} else if (history->exit_status == history->expected_exit_status) {
remap_because(history, &why, PCMK_EXEC_DONE, "expected result");
} else {
remap_because(history, &why, PCMK_EXEC_ERROR, "unexpected result");
pe_rsc_debug(history->rsc,
"%s on %s: expected %d (%s), got %d (%s%s%s)",
history->key, pe__node_name(history->node),
history->expected_exit_status,
services_ocf_exitcode_str(history->expected_exit_status),
history->exit_status,
services_ocf_exitcode_str(history->exit_status),
(pcmk__str_empty(history->exit_reason)? "" : ": "),
pcmk__s(history->exit_reason, ""));
}
switch (history->exit_status) {
case PCMK_OCF_OK:
if (is_probe
&& (history->expected_exit_status == PCMK_OCF_NOT_RUNNING)) {
char *last_change_s = last_change_str(history->xml);
remap_because(history, &why, PCMK_EXEC_DONE, "probe");
pe_rsc_info(history->rsc, "Probe found %s active on %s at %s",
history->rsc->id, pe__node_name(history->node),
last_change_s);
free(last_change_s);
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe
|| (history->expected_exit_status == history->exit_status)
|| !pcmk_is_set(history->rsc->flags, pe_rsc_managed)) {
/* For probes, recurring monitors for the Stopped role, and
* unmanaged resources, "not running" is not considered a
* failure.
*/
remap_because(history, &why, PCMK_EXEC_DONE, "exit status");
history->rsc->role = RSC_ROLE_STOPPED;
*on_fail = action_fail_ignore;
pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN,
"not running");
}
break;
case PCMK_OCF_RUNNING_PROMOTED:
if (is_probe
&& (history->exit_status != history->expected_exit_status)) {
char *last_change_s = last_change_str(history->xml);
remap_because(history, &why, PCMK_EXEC_DONE, "probe");
pe_rsc_info(history->rsc,
"Probe found %s active and promoted on %s at %s",
history->rsc->id, pe__node_name(history->node),
last_change_s);
free(last_change_s);
}
if (!expired
|| (history->exit_status == history->expected_exit_status)) {
history->rsc->role = RSC_ROLE_PROMOTED;
}
break;
case PCMK_OCF_FAILED_PROMOTED:
if (!expired) {
history->rsc->role = RSC_ROLE_PROMOTED;
}
remap_because(history, &why, PCMK_EXEC_ERROR, "exit status");
break;
case PCMK_OCF_NOT_CONFIGURED:
remap_because(history, &why, PCMK_EXEC_ERROR_FATAL, "exit status");
break;
case PCMK_OCF_UNIMPLEMENT_FEATURE:
{
guint interval_ms = 0;
crm_element_value_ms(history->xml, XML_LRM_ATTR_INTERVAL_MS,
&interval_ms);
if (interval_ms == 0) {
if (!expired) {
block_if_unrecoverable(history);
}
remap_because(history, &why, PCMK_EXEC_ERROR_HARD,
"exit status");
} else {
remap_because(history, &why, PCMK_EXEC_NOT_SUPPORTED,
"exit status");
}
}
break;
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
if (!expired) {
block_if_unrecoverable(history);
}
remap_because(history, &why, PCMK_EXEC_ERROR_HARD, "exit status");
break;
default:
if (history->execution_status == PCMK_EXEC_DONE) {
char *last_change_s = last_change_str(history->xml);
crm_info("Treating unknown exit status %d from %s of %s "
"on %s at %s as failure",
history->exit_status, task, history->rsc->id,
pe__node_name(history->node), last_change_s);
remap_because(history, &why, PCMK_EXEC_ERROR,
"unknown exit status");
free(last_change_s);
}
break;
}
remap_done:
if (why != NULL) {
pe_rsc_trace(history->rsc,
"Remapped %s result from [%s: %s] to [%s: %s] "
"because of %s",
history->key, pcmk_exec_status_str(orig_exec_status),
crm_exit_str(orig_exit_status),
pcmk_exec_status_str(history->execution_status),
crm_exit_str(history->exit_status), why);
}
}
// return TRUE if start or monitor last failure but parameters changed
static bool
should_clear_for_param_change(const xmlNode *xml_op, const char *task,
pe_resource_t *rsc, pe_node_t *node)
{
if (pcmk__str_any_of(task, PCMK_ACTION_START, PCMK_ACTION_MONITOR, NULL)) {
if (pe__bundle_needs_remote_name(rsc)) {
/* We haven't allocated resources yet, so we can't reliably
* substitute addr parameters for the REMOTE_CONTAINER_HACK.
* When that's needed, defer the check until later.
*/
pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
rsc->cluster);
} else {
op_digest_cache_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node,
rsc->cluster);
switch (digest_data->rc) {
case RSC_DIGEST_UNKNOWN:
crm_trace("Resource %s history entry %s on %s"
" has no digest to compare",
rsc->id, pe__xe_history_key(xml_op),
node->details->id);
break;
case RSC_DIGEST_MATCH:
break;
default:
return TRUE;
}
}
}
return FALSE;
}
// Order action after fencing of remote node, given connection rsc
static void
order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn,
pe_working_set_t *data_set)
{
pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id);
if (remote_node) {
pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
FALSE, data_set);
order_actions(fence, action, pe_order_implies_then);
}
}
static bool
should_ignore_failure_timeout(const pe_resource_t *rsc, const char *task,
guint interval_ms, bool is_last_failure)
{
/* Clearing failures of recurring monitors has special concerns. The
* executor reports only changes in the monitor result, so if the
* monitor is still active and still getting the same failure result,
* that will go undetected after the failure is cleared.
*
* Also, the operation history will have the time when the recurring
* monitor result changed to the given code, not the time when the
* result last happened.
*
* @TODO We probably should clear such failures only when the failure
* timeout has passed since the last occurrence of the failed result.
* However we don't record that information. We could maybe approximate
* that by clearing only if there is a more recent successful monitor or
* stop result, but we don't even have that information at this point
* since we are still unpacking the resource's operation history.
*
* This is especially important for remote connection resources with a
* reconnect interval, so in that case, we skip clearing failures
* if the remote node hasn't been fenced.
*/
if (rsc->remote_reconnect_ms
&& pcmk_is_set(rsc->cluster->flags, pe_flag_stonith_enabled)
&& (interval_ms != 0)
&& pcmk__str_eq(task, PCMK_ACTION_MONITOR, pcmk__str_casei)) {
pe_node_t *remote_node = pe_find_node(rsc->cluster->nodes, rsc->id);
if (remote_node && !remote_node->details->remote_was_fenced) {
if (is_last_failure) {
crm_info("Waiting to clear monitor failure for remote node %s"
" until fencing has occurred", rsc->id);
}
return TRUE;
}
}
return FALSE;
}
/*!
* \internal
* \brief Check operation age and schedule failure clearing when appropriate
*
* This function has two distinct purposes. The first is to check whether an
* operation history entry is expired (i.e. the resource has a failure timeout,
* the entry is older than the timeout, and the resource either has no fail
* count or its fail count is entirely older than the timeout). The second is to
* schedule fail count clearing when appropriate (i.e. the operation is expired
* and either the resource has an expired fail count or the operation is a
* last_failure for a remote connection resource with a reconnect interval,
* or the operation is a last_failure for a start or monitor operation and the
* resource's parameters have changed since the operation).
*
* \param[in,out] history Parsed action result history
*
* \return true if operation history entry is expired, otherwise false
*/
static bool
check_operation_expiry(struct action_history *history)
{
bool expired = false;
bool is_last_failure = pcmk__ends_with(history->id, "_last_failure_0");
time_t last_run = 0;
int unexpired_fail_count = 0;
const char *clear_reason = NULL;
if (history->execution_status == PCMK_EXEC_NOT_INSTALLED) {
pe_rsc_trace(history->rsc,
"Resource history entry %s on %s is not expired: "
"Not Installed does not expire",
history->id, pe__node_name(history->node));
return false; // "Not installed" must always be cleared manually
}
if ((history->rsc->failure_timeout > 0)
&& (crm_element_value_epoch(history->xml, XML_RSC_OP_LAST_CHANGE,
&last_run) == 0)) {
// Resource has a failure-timeout, and history entry has a timestamp
time_t now = get_effective_time(history->rsc->cluster);
time_t last_failure = 0;
// Is this particular operation history older than the failure timeout?
if ((now >= (last_run + history->rsc->failure_timeout))
&& !should_ignore_failure_timeout(history->rsc, history->task,
history->interval_ms,
is_last_failure)) {
expired = true;
}
// Does the resource as a whole have an unexpired fail count?
unexpired_fail_count = pe_get_failcount(history->node, history->rsc,
&last_failure, pe_fc_effective,
history->xml);
// Update scheduler recheck time according to *last* failure
crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
" last-failure@%lld",
history->id, (long long) last_run, (expired? "" : "not "),
(long long) now, unexpired_fail_count,
history->rsc->failure_timeout, (long long) last_failure);
last_failure += history->rsc->failure_timeout + 1;
if (unexpired_fail_count && (now < last_failure)) {
pe__update_recheck_time(last_failure, history->rsc->cluster);
}
}
if (expired) {
if (pe_get_failcount(history->node, history->rsc, NULL, pe_fc_default,
history->xml)) {
// There is a fail count ignoring timeout
if (unexpired_fail_count == 0) {
// There is no fail count considering timeout
clear_reason = "it expired";
} else {
/* This operation is old, but there is an unexpired fail count.
* In a properly functioning cluster, this should only be
* possible if this operation is not a failure (otherwise the
* fail count should be expired too), so this is really just a
* failsafe.
*/
pe_rsc_trace(history->rsc,
"Resource history entry %s on %s is not expired: "
"Unexpired fail count",
history->id, pe__node_name(history->node));
expired = false;
}
} else if (is_last_failure
&& (history->rsc->remote_reconnect_ms != 0)) {
/* Clear any expired last failure when reconnect interval is set,
* even if there is no fail count.
*/
clear_reason = "reconnect interval is set";
}
}
if (!expired && is_last_failure
&& should_clear_for_param_change(history->xml, history->task,
history->rsc, history->node)) {
clear_reason = "resource parameters have changed";
}
if (clear_reason != NULL) {
// Schedule clearing of the fail count
pe_action_t *clear_op = pe__clear_failcount(history->rsc, history->node,
clear_reason,
history->rsc->cluster);
if (pcmk_is_set(history->rsc->cluster->flags, pe_flag_stonith_enabled)
&& (history->rsc->remote_reconnect_ms != 0)) {
/* If we're clearing a remote connection due to a reconnect
* interval, we want to wait until any scheduled fencing
* completes.
*
* We could limit this to remote_node->details->unclean, but at
* this point, that's always true (it won't be reliable until
* after unpack_node_history() is done).
*/
crm_info("Clearing %s failure will wait until any scheduled "
"fencing of %s completes",
history->task, history->rsc->id);
order_after_remote_fencing(clear_op, history->rsc,
history->rsc->cluster);
}
}
if (expired && (history->interval_ms == 0)
&& pcmk__str_eq(history->task, PCMK_ACTION_MONITOR, pcmk__str_none)) {
switch (history->exit_status) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_PROMOTED:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_PROMOTED:
// Don't expire probes that return these values
pe_rsc_trace(history->rsc,
"Resource history entry %s on %s is not expired: "
"Probe result",
history->id, pe__node_name(history->node));
expired = false;
break;
}
}
return expired;
}
int
pe__target_rc_from_xml(const xmlNode *xml_op)
{
int target_rc = 0;
const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
if (key == NULL) {
return -1;
}
decode_transition_key(key, NULL, NULL, NULL, &target_rc);
return target_rc;
}
/*!
* \internal
* \brief Get the failure handling for an action
*
* \param[in,out] history Parsed action history entry
*
* \return Failure handling appropriate to action
*/
static enum action_fail_response
get_action_on_fail(struct action_history *history)
{
enum action_fail_response result = action_fail_recover;
pe_action_t *action = custom_action(history->rsc, strdup(history->key),
history->task, NULL, TRUE, FALSE,
history->rsc->cluster);
result = action->on_fail;
pe_free_action(action);
return result;
}
/*!
* \internal
* \brief Update a resource's state for an action result
*
* \param[in,out] history Parsed action history entry
* \param[in] exit_status Exit status to base new state on
* \param[in] last_failure Resource's last_failure entry, if known
* \param[in,out] on_fail Resource's current failure handling
*/
static void
update_resource_state(struct action_history *history, int exit_status,
const xmlNode *last_failure,
enum action_fail_response *on_fail)
{
bool clear_past_failure = false;
if ((exit_status == PCMK_OCF_NOT_INSTALLED)
|| (!pe_rsc_is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml))) {
history->rsc->role = RSC_ROLE_STOPPED;
} else if (exit_status == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MONITOR,
pcmk__str_none)) {
if ((last_failure != NULL)
&& pcmk__str_eq(history->key, pe__xe_history_key(last_failure),
pcmk__str_none)) {
clear_past_failure = true;
}
if (history->rsc->role < RSC_ROLE_STARTED) {
set_active(history->rsc);
}
} else if (pcmk__str_eq(history->task, PCMK_ACTION_START, pcmk__str_none)) {
history->rsc->role = RSC_ROLE_STARTED;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_STOP, pcmk__str_none)) {
history->rsc->role = RSC_ROLE_STOPPED;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_PROMOTE,
pcmk__str_none)) {
history->rsc->role = RSC_ROLE_PROMOTED;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_DEMOTE,
pcmk__str_none)) {
if (*on_fail == action_fail_demote) {
// Demote clears an error only if on-fail=demote
clear_past_failure = true;
}
history->rsc->role = RSC_ROLE_UNPROMOTED;
- } else if (pcmk__str_eq(history->task, CRMD_ACTION_MIGRATED,
+ } else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_none)) {
history->rsc->role = RSC_ROLE_STARTED;
clear_past_failure = true;
} else if (pcmk__str_eq(history->task, PCMK_ACTION_MIGRATE_TO,
pcmk__str_none)) {
unpack_migrate_to_success(history);
} else if (history->rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(history->rsc, "%s active on %s",
history->rsc->id, pe__node_name(history->node));
set_active(history->rsc);
}
if (!clear_past_failure) {
return;
}
switch (*on_fail) {
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
case action_fail_standby:
pe_rsc_trace(history->rsc,
"%s (%s) is not cleared by a completed %s",
history->rsc->id, fail2text(*on_fail), history->task);
break;
case action_fail_block:
case action_fail_ignore:
case action_fail_demote:
case action_fail_recover:
case action_fail_restart_container:
*on_fail = action_fail_ignore;
pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN,
"clear past failures");
break;
case action_fail_reset_remote:
if (history->rsc->remote_reconnect_ms == 0) {
/* With no reconnect interval, the connection is allowed to
* start again after the remote node is fenced and
* completely stopped. (With a reconnect interval, we wait
* for the failure to be cleared entirely before attempting
* to reconnect.)
*/
*on_fail = action_fail_ignore;
pe__set_next_role(history->rsc, RSC_ROLE_UNKNOWN,
"clear past failures and reset remote");
}
break;
}
}
/*!
* \internal
* \brief Check whether a given history entry matters for resource state
*
* \param[in] history Parsed action history entry
*
* \return true if action can affect resource state, otherwise false
*/
static inline bool
can_affect_state(struct action_history *history)
{
#if 0
/* @COMPAT It might be better to parse only actions we know we're interested
* in, rather than exclude a couple we don't. However that would be a
* behavioral change that should be done at a major or minor series release.
* Currently, unknown operations can affect whether a resource is considered
* active and/or failed.
*/
return pcmk__str_any_of(history->task, PCMK_ACTION_MONITOR,
PCMK_ACTION_START, PCMK_ACTION_STOP,
PCMK_ACTION_PROMOTE, PCMK_ACTION_DEMOTE,
- PCMK_ACTION_MIGRATE_TO, CRMD_ACTION_MIGRATED,
+ PCMK_ACTION_MIGRATE_TO, PCMK_ACTION_MIGRATE_FROM,
"asyncmon", NULL);
#else
return !pcmk__str_any_of(history->task, PCMK_ACTION_NOTIFY,
PCMK_ACTION_META_DATA, NULL);
#endif
}
/*!
* \internal
* \brief Unpack execution/exit status and exit reason from a history entry
*
* \param[in,out] history Action history entry to unpack
*
* \return Standard Pacemaker return code
*/
static int
unpack_action_result(struct action_history *history)
{
if ((crm_element_value_int(history->xml, XML_LRM_ATTR_OPSTATUS,
&(history->execution_status)) < 0)
|| (history->execution_status < PCMK_EXEC_PENDING)
|| (history->execution_status > PCMK_EXEC_MAX)
|| (history->execution_status == PCMK_EXEC_CANCELLED)) {
crm_err("Ignoring resource history entry %s for %s on %s "
"with invalid " XML_LRM_ATTR_OPSTATUS " '%s'",
history->id, history->rsc->id, pe__node_name(history->node),
pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_OPSTATUS),
""));
return pcmk_rc_unpack_error;
}
if ((crm_element_value_int(history->xml, XML_LRM_ATTR_RC,
&(history->exit_status)) < 0)
|| (history->exit_status < 0) || (history->exit_status > CRM_EX_MAX)) {
#if 0
/* @COMPAT We should ignore malformed entries, but since that would
* change behavior, it should be done at a major or minor series
* release.
*/
crm_err("Ignoring resource history entry %s for %s on %s "
"with invalid " XML_LRM_ATTR_RC " '%s'",
history->id, history->rsc->id, pe__node_name(history->node),
pcmk__s(crm_element_value(history->xml, XML_LRM_ATTR_RC),
""));
return pcmk_rc_unpack_error;
#else
history->exit_status = CRM_EX_ERROR;
#endif
}
history->exit_reason = crm_element_value(history->xml,
XML_LRM_ATTR_EXIT_REASON);
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Process an action history entry whose result expired
*
* \param[in,out] history Parsed action history entry
* \param[in] orig_exit_status Action exit status before remapping
*
* \return Standard Pacemaker return code (in particular, pcmk_rc_ok means the
* entry needs no further processing)
*/
static int
process_expired_result(struct action_history *history, int orig_exit_status)
{
if (!pe_rsc_is_bundled(history->rsc)
&& pcmk_xe_mask_probe_failure(history->xml)
&& (orig_exit_status != history->expected_exit_status)) {
if (history->rsc->role <= RSC_ROLE_STOPPED) {
history->rsc->role = RSC_ROLE_UNKNOWN;
}
crm_trace("Ignoring resource history entry %s for probe of %s on %s: "
"Masked failure expired",
history->id, history->rsc->id,
pe__node_name(history->node));
return pcmk_rc_ok;
}
if (history->exit_status == history->expected_exit_status) {
return pcmk_rc_undetermined; // Only failures expire
}
if (history->interval_ms == 0) {
crm_notice("Ignoring resource history entry %s for %s of %s on %s: "
"Expired failure",
history->id, history->task, history->rsc->id,
pe__node_name(history->node));
return pcmk_rc_ok;
}
if (history->node->details->online && !history->node->details->unclean) {
/* Reschedule the recurring action. schedule_cancel() won't work at
* this stage, so as a hacky workaround, forcibly change the restart
* digest so pcmk__check_action_config() does what we want later.
*
* @TODO We should skip this if there is a newer successful monitor.
* Also, this causes rescheduling only if the history entry
* has an op-digest (which the expire-non-blocked-failure
* scheduler regression test doesn't, but that may not be a
* realistic scenario in production).
*/
crm_notice("Rescheduling %s-interval %s of %s on %s "
"after failure expired",
pcmk__readable_interval(history->interval_ms), history->task,
history->rsc->id, pe__node_name(history->node));
crm_xml_add(history->xml, XML_LRM_ATTR_RESTART_DIGEST,
"calculated-failure-timeout");
return pcmk_rc_ok;
}
return pcmk_rc_undetermined;
}
/*!
* \internal
* \brief Process a masked probe failure
*
* \param[in,out] history Parsed action history entry
* \param[in] orig_exit_status Action exit status before remapping
* \param[in] last_failure Resource's last_failure entry, if known
* \param[in,out] on_fail Resource's current failure handling
*/
static void
mask_probe_failure(struct action_history *history, int orig_exit_status,
const xmlNode *last_failure,
enum action_fail_response *on_fail)
{
pe_resource_t *ban_rsc = history->rsc;
if (!pcmk_is_set(history->rsc->flags, pe_rsc_unique)) {
ban_rsc = uber_parent(history->rsc);
}
crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
services_ocf_exitcode_str(orig_exit_status), history->rsc->id,
pe__node_name(history->node));
update_resource_state(history, history->expected_exit_status, last_failure,
on_fail);
crm_xml_add(history->xml, XML_ATTR_UNAME, history->node->details->uname);
record_failed_op(history);
resource_location(ban_rsc, history->node, -INFINITY, "masked-probe-failure",
history->rsc->cluster);
}
/*!
* \internal Check whether a given failure is for a given pending action
*
* \param[in] history Parsed history entry for pending action
* \param[in] last_failure Resource's last_failure entry, if known
*
* \return true if \p last_failure is failure of pending action in \p history,
* otherwise false
* \note Both \p history and \p last_failure must come from the same
* lrm_resource block, as node and resource are assumed to be the same.
*/
static bool
failure_is_newer(const struct action_history *history,
const xmlNode *last_failure)
{
guint failure_interval_ms = 0U;
long long failure_change = 0LL;
long long this_change = 0LL;
if (last_failure == NULL) {
return false; // Resource has no last_failure entry
}
if (!pcmk__str_eq(history->task,
crm_element_value(last_failure, XML_LRM_ATTR_TASK),
pcmk__str_none)) {
return false; // last_failure is for different action
}
if ((crm_element_value_ms(last_failure, XML_LRM_ATTR_INTERVAL_MS,
&failure_interval_ms) != pcmk_ok)
|| (history->interval_ms != failure_interval_ms)) {
return false; // last_failure is for action with different interval
}
if ((pcmk__scan_ll(crm_element_value(history->xml, XML_RSC_OP_LAST_CHANGE),
&this_change, 0LL) != pcmk_rc_ok)
|| (pcmk__scan_ll(crm_element_value(last_failure,
XML_RSC_OP_LAST_CHANGE),
&failure_change, 0LL) != pcmk_rc_ok)
|| (failure_change < this_change)) {
return false; // Failure is not known to be newer
}
return true;
}
/*!
* \internal
* \brief Update a resource's role etc. for a pending action
*
* \param[in,out] history Parsed history entry for pending action
* \param[in] last_failure Resource's last_failure entry, if known
*/
static void
process_pending_action(struct action_history *history,
const xmlNode *last_failure)
{
/* For recurring monitors, a failure is recorded only in RSC_last_failure_0,
* and there might be a RSC_monitor_INTERVAL entry with the last successful
* or pending result.
*
* If last_failure contains the failure of the pending recurring monitor
* we're processing here, and is newer, the action is no longer pending.
* (Pending results have call ID -1, which sorts last, so the last failure
* if any should be known.)
*/
if (failure_is_newer(history, last_failure)) {
return;
}
if (strcmp(history->task, PCMK_ACTION_START) == 0) {
pe__set_resource_flags(history->rsc, pe_rsc_start_pending);
set_active(history->rsc);
} else if (strcmp(history->task, PCMK_ACTION_PROMOTE) == 0) {
history->rsc->role = RSC_ROLE_PROMOTED;
} else if ((strcmp(history->task, PCMK_ACTION_MIGRATE_TO) == 0)
&& history->node->details->unclean) {
/* A migrate_to action is pending on a unclean source, so force a stop
* on the target.
*/
const char *migrate_target = NULL;
pe_node_t *target = NULL;
migrate_target = crm_element_value(history->xml,
XML_LRM_ATTR_MIGRATE_TARGET);
target = pe_find_node(history->rsc->cluster->nodes, migrate_target);
if (target != NULL) {
stop_action(history->rsc, target, FALSE);
}
}
if (history->rsc->pending_task != NULL) {
/* There should never be multiple pending actions, but as a failsafe,
* just remember the first one processed for display purposes.
*/
return;
}
if (pcmk_is_probe(history->task, history->interval_ms)) {
/* Pending probes are currently never displayed, even if pending
* operations are requested. If we ever want to change that,
* enable the below and the corresponding part of
* native.c:native_pending_task().
*/
#if 0
history->rsc->pending_task = strdup("probe");
history->rsc->pending_node = history->node;
#endif
} else {
history->rsc->pending_task = strdup(history->task);
history->rsc->pending_node = history->node;
}
}
static void
unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
xmlNode **last_failure, enum action_fail_response *on_fail)
{
int old_rc = 0;
bool expired = false;
pe_resource_t *parent = rsc;
enum action_fail_response failure_strategy = action_fail_recover;
struct action_history history = {
.rsc = rsc,
.node = node,
.xml = xml_op,
.execution_status = PCMK_EXEC_UNKNOWN,
};
CRM_CHECK(rsc && node && xml_op, return);
history.id = ID(xml_op);
if (history.id == NULL) {
crm_err("Ignoring resource history entry for %s on %s without ID",
rsc->id, pe__node_name(node));
return;
}
// Task and interval
history.task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
if (history.task == NULL) {
crm_err("Ignoring resource history entry %s for %s on %s without "
XML_LRM_ATTR_TASK, history.id, rsc->id, pe__node_name(node));
return;
}
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS,
&(history.interval_ms));
if (!can_affect_state(&history)) {
pe_rsc_trace(rsc,
"Ignoring resource history entry %s for %s on %s "
"with irrelevant action '%s'",
history.id, rsc->id, pe__node_name(node), history.task);
return;
}
if (unpack_action_result(&history) != pcmk_rc_ok) {
return; // Error already logged
}
history.expected_exit_status = pe__target_rc_from_xml(xml_op);
history.key = pe__xe_history_key(xml_op);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &(history.call_id));
pe_rsc_trace(rsc, "Unpacking %s (%s call %d on %s): %s (%s)",
history.id, history.task, history.call_id, pe__node_name(node),
pcmk_exec_status_str(history.execution_status),
crm_exit_str(history.exit_status));
if (node->details->unclean) {
pe_rsc_trace(rsc,
"%s is running on %s, which is unclean (further action "
"depends on value of stop's on-fail attribute)",
rsc->id, pe__node_name(node));
}
expired = check_operation_expiry(&history);
old_rc = history.exit_status;
remap_operation(&history, on_fail, expired);
if (expired && (process_expired_result(&history, old_rc) == pcmk_rc_ok)) {
goto done;
}
if (!pe_rsc_is_bundled(rsc) && pcmk_xe_mask_probe_failure(xml_op)) {
mask_probe_failure(&history, old_rc, *last_failure, on_fail);
goto done;
}
if (!pcmk_is_set(rsc->flags, pe_rsc_unique)) {
parent = uber_parent(rsc);
}
switch (history.execution_status) {
case PCMK_EXEC_PENDING:
process_pending_action(&history, *last_failure);
goto done;
case PCMK_EXEC_DONE:
update_resource_state(&history, history.exit_status, *last_failure,
on_fail);
goto done;
case PCMK_EXEC_NOT_INSTALLED:
failure_strategy = get_action_on_fail(&history);
if (failure_strategy == action_fail_ignore) {
crm_warn("Cannot ignore failed %s of %s on %s: "
"Resource agent doesn't exist "
CRM_XS " status=%d rc=%d id=%s",
history.task, rsc->id, pe__node_name(node),
history.execution_status, history.exit_status,
history.id);
/* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
*on_fail = action_fail_migrate;
}
resource_location(parent, node, -INFINITY, "hard-error",
rsc->cluster);
unpack_rsc_op_failure(&history, last_failure, on_fail);
goto done;
case PCMK_EXEC_NOT_CONNECTED:
if (pe__is_guest_or_remote_node(node)
&& pcmk_is_set(node->details->remote_rsc->flags, pe_rsc_managed)) {
/* We should never get into a situation where a managed remote
* connection resource is considered OK but a resource action
* behind the connection gets a "not connected" status. But as a
* fail-safe in case a bug or unusual circumstances do lead to
* that, ensure the remote connection is considered failed.
*/
pe__set_resource_flags(node->details->remote_rsc,
pe_rsc_failed|pe_rsc_stop);
}
break; // Not done, do error handling
case PCMK_EXEC_ERROR:
case PCMK_EXEC_ERROR_HARD:
case PCMK_EXEC_ERROR_FATAL:
case PCMK_EXEC_TIMEOUT:
case PCMK_EXEC_NOT_SUPPORTED:
case PCMK_EXEC_INVALID:
break; // Not done, do error handling
default: // No other value should be possible at this point
break;
}
failure_strategy = get_action_on_fail(&history);
if ((failure_strategy == action_fail_ignore)
|| (failure_strategy == action_fail_restart_container
&& (strcmp(history.task, PCMK_ACTION_STOP) == 0))) {
char *last_change_s = last_change_str(xml_op);
crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s succeeded "
CRM_XS " %s",
history.task, services_ocf_exitcode_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), rsc->id, pe__node_name(node),
last_change_s, history.id);
free(last_change_s);
update_resource_state(&history, history.expected_exit_status,
*last_failure, on_fail);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
pe__set_resource_flags(rsc, pe_rsc_failure_ignored);
record_failed_op(&history);
if ((failure_strategy == action_fail_restart_container)
&& cmp_on_fail(*on_fail, action_fail_recover) <= 0) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(&history, last_failure, on_fail);
if (history.execution_status == PCMK_EXEC_ERROR_HARD) {
uint8_t log_level = LOG_ERR;
if (history.exit_status == PCMK_OCF_NOT_INSTALLED) {
log_level = LOG_NOTICE;
}
do_crm_log(log_level,
"Preventing %s from restarting on %s because "
"of hard failure (%s%s%s) " CRM_XS " %s",
parent->id, pe__node_name(node),
services_ocf_exitcode_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), history.id);
resource_location(parent, node, -INFINITY, "hard-error",
rsc->cluster);
} else if (history.execution_status == PCMK_EXEC_ERROR_FATAL) {
crm_err("Preventing %s from restarting anywhere because "
"of fatal failure (%s%s%s) " CRM_XS " %s",
parent->id, services_ocf_exitcode_str(history.exit_status),
(pcmk__str_empty(history.exit_reason)? "" : ": "),
pcmk__s(history.exit_reason, ""), history.id);
resource_location(parent, NULL, -INFINITY, "fatal-error",
rsc->cluster);
}
}
done:
pe_rsc_trace(rsc, "%s role on %s after %s is %s (next %s)",
rsc->id, pe__node_name(node), history.id,
role2text(rsc->role), role2text(rsc->next_role));
}
static void
add_node_attrs(const xmlNode *xml_obj, pe_node_t *node, bool overwrite,
pe_working_set_t *data_set)
{
const char *cluster_name = NULL;
pe_rule_eval_data_t rule_data = {
.node_hash = NULL,
.role = RSC_ROLE_UNKNOWN,
.now = data_set->now,
.match_data = NULL,
.rsc_data = NULL,
.op_data = NULL
};
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
strdup(node->details->id));
if (pcmk__str_eq(node->details->id, data_set->dc_uuid, pcmk__str_casei)) {
data_set->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
}
cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
if (cluster_name) {
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
strdup(cluster_name));
}
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_ATTR_SETS, &rule_data,
node->details->attrs, NULL, overwrite, data_set);
pe__unpack_dataset_nvpairs(xml_obj, XML_TAG_UTILIZATION, &rule_data,
node->details->utilization, NULL,
FALSE, data_set);
if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
const char *site_name = pe_node_attribute_raw(node, "site-name");
if (site_name) {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(site_name));
} else if (cluster_name) {
/* Default to cluster-name if unset */
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(cluster_name));
}
}
}
static GList *
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GList *gIter = NULL;
GList *op_list = NULL;
GList *sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = pcmk__xe_first_child(rsc_entry);
rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op)) {
if (pcmk__str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP,
pcmk__str_none)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", ID(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GList *
find_operations(const char *rsc, const char *node, gboolean active_filter,
pe_working_set_t * data_set)
{
GList *output = NULL;
GList *intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
pe_node_t *this_node = NULL;
xmlNode *node_state = NULL;
for (node_state = pcmk__xe_first_child(status); node_state != NULL;
node_state = pcmk__xe_next(node_state)) {
if (pcmk__str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, pcmk__str_none)) {
const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
if (node != NULL && !pcmk__str_eq(uname, node, pcmk__str_casei)) {
continue;
}
this_node = pe_find_node(data_set->nodes, uname);
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
continue;
} else if (pe__is_guest_or_remote_node(this_node)) {
determine_remote_online_status(data_set, this_node);
} else {
determine_online_status(node_state, this_node, data_set);
}
if (this_node->details->online
|| pcmk_is_set(data_set->flags, pe_flag_stonith_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for (lrm_rsc = pcmk__xe_first_child(tmp); lrm_rsc != NULL;
lrm_rsc = pcmk__xe_next(lrm_rsc)) {
if (pcmk__str_eq((const char *)lrm_rsc->name,
XML_LRM_TAG_RESOURCE, pcmk__str_none)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if (rsc != NULL && !pcmk__str_eq(rsc_id, rsc, pcmk__str_casei)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 8, 6:05 PM (1 d, 2 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2002466
Default Alt Text
(693 KB)

Event Timeline