Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3687573
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
246 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 74baebc80c..32cc5f7c1a 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1476 +1,1476 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd.h>
#include <crm/lrmd_internal.h>
#include <crm/services.h>
#include <pacemaker-controld.h>
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
#define cmd_set_flags(cmd, flags_to_set) do { \
(cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define cmd_clear_flags(cmd, flags_to_clear) do { \
(cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_cmd_status {
cmd_reported_success = (1 << 0),
cmd_cancel = (1 << 1),
};
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
pcmk__action_result_t result;
int call_id;
time_t start_time;
uint32_t status;
} remote_ra_cmd_t;
#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_status {
expect_takeover = (1 << 0),
takeover_complete = (1 << 1),
remote_active = (1 << 2),
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
remote_in_maint = (1 << 3),
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
controlling_guest = (1 << 4),
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
uint32_t status;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
pcmk__reset_result(&(cmd->result));
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static bool
should_purge_attributes(crm_node_t *node)
{
bool purge = true;
crm_node_t *conn_node = NULL;
lrm_state_t *connection_rsc = NULL;
if (!node->conn_host) {
return purge;
}
/* Get the node that was hosting the remote connection resource from the
* peer cache. That's the one we really care about here.
*/
conn_node = crm_get_peer(0, node->conn_host);
if (conn_node == NULL) {
return purge;
}
/* Check the uptime of connection_rsc. If it hasn't been running long
* enough, set purge=true. "Long enough" means it started running earlier
* than the timestamp when we noticed it went away in the first place.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
time_t uptime = lrmd__uptime(lrm);
time_t now = time(NULL);
/* Add 20s of fuzziness to give corosync a while to notice the remote
* host is gone. On various error conditions (failure to get uptime,
* peer_lost isn't set) we default to purging.
*/
if (uptime > 0 &&
conn_node->peer_lost > 0 &&
uptime + 20 >= now - conn_node->peer_lost) {
purge = false;
}
}
return purge;
}
static enum controld_section_e
section_to_delete(bool purge)
{
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
if (purge) {
return controld_section_all_unlocked;
} else {
return controld_section_lrm_unlocked;
}
} else {
if (purge) {
return controld_section_all;
} else {
return controld_section_lrm;
}
}
}
static void
purge_remote_node_attrs(int call_opt, crm_node_t *node)
{
bool purge = should_purge_attributes(node);
enum controld_section_e section = section_to_delete(purge);
/* Purge node from attrd's memory */
if (purge) {
update_attrd_remote_node_removed(node->uname, NULL);
}
controld_delete_node_state(node->uname, section, call_opt);
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt;
xmlNode *update, *state;
crm_node_t *node;
lrm_state_t *connection_rsc = NULL;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing Pacemaker Remote node %s", node_name);
call_opt = crmd_cib_smart_opt();
/* Delete node's probe_complete attribute. This serves two purposes:
*
* - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it
* - deleting it (or any attribute for that matter) here ensures the
* attribute manager learns the node is remote
*/
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
purge_remote_node_attrs(call_opt, node);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
/* Apply any start state that we were given from the environment on the
* remote node.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
const char *start_state = lrmd__node_start_state(lrm);
if (start_state) {
set_join_state(start_state, node->uname, node->uuid, true);
}
}
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
broadcast_remote_state_message(node_name, true);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__func__);
/* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, XML_NODE_IS_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the XML_NODE_IS_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
free_xml(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_opt = crmd_cib_smart_opt();
crm_node_t *node;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Normally, only node attributes should be erased, and the resource history
* should be kept until the node comes back up. However, after a successful
* fence, we want to clear the history as well, so we don't think resources
* are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
controld_delete_node_state(node_name, controld_section_all, call_opt);
} else {
controld_delete_node_state(node_name, controld_section_attrs, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
/* Notify DC */
broadcast_remote_state_message(node_name, false);
/* Update CIB node state */
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
create_node_state_update(node, node_update_cluster, update, __func__);
controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
free_xml(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(const remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (!pcmk__result_ok(&(cmd->result))) {
return;
}
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
remote_node_up(cmd->rsc_id);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_casei)) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
crm_remote_peer_cache_remove(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.t_run = (unsigned int) cmd->start_time;
op.t_rcchange = (unsigned int) cmd->start_time;
lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
cmd->result.exit_reason);
if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
op.t_rcchange = (unsigned int) time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = pcmk__strkey_table(free, free);
for (tmp = cmd->params; tmp; tmp = tmp->next) {
g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
lrmd__reset_result(&op);
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = ETIME;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
} else {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Not enough time remains to retry remote connection");
}
if (rc != pcmk_rc_ok) {
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = lrm_state_find(cmd->rsc_id);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = lrm_state_find(cmd->rsc_id);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Remote executor did not respond");
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = lrm_state_find(controld_globals.our_nodename);
}
CRM_ASSERT(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.t_run = (unsigned int) time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
services_ocf_exitcode_str(op->rc), op->rc,
pcmk_exec_status_str(op->op_status), op->op_status);
lrm_state = lrm_state_find(op->remote_nodename);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (pcmk_is_set(ra_data->status, expect_takeover)) {
// Great, we knew this was coming
lrm_remote_clear_flags(lrm_state, expect_takeover);
lrm_remote_set_flags(lrm_state, takeover_complete);
} else {
crm_err("Disconnecting from Pacemaker Remote node %s due to "
"unexpected client takeover", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (pcmk_is_set(ra_data->status, takeover_complete)) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (!pcmk_is_set(ra_data->status, remote_active)) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name,
PCMK_ACTION_STOP);
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if ((op->type == lrmd_event_connect)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if ((op->connection_rc == -ENOKEY)
|| (op->connection_rc == -EKEYREJECTED)) {
// Hard error, don't retry
pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
PCMK_EXEC_ERROR,
pcmk_strerror(op->connection_rc));
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"%s without enough time to retry",
pcmk_strerror(op->connection_rc));
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
lrm_remote_set_flags(lrm_state, remote_active);
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_poke)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
cmd_set_flags(cmd, cmd_reported_success);
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_disconnect)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (pcmk_is_set(ra_data->status, remote_active) &&
!pcmk_is_set(cmd->status, cmd_cancel)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Remote connection unexpectedly dropped "
"during monitor");
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_new_client)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
handle_remote_ra_stop(lrm_state, cmd);
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
CRM_ASSERT(lrm_state);
ra_data = lrm_state->remote_ra_data;
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->active_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
lrm_remote_clear_flags(lrm_state, remote_active);
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
}
// \return Standard Pacemaker return code
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
int rc = pcmk_rc_ok;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
- if (pcmk__strcase_any_of(tmp->key, XML_RSC_ATTR_REMOTE_RA_ADDR,
+ if (pcmk__strcase_any_of(tmp->key, PCMK_REMOTE_RA_ADDR,
XML_RSC_ATTR_REMOTE_RA_SERVER, NULL)) {
server = tmp->value;
} else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
port = atoi(tmp->value);
} else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
pcmk__str_none)) {
lrm_remote_set_flags(lrm_state, controlling_guest);
}
}
rc = controld_connect_remote_executor(lrm_state, server, port,
timeout_used);
if (rc != pcmk_rc_ok) {
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Could not connect to Pacemaker Remote node %s: %s",
lrm_state->node_name, pcmk_rc_str(rc));
}
return rc;
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
if (handle_remote_ra_start(lrm_state, cmd,
cmd->timeout) == pcmk_rc_ok) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, pcmk_strerror(rc));
}
} else {
rc = -1;
pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
PCMK_EXEC_DONE, "Remote connection inactive");
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
if (pcmk_is_set(ra_data->status, expect_takeover)) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
lrm_remote_clear_flags(lrm_state, takeover_complete);
lrm_remote_set_flags(lrm_state, expect_takeover);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Currently the only reloadable parameter is reconnect_interval,
* which is only used by the scheduler via the CIB, so reloads are a
* no-op.
*
* @COMPAT DC <2.1.0: We only need to check for "reload" in case
* we're in a rolling upgrade with a DC scheduling "reload" instead
* of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
* so this would work for that purpose as well.
*/
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = calloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
if ((id != NULL) && (lrm_state_find(id) != NULL)
&& !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
return TRUE;
}
return FALSE;
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
if ((lrm_state_find(rsc_id))) {
info = calloc(1, sizeof(lrmd_rsc_info_t));
info->id = strdup(rsc_id);
info->type = strdup(REMOTE_LRMD_RA);
info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
info->provider = strdup("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
return pcmk__str_any_of(action,
PCMK_ACTION_START,
PCMK_ACTION_STOP,
PCMK_ACTION_MONITOR,
PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_RELOAD,
NULL);
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, "Lost connection to remote executor");
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
!pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
(ra_data->cur_cmd->interval_ms == interval_ms)
&& pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = strdup(userdata);
}
/* if we've already reported success, generate a new call id */
if (pcmk_is_set(cmd->status, cmd_reported_success)) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd_clear_flags(cmd, cmd_reported_success);
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
/*!
* \internal
* \brief Execute an action using the (internal) ocf:pacemaker:remote agent
*
* \param[in] lrm_state Executor state object for remote connection
* \param[in] rsc_id Connection resource ID
* \param[in] action Action to execute
* \param[in] userdata String to copy and pass to execution callback
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] start_delay_ms Delay (in milliseconds) before executing action
* \param[in,out] params Connection resource parameters
* \param[out] call_id Where to store call ID on success
*
* \return Standard Pacemaker return code
* \note This takes ownership of \p params, which should not be used or freed
* after calling this function.
*/
int
controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata,
guint interval_ms, int timeout_ms,
int start_delay_ms, lrmd_key_value_t *params,
int *call_id)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
*call_id = 0;
CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
&& (userdata != NULL) && (call_id != NULL),
lrmd_key_value_freeall(params); return EINVAL);
if (!is_remote_ra_supported_action(action)) {
lrmd_key_value_freeall(params);
return EOPNOTSUPP;
}
connection_rsc = lrm_state_find(rsc_id);
if (connection_rsc == NULL) {
lrmd_key_value_freeall(params);
return ENOTCONN;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
*call_id = cmd->call_id;
lrmd_key_value_freeall(params);
return pcmk_rc_ok;
}
cmd = calloc(1, sizeof(remote_ra_cmd_t));
if (cmd == NULL) {
lrmd_key_value_freeall(params);
return ENOMEM;
}
cmd->owner = strdup(lrm_state->node_name);
cmd->rsc_id = strdup(rsc_id);
cmd->action = strdup(action);
cmd->userdata = strdup(userdata);
if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
|| (cmd->userdata == NULL)) {
free_cmd(cmd);
lrmd_key_value_freeall(params);
return ENOMEM;
}
cmd->interval_ms = interval_ms;
cmd->timeout = timeout_ms;
cmd->start_delay = start_delay_ms;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
*call_id = cmd->call_id;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
* <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
* on_node="lxc1" on_node_uuid="lxc1">
* <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
* CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
* <downed>
* <node id="lxc1"/>
* </downed>
* </pseudo_event>
*/
#define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = ID(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
xmlNode *update, *state;
int call_opt;
crm_node_t *node;
call_opt = crmd_cib_smart_opt();
node = crm_remote_peer_get(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_none, update,
__func__);
crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt,
NULL) == pcmk_rc_ok) {
/* TODO: still not 100% sure that async update will succeed ... */
if (maintenance) {
lrm_remote_set_flags(lrm_state, remote_in_maint);
} else {
lrm_remote_clear_flags(lrm_state, remote_in_maint);
}
}
free_xml(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
XML_GRAPH_TAG_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node = first_named_child(getXpathResult(search, 0),
XML_CIB_TAG_NODE);
node != NULL; node = crm_next_same_xml(node)) {
lrm_state_t *lrm_state = lrm_state_find(ID(node));
cnt++;
if (lrm_state && lrm_state->remote_ra_data &&
pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
int is_maint;
cnt_remote++;
pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
&is_maint, 0);
remote_ra_maintenance(lrm_state, is_maint);
}
}
crm_trace("Action holds %d nodes (%d remotes found) adjusting "
PCMK_OPT_MAINTENANCE_MODE,
cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, remote_in_maint);
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, controlling_guest);
}
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
index b47980513d..3c80d87cf9 100644
--- a/include/crm/msg_xml.h
+++ b/include/crm/msg_xml.h
@@ -1,457 +1,464 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_MSG_XML__H
# define PCMK__CRM_MSG_XML__H
# include <crm/common/xml.h>
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/msg_xml_compat.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* This file defines constants for various XML syntax (mainly element and
* attribute names).
*
* For consistency, new constants should start with "PCMK_", followed by "XE"
* for XML element names, "XA" for XML attribute names, and "META" for meta
* attribute names. Old names that don't follow this policy should eventually be
* deprecated and replaced with names that do.
*/
/*
* XML elements
*/
#define PCMK_XE_DATE_EXPRESSION "date_expression"
#define PCMK_XE_OP_EXPRESSION "op_expression"
/* This has been deprecated as a CIB element (an alias for <clone> with
* "promotable" set to "true") since 2.0.0.
*/
#define PCMK_XE_PROMOTABLE_LEGACY "master"
#define PCMK_XE_RSC_EXPRESSION "rsc_expression"
/*
* XML attributes
*/
#define PCMK_XA_ADMIN_EPOCH "admin_epoch"
#define PCMK_XA_CIB_LAST_WRITTEN "cib-last-written"
#define PCMK_XA_CRM_DEBUG_ORIGIN "crm-debug-origin"
#define PCMK_XA_CRM_FEATURE_SET "crm_feature_set"
#define PCMK_XA_CRM_TIMESTAMP "crm-timestamp"
#define PCMK_XA_DESCRIPTION "description"
#define PCMK_XA_EPOCH "epoch"
#define PCMK_XA_FORMAT "format"
#define PCMK_XA_HAVE_QUORUM "have-quorum"
#define PCMK_XA_ID "id"
#define PCMK_XA_NO_QUORUM_PANIC "no-quorum-panic"
#define PCMK_XA_NUM_UPDATES "num_updates"
#define PCMK_XA_VALIDATE_WITH "validate-with"
#define PCMK_XA_VERSION "version"
/*
* Meta attributes
*/
#define PCMK_META_ALLOW_MIGRATE "allow-migrate"
#define PCMK_META_CLONE_MAX "clone-max"
#define PCMK_META_CLONE_MIN "clone-min"
#define PCMK_META_CLONE_NODE_MAX "clone-node-max"
#define PCMK_META_CONTAINER_ATTR_TARGET "container-attribute-target"
#define PCMK_META_CRITICAL "critical"
#define PCMK_META_ENABLED "enabled"
#define PCMK_META_FAILURE_TIMEOUT "failure-timeout"
#define PCMK_META_GLOBALLY_UNIQUE "globally-unique"
#define PCMK_META_INTERLEAVE "interleave"
#define PCMK_META_IS_MANAGED "is-managed"
#define PCMK_META_MAINTENANCE "maintenance"
#define PCMK_META_MIGRATION_THRESHOLD "migration-threshold"
#define PCMK_META_MULTIPLE_ACTIVE "multiple-active"
#define PCMK_META_NOTIFY "notify"
#define PCMK_META_ORDERED "ordered"
#define PCMK_META_PRIORITY "priority"
#define PCMK_META_PROMOTABLE "promotable"
#define PCMK_META_PROMOTED_MAX "promoted-max"
#define PCMK_META_PROMOTED_NODE_MAX "promoted-node-max"
#define PCMK_META_REMOTE_ADDR "remote-addr"
#define PCMK_META_REMOTE_ALLOW_MIGRATE "remote-allow-migrate"
#define PCMK_META_REMOTE_CONNECT_TIMEOUT "remote-connect-timeout"
#define PCMK_META_REMOTE_NODE "remote-node"
#define PCMK_META_REMOTE_PORT "remote-port"
#define PCMK_META_REQUIRES "requires"
#define PCMK_META_RESOURCE_STICKINESS "resource-stickiness"
#define PCMK_META_TARGET_ROLE "target-role"
+/*
+ * Remote resource instance attributes
+ */
+
+#define PCMK_REMOTE_RA_ADDR "addr"
+
+
/*
* Older constants that don't follow current naming
*/
# ifndef F_ORIG
# define F_ORIG "src"
# endif
# ifndef F_SEQ
# define F_SEQ "seq"
# endif
# ifndef F_SUBTYPE
# define F_SUBTYPE "subt"
# endif
# ifndef F_TYPE
# define F_TYPE "t"
# endif
# ifndef F_CLIENTNAME
# define F_CLIENTNAME "cn"
# endif
# ifndef F_XML_TAGNAME
# define F_XML_TAGNAME "__name__"
# endif
# ifndef T_CRM
# define T_CRM "crmd"
# endif
# ifndef T_ATTRD
# define T_ATTRD "attrd"
# endif
# define CIB_OPTIONS_FIRST "cib-bootstrap-options"
# define F_CRM_DATA "crm_xml"
# define F_CRM_TASK "crm_task"
# define F_CRM_HOST_TO "crm_host_to"
# define F_CRM_MSG_TYPE F_SUBTYPE
# define F_CRM_SYS_TO "crm_sys_to"
# define F_CRM_SYS_FROM "crm_sys_from"
# define F_CRM_HOST_FROM F_ORIG
# define F_CRM_REFERENCE XML_ATTR_REFERENCE
# define F_CRM_VERSION PCMK_XA_VERSION
# define F_CRM_ORIGIN "origin"
# define F_CRM_USER "crm_user"
# define F_CRM_JOIN_ID "join_id"
# define F_CRM_DC_LEAVING "dc-leaving"
# define F_CRM_ELECTION_ID "election-id"
# define F_CRM_ELECTION_AGE_S "election-age-sec"
# define F_CRM_ELECTION_AGE_US "election-age-nano-sec"
# define F_CRM_ELECTION_OWNER "election-owner"
# define F_CRM_TGRAPH "crm-tgraph-file"
# define F_CRM_TGRAPH_INPUT "crm-tgraph-in"
# define F_CRM_THROTTLE_MODE "crm-limit-mode"
# define F_CRM_THROTTLE_MAX "crm-limit-max"
/*---- Common tags/attrs */
# define XML_DIFF_MARKER "__crm_diff_marker__"
# define XML_TAG_CIB "cib"
# define XML_TAG_FAILED "failed"
# define XML_ATTR_TIMEOUT "timeout"
# define XML_ATTR_NAME "name"
# define XML_ATTR_IDREF "id-ref"
# define XML_ATTR_ID_LONG "long-id"
# define XML_ATTR_TYPE "type"
# define XML_ATTR_OP "op"
# define XML_ATTR_DC_UUID "dc-uuid"
# define XML_ATTR_UPDATE_ORIG "update-origin"
# define XML_ATTR_UPDATE_CLIENT "update-client"
# define XML_ATTR_UPDATE_USER "update-user"
# define XML_BOOLEAN_TRUE "true"
# define XML_BOOLEAN_FALSE "false"
# define XML_TAG_OPTIONS "options"
/*---- top level tags/attrs */
# define XML_ATTR_REQUEST "request"
# define XML_ATTR_RESPONSE "response"
# define XML_ATTR_UNAME "uname"
# define XML_ATTR_REFERENCE "reference"
# define XML_CRM_TAG_PING "ping_response"
# define XML_PING_ATTR_STATUS "result"
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
# define XML_PING_ATTR_PACEMAKERDSTATE_REMOTE "remote"
# define XML_FAIL_TAG_CIB "failed_update"
# define XML_FAILCIB_ATTR_OBJTYPE "object_type"
# define XML_FAILCIB_ATTR_OP "operation"
# define XML_FAILCIB_ATTR_REASON "reason"
/*---- CIB specific tags/attrs */
# define XML_CIB_TAG_SECTION_ALL "all"
# define XML_CIB_TAG_CONFIGURATION "configuration"
# define XML_CIB_TAG_STATUS "status"
# define XML_CIB_TAG_RESOURCES "resources"
# define XML_CIB_TAG_NODES "nodes"
# define XML_CIB_TAG_CONSTRAINTS "constraints"
# define XML_CIB_TAG_CRMCONFIG "crm_config"
# define XML_CIB_TAG_OPCONFIG "op_defaults"
# define XML_CIB_TAG_RSCCONFIG "rsc_defaults"
# define XML_CIB_TAG_ACLS "acls"
# define XML_CIB_TAG_ALERTS "alerts"
# define XML_CIB_TAG_ALERT "alert"
# define XML_CIB_TAG_ALERT_RECIPIENT "recipient"
# define XML_CIB_TAG_ALERT_SELECT "select"
# define XML_CIB_TAG_ALERT_ATTRIBUTES "select_attributes"
# define XML_CIB_TAG_ALERT_FENCING "select_fencing"
# define XML_CIB_TAG_ALERT_NODES "select_nodes"
# define XML_CIB_TAG_ALERT_RESOURCES "select_resources"
# define XML_CIB_TAG_ALERT_ATTR "attribute"
# define XML_CIB_TAG_STATE "node_state"
# define XML_CIB_TAG_NODE "node"
# define XML_CIB_TAG_NVPAIR "nvpair"
# define XML_CIB_TAG_PROPSET "cluster_property_set"
# define XML_TAG_ATTR_SETS "instance_attributes"
# define XML_TAG_META_SETS "meta_attributes"
# define XML_TAG_ATTRS "attributes"
# define XML_TAG_PARAMS "parameters"
# define XML_TAG_PARAM "param"
# define XML_TAG_UTILIZATION "utilization"
# define XML_TAG_RESOURCE_REF "resource_ref"
# define XML_CIB_TAG_RESOURCE "primitive"
# define XML_CIB_TAG_GROUP "group"
# define XML_CIB_TAG_INCARNATION "clone"
# define XML_CIB_TAG_CONTAINER "bundle"
# define XML_CIB_TAG_RSC_TEMPLATE "template"
# define XML_RSC_ATTR_REMOTE_RA_ADDR "addr"
# define XML_RSC_ATTR_REMOTE_RA_SERVER "server"
# define XML_RSC_ATTR_REMOTE_RA_PORT "port"
# define XML_REMOTE_ATTR_RECONNECT_INTERVAL "reconnect_interval"
# define XML_OP_ATTR_ON_FAIL "on-fail"
# define XML_OP_ATTR_START_DELAY "start-delay"
# define XML_OP_ATTR_ORIGIN "interval-origin"
# define XML_OP_ATTR_PENDING "record-pending"
# define XML_OP_ATTR_DIGESTS_ALL "digests-all"
# define XML_OP_ATTR_DIGESTS_SECURE "digests-secure"
# define XML_CIB_TAG_LRM "lrm"
# define XML_LRM_TAG_RESOURCES "lrm_resources"
# define XML_LRM_TAG_RESOURCE "lrm_resource"
# define XML_LRM_TAG_RSC_OP "lrm_rsc_op"
# define XML_AGENT_ATTR_CLASS "class"
# define XML_AGENT_ATTR_PROVIDER "provider"
//! \deprecated Do not use (will be removed in a future release)
# define XML_CIB_ATTR_REPLACE "replace"
# define XML_CIB_ATTR_PRIORITY "priority"
# define XML_NODE_IS_REMOTE "remote_node"
# define XML_NODE_IS_FENCED "node_fenced"
# define XML_NODE_IS_MAINTENANCE "node_in_maintenance"
# define XML_CIB_ATTR_SHUTDOWN "shutdown"
/* Aside from being an old name for the executor, LRM is a misnomer here because
* the controller and scheduler use these to track actions, which are not always
* executor operations.
*/
// XML attribute that takes interval specification (user-facing configuration)
# define XML_LRM_ATTR_INTERVAL "interval"
// XML attribute that takes interval in milliseconds (daemon APIs)
// (identical value as above, but different constant allows clearer code intent)
# define XML_LRM_ATTR_INTERVAL_MS XML_LRM_ATTR_INTERVAL
# define XML_LRM_ATTR_TASK "operation"
# define XML_LRM_ATTR_TASK_KEY "operation_key"
# define XML_LRM_ATTR_TARGET "on_node"
# define XML_LRM_ATTR_TARGET_UUID "on_node_uuid"
/*! Actions to be executed on Pacemaker Remote nodes are routed through the
* controller on the cluster node hosting the remote connection. That cluster
* node is considered the router node for the action.
*/
# define XML_LRM_ATTR_ROUTER_NODE "router_node"
# define XML_LRM_ATTR_RSCID "rsc-id"
# define XML_LRM_ATTR_OPSTATUS "op-status"
# define XML_LRM_ATTR_RC "rc-code"
# define XML_LRM_ATTR_CALLID "call-id"
# define XML_LRM_ATTR_OP_DIGEST "op-digest"
# define XML_LRM_ATTR_OP_RESTART "op-force-restart"
# define XML_LRM_ATTR_OP_SECURE "op-secure-params"
# define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest"
# define XML_LRM_ATTR_SECURE_DIGEST "op-secure-digest"
# define XML_LRM_ATTR_EXIT_REASON "exit-reason"
# define XML_RSC_OP_LAST_CHANGE "last-rc-change"
# define XML_RSC_OP_T_EXEC "exec-time"
# define XML_RSC_OP_T_QUEUE "queue-time"
# define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source"
# define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target"
# define XML_TAG_GRAPH "transition_graph"
# define XML_GRAPH_TAG_RSC_OP "rsc_op"
# define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event"
# define XML_GRAPH_TAG_CRM_EVENT "crm_event"
# define XML_GRAPH_TAG_DOWNED "downed"
# define XML_GRAPH_TAG_MAINTENANCE "maintenance"
# define XML_TAG_RULE "rule"
# define XML_RULE_ATTR_SCORE "score"
# define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute"
# define XML_RULE_ATTR_ROLE "role"
# define XML_RULE_ATTR_BOOLEAN_OP "boolean-op"
# define XML_TAG_EXPRESSION "expression"
# define XML_EXPR_ATTR_ATTRIBUTE "attribute"
# define XML_EXPR_ATTR_OPERATION "operation"
# define XML_EXPR_ATTR_VALUE "value"
# define XML_EXPR_ATTR_TYPE "type"
# define XML_EXPR_ATTR_VALUE_SOURCE "value-source"
# define XML_CONS_TAG_RSC_DEPEND "rsc_colocation"
# define XML_CONS_TAG_RSC_ORDER "rsc_order"
# define XML_CONS_TAG_RSC_LOCATION "rsc_location"
# define XML_CONS_TAG_RSC_TICKET "rsc_ticket"
# define XML_CONS_TAG_RSC_SET "resource_set"
# define XML_CONS_ATTR_SYMMETRICAL "symmetrical"
# define XML_LOCATION_ATTR_DISCOVERY "resource-discovery"
# define XML_COLOC_ATTR_SOURCE "rsc"
# define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role"
# define XML_COLOC_ATTR_TARGET "with-rsc"
# define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role"
# define XML_COLOC_ATTR_NODE_ATTR "node-attribute"
# define XML_COLOC_ATTR_INFLUENCE "influence"
//! \deprecated Deprecated since 2.1.5
# define XML_COLOC_ATTR_SOURCE_INSTANCE "rsc-instance"
//! \deprecated Deprecated since 2.1.5
# define XML_COLOC_ATTR_TARGET_INSTANCE "with-rsc-instance"
# define XML_LOC_ATTR_SOURCE "rsc"
# define XML_LOC_ATTR_SOURCE_PATTERN "rsc-pattern"
# define XML_ORDER_ATTR_FIRST "first"
# define XML_ORDER_ATTR_THEN "then"
# define XML_ORDER_ATTR_FIRST_ACTION "first-action"
# define XML_ORDER_ATTR_THEN_ACTION "then-action"
# define XML_ORDER_ATTR_KIND "kind"
//! \deprecated Deprecated since 2.1.5
# define XML_ORDER_ATTR_FIRST_INSTANCE "first-instance"
//! \deprecated Deprecated since 2.1.5
# define XML_ORDER_ATTR_THEN_INSTANCE "then-instance"
# define XML_TICKET_ATTR_TICKET "ticket"
# define XML_TICKET_ATTR_LOSS_POLICY "loss-policy"
# define XML_NVPAIR_ATTR_NAME "name"
# define XML_NVPAIR_ATTR_VALUE "value"
# define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled"
# define XML_ALERT_ATTR_PATH "path"
# define XML_ALERT_ATTR_TIMEOUT "timeout"
# define XML_ALERT_ATTR_TSTAMP_FORMAT "timestamp-format"
# define XML_ALERT_ATTR_REC_VALUE "value"
# define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple"
# define XML_ATTR_TRANSITION_MAGIC "transition-magic"
# define XML_ATTR_TRANSITION_KEY "transition-key"
# define XML_ATTR_TE_NOWAIT "op_no_wait"
# define XML_ATTR_TE_TARGET_RC "op_target_rc"
# define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes"
//! \deprecated Do not use (will be removed in a future release)
# define XML_TAG_DIFF_ADDED "diff-added"
//! \deprecated Do not use (will be removed in a future release)
# define XML_TAG_DIFF_REMOVED "diff-removed"
# define XML_ACL_TAG_USER "acl_target"
# define XML_ACL_TAG_USERv1 "acl_user"
# define XML_ACL_TAG_GROUP "acl_group"
# define XML_ACL_TAG_ROLE "acl_role"
# define XML_ACL_TAG_PERMISSION "acl_permission"
# define XML_ACL_TAG_ROLE_REF "role"
# define XML_ACL_TAG_ROLE_REFv1 "role_ref"
# define XML_ACL_ATTR_KIND "kind"
# define XML_ACL_TAG_READ "read"
# define XML_ACL_TAG_WRITE "write"
# define XML_ACL_TAG_DENY "deny"
# define XML_ACL_ATTR_REF "reference"
# define XML_ACL_ATTR_REFv1 "ref"
# define XML_ACL_ATTR_TAG "object-type"
# define XML_ACL_ATTR_TAGv1 "tag"
# define XML_ACL_ATTR_XPATH "xpath"
# define XML_ACL_ATTR_ATTRIBUTE "attribute"
# define XML_CIB_TAG_TICKETS "tickets"
# define XML_CIB_TAG_TICKET_STATE "ticket_state"
# define XML_CIB_TAG_TAGS "tags"
# define XML_CIB_TAG_TAG "tag"
# define XML_CIB_TAG_OBJ_REF "obj_ref"
# define XML_TAG_FENCING_TOPOLOGY "fencing-topology"
# define XML_TAG_FENCING_LEVEL "fencing-level"
# define XML_ATTR_STONITH_INDEX "index"
# define XML_ATTR_STONITH_TARGET "target"
# define XML_ATTR_STONITH_TARGET_VALUE "target-value"
# define XML_ATTR_STONITH_TARGET_PATTERN "target-pattern"
# define XML_ATTR_STONITH_TARGET_ATTRIBUTE "target-attribute"
# define XML_ATTR_STONITH_DEVICES "devices"
# define XML_TAG_DIFF "diff"
# define XML_DIFF_VERSION "version"
# define XML_DIFF_VSOURCE "source"
# define XML_DIFF_VTARGET "target"
# define XML_DIFF_CHANGE "change"
# define XML_DIFF_LIST "change-list"
# define XML_DIFF_ATTR "change-attr"
# define XML_DIFF_RESULT "change-result"
# define XML_DIFF_OP "operation"
# define XML_DIFF_PATH "path"
# define XML_DIFF_POSITION "position"
# define ID(x) crm_element_value(x, PCMK_XA_ID)
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/pacemaker/pcmk_sched_bundle.c b/lib/pacemaker/pcmk_sched_bundle.c
index 0a9efe13f6..13f00653ac 100644
--- a/lib/pacemaker/pcmk_sched_bundle.c
+++ b/lib/pacemaker/pcmk_sched_bundle.c
@@ -1,1060 +1,1060 @@
/*
- * Copyright 2004-2023 the Pacemaker project contributors
+ * Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <stdbool.h>
#include <crm/msg_xml.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
struct assign_data {
const pcmk_node_t *prefer;
bool stop_if_fail;
};
/*!
* \internal
* \brief Assign a single bundle replica's resources (other than container)
*
* \param[in,out] replica Replica to assign
* \param[in] user_data Preferred node, if any
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
assign_replica(pcmk__bundle_replica_t *replica, void *user_data)
{
pcmk_node_t *container_host = NULL;
struct assign_data *assign_data = user_data;
const pcmk_node_t *prefer = assign_data->prefer;
bool stop_if_fail = assign_data->stop_if_fail;
const pcmk_resource_t *bundle = pe__const_top_resource(replica->container,
true);
if (replica->ip != NULL) {
pcmk__rsc_trace(bundle, "Assigning bundle %s IP %s",
bundle->id, replica->ip->id);
replica->ip->cmds->assign(replica->ip, prefer, stop_if_fail);
}
container_host = replica->container->allocated_to;
if (replica->remote != NULL) {
if (pe__is_guest_or_remote_node(container_host)) {
/* REMOTE_CONTAINER_HACK: "Nested" connection resources must be on
* the same host because Pacemaker Remote only supports a single
* active connection.
*/
pcmk__new_colocation("#replica-remote-with-host-remote", NULL,
INFINITY, replica->remote,
container_host->details->remote_rsc, NULL,
NULL, pcmk__coloc_influence);
}
pcmk__rsc_trace(bundle, "Assigning bundle %s connection %s",
bundle->id, replica->remote->id);
replica->remote->cmds->assign(replica->remote, prefer, stop_if_fail);
}
if (replica->child != NULL) {
pcmk_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, replica->child->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
if (!pe__same_node(node, replica->node)) {
node->weight = -INFINITY;
} else if (!pcmk__threshold_reached(replica->child, node, NULL)) {
node->weight = INFINITY;
}
}
pe__set_resource_flags(replica->child->parent, pcmk_rsc_assigning);
pcmk__rsc_trace(bundle, "Assigning bundle %s replica child %s",
bundle->id, replica->child->id);
replica->child->cmds->assign(replica->child, replica->node,
stop_if_fail);
pe__clear_resource_flags(replica->child->parent, pcmk_rsc_assigning);
}
return true;
}
/*!
* \internal
* \brief Assign a bundle resource to a node
*
* \param[in,out] rsc Resource to assign to a node
* \param[in] prefer Node to prefer, if all else is equal
* \param[in] stop_if_fail If \c true and a primitive descendant of \p rsc
* can't be assigned to a node, set the
* descendant's next role to stopped and update
* existing actions
*
* \return Node that \p rsc is assigned to, if assigned entirely to one node
*
* \note If \p stop_if_fail is \c false, then \c pcmk__unassign_resource() can
* completely undo the assignment. A successful assignment can be either
* undone or left alone as final. A failed assignment has the same effect
* as calling pcmk__unassign_resource(); there are no side effects on
* roles or actions.
*/
pcmk_node_t *
pcmk__bundle_assign(pcmk_resource_t *rsc, const pcmk_node_t *prefer,
bool stop_if_fail)
{
GList *containers = NULL;
pcmk_resource_t *bundled_resource = NULL;
struct assign_data assign_data = { prefer, stop_if_fail };
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pcmk__rsc_trace(rsc, "Assigning bundle %s", rsc->id);
pe__set_resource_flags(rsc, pcmk_rsc_assigning);
pe__show_node_scores(!pcmk_is_set(rsc->cluster->flags,
pcmk_sched_output_scores),
rsc, __func__, rsc->allowed_nodes, rsc->cluster);
// Assign all containers first, so we know what nodes the bundle will be on
containers = g_list_sort(pe__bundle_containers(rsc), pcmk__cmp_instance);
pcmk__assign_instances(rsc, containers, pe__bundle_max(rsc),
rsc->fns->max_per_node(rsc));
g_list_free(containers);
// Then assign remaining replica resources
pe__foreach_bundle_replica(rsc, assign_replica, (void *) &assign_data);
// Finally, assign the bundled resources to each bundle node
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
pcmk_node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, bundled_resource->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & node)) {
if (pe__node_is_bundle_instance(rsc, node)) {
node->weight = 0;
} else {
node->weight = -INFINITY;
}
}
bundled_resource->cmds->assign(bundled_resource, prefer, stop_if_fail);
}
pe__clear_resource_flags(rsc, pcmk_rsc_assigning|pcmk_rsc_unassigned);
return NULL;
}
/*!
* \internal
* \brief Create actions for a bundle replica's resources (other than child)
*
* \param[in,out] replica Replica to create actions for
* \param[in] user_data Unused
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
create_replica_actions(pcmk__bundle_replica_t *replica, void *user_data)
{
if (replica->ip != NULL) {
replica->ip->cmds->create_actions(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->create_actions(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->create_actions(replica->remote);
}
return true;
}
/*!
* \internal
* \brief Create all actions needed for a given bundle resource
*
* \param[in,out] rsc Bundle resource to create actions for
*/
void
pcmk__bundle_create_actions(pcmk_resource_t *rsc)
{
pcmk_action_t *action = NULL;
GList *containers = NULL;
pcmk_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, create_replica_actions, NULL);
containers = pe__bundle_containers(rsc);
pcmk__create_instance_actions(rsc, containers);
g_list_free(containers);
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
bundled_resource->cmds->create_actions(bundled_resource);
if (pcmk_is_set(bundled_resource->flags, pcmk_rsc_promotable)) {
pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_PROMOTED,
true, true);
action->priority = INFINITY;
pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTE, true, true);
action = pe__new_rsc_pseudo_action(rsc, PCMK_ACTION_DEMOTED,
true, true);
action->priority = INFINITY;
}
}
}
/*!
* \internal
* \brief Create internal constraints for a bundle replica's resources
*
* \param[in,out] replica Replica to create internal constraints for
* \param[in,out] user_data Replica's parent bundle
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
replica_internal_constraints(pcmk__bundle_replica_t *replica, void *user_data)
{
pcmk_resource_t *bundle = user_data;
replica->container->cmds->internal_constraints(replica->container);
// Start bundle -> start replica container
pcmk__order_starts(bundle, replica->container,
pcmk__ar_unrunnable_first_blocks
|pcmk__ar_then_implies_first_graphed);
// Stop bundle -> stop replica child and container
if (replica->child != NULL) {
pcmk__order_stops(bundle, replica->child,
pcmk__ar_then_implies_first_graphed);
}
pcmk__order_stops(bundle, replica->container,
pcmk__ar_then_implies_first_graphed);
// Start replica container -> bundle is started
pcmk__order_resource_actions(replica->container, PCMK_ACTION_START, bundle,
PCMK_ACTION_RUNNING,
pcmk__ar_first_implies_then_graphed);
// Stop replica container -> bundle is stopped
pcmk__order_resource_actions(replica->container, PCMK_ACTION_STOP, bundle,
PCMK_ACTION_STOPPED,
pcmk__ar_first_implies_then_graphed);
if (replica->ip != NULL) {
replica->ip->cmds->internal_constraints(replica->ip);
// Replica IP address -> replica container (symmetric)
pcmk__order_starts(replica->ip, replica->container,
pcmk__ar_unrunnable_first_blocks
|pcmk__ar_guest_allowed);
pcmk__order_stops(replica->container, replica->ip,
pcmk__ar_then_implies_first|pcmk__ar_guest_allowed);
pcmk__new_colocation("#ip-with-container", NULL, INFINITY, replica->ip,
replica->container, NULL, NULL,
pcmk__coloc_influence);
}
if (replica->remote != NULL) {
/* This handles ordering and colocating remote relative to container
* (via "#resource-with-container"). Since IP is also ordered and
* colocated relative to the container, we don't need to do anything
* explicit here with IP.
*/
replica->remote->cmds->internal_constraints(replica->remote);
}
if (replica->child != NULL) {
CRM_ASSERT(replica->remote != NULL);
// "Start remote then child" is implicit in scheduler's remote logic
}
return true;
}
/*!
* \internal
* \brief Create implicit constraints needed for a bundle resource
*
* \param[in,out] rsc Bundle resource to create implicit constraints for
*/
void
pcmk__bundle_internal_constraints(pcmk_resource_t *rsc)
{
pcmk_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, replica_internal_constraints, rsc);
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource == NULL) {
return;
}
// Start bundle -> start bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_START, bundled_resource,
PCMK_ACTION_START,
pcmk__ar_then_implies_first_graphed);
// Bundled clone is started -> bundle is started
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_RUNNING,
rsc, PCMK_ACTION_RUNNING,
pcmk__ar_first_implies_then_graphed);
// Stop bundle -> stop bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_STOP, bundled_resource,
PCMK_ACTION_STOP,
pcmk__ar_then_implies_first_graphed);
// Bundled clone is stopped -> bundle is stopped
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_STOPPED,
rsc, PCMK_ACTION_STOPPED,
pcmk__ar_first_implies_then_graphed);
bundled_resource->cmds->internal_constraints(bundled_resource);
if (!pcmk_is_set(bundled_resource->flags, pcmk_rsc_promotable)) {
return;
}
pcmk__promotable_restart_ordering(rsc);
// Demote bundle -> demote bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_DEMOTE, bundled_resource,
PCMK_ACTION_DEMOTE,
pcmk__ar_then_implies_first_graphed);
// Bundled clone is demoted -> bundle is demoted
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_DEMOTED,
rsc, PCMK_ACTION_DEMOTED,
pcmk__ar_first_implies_then_graphed);
// Promote bundle -> promote bundled clone
pcmk__order_resource_actions(rsc, PCMK_ACTION_PROMOTE,
bundled_resource, PCMK_ACTION_PROMOTE,
pcmk__ar_then_implies_first_graphed);
// Bundled clone is promoted -> bundle is promoted
pcmk__order_resource_actions(bundled_resource, PCMK_ACTION_PROMOTED,
rsc, PCMK_ACTION_PROMOTED,
pcmk__ar_first_implies_then_graphed);
}
struct match_data {
const pcmk_node_t *node; // Node to compare against replica
pcmk_resource_t *container; // Replica container corresponding to node
};
/*!
* \internal
* \brief Check whether a replica container is assigned to a given node
*
* \param[in] replica Replica to check
* \param[in,out] user_data struct match_data with node to compare against
*
* \return true if the replica does not match (to indicate further replicas
* should be processed), otherwise false
*/
static bool
match_replica_container(const pcmk__bundle_replica_t *replica, void *user_data)
{
struct match_data *match_data = user_data;
if (pcmk__instance_matches(replica->container, match_data->node,
pcmk_role_unknown, false)) {
match_data->container = replica->container;
return false; // Match found, don't bother searching further replicas
}
return true; // No match, keep searching
}
/*!
* \internal
* \brief Get the host to which a bundle node is assigned
*
* \param[in] node Possible bundle node to check
*
* \return Node to which the container for \p node is assigned if \p node is a
* bundle node, otherwise \p node itself
*/
static const pcmk_node_t *
get_bundle_node_host(const pcmk_node_t *node)
{
if (pe__is_bundle_node(node)) {
const pcmk_resource_t *container = node->details->remote_rsc->container;
return container->fns->location(container, NULL, 0);
}
return node;
}
/*!
* \internal
* \brief Find a bundle container compatible with a dependent resource
*
* \param[in] dependent Dependent resource in colocation with bundle
* \param[in] bundle Bundle that \p dependent is colocated with
*
* \return A container from \p bundle assigned to the same node as \p dependent
* if assigned, otherwise assigned to any of dependent's allowed nodes,
* otherwise NULL.
*/
static pcmk_resource_t *
compatible_container(const pcmk_resource_t *dependent,
const pcmk_resource_t *bundle)
{
GList *scratch = NULL;
struct match_data match_data = { NULL, NULL };
// If dependent is assigned, only check there
match_data.node = dependent->fns->location(dependent, NULL, 0);
match_data.node = get_bundle_node_host(match_data.node);
if (match_data.node != NULL) {
pe__foreach_const_bundle_replica(bundle, match_replica_container,
&match_data);
return match_data.container;
}
// Otherwise, check for any of the dependent's allowed nodes
scratch = g_hash_table_get_values(dependent->allowed_nodes);
scratch = pcmk__sort_nodes(scratch, NULL);
for (const GList *iter = scratch; iter != NULL; iter = iter->next) {
match_data.node = iter->data;
match_data.node = get_bundle_node_host(match_data.node);
if (match_data.node == NULL) {
continue;
}
pe__foreach_const_bundle_replica(bundle, match_replica_container,
&match_data);
if (match_data.container != NULL) {
break;
}
}
g_list_free(scratch);
return match_data.container;
}
struct coloc_data {
const pcmk__colocation_t *colocation;
pcmk_resource_t *dependent;
GList *container_hosts;
};
/*!
* \internal
* \brief Apply a colocation score to replica node scores or resource priority
*
* \param[in] replica Replica of primary bundle resource in colocation
* \param[in,out] user_data struct coloc_data for colocation being applied
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
replica_apply_coloc_score(const pcmk__bundle_replica_t *replica,
void *user_data)
{
struct coloc_data *coloc_data = user_data;
pcmk_node_t *chosen = NULL;
if (coloc_data->colocation->score < INFINITY) {
replica->container->cmds->apply_coloc_score(coloc_data->dependent,
replica->container,
coloc_data->colocation,
false);
return true;
}
chosen = replica->container->fns->location(replica->container, NULL, 0);
if ((chosen == NULL)
|| is_set_recursive(replica->container, pcmk_rsc_blocked, true)) {
return true;
}
if ((coloc_data->colocation->primary_role >= pcmk_role_promoted)
&& ((replica->child == NULL)
|| (replica->child->next_role < pcmk_role_promoted))) {
return true;
}
pcmk__rsc_trace(pe__const_top_resource(replica->container, true),
"Allowing mandatory colocation %s using %s @%d",
coloc_data->colocation->id, pe__node_name(chosen),
chosen->weight);
coloc_data->container_hosts = g_list_prepend(coloc_data->container_hosts,
chosen);
return true;
}
/*!
* \internal
* \brief Apply a colocation's score to node scores or resource priority
*
* Given a colocation constraint, apply its score to the dependent's
* allowed node scores (if we are still placing resources) or priority (if
* we are choosing promotable clone instance roles).
*
* \param[in,out] dependent Dependent resource in colocation
* \param[in] primary Primary resource in colocation
* \param[in] colocation Colocation constraint to apply
* \param[in] for_dependent true if called on behalf of dependent
*/
void
pcmk__bundle_apply_coloc_score(pcmk_resource_t *dependent,
const pcmk_resource_t *primary,
const pcmk__colocation_t *colocation,
bool for_dependent)
{
struct coloc_data coloc_data = { colocation, dependent, NULL };
/* This should never be called for the bundle itself as a dependent.
* Instead, we add its colocation constraints to its containers and bundled
* primitive and call the apply_coloc_score() method for them as dependents.
*/
CRM_ASSERT((primary != NULL)
&& (primary->variant == pcmk_rsc_variant_bundle)
&& (dependent != NULL)
&& (dependent->variant == pcmk_rsc_variant_primitive)
&& (colocation != NULL) && !for_dependent);
if (pcmk_is_set(primary->flags, pcmk_rsc_unassigned)) {
pcmk__rsc_trace(primary,
"Skipping applying colocation %s "
"because %s is still provisional",
colocation->id, primary->id);
return;
}
pcmk__rsc_trace(primary, "Applying colocation %s (%s with %s at %s)",
colocation->id, dependent->id, primary->id,
pcmk_readable_score(colocation->score));
/* If the constraint dependent is a clone or bundle, "dependent" here is one
* of its instances. Look for a compatible instance of this bundle.
*/
if (colocation->dependent->variant > pcmk_rsc_variant_group) {
const pcmk_resource_t *primary_container = NULL;
primary_container = compatible_container(dependent, primary);
if (primary_container != NULL) { // Success, we found one
pcmk__rsc_debug(primary, "Pairing %s with %s",
dependent->id, primary_container->id);
dependent->cmds->apply_coloc_score(dependent, primary_container,
colocation, true);
} else if (colocation->score >= INFINITY) { // Failure, and it's fatal
crm_notice("%s cannot run because there is no compatible "
"instance of %s to colocate with",
dependent->id, primary->id);
pcmk__assign_resource(dependent, NULL, true, true);
} else { // Failure, but we can ignore it
pcmk__rsc_debug(primary,
"%s cannot be colocated with any instance of %s",
dependent->id, primary->id);
}
return;
}
pe__foreach_const_bundle_replica(primary, replica_apply_coloc_score,
&coloc_data);
if (colocation->score >= INFINITY) {
pcmk__colocation_intersect_nodes(dependent, primary, colocation,
coloc_data.container_hosts, false);
}
g_list_free(coloc_data.container_hosts);
}
// Bundle implementation of pcmk_assignment_methods_t:with_this_colocations()
void
pcmk__with_bundle_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList **list)
{
const pcmk_resource_t *bundled_rsc = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (orig_rsc != NULL) && (list != NULL));
// The bundle itself and its containers always get its colocations
if ((orig_rsc == rsc)
|| pcmk_is_set(orig_rsc->flags, pcmk_rsc_replica_container)) {
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
return;
}
/* The bundled resource gets the colocations if it's promotable and we've
* begun choosing roles
*/
bundled_rsc = pe__bundled_resource(rsc);
if ((bundled_rsc == NULL)
|| !pcmk_is_set(bundled_rsc->flags, pcmk_rsc_promotable)
|| (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) {
return;
}
if (orig_rsc == bundled_rsc) {
if (pe__clone_flag_is_set(orig_rsc,
pcmk__clone_promotion_constrained)) {
/* orig_rsc is the clone and we're setting roles (or have already
* done so)
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
}
} else if (!pcmk_is_set(orig_rsc->flags, pcmk_rsc_unassigned)) {
/* orig_rsc is an instance and is already assigned. If something
* requests colocations for orig_rsc now, it's for setting roles.
*/
pcmk__add_with_this_list(list, rsc->rsc_cons_lhs, orig_rsc);
}
}
// Bundle implementation of pcmk_assignment_methods_t:this_with_colocations()
void
pcmk__bundle_with_colocations(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList **list)
{
const pcmk_resource_t *bundled_rsc = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (orig_rsc != NULL) && (list != NULL));
// The bundle itself and its containers always get its colocations
if ((orig_rsc == rsc)
|| pcmk_is_set(orig_rsc->flags, pcmk_rsc_replica_container)) {
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
return;
}
/* The bundled resource gets the colocations if it's promotable and we've
* begun choosing roles
*/
bundled_rsc = pe__bundled_resource(rsc);
if ((bundled_rsc == NULL)
|| !pcmk_is_set(bundled_rsc->flags, pcmk_rsc_promotable)
|| (pe__const_top_resource(orig_rsc, false) != bundled_rsc)) {
return;
}
if (orig_rsc == bundled_rsc) {
if (pe__clone_flag_is_set(orig_rsc,
pcmk__clone_promotion_constrained)) {
/* orig_rsc is the clone and we're setting roles (or have already
* done so)
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
}
} else if (!pcmk_is_set(orig_rsc->flags, pcmk_rsc_unassigned)) {
/* orig_rsc is an instance and is already assigned. If something
* requests colocations for orig_rsc now, it's for setting roles.
*/
pcmk__add_this_with_list(list, rsc->rsc_cons, orig_rsc);
}
}
/*!
* \internal
* \brief Return action flags for a given bundle resource action
*
* \param[in,out] action Bundle resource action to get flags for
* \param[in] node If not NULL, limit effects to this node
*
* \return Flags appropriate to \p action on \p node
*/
uint32_t
pcmk__bundle_action_flags(pcmk_action_t *action, const pcmk_node_t *node)
{
GList *containers = NULL;
uint32_t flags = 0;
pcmk_resource_t *bundled_resource = NULL;
CRM_ASSERT((action != NULL) && (action->rsc != NULL)
&& (action->rsc->variant == pcmk_rsc_variant_bundle));
bundled_resource = pe__bundled_resource(action->rsc);
if (bundled_resource != NULL) {
// Clone actions are done on the bundled clone resource, not container
switch (get_complex_task(bundled_resource, action->task)) {
case pcmk_action_unspecified:
case pcmk_action_notify:
case pcmk_action_notified:
case pcmk_action_promote:
case pcmk_action_promoted:
case pcmk_action_demote:
case pcmk_action_demoted:
return pcmk__collective_action_flags(action,
bundled_resource->children,
node);
default:
break;
}
}
containers = pe__bundle_containers(action->rsc);
flags = pcmk__collective_action_flags(action, containers, node);
g_list_free(containers);
return flags;
}
/*!
* \internal
* \brief Apply a location constraint to a bundle replica
*
* \param[in,out] replica Replica to apply constraint to
* \param[in,out] user_data Location constraint to apply
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
apply_location_to_replica(pcmk__bundle_replica_t *replica, void *user_data)
{
pcmk__location_t *location = user_data;
if (replica->container != NULL) {
replica->container->cmds->apply_location(replica->container, location);
}
if (replica->ip != NULL) {
replica->ip->cmds->apply_location(replica->ip, location);
}
return true;
}
/*!
* \internal
* \brief Apply a location constraint to a bundle resource's allowed node scores
*
* \param[in,out] rsc Bundle resource to apply constraint to
* \param[in,out] location Location constraint to apply
*/
void
pcmk__bundle_apply_location(pcmk_resource_t *rsc, pcmk__location_t *location)
{
pcmk_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle)
&& (location != NULL));
pcmk__apply_location(rsc, location);
pe__foreach_bundle_replica(rsc, apply_location_to_replica, location);
bundled_resource = pe__bundled_resource(rsc);
if ((bundled_resource != NULL)
&& ((location->role_filter == pcmk_role_unpromoted)
|| (location->role_filter == pcmk_role_promoted))) {
bundled_resource->cmds->apply_location(bundled_resource, location);
bundled_resource->rsc_location = g_list_prepend(
bundled_resource->rsc_location, location);
}
}
-#define XPATH_REMOTE "//nvpair[@name='" XML_RSC_ATTR_REMOTE_RA_ADDR "']"
+#define XPATH_REMOTE "//nvpair[@name='" PCMK_REMOTE_RA_ADDR "']"
/*!
* \internal
* \brief Add a bundle replica's actions to transition graph
*
* \param[in,out] replica Replica to add to graph
* \param[in] user_data Bundle that replica belongs to (for logging only)
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
add_replica_actions_to_graph(pcmk__bundle_replica_t *replica, void *user_data)
{
if ((replica->remote != NULL) && (replica->container != NULL)
&& pe__bundle_needs_remote_name(replica->remote)) {
/* REMOTE_CONTAINER_HACK: Allow remote nodes to run containers that
* run pacemaker-remoted inside, without needing a separate IP for
* the container. This is done by configuring the inner remote's
* connection host as the magic string "#uname", then
* replacing it with the underlying host when needed.
*/
xmlNode *nvpair = get_xpath_object(XPATH_REMOTE, replica->remote->xml,
LOG_ERR);
const char *calculated_addr = NULL;
// Replace the value in replica->remote->xml (if appropriate)
calculated_addr = pe__add_bundle_remote_name(replica->remote,
replica->remote->cluster,
nvpair, "value");
if (calculated_addr != NULL) {
/* Since this is for the bundle as a resource, and not any
* particular action, replace the value in the default
* parameters (not evaluated for node). create_graph_action()
* will grab it from there to replace it in node-evaluated
* parameters.
*/
GHashTable *params = pe_rsc_params(replica->remote,
NULL, replica->remote->cluster);
g_hash_table_replace(params,
- strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
+ strdup(PCMK_REMOTE_RA_ADDR),
strdup(calculated_addr));
} else {
pcmk_resource_t *bundle = user_data;
/* The only way to get here is if the remote connection is
* neither currently running nor scheduled to run. That means we
* won't be doing any operations that require addr (only start
* requires it; we additionally use it to compare digests when
* unpacking status, promote, and migrate_from history, but
* that's already happened by this point).
*/
pcmk__rsc_info(bundle,
"Unable to determine address for bundle %s "
"remote connection", bundle->id);
}
}
if (replica->ip != NULL) {
replica->ip->cmds->add_actions_to_graph(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->add_actions_to_graph(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->add_actions_to_graph(replica->remote);
}
return true;
}
/*!
* \internal
* \brief Add a bundle resource's actions to the transition graph
*
* \param[in,out] rsc Bundle resource whose actions should be added
*/
void
pcmk__bundle_add_actions_to_graph(pcmk_resource_t *rsc)
{
pcmk_resource_t *bundled_resource = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
bundled_resource = pe__bundled_resource(rsc);
if (bundled_resource != NULL) {
bundled_resource->cmds->add_actions_to_graph(bundled_resource);
}
pe__foreach_bundle_replica(rsc, add_replica_actions_to_graph, rsc);
}
struct probe_data {
pcmk_resource_t *bundle; // Bundle being probed
pcmk_node_t *node; // Node to create probes on
bool any_created; // Whether any probes have been created
};
/*!
* \internal
* \brief Order a bundle replica's start after another replica's probe
*
* \param[in,out] replica Replica to order start for
* \param[in,out] user_data Replica with probe to order after
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
order_replica_start_after(pcmk__bundle_replica_t *replica, void *user_data)
{
pcmk__bundle_replica_t *probed_replica = user_data;
if ((replica == probed_replica) || (replica->container == NULL)) {
return true;
}
pcmk__new_ordering(probed_replica->container,
pcmk__op_key(probed_replica->container->id,
PCMK_ACTION_MONITOR, 0),
NULL, replica->container,
pcmk__op_key(replica->container->id, PCMK_ACTION_START,
0),
NULL, pcmk__ar_ordered|pcmk__ar_if_on_same_node,
replica->container->cluster);
return true;
}
/*!
* \internal
* \brief Create probes for a bundle replica's resources
*
* \param[in,out] replica Replica to create probes for
* \param[in,out] user_data struct probe_data
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
create_replica_probes(pcmk__bundle_replica_t *replica, void *user_data)
{
struct probe_data *probe_data = user_data;
if ((replica->ip != NULL)
&& replica->ip->cmds->create_probe(replica->ip, probe_data->node)) {
probe_data->any_created = true;
}
if ((replica->child != NULL)
&& pe__same_node(probe_data->node, replica->node)
&& replica->child->cmds->create_probe(replica->child,
probe_data->node)) {
probe_data->any_created = true;
}
if ((replica->container != NULL)
&& replica->container->cmds->create_probe(replica->container,
probe_data->node)) {
probe_data->any_created = true;
/* If we're limited to one replica per host (due to
* the lack of an IP range probably), then we don't
* want any of our peer containers starting until
* we've established that no other copies are already
* running.
*
* Partly this is to ensure that the maximum replicas per host is
* observed, but also to ensure that the containers
* don't fail to start because the necessary port
* mappings (which won't include an IP for uniqueness)
* are already taken
*/
if (probe_data->bundle->fns->max_per_node(probe_data->bundle) == 1) {
pe__foreach_bundle_replica(probe_data->bundle,
order_replica_start_after, replica);
}
}
if ((replica->container != NULL) && (replica->remote != NULL)
&& replica->remote->cmds->create_probe(replica->remote,
probe_data->node)) {
/* Do not probe the remote resource until we know where the container is
* running. This is required for REMOTE_CONTAINER_HACK to correctly
* probe remote resources.
*/
char *probe_uuid = pcmk__op_key(replica->remote->id,
PCMK_ACTION_MONITOR, 0);
pcmk_action_t *probe = find_first_action(replica->remote->actions,
probe_uuid, NULL,
probe_data->node);
free(probe_uuid);
if (probe != NULL) {
probe_data->any_created = true;
pcmk__rsc_trace(probe_data->bundle, "Ordering %s probe on %s",
replica->remote->id,
pe__node_name(probe_data->node));
pcmk__new_ordering(replica->container,
pcmk__op_key(replica->container->id,
PCMK_ACTION_START, 0),
NULL, replica->remote, NULL, probe,
pcmk__ar_nested_remote_probe,
probe_data->bundle->cluster);
}
}
return true;
}
/*!
* \internal
*
* \brief Schedule any probes needed for a bundle resource on a node
*
* \param[in,out] rsc Bundle resource to create probes for
* \param[in,out] node Node to create probe on
*
* \return true if any probe was created, otherwise false
*/
bool
pcmk__bundle_create_probe(pcmk_resource_t *rsc, pcmk_node_t *node)
{
struct probe_data probe_data = { rsc, node, false };
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, create_replica_probes, &probe_data);
return probe_data.any_created;
}
/*!
* \internal
* \brief Output actions for one bundle replica
*
* \param[in,out] replica Replica to output actions for
* \param[in] user_data Unused
*
* \return true (to indicate that any further replicas should be processed)
*/
static bool
output_replica_actions(pcmk__bundle_replica_t *replica, void *user_data)
{
if (replica->ip != NULL) {
replica->ip->cmds->output_actions(replica->ip);
}
if (replica->container != NULL) {
replica->container->cmds->output_actions(replica->container);
}
if (replica->remote != NULL) {
replica->remote->cmds->output_actions(replica->remote);
}
if (replica->child != NULL) {
replica->child->cmds->output_actions(replica->child);
}
return true;
}
/*!
* \internal
* \brief Output a summary of scheduled actions for a bundle resource
*
* \param[in,out] rsc Bundle resource to output actions for
*/
void
pcmk__output_bundle_actions(pcmk_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
pe__foreach_bundle_replica(rsc, output_replica_actions, NULL);
}
// Bundle implementation of pcmk_assignment_methods_t:add_utilization()
void
pcmk__bundle_add_utilization(const pcmk_resource_t *rsc,
const pcmk_resource_t *orig_rsc, GList *all_rscs,
GHashTable *utilization)
{
pcmk_resource_t *container = NULL;
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
if (!pcmk_is_set(rsc->flags, pcmk_rsc_unassigned)) {
return;
}
/* All bundle replicas are identical, so using the utilization of the first
* is sufficient for any. Only the implicit container resource can have
* utilization values.
*/
container = pe__first_container(rsc);
if (container != NULL) {
container->cmds->add_utilization(container, orig_rsc, all_rscs,
utilization);
}
}
// Bundle implementation of pcmk_assignment_methods_t:shutdown_lock()
void
pcmk__bundle_shutdown_lock(pcmk_resource_t *rsc)
{
CRM_ASSERT((rsc != NULL) && (rsc->variant == pcmk_rsc_variant_bundle));
// Bundles currently don't support shutdown locks
}
diff --git a/lib/pacemaker/pcmk_sched_remote.c b/lib/pacemaker/pcmk_sched_remote.c
index d952dbcb24..c631807a45 100644
--- a/lib/pacemaker/pcmk_sched_remote.c
+++ b/lib/pacemaker/pcmk_sched_remote.c
@@ -1,735 +1,734 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <sys/param.h>
#include <crm/crm.h>
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <glib.h>
#include <crm/pengine/status.h>
#include <pacemaker-internal.h>
#include "libpacemaker_private.h"
enum remote_connection_state {
remote_state_unknown = 0,
remote_state_alive = 1,
remote_state_resting = 2,
remote_state_failed = 3,
remote_state_stopped = 4
};
static const char *
state2text(enum remote_connection_state state)
{
switch (state) {
case remote_state_unknown:
return "unknown";
case remote_state_alive:
return "alive";
case remote_state_resting:
return "resting";
case remote_state_failed:
return "failed";
case remote_state_stopped:
return "stopped";
}
return "impossible";
}
/* We always use pcmk__ar_guest_allowed with these convenience functions to
* exempt internally generated constraints from the prohibition of user
* constraints involving remote connection resources.
*
* The start ordering additionally uses pcmk__ar_unrunnable_first_blocks so that
* the specified action is not runnable if the start is not runnable.
*/
static inline void
order_start_then_action(pcmk_resource_t *first_rsc, pcmk_action_t *then_action,
uint32_t extra)
{
if ((first_rsc != NULL) && (then_action != NULL)) {
pcmk__new_ordering(first_rsc, start_key(first_rsc), NULL,
then_action->rsc, NULL, then_action,
pcmk__ar_guest_allowed
|pcmk__ar_unrunnable_first_blocks
|extra,
first_rsc->cluster);
}
}
static inline void
order_action_then_stop(pcmk_action_t *first_action, pcmk_resource_t *then_rsc,
uint32_t extra)
{
if ((first_action != NULL) && (then_rsc != NULL)) {
pcmk__new_ordering(first_action->rsc, NULL, first_action,
then_rsc, stop_key(then_rsc), NULL,
pcmk__ar_guest_allowed|extra, then_rsc->cluster);
}
}
static enum remote_connection_state
get_remote_node_state(const pcmk_node_t *node)
{
const pcmk_resource_t *remote_rsc = NULL;
const pcmk_node_t *cluster_node = NULL;
CRM_ASSERT(node != NULL);
remote_rsc = node->details->remote_rsc;
CRM_ASSERT(remote_rsc != NULL);
cluster_node = pe__current_node(remote_rsc);
/* If the cluster node the remote connection resource resides on
* is unclean or went offline, we can't process any operations
* on that remote node until after it starts elsewhere.
*/
if ((remote_rsc->next_role == pcmk_role_stopped)
|| (remote_rsc->allocated_to == NULL)) {
// The connection resource is not going to run anywhere
if ((cluster_node != NULL) && cluster_node->details->unclean) {
/* The remote connection is failed because its resource is on a
* failed node and can't be recovered elsewhere, so we must fence.
*/
return remote_state_failed;
}
if (!pcmk_is_set(remote_rsc->flags, pcmk_rsc_failed)) {
/* Connection resource is cleanly stopped */
return remote_state_stopped;
}
/* Connection resource is failed */
if ((remote_rsc->next_role == pcmk_role_stopped)
&& remote_rsc->remote_reconnect_ms
&& node->details->remote_was_fenced
&& !pe__shutdown_requested(node)) {
/* We won't know whether the connection is recoverable until the
* reconnect interval expires and we reattempt connection.
*/
return remote_state_unknown;
}
/* The remote connection is in a failed state. If there are any
* resources known to be active on it (stop) or in an unknown state
* (probe), we must assume the worst and fence it.
*/
return remote_state_failed;
} else if (cluster_node == NULL) {
/* Connection is recoverable but not currently running anywhere, so see
* if we can recover it first
*/
return remote_state_unknown;
} else if (cluster_node->details->unclean
|| !(cluster_node->details->online)) {
// Connection is running on a dead node, see if we can recover it first
return remote_state_resting;
} else if (pcmk__list_of_multiple(remote_rsc->running_on)
&& (remote_rsc->partial_migration_source != NULL)
&& (remote_rsc->partial_migration_target != NULL)) {
/* We're in the middle of migrating a connection resource, so wait until
* after the migration completes before performing any actions.
*/
return remote_state_resting;
}
return remote_state_alive;
}
/*!
* \internal
* \brief Order actions on remote node relative to actions for the connection
*
* \param[in,out] action An action scheduled on a Pacemaker Remote node
*/
static void
apply_remote_ordering(pcmk_action_t *action)
{
pcmk_resource_t *remote_rsc = NULL;
enum action_tasks task = text2task(action->task);
enum remote_connection_state state = get_remote_node_state(action->node);
uint32_t order_opts = pcmk__ar_none;
if (action->rsc == NULL) {
return;
}
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc != NULL);
crm_trace("Order %s action %s relative to %s%s (state: %s)",
action->task, action->uuid,
pcmk_is_set(remote_rsc->flags, pcmk_rsc_failed)? "failed " : "",
remote_rsc->id, state2text(state));
if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
/* Migration ops map to pcmk_action_unspecified, but we need to apply
* the same ordering as for stop or demote (see get_router_node()).
*/
task = pcmk_action_stop;
}
switch (task) {
case pcmk_action_start:
case pcmk_action_promote:
order_opts = pcmk__ar_none;
if (state == remote_state_failed) {
/* Force recovery, by making this action required */
pe__set_order_flags(order_opts, pcmk__ar_first_implies_then);
}
/* Ensure connection is up before running this action */
order_start_then_action(remote_rsc, action, order_opts);
break;
case pcmk_action_stop:
if (state == remote_state_alive) {
order_action_then_stop(action, remote_rsc,
pcmk__ar_then_implies_first);
} else if (state == remote_state_failed) {
/* The resource is active on the node, but since we don't have a
* valid connection, the only way to stop the resource is by
* fencing the node. There is no need to order the stop relative
* to the remote connection, since the stop will become implied
* by the fencing.
*/
pe_fence_node(remote_rsc->cluster, action->node,
"resources are active but "
"connection is unrecoverable",
FALSE);
} else if (remote_rsc->next_role == pcmk_role_stopped) {
/* State must be remote_state_unknown or remote_state_stopped.
* Since the connection is not coming back up in this
* transition, stop this resource first.
*/
order_action_then_stop(action, remote_rsc,
pcmk__ar_then_implies_first);
} else {
/* The connection is going to be started somewhere else, so
* stop this resource after that completes.
*/
order_start_then_action(remote_rsc, action, pcmk__ar_none);
}
break;
case pcmk_action_demote:
/* Only order this demote relative to the connection start if the
* connection isn't being torn down. Otherwise, the demote would be
* blocked because the connection start would not be allowed.
*/
if ((state == remote_state_resting)
|| (state == remote_state_unknown)) {
order_start_then_action(remote_rsc, action, pcmk__ar_none);
} /* Otherwise we can rely on the stop ordering */
break;
default:
/* Wait for the connection resource to be up */
if (pcmk__action_is_recurring(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
order_start_then_action(remote_rsc, action,
pcmk__ar_first_implies_then);
} else {
pcmk_node_t *cluster_node = pe__current_node(remote_rsc);
if ((task == pcmk_action_monitor) && (state == remote_state_failed)) {
/* We would only be here if we do not know the state of the
* resource on the remote node. Since we have no way to find
* out, it is necessary to fence the node.
*/
pe_fence_node(remote_rsc->cluster, action->node,
"resources are in unknown state "
"and connection is unrecoverable", FALSE);
}
if ((cluster_node != NULL) && (state == remote_state_stopped)) {
/* The connection is currently up, but is going down
* permanently. Make sure we check services are actually
* stopped _before_ we let the connection get closed.
*/
order_action_then_stop(action, remote_rsc,
pcmk__ar_unrunnable_first_blocks);
} else {
order_start_then_action(remote_rsc, action, pcmk__ar_none);
}
}
break;
}
}
static void
apply_container_ordering(pcmk_action_t *action)
{
/* VMs are also classified as containers for these purposes... in
* that they both involve a 'thing' running on a real or remote
* cluster node.
*
* This allows us to be smarter about the type and extent of
* recovery actions required in various scenarios
*/
pcmk_resource_t *remote_rsc = NULL;
pcmk_resource_t *container = NULL;
enum action_tasks task = text2task(action->task);
CRM_ASSERT(action->rsc != NULL);
CRM_ASSERT(action->node != NULL);
CRM_ASSERT(pe__is_guest_or_remote_node(action->node));
remote_rsc = action->node->details->remote_rsc;
CRM_ASSERT(remote_rsc != NULL);
container = remote_rsc->container;
CRM_ASSERT(container != NULL);
if (pcmk_is_set(container->flags, pcmk_rsc_failed)) {
pe_fence_node(action->rsc->cluster, action->node, "container failed",
FALSE);
}
crm_trace("Order %s action %s relative to %s%s for %s%s",
action->task, action->uuid,
pcmk_is_set(remote_rsc->flags, pcmk_rsc_failed)? "failed " : "",
remote_rsc->id,
pcmk_is_set(container->flags, pcmk_rsc_failed)? "failed " : "",
container->id);
if (pcmk__strcase_any_of(action->task, PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
/* Migration ops map to pcmk_action_unspecified, but we need to apply
* the same ordering as for stop or demote (see get_router_node()).
*/
task = pcmk_action_stop;
}
switch (task) {
case pcmk_action_start:
case pcmk_action_promote:
// Force resource recovery if the container is recovered
order_start_then_action(container, action,
pcmk__ar_first_implies_then);
// Wait for the connection resource to be up, too
order_start_then_action(remote_rsc, action, pcmk__ar_none);
break;
case pcmk_action_stop:
case pcmk_action_demote:
if (pcmk_is_set(container->flags, pcmk_rsc_failed)) {
/* When the container representing a guest node fails, any stop
* or demote actions for resources running on the guest node
* are implied by the container stopping. This is similar to
* how fencing operations work for cluster nodes and remote
* nodes.
*/
} else {
/* Ensure the operation happens before the connection is brought
* down.
*
* If we really wanted to, we could order these after the
* connection start, IFF the container's current role was
* stopped (otherwise we re-introduce an ordering loop when the
* connection is restarting).
*/
order_action_then_stop(action, remote_rsc, pcmk__ar_none);
}
break;
default:
/* Wait for the connection resource to be up */
if (pcmk__action_is_recurring(action)) {
/* In case we ever get the recovery logic wrong, force
* recurring monitors to be restarted, even if just
* the connection was re-established
*/
if (task != pcmk_action_unspecified) {
order_start_then_action(remote_rsc, action,
pcmk__ar_first_implies_then);
}
} else {
order_start_then_action(remote_rsc, action, pcmk__ar_none);
}
break;
}
}
/*!
* \internal
* \brief Order all relevant actions relative to remote connection actions
*
* \param[in,out] scheduler Scheduler data
*/
void
pcmk__order_remote_connection_actions(pcmk_scheduler_t *scheduler)
{
if (!pcmk_is_set(scheduler->flags, pcmk_sched_have_remote_nodes)) {
return;
}
crm_trace("Creating remote connection orderings");
for (GList *iter = scheduler->actions; iter != NULL; iter = iter->next) {
pcmk_action_t *action = iter->data;
pcmk_resource_t *remote = NULL;
// We are only interested in resource actions
if (action->rsc == NULL) {
continue;
}
/* Special case: If we are clearing the failcount of an actual
* remote connection resource, then make sure this happens before
* any start of the resource in this transition.
*/
if (action->rsc->is_remote_node &&
pcmk__str_eq(action->task, PCMK_ACTION_CLEAR_FAILCOUNT,
pcmk__str_none)) {
pcmk__new_ordering(action->rsc, NULL, action, action->rsc,
pcmk__op_key(action->rsc->id, PCMK_ACTION_START,
0),
NULL, pcmk__ar_ordered, scheduler);
continue;
}
// We are only interested in actions assigned to a node
if (action->node == NULL) {
continue;
}
if (!pe__is_guest_or_remote_node(action->node)) {
continue;
}
/* We are only interested in real actions.
*
* @TODO This is probably wrong; pseudo-actions might be converted to
* real actions and vice versa later in update_actions() at the end of
* pcmk__apply_orderings().
*/
if (pcmk_is_set(action->flags, pcmk_action_pseudo)) {
continue;
}
remote = action->node->details->remote_rsc;
if (remote == NULL) {
// Orphaned
continue;
}
/* Another special case: if a resource is moving to a Pacemaker Remote
* node, order the stop on the original node after any start of the
* remote connection. This ensures that if the connection fails to
* start, we leave the resource running on the original node.
*/
if (pcmk__str_eq(action->task, PCMK_ACTION_START, pcmk__str_none)) {
for (GList *item = action->rsc->actions; item != NULL;
item = item->next) {
pcmk_action_t *rsc_action = item->data;
if (!pe__same_node(rsc_action->node, action->node)
&& pcmk__str_eq(rsc_action->task, PCMK_ACTION_STOP,
pcmk__str_none)) {
pcmk__new_ordering(remote, start_key(remote), NULL,
action->rsc, NULL, rsc_action,
pcmk__ar_ordered, scheduler);
}
}
}
/* The action occurs across a remote connection, so create
* ordering constraints that guarantee the action occurs while the node
* is active (after start, before stop ... things like that).
*
* This is somewhat brittle in that we need to make sure the results of
* this ordering are compatible with the result of get_router_node().
* It would probably be better to add XML_LRM_ATTR_ROUTER_NODE as part
* of this logic rather than create_graph_action().
*/
if (remote->container) {
crm_trace("Container ordering for %s", action->uuid);
apply_container_ordering(action);
} else {
crm_trace("Remote ordering for %s", action->uuid);
apply_remote_ordering(action);
}
}
}
/*!
* \internal
* \brief Check whether a node is a failed remote node
*
* \param[in] node Node to check
*
* \return true if \p node is a failed remote node, false otherwise
*/
bool
pcmk__is_failed_remote_node(const pcmk_node_t *node)
{
return pe__is_remote_node(node) && (node->details->remote_rsc != NULL)
&& (get_remote_node_state(node) == remote_state_failed);
}
/*!
* \internal
* \brief Check whether a given resource corresponds to a given node as guest
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return true if \p node is a guest node and \p rsc is its containing
* resource, otherwise false
*/
bool
pcmk__rsc_corresponds_to_guest(const pcmk_resource_t *rsc,
const pcmk_node_t *node)
{
return (rsc != NULL) && (rsc->fillers != NULL) && (node != NULL)
&& (node->details->remote_rsc != NULL)
&& (node->details->remote_rsc->container == rsc);
}
/*!
* \internal
* \brief Get proper connection host that a remote action must be routed through
*
* A remote connection resource might be starting, stopping, or migrating in the
* same transition that an action needs to be executed on its Pacemaker Remote
* node. Determine the proper node that the remote action should be routed
* through.
*
* \param[in] action (Potentially remote) action to route
*
* \return Connection host that action should be routed through if remote,
* otherwise NULL
*/
pcmk_node_t *
pcmk__connection_host_for_action(const pcmk_action_t *action)
{
pcmk_node_t *began_on = NULL;
pcmk_node_t *ended_on = NULL;
bool partial_migration = false;
const char *task = action->task;
if (pcmk__str_eq(task, PCMK_ACTION_STONITH, pcmk__str_none)
|| !pe__is_guest_or_remote_node(action->node)) {
return NULL;
}
CRM_ASSERT(action->node->details->remote_rsc != NULL);
began_on = pe__current_node(action->node->details->remote_rsc);
ended_on = action->node->details->remote_rsc->allocated_to;
if (action->node->details->remote_rsc
&& (action->node->details->remote_rsc->container == NULL)
&& action->node->details->remote_rsc->partial_migration_target) {
partial_migration = true;
}
if (began_on == NULL) {
crm_trace("Routing %s for %s through remote connection's "
"next node %s (starting)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(ended_on? ended_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return ended_on;
}
if (ended_on == NULL) {
crm_trace("Routing %s for %s through remote connection's "
"current node %s (stopping)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(began_on? began_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return began_on;
}
if (pe__same_node(began_on, ended_on)) {
crm_trace("Routing %s for %s through remote connection's "
"current node %s (not moving)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(began_on? began_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return began_on;
}
/* If we get here, the remote connection is moving during this transition.
* This means some actions for resources behind the connection will get
* routed through the cluster node the connection resource is currently on,
* and others are routed through the cluster node the connection will end up
* on.
*/
if (pcmk__str_eq(task, PCMK_ACTION_NOTIFY, pcmk__str_none)) {
task = g_hash_table_lookup(action->meta, "notify_operation");
}
/*
* Stop, demote, and migration actions must occur before the connection can
* move (these actions are required before the remote resource can stop). In
* this case, we know these actions have to be routed through the initial
* cluster node the connection resource lived on before the move takes
* place.
*
* The exception is a partial migration of a (non-guest) remote connection
* resource; in that case, all actions (even these) will be ordered after
* the connection's pseudo-start on the migration target, so the target is
* the router node.
*/
if (pcmk__strcase_any_of(task, PCMK_ACTION_CANCEL, PCMK_ACTION_STOP,
PCMK_ACTION_DEMOTE, PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_MIGRATE_TO, NULL)
&& !partial_migration) {
crm_trace("Routing %s for %s through remote connection's "
"current node %s (moving)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(began_on? began_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return began_on;
}
/* Everything else (start, promote, monitor, probe, refresh,
* clear failcount, delete, ...) must occur after the connection starts on
* the node it is moving to.
*/
crm_trace("Routing %s for %s through remote connection's "
"next node %s (moving)%s",
action->task, (action->rsc? action->rsc->id : "no resource"),
(ended_on? ended_on->details->uname : "none"),
partial_migration? " (partial migration)" : "");
return ended_on;
}
/*!
* \internal
* \brief Replace remote connection's addr="#uname" with actual address
*
* REMOTE_CONTAINER_HACK: If a given resource is a remote connection resource
* with its "addr" parameter set to "#uname", pull the actual value from the
* parameters evaluated without a node (which was put there earlier in
* pcmk__create_graph() when the bundle's expand() method was called).
*
* \param[in,out] rsc Resource to check
* \param[in,out] params Resource parameters evaluated per node
*/
void
pcmk__substitute_remote_addr(pcmk_resource_t *rsc, GHashTable *params)
{
- const char *remote_addr = g_hash_table_lookup(params,
- XML_RSC_ATTR_REMOTE_RA_ADDR);
+ const char *remote_addr = g_hash_table_lookup(params, PCMK_REMOTE_RA_ADDR);
if (pcmk__str_eq(remote_addr, "#uname", pcmk__str_none)) {
GHashTable *base = pe_rsc_params(rsc, NULL, rsc->cluster);
- remote_addr = g_hash_table_lookup(base, XML_RSC_ATTR_REMOTE_RA_ADDR);
+ remote_addr = g_hash_table_lookup(base, PCMK_REMOTE_RA_ADDR);
if (remote_addr != NULL) {
- g_hash_table_insert(params, strdup(XML_RSC_ATTR_REMOTE_RA_ADDR),
+ g_hash_table_insert(params, strdup(PCMK_REMOTE_RA_ADDR),
strdup(remote_addr));
}
}
}
/*!
* \brief Add special guest node meta-attributes to XML
*
* If a given action will be executed on a guest node, add the following as XML
* attributes (using meta-attribute naming):
* * The resource's \c PCMK_META_CONTAINER_ATTR_TARGET meta-attribute (usually
* set only for bundles), as \c PCMK_META_CONTAINER_ATTR_TARGET
* * The guest's physical host (current host for "down" actions, next host for
* "up" actions), as \c PCMK__META_PHYSICAL_HOST
*
* If the guest node has no physical host, then don't add either attribute.
*
* \param[in,out] args_xml XML to add attributes to
* \param[in] action Action to check
*/
void
pcmk__add_guest_meta_to_xml(xmlNode *args_xml, const pcmk_action_t *action)
{
const pcmk_node_t *guest = action->node;
const pcmk_node_t *host = NULL;
enum action_tasks task;
if (!pe__is_guest_node(guest)) {
return;
}
task = text2task(action->task);
if ((task == pcmk_action_notify) || (task == pcmk_action_notified)) {
task = text2task(g_hash_table_lookup(action->meta, "notify_operation"));
}
switch (task) {
case pcmk_action_stop:
case pcmk_action_stopped:
case pcmk_action_demote:
case pcmk_action_demoted:
// "Down" actions take place on guest's current host
host = pe__current_node(guest->details->remote_rsc->container);
break;
case pcmk_action_start:
case pcmk_action_started:
case pcmk_action_monitor:
case pcmk_action_promote:
case pcmk_action_promoted:
// "Up" actions take place on guest's next host
host = guest->details->remote_rsc->container->allocated_to;
break;
default:
break;
}
if (host != NULL) {
gpointer target = g_hash_table_lookup(action->rsc->meta,
PCMK_META_CONTAINER_ATTR_TARGET);
hash2metafield((gpointer) PCMK_META_CONTAINER_ATTR_TARGET,
target,
(gpointer) args_xml);
hash2metafield((gpointer) PCMK__META_PHYSICAL_HOST,
(gpointer) host->details->uname,
(gpointer) args_xml);
}
}
diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c
index 2842d89c9f..c0de71d30f 100644
--- a/lib/pengine/bundle.c
+++ b/lib/pengine/bundle.c
@@ -1,2228 +1,2228 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <ctype.h>
#include <stdint.h>
#include <crm/pengine/rules.h>
#include <crm/pengine/status.h>
#include <crm/pengine/internal.h>
#include <crm/msg_xml.h>
#include <crm/common/output.h>
#include <crm/common/xml_internal.h>
#include <pe_status_private.h>
enum pe__bundle_mount_flags {
pe__bundle_mount_none = 0x00,
// mount instance-specific subdirectory rather than source directly
pe__bundle_mount_subdir = 0x01
};
typedef struct {
char *source;
char *target;
char *options;
uint32_t flags; // bitmask of pe__bundle_mount_flags
} pe__bundle_mount_t;
typedef struct {
char *source;
char *target;
} pe__bundle_port_t;
enum pe__container_agent {
PE__CONTAINER_AGENT_UNKNOWN,
PE__CONTAINER_AGENT_DOCKER,
PE__CONTAINER_AGENT_RKT,
PE__CONTAINER_AGENT_PODMAN,
};
#define PE__CONTAINER_AGENT_UNKNOWN_S "unknown"
#define PE__CONTAINER_AGENT_DOCKER_S "docker"
#define PE__CONTAINER_AGENT_RKT_S "rkt"
#define PE__CONTAINER_AGENT_PODMAN_S "podman"
typedef struct pe__bundle_variant_data_s {
int promoted_max;
int nreplicas;
int nreplicas_per_host;
char *prefix;
char *image;
const char *ip_last;
char *host_network;
char *host_netmask;
char *control_port;
char *container_network;
char *ip_range_start;
gboolean add_host;
gchar *container_host_options;
char *container_command;
char *launcher_options;
const char *attribute_target;
pcmk_resource_t *child;
GList *replicas; // pcmk__bundle_replica_t *
GList *ports; // pe__bundle_port_t *
GList *mounts; // pe__bundle_mount_t *
enum pe__container_agent agent_type;
} pe__bundle_variant_data_t;
#define get_bundle_variant_data(data, rsc) \
CRM_ASSERT(rsc != NULL); \
CRM_ASSERT(rsc->variant == pcmk_rsc_variant_bundle); \
CRM_ASSERT(rsc->variant_opaque != NULL); \
data = (pe__bundle_variant_data_t *) rsc->variant_opaque;
/*!
* \internal
* \brief Get maximum number of bundle replicas allowed to run
*
* \param[in] rsc Bundle or bundled resource to check
*
* \return Maximum replicas for bundle corresponding to \p rsc
*/
int
pe__bundle_max(const pcmk_resource_t *rsc)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true));
return bundle_data->nreplicas;
}
/*!
* \internal
* \brief Get the resource inside a bundle
*
* \param[in] bundle Bundle to check
*
* \return Resource inside \p bundle if any, otherwise NULL
*/
pcmk_resource_t *
pe__bundled_resource(const pcmk_resource_t *rsc)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, pe__const_top_resource(rsc, true));
return bundle_data->child;
}
/*!
* \internal
* \brief Get containerized resource corresponding to a given bundle container
*
* \param[in] instance Collective instance that might be a bundle container
*
* \return Bundled resource instance inside \p instance if it is a bundle
* container instance, otherwise NULL
*/
const pcmk_resource_t *
pe__get_rsc_in_container(const pcmk_resource_t *instance)
{
const pe__bundle_variant_data_t *data = NULL;
const pcmk_resource_t *top = pe__const_top_resource(instance, true);
if ((top == NULL) || (top->variant != pcmk_rsc_variant_bundle)) {
return NULL;
}
get_bundle_variant_data(data, top);
for (const GList *iter = data->replicas; iter != NULL; iter = iter->next) {
const pcmk__bundle_replica_t *replica = iter->data;
if (instance == replica->container) {
return replica->child;
}
}
return NULL;
}
/*!
* \internal
* \brief Check whether a given node is created by a bundle
*
* \param[in] bundle Bundle resource to check
* \param[in] node Node to check
*
* \return true if \p node is an instance of \p bundle, otherwise false
*/
bool
pe__node_is_bundle_instance(const pcmk_resource_t *bundle,
const pcmk_node_t *node)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, bundle);
for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
if (pe__same_node(node, replica->node)) {
return true;
}
}
return false;
}
/*!
* \internal
* \brief Get the container of a bundle's first replica
*
* \param[in] bundle Bundle resource to get container for
*
* \return Container resource from first replica of \p bundle if any,
* otherwise NULL
*/
pcmk_resource_t *
pe__first_container(const pcmk_resource_t *bundle)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
const pcmk__bundle_replica_t *replica = NULL;
get_bundle_variant_data(bundle_data, bundle);
if (bundle_data->replicas == NULL) {
return NULL;
}
replica = bundle_data->replicas->data;
return replica->container;
}
/*!
* \internal
* \brief Iterate over bundle replicas
*
* \param[in,out] bundle Bundle to iterate over
* \param[in] fn Function to call for each replica (its return value
* indicates whether to continue iterating)
* \param[in,out] user_data Pointer to pass to \p fn
*/
void
pe__foreach_bundle_replica(pcmk_resource_t *bundle,
bool (*fn)(pcmk__bundle_replica_t *, void *),
void *user_data)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, bundle);
for (GList *iter = bundle_data->replicas; iter != NULL; iter = iter->next) {
if (!fn((pcmk__bundle_replica_t *) iter->data, user_data)) {
break;
}
}
}
/*!
* \internal
* \brief Iterate over const bundle replicas
*
* \param[in] bundle Bundle to iterate over
* \param[in] fn Function to call for each replica (its return value
* indicates whether to continue iterating)
* \param[in,out] user_data Pointer to pass to \p fn
*/
void
pe__foreach_const_bundle_replica(const pcmk_resource_t *bundle,
bool (*fn)(const pcmk__bundle_replica_t *,
void *),
void *user_data)
{
const pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, bundle);
for (const GList *iter = bundle_data->replicas; iter != NULL;
iter = iter->next) {
if (!fn((const pcmk__bundle_replica_t *) iter->data, user_data)) {
break;
}
}
}
static char *
next_ip(const char *last_ip)
{
unsigned int oct1 = 0;
unsigned int oct2 = 0;
unsigned int oct3 = 0;
unsigned int oct4 = 0;
int rc = sscanf(last_ip, "%u.%u.%u.%u", &oct1, &oct2, &oct3, &oct4);
if (rc != 4) {
/*@ TODO check for IPv6 */
return NULL;
} else if (oct3 > 253) {
return NULL;
} else if (oct4 > 253) {
++oct3;
oct4 = 1;
} else {
++oct4;
}
return crm_strdup_printf("%u.%u.%u.%u", oct1, oct2, oct3, oct4);
}
static void
allocate_ip(pe__bundle_variant_data_t *data, pcmk__bundle_replica_t *replica,
GString *buffer)
{
if(data->ip_range_start == NULL) {
return;
} else if(data->ip_last) {
replica->ipaddr = next_ip(data->ip_last);
} else {
replica->ipaddr = strdup(data->ip_range_start);
}
data->ip_last = replica->ipaddr;
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
if (data->add_host) {
g_string_append_printf(buffer, " --add-host=%s-%d:%s",
data->prefix, replica->offset,
replica->ipaddr);
} else {
g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d",
replica->ipaddr, data->prefix,
replica->offset);
}
break;
case PE__CONTAINER_AGENT_RKT:
g_string_append_printf(buffer, " --hosts-entry=%s=%s-%d",
replica->ipaddr, data->prefix,
replica->offset);
break;
default: // PE__CONTAINER_AGENT_UNKNOWN
break;
}
}
static xmlNode *
create_resource(const char *name, const char *provider, const char *kind)
{
xmlNode *rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
crm_xml_add(rsc, PCMK_XA_ID, name);
crm_xml_add(rsc, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF);
crm_xml_add(rsc, XML_AGENT_ATTR_PROVIDER, provider);
crm_xml_add(rsc, XML_ATTR_TYPE, kind);
return rsc;
}
/*!
* \internal
* \brief Check whether cluster can manage resource inside container
*
* \param[in,out] data Container variant data
*
* \return TRUE if networking configuration is acceptable, FALSE otherwise
*
* \note The resource is manageable if an IP range or control port has been
* specified. If a control port is used without an IP range, replicas per
* host must be 1.
*/
static bool
valid_network(pe__bundle_variant_data_t *data)
{
if(data->ip_range_start) {
return TRUE;
}
if(data->control_port) {
if(data->nreplicas_per_host > 1) {
pcmk__config_err("Specifying the 'control-port' for %s requires "
"'replicas-per-host=1'", data->prefix);
data->nreplicas_per_host = 1;
// @TODO to be sure:
// pe__clear_resource_flags(rsc, pcmk_rsc_unique);
}
return TRUE;
}
return FALSE;
}
static int
create_ip_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
if(data->ip_range_start) {
char *id = NULL;
xmlNode *xml_ip = NULL;
xmlNode *xml_obj = NULL;
id = crm_strdup_printf("%s-ip-%s", data->prefix, replica->ipaddr);
crm_xml_sanitize_id(id);
xml_ip = create_resource(id, "heartbeat", "IPaddr2");
free(id);
xml_obj = create_xml_node(xml_ip, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d",
data->prefix, replica->offset);
crm_create_nvpair_xml(xml_obj, NULL, "ip", replica->ipaddr);
if(data->host_network) {
crm_create_nvpair_xml(xml_obj, NULL, "nic", data->host_network);
}
if(data->host_netmask) {
crm_create_nvpair_xml(xml_obj, NULL,
"cidr_netmask", data->host_netmask);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "cidr_netmask", "32");
}
xml_obj = create_xml_node(xml_ip, "operations");
crm_create_op_xml(xml_obj, ID(xml_ip), PCMK_ACTION_MONITOR, "60s",
NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (pe__unpack_resource(xml_ip, &replica->ip, parent,
parent->cluster) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
parent->children = g_list_append(parent->children, replica->ip);
}
return pcmk_rc_ok;
}
static const char*
container_agent_str(enum pe__container_agent t)
{
switch (t) {
case PE__CONTAINER_AGENT_DOCKER: return PE__CONTAINER_AGENT_DOCKER_S;
case PE__CONTAINER_AGENT_RKT: return PE__CONTAINER_AGENT_RKT_S;
case PE__CONTAINER_AGENT_PODMAN: return PE__CONTAINER_AGENT_PODMAN_S;
default: // PE__CONTAINER_AGENT_UNKNOWN
break;
}
return PE__CONTAINER_AGENT_UNKNOWN_S;
}
static int
create_container_resource(pcmk_resource_t *parent,
const pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
char *id = NULL;
xmlNode *xml_container = NULL;
xmlNode *xml_obj = NULL;
// Agent-specific
const char *hostname_opt = NULL;
const char *env_opt = NULL;
const char *agent_str = NULL;
int volid = 0; // rkt-only
GString *buffer = NULL;
GString *dbuffer = NULL;
// Where syntax differences are drop-in replacements, set them now
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
hostname_opt = "-h ";
env_opt = "-e ";
break;
case PE__CONTAINER_AGENT_RKT:
hostname_opt = "--hostname=";
env_opt = "--environment=";
break;
default: // PE__CONTAINER_AGENT_UNKNOWN
return pcmk_rc_unpack_error;
}
agent_str = container_agent_str(data->agent_type);
buffer = g_string_sized_new(4096);
id = crm_strdup_printf("%s-%s-%d", data->prefix, agent_str,
replica->offset);
crm_xml_sanitize_id(id);
xml_container = create_resource(id, "heartbeat", agent_str);
free(id);
xml_obj = create_xml_node(xml_container, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_obj, "%s-attributes-%d", data->prefix, replica->offset);
crm_create_nvpair_xml(xml_obj, NULL, "image", data->image);
crm_create_nvpair_xml(xml_obj, NULL, "allow_pull", XML_BOOLEAN_TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "force_kill", XML_BOOLEAN_FALSE);
crm_create_nvpair_xml(xml_obj, NULL, "reuse", XML_BOOLEAN_FALSE);
if (data->agent_type == PE__CONTAINER_AGENT_DOCKER) {
g_string_append(buffer, " --restart=no");
}
/* Set a container hostname only if we have an IP to map it to. The user can
* set -h or --uts=host themselves if they want a nicer name for logs, but
* this makes applications happy who need their hostname to match the IP
* they bind to.
*/
if (data->ip_range_start != NULL) {
g_string_append_printf(buffer, " %s%s-%d", hostname_opt, data->prefix,
replica->offset);
}
pcmk__g_strcat(buffer, " ", env_opt, "PCMK_stderr=1", NULL);
if (data->container_network != NULL) {
pcmk__g_strcat(buffer, " --net=", data->container_network, NULL);
}
if (data->control_port != NULL) {
pcmk__g_strcat(buffer, " ", env_opt, "PCMK_" PCMK__ENV_REMOTE_PORT "=",
data->control_port, NULL);
} else {
g_string_append_printf(buffer, " %sPCMK_" PCMK__ENV_REMOTE_PORT "=%d",
env_opt, DEFAULT_REMOTE_PORT);
}
for (GList *iter = data->mounts; iter != NULL; iter = iter->next) {
pe__bundle_mount_t *mount = (pe__bundle_mount_t *) iter->data;
char *source = NULL;
if (pcmk_is_set(mount->flags, pe__bundle_mount_subdir)) {
source = crm_strdup_printf("%s/%s-%d", mount->source, data->prefix,
replica->offset);
pcmk__add_separated_word(&dbuffer, 1024, source, ",");
}
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
pcmk__g_strcat(buffer,
" -v ", pcmk__s(source, mount->source),
":", mount->target, NULL);
if (mount->options != NULL) {
pcmk__g_strcat(buffer, ":", mount->options, NULL);
}
break;
case PE__CONTAINER_AGENT_RKT:
g_string_append_printf(buffer,
" --volume vol%d,kind=host,"
"source=%s%s%s "
"--mount volume=vol%d,target=%s",
volid, pcmk__s(source, mount->source),
(mount->options != NULL)? "," : "",
pcmk__s(mount->options, ""),
volid, mount->target);
volid++;
break;
default:
break;
}
free(source);
}
for (GList *iter = data->ports; iter != NULL; iter = iter->next) {
pe__bundle_port_t *port = (pe__bundle_port_t *) iter->data;
switch (data->agent_type) {
case PE__CONTAINER_AGENT_DOCKER:
case PE__CONTAINER_AGENT_PODMAN:
if (replica->ipaddr != NULL) {
pcmk__g_strcat(buffer,
" -p ", replica->ipaddr, ":", port->source,
":", port->target, NULL);
} else if (!pcmk__str_eq(data->container_network, "host",
pcmk__str_none)) {
// No need to do port mapping if net == host
pcmk__g_strcat(buffer,
" -p ", port->source, ":", port->target,
NULL);
}
break;
case PE__CONTAINER_AGENT_RKT:
if (replica->ipaddr != NULL) {
pcmk__g_strcat(buffer,
" --port=", port->target,
":", replica->ipaddr, ":", port->source,
NULL);
} else {
pcmk__g_strcat(buffer,
" --port=", port->target, ":", port->source,
NULL);
}
break;
default:
break;
}
}
/* @COMPAT: We should use pcmk__add_word() here, but we can't yet, because
* it would cause restarts during rolling upgrades.
*
* In a previous version of the container resource creation logic, if
* data->launcher_options is not NULL, we append
* (" %s", data->launcher_options) even if data->launcher_options is an
* empty string. Likewise for data->container_host_options. Using
*
* pcmk__add_word(buffer, 0, data->launcher_options)
*
* removes that extra trailing space, causing a resource definition change.
*/
if (data->launcher_options != NULL) {
pcmk__g_strcat(buffer, " ", data->launcher_options, NULL);
}
if (data->container_host_options != NULL) {
pcmk__g_strcat(buffer, " ", data->container_host_options, NULL);
}
crm_create_nvpair_xml(xml_obj, NULL, "run_opts",
(const char *) buffer->str);
g_string_free(buffer, TRUE);
crm_create_nvpair_xml(xml_obj, NULL, "mount_points",
(dbuffer != NULL)? (const char *) dbuffer->str : "");
if (dbuffer != NULL) {
g_string_free(dbuffer, TRUE);
}
if (replica->child != NULL) {
if (data->container_command != NULL) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->container_command);
} else {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
SBIN_DIR "/pacemaker-remoted");
}
/* TODO: Allow users to specify their own?
*
* We just want to know if the container is alive; we'll monitor the
* child independently.
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
#if 0
/* @TODO Consider supporting the use case where we can start and stop
* resources, but not proxy local commands (such as setting node
* attributes), by running the local executor in stand-alone mode.
* However, this would probably be better done via ACLs as with other
* Pacemaker Remote nodes.
*/
} else if ((child != NULL) && data->untrusted) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
CRM_DAEMON_DIR "/pacemaker-execd");
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd",
CRM_DAEMON_DIR "/pacemaker/cts-exec-helper -c poke");
#endif
} else {
if (data->container_command != NULL) {
crm_create_nvpair_xml(xml_obj, NULL, "run_cmd",
data->container_command);
}
/* TODO: Allow users to specify their own?
*
* We don't know what's in the container, so we just want to know if it
* is alive.
*/
crm_create_nvpair_xml(xml_obj, NULL, "monitor_cmd", "/bin/true");
}
xml_obj = create_xml_node(xml_container, "operations");
crm_create_op_xml(xml_obj, ID(xml_container), PCMK_ACTION_MONITOR, "60s",
NULL);
// TODO: Other ops? Timeouts and intervals from underlying resource?
if (pe__unpack_resource(xml_container, &replica->container, parent,
parent->cluster) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
pe__set_resource_flags(replica->container, pcmk_rsc_replica_container);
parent->children = g_list_append(parent->children, replica->container);
return pcmk_rc_ok;
}
/*!
* \brief Ban a node from a resource's (and its children's) allowed nodes list
*
* \param[in,out] rsc Resource to modify
* \param[in] uname Name of node to ban
*/
static void
disallow_node(pcmk_resource_t *rsc, const char *uname)
{
gpointer match = g_hash_table_lookup(rsc->allowed_nodes, uname);
if (match) {
((pcmk_node_t *) match)->weight = -INFINITY;
((pcmk_node_t *) match)->rsc_discover_mode = pcmk_probe_never;
}
if (rsc->children) {
g_list_foreach(rsc->children, (GFunc) disallow_node, (gpointer) uname);
}
}
static int
create_remote_resource(pcmk_resource_t *parent, pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
if (replica->child && valid_network(data)) {
GHashTableIter gIter;
pcmk_node_t *node = NULL;
xmlNode *xml_remote = NULL;
char *id = crm_strdup_printf("%s-%d", data->prefix, replica->offset);
char *port_s = NULL;
const char *uname = NULL;
const char *connect_name = NULL;
if (pe_find_resource(parent->cluster->resources, id) != NULL) {
free(id);
// The biggest hammer we have
id = crm_strdup_printf("pcmk-internal-%s-remote-%d",
replica->child->id, replica->offset);
//@TODO return error instead of asserting?
CRM_ASSERT(pe_find_resource(parent->cluster->resources,
id) == NULL);
}
/* REMOTE_CONTAINER_HACK: Using "#uname" as the server name when the
* connection does not have its own IP is a magic string that we use to
* support nested remotes (i.e. a bundle running on a remote node).
*/
connect_name = (replica->ipaddr? replica->ipaddr : "#uname");
if (data->control_port == NULL) {
port_s = pcmk__itoa(DEFAULT_REMOTE_PORT);
}
/* This sets replica->container as replica->remote's container, which is
* similar to what happens with guest nodes. This is how the scheduler
* knows that the bundle node is fenced by recovering the container, and
* that remote should be ordered relative to the container.
*/
xml_remote = pe_create_remote_xml(NULL, id, replica->container->id,
NULL, NULL, NULL,
connect_name, (data->control_port?
data->control_port : port_s));
free(port_s);
/* Abandon our created ID, and pull the copy from the XML, because we
* need something that will get freed during scheduler data cleanup to
* use as the node ID and uname.
*/
free(id);
id = NULL;
uname = ID(xml_remote);
/* Ensure a node has been created for the guest (it may have already
* been, if it has a permanent node attribute), and ensure its weight is
* -INFINITY so no other resources can run on it.
*/
node = pe_find_node(parent->cluster->nodes, uname);
if (node == NULL) {
node = pe_create_node(uname, uname, "remote", "-INFINITY",
parent->cluster);
} else {
node->weight = -INFINITY;
}
node->rsc_discover_mode = pcmk_probe_never;
/* unpack_remote_nodes() ensures that each remote node and guest node
* has a pcmk_node_t entry. Ideally, it would do the same for bundle
* nodes. Unfortunately, a bundle has to be mostly unpacked before it's
* obvious what nodes will be needed, so we do it just above.
*
* Worse, that means that the node may have been utilized while
* unpacking other resources, without our weight correction. The most
* likely place for this to happen is when pe__unpack_resource() calls
* resource_location() to set a default score in symmetric clusters.
* This adds a node *copy* to each resource's allowed nodes, and these
* copies will have the wrong weight.
*
* As a hacky workaround, fix those copies here.
*
* @TODO Possible alternative: ensure bundles are unpacked before other
* resources, so the weight is correct before any copies are made.
*/
g_list_foreach(parent->cluster->resources, (GFunc) disallow_node,
(gpointer) uname);
replica->node = pe__copy_node(node);
replica->node->weight = 500;
replica->node->rsc_discover_mode = pcmk_probe_exclusive;
/* Ensure the node shows up as allowed and with the correct discovery set */
if (replica->child->allowed_nodes != NULL) {
g_hash_table_destroy(replica->child->allowed_nodes);
}
replica->child->allowed_nodes = pcmk__strkey_table(NULL, free);
g_hash_table_insert(replica->child->allowed_nodes,
(gpointer) replica->node->details->id,
pe__copy_node(replica->node));
{
pcmk_node_t *copy = pe__copy_node(replica->node);
copy->weight = -INFINITY;
g_hash_table_insert(replica->child->parent->allowed_nodes,
(gpointer) replica->node->details->id, copy);
}
if (pe__unpack_resource(xml_remote, &replica->remote, parent,
parent->cluster) != pcmk_rc_ok) {
return pcmk_rc_unpack_error;
}
g_hash_table_iter_init(&gIter, replica->remote->allowed_nodes);
while (g_hash_table_iter_next(&gIter, NULL, (void **)&node)) {
if (pe__is_guest_or_remote_node(node)) {
/* Remote resources can only run on 'normal' cluster node */
node->weight = -INFINITY;
}
}
replica->node->details->remote_rsc = replica->remote;
// Ensure pe__is_guest_node() functions correctly immediately
replica->remote->container = replica->container;
/* A bundle's #kind is closer to "container" (guest node) than the
* "remote" set by pe_create_node().
*/
g_hash_table_insert(replica->node->details->attrs,
strdup(CRM_ATTR_KIND), strdup("container"));
/* One effect of this is that setup_container() will add
* replica->remote to replica->container's fillers, which will make
* pe__resource_contains_guest_node() true for replica->container.
*
* replica->child does NOT get added to replica->container's fillers.
* The only noticeable effect if it did would be for its fail count to
* be taken into account when checking replica->container's migration
* threshold.
*/
parent->children = g_list_append(parent->children, replica->remote);
}
return pcmk_rc_ok;
}
static int
create_replica_resources(pcmk_resource_t *parent,
pe__bundle_variant_data_t *data,
pcmk__bundle_replica_t *replica)
{
int rc = pcmk_rc_ok;
rc = create_container_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = create_ip_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
rc = create_remote_resource(parent, data, replica);
if (rc != pcmk_rc_ok) {
return rc;
}
if ((replica->child != NULL) && (replica->ipaddr != NULL)) {
add_hash_param(replica->child->meta, "external-ip", replica->ipaddr);
}
if (replica->remote != NULL) {
/*
* Allow the remote connection resource to be allocated to a
* different node than the one on which the container is active.
*
* This makes it possible to have Pacemaker Remote nodes running
* containers with pacemaker-remoted inside in order to start
* services inside those containers.
*/
pe__set_resource_flags(replica->remote,
pcmk_rsc_remote_nesting_allowed);
}
return rc;
}
static void
mount_add(pe__bundle_variant_data_t *bundle_data, const char *source,
const char *target, const char *options, uint32_t flags)
{
pe__bundle_mount_t *mount = calloc(1, sizeof(pe__bundle_mount_t));
CRM_ASSERT(mount != NULL);
mount->source = strdup(source);
mount->target = strdup(target);
pcmk__str_update(&mount->options, options);
mount->flags = flags;
bundle_data->mounts = g_list_append(bundle_data->mounts, mount);
}
static void
mount_free(pe__bundle_mount_t *mount)
{
free(mount->source);
free(mount->target);
free(mount->options);
free(mount);
}
static void
port_free(pe__bundle_port_t *port)
{
free(port->source);
free(port->target);
free(port);
}
static pcmk__bundle_replica_t *
replica_for_remote(pcmk_resource_t *remote)
{
pcmk_resource_t *top = remote;
pe__bundle_variant_data_t *bundle_data = NULL;
if (top == NULL) {
return NULL;
}
while (top->parent != NULL) {
top = top->parent;
}
get_bundle_variant_data(bundle_data, top);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
if (replica->remote == remote) {
return replica;
}
}
CRM_LOG_ASSERT(FALSE);
return NULL;
}
bool
pe__bundle_needs_remote_name(pcmk_resource_t *rsc)
{
const char *value;
GHashTable *params = NULL;
if (rsc == NULL) {
return false;
}
// Use NULL node since pcmk__bundle_expand() uses that to set value
params = pe_rsc_params(rsc, NULL, rsc->cluster);
- value = g_hash_table_lookup(params, XML_RSC_ATTR_REMOTE_RA_ADDR);
+ value = g_hash_table_lookup(params, PCMK_REMOTE_RA_ADDR);
return pcmk__str_eq(value, "#uname", pcmk__str_casei)
&& xml_contains_remote_node(rsc->xml);
}
const char *
pe__add_bundle_remote_name(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler,
xmlNode *xml, const char *field)
{
// REMOTE_CONTAINER_HACK: Allow remote nodes that start containers with pacemaker remote inside
pcmk_node_t *node = NULL;
pcmk__bundle_replica_t *replica = NULL;
if (!pe__bundle_needs_remote_name(rsc)) {
return NULL;
}
replica = replica_for_remote(rsc);
if (replica == NULL) {
return NULL;
}
node = replica->container->allocated_to;
if (node == NULL) {
/* If it won't be running anywhere after the
* transition, go with where it's running now.
*/
node = pe__current_node(replica->container);
}
if(node == NULL) {
crm_trace("Cannot determine address for bundle connection %s", rsc->id);
return NULL;
}
crm_trace("Setting address for bundle connection %s to bundle host %s",
rsc->id, pe__node_name(node));
if(xml != NULL && field != NULL) {
crm_xml_add(xml, field, node->details->uname);
}
return node->details->uname;
}
#define pe__set_bundle_mount_flags(mount_xml, flags, flags_to_set) do { \
flags = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Bundle mount", ID(mount_xml), flags, \
(flags_to_set), #flags_to_set); \
} while (0)
gboolean
pe__unpack_bundle(pcmk_resource_t *rsc, pcmk_scheduler_t *scheduler)
{
const char *value = NULL;
xmlNode *xml_obj = NULL;
xmlNode *xml_resource = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
bool need_log_mount = TRUE;
CRM_ASSERT(rsc != NULL);
pcmk__rsc_trace(rsc, "Processing resource %s...", rsc->id);
bundle_data = calloc(1, sizeof(pe__bundle_variant_data_t));
rsc->variant_opaque = bundle_data;
bundle_data->prefix = strdup(rsc->id);
xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_DOCKER_S);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_DOCKER;
} else {
xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_RKT_S);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_RKT;
} else {
xml_obj = first_named_child(rsc->xml, PE__CONTAINER_AGENT_PODMAN_S);
if (xml_obj != NULL) {
bundle_data->agent_type = PE__CONTAINER_AGENT_PODMAN;
} else {
return FALSE;
}
}
}
// Use 0 for default, minimum, and invalid promoted-max
value = crm_element_value(xml_obj, PCMK_META_PROMOTED_MAX);
if (value == NULL) {
// @COMPAT deprecated since 2.0.0
value = crm_element_value(xml_obj, "masters");
}
pcmk__scan_min_int(value, &bundle_data->promoted_max, 0);
// Default replicas to promoted-max if it was specified and 1 otherwise
value = crm_element_value(xml_obj, "replicas");
if ((value == NULL) && (bundle_data->promoted_max > 0)) {
bundle_data->nreplicas = bundle_data->promoted_max;
} else {
pcmk__scan_min_int(value, &bundle_data->nreplicas, 1);
}
/*
* Communication between containers on the same host via the
* floating IPs only works if the container is started with:
* --userland-proxy=false --ip-masq=false
*/
value = crm_element_value(xml_obj, "replicas-per-host");
pcmk__scan_min_int(value, &bundle_data->nreplicas_per_host, 1);
if (bundle_data->nreplicas_per_host == 1) {
pe__clear_resource_flags(rsc, pcmk_rsc_unique);
}
bundle_data->container_command = crm_element_value_copy(xml_obj, "run-command");
bundle_data->launcher_options = crm_element_value_copy(xml_obj, "options");
bundle_data->image = crm_element_value_copy(xml_obj, "image");
bundle_data->container_network = crm_element_value_copy(xml_obj, "network");
xml_obj = first_named_child(rsc->xml, "network");
if(xml_obj) {
bundle_data->ip_range_start = crm_element_value_copy(xml_obj, "ip-range-start");
bundle_data->host_netmask = crm_element_value_copy(xml_obj, "host-netmask");
bundle_data->host_network = crm_element_value_copy(xml_obj, "host-interface");
bundle_data->control_port = crm_element_value_copy(xml_obj, "control-port");
value = crm_element_value(xml_obj, "add-host");
if (crm_str_to_boolean(value, &bundle_data->add_host) != 1) {
bundle_data->add_host = TRUE;
}
for (xmlNode *xml_child = pcmk__xe_first_child(xml_obj); xml_child != NULL;
xml_child = pcmk__xe_next(xml_child)) {
pe__bundle_port_t *port = calloc(1, sizeof(pe__bundle_port_t));
port->source = crm_element_value_copy(xml_child, "port");
if(port->source == NULL) {
port->source = crm_element_value_copy(xml_child, "range");
} else {
port->target = crm_element_value_copy(xml_child, "internal-port");
}
if(port->source != NULL && strlen(port->source) > 0) {
if(port->target == NULL) {
port->target = strdup(port->source);
}
bundle_data->ports = g_list_append(bundle_data->ports, port);
} else {
pcmk__config_err("Invalid port directive %s", ID(xml_child));
port_free(port);
}
}
}
xml_obj = first_named_child(rsc->xml, "storage");
for (xmlNode *xml_child = pcmk__xe_first_child(xml_obj); xml_child != NULL;
xml_child = pcmk__xe_next(xml_child)) {
const char *source = crm_element_value(xml_child, "source-dir");
const char *target = crm_element_value(xml_child, "target-dir");
const char *options = crm_element_value(xml_child, "options");
int flags = pe__bundle_mount_none;
if (source == NULL) {
source = crm_element_value(xml_child, "source-dir-root");
pe__set_bundle_mount_flags(xml_child, flags,
pe__bundle_mount_subdir);
}
if (source && target) {
mount_add(bundle_data, source, target, options, flags);
if (strcmp(target, "/var/log") == 0) {
need_log_mount = FALSE;
}
} else {
pcmk__config_err("Invalid mount directive %s", ID(xml_child));
}
}
xml_obj = first_named_child(rsc->xml, "primitive");
if (xml_obj && valid_network(bundle_data)) {
char *value = NULL;
xmlNode *xml_set = NULL;
xml_resource = create_xml_node(NULL, XML_CIB_TAG_INCARNATION);
/* @COMPAT We no longer use the <master> tag, but we need to keep it as
* part of the resource name, so that bundles don't restart in a rolling
* upgrade. (It also avoids needing to change regression tests.)
*/
crm_xml_set_id(xml_resource, "%s-%s", bundle_data->prefix,
(bundle_data->promoted_max? "master"
: (const char *)xml_resource->name));
xml_set = create_xml_node(xml_resource, XML_TAG_META_SETS);
crm_xml_set_id(xml_set, "%s-%s-meta", bundle_data->prefix, xml_resource->name);
crm_create_nvpair_xml(xml_set, NULL,
PCMK_META_ORDERED, XML_BOOLEAN_TRUE);
value = pcmk__itoa(bundle_data->nreplicas);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_MAX, value);
free(value);
value = pcmk__itoa(bundle_data->nreplicas_per_host);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_CLONE_NODE_MAX, value);
free(value);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_GLOBALLY_UNIQUE,
pcmk__btoa(bundle_data->nreplicas_per_host > 1));
if (bundle_data->promoted_max) {
crm_create_nvpair_xml(xml_set, NULL,
PCMK_META_PROMOTABLE, XML_BOOLEAN_TRUE);
value = pcmk__itoa(bundle_data->promoted_max);
crm_create_nvpair_xml(xml_set, NULL, PCMK_META_PROMOTED_MAX, value);
free(value);
}
//crm_xml_add(xml_obj, PCMK_XA_ID, bundle_data->prefix);
add_node_copy(xml_resource, xml_obj);
} else if(xml_obj) {
pcmk__config_err("Cannot control %s inside %s without either "
"ip-range-start or control-port",
rsc->id, ID(xml_obj));
return FALSE;
}
if(xml_resource) {
int lpc = 0;
GList *childIter = NULL;
pe__bundle_port_t *port = NULL;
GString *buffer = NULL;
if (pe__unpack_resource(xml_resource, &(bundle_data->child), rsc,
scheduler) != pcmk_rc_ok) {
return FALSE;
}
/* Currently, we always map the default authentication key location
* into the same location inside the container.
*
* Ideally, we would respect the host's PCMK_authkey_location, but:
* - it may be different on different nodes;
* - the actual connection will do extra checking to make sure the key
* file exists and is readable, that we can't do here on the DC
* - tools such as crm_resource and crm_simulate may not have the same
* environment variables as the cluster, causing operation digests to
* differ
*
* Always using the default location inside the container is fine,
* because we control the pacemaker_remote environment, and it avoids
* having to pass another environment variable to the container.
*
* @TODO A better solution may be to have only pacemaker_remote use the
* environment variable, and have the cluster nodes use a new
* cluster option for key location. This would introduce the limitation
* of the location being the same on all cluster nodes, but that's
* reasonable.
*/
mount_add(bundle_data, DEFAULT_REMOTE_KEY_LOCATION,
DEFAULT_REMOTE_KEY_LOCATION, NULL, pe__bundle_mount_none);
if (need_log_mount) {
mount_add(bundle_data, CRM_BUNDLE_DIR, "/var/log", NULL,
pe__bundle_mount_subdir);
}
port = calloc(1, sizeof(pe__bundle_port_t));
if(bundle_data->control_port) {
port->source = strdup(bundle_data->control_port);
} else {
/* If we wanted to respect PCMK_remote_port, we could use
* crm_default_remote_port() here and elsewhere in this file instead
* of DEFAULT_REMOTE_PORT.
*
* However, it gains nothing, since we control both the container
* environment and the connection resource parameters, and the user
* can use a different port if desired by setting control-port.
*/
port->source = pcmk__itoa(DEFAULT_REMOTE_PORT);
}
port->target = strdup(port->source);
bundle_data->ports = g_list_append(bundle_data->ports, port);
buffer = g_string_sized_new(1024);
for (childIter = bundle_data->child->children; childIter != NULL;
childIter = childIter->next) {
pcmk__bundle_replica_t *replica = NULL;
replica = calloc(1, sizeof(pcmk__bundle_replica_t));
replica->child = childIter->data;
replica->child->exclusive_discover = TRUE;
replica->offset = lpc++;
// Ensure the child's notify gets set based on the underlying primitive's value
if (pcmk_is_set(replica->child->flags, pcmk_rsc_notify)) {
pe__set_resource_flags(bundle_data->child, pcmk_rsc_notify);
}
allocate_ip(bundle_data, replica, buffer);
bundle_data->replicas = g_list_append(bundle_data->replicas,
replica);
bundle_data->attribute_target =
g_hash_table_lookup(replica->child->meta,
PCMK_META_CONTAINER_ATTR_TARGET);
}
bundle_data->container_host_options = g_string_free(buffer, FALSE);
if (bundle_data->attribute_target) {
g_hash_table_replace(rsc->meta,
strdup(PCMK_META_CONTAINER_ATTR_TARGET),
strdup(bundle_data->attribute_target));
g_hash_table_replace(bundle_data->child->meta,
strdup(PCMK_META_CONTAINER_ATTR_TARGET),
strdup(bundle_data->attribute_target));
}
} else {
// Just a naked container, no pacemaker-remote
GString *buffer = g_string_sized_new(1024);
for (int lpc = 0; lpc < bundle_data->nreplicas; lpc++) {
pcmk__bundle_replica_t *replica = NULL;
replica = calloc(1, sizeof(pcmk__bundle_replica_t));
replica->offset = lpc;
allocate_ip(bundle_data, replica, buffer);
bundle_data->replicas = g_list_append(bundle_data->replicas,
replica);
}
bundle_data->container_host_options = g_string_free(buffer, FALSE);
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
if (create_replica_resources(rsc, bundle_data, replica) != pcmk_rc_ok) {
pcmk__config_err("Failed unpacking resource %s", rsc->id);
rsc->fns->free(rsc);
return FALSE;
}
/* Utilization needs special handling for bundles. It makes no sense for
* the inner primitive to have utilization, because it is tied
* one-to-one to the guest node created by the container resource -- and
* there's no way to set capacities for that guest node anyway.
*
* What the user really wants is to configure utilization for the
* container. However, the schema only allows utilization for
* primitives, and the container resource is implicit anyway, so the
* user can *only* configure utilization for the inner primitive. If
* they do, move the primitive's utilization values to the container.
*
* @TODO This means that bundles without an inner primitive can't have
* utilization. An alternative might be to allow utilization values in
* the top-level bundle XML in the schema, and copy those to each
* container.
*/
if (replica->child != NULL) {
GHashTable *empty = replica->container->utilization;
replica->container->utilization = replica->child->utilization;
replica->child->utilization = empty;
}
}
if (bundle_data->child) {
rsc->children = g_list_append(rsc->children, bundle_data->child);
}
return TRUE;
}
static int
replica_resource_active(pcmk_resource_t *rsc, gboolean all)
{
if (rsc) {
gboolean child_active = rsc->fns->active(rsc, all);
if (child_active && !all) {
return TRUE;
} else if (!child_active && all) {
return FALSE;
}
}
return -1;
}
gboolean
pe__bundle_active(pcmk_resource_t *rsc, gboolean all)
{
pe__bundle_variant_data_t *bundle_data = NULL;
GList *iter = NULL;
get_bundle_variant_data(bundle_data, rsc);
for (iter = bundle_data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
int rsc_active;
rsc_active = replica_resource_active(replica->ip, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->child, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->container, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
rsc_active = replica_resource_active(replica->remote, all);
if (rsc_active >= 0) {
return (gboolean) rsc_active;
}
}
/* If "all" is TRUE, we've already checked that no resources were inactive,
* so return TRUE; if "all" is FALSE, we didn't find any active resources,
* so return FALSE.
*/
return all;
}
/*!
* \internal
* \brief Find the bundle replica corresponding to a given node
*
* \param[in] bundle Top-level bundle resource
* \param[in] node Node to search for
*
* \return Bundle replica if found, NULL otherwise
*/
pcmk_resource_t *
pe__find_bundle_replica(const pcmk_resource_t *bundle, const pcmk_node_t *node)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_ASSERT(bundle && node);
get_bundle_variant_data(bundle_data, bundle);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica && replica->node);
if (replica->node->details == node->details) {
return replica->child;
}
}
return NULL;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
print_rsc_in_list(pcmk_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
if (rsc != NULL) {
if (options & pe_print_html) {
status_print("<li>");
}
rsc->fns->print(rsc, pre_text, options, print_data);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
bundle_print_xml(pcmk_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
pe__bundle_variant_data_t *bundle_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (pre_text == NULL) {
pre_text = "";
}
child_text = crm_strdup_printf("%s ", pre_text);
get_bundle_variant_data(bundle_data, rsc);
status_print("%s<bundle ", pre_text);
status_print(PCMK_XA_ID "=\"%s\" ", rsc->id);
status_print("type=\"%s\" ", container_agent_str(bundle_data->agent_type));
status_print("image=\"%s\" ", bundle_data->image);
status_print("unique=\"%s\" ", pe__rsc_bool_str(rsc, pcmk_rsc_unique));
status_print("managed=\"%s\" ",
pe__rsc_bool_str(rsc, pcmk_rsc_managed));
status_print("failed=\"%s\" ", pe__rsc_bool_str(rsc, pcmk_rsc_failed));
status_print(">\n");
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
status_print("%s <replica " PCMK_XA_ID "=\"%d\">\n",
pre_text, replica->offset);
print_rsc_in_list(replica->ip, child_text, options, print_data);
print_rsc_in_list(replica->child, child_text, options, print_data);
print_rsc_in_list(replica->container, child_text, options, print_data);
print_rsc_in_list(replica->remote, child_text, options, print_data);
status_print("%s </replica>\n", pre_text);
}
status_print("%s</bundle>\n", pre_text);
free(child_text);
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__bundle_xml(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean printed_header = FALSE;
gboolean print_everything = TRUE;
const char *desc = NULL;
CRM_ASSERT(rsc != NULL);
get_bundle_variant_data(bundle_data, rsc);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
char *id = NULL;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(replica->container, only_node)) {
continue;
}
print_ip = replica->ip != NULL &&
!replica->ip->fns->is_filtered(replica->ip, only_rsc, print_everything);
print_child = replica->child != NULL &&
!replica->child->fns->is_filtered(replica->child, only_rsc, print_everything);
print_ctnr = !replica->container->fns->is_filtered(replica->container, only_rsc, print_everything);
print_remote = replica->remote != NULL &&
!replica->remote->fns->is_filtered(replica->remote, only_rsc, print_everything);
if (!print_everything && !print_ip && !print_child && !print_ctnr && !print_remote) {
continue;
}
if (!printed_header) {
printed_header = TRUE;
desc = pe__resource_description(rsc, show_opts);
rc = pe__name_and_nvpairs_xml(out, true, "bundle", 8,
PCMK_XA_ID, rsc->id,
"type", container_agent_str(bundle_data->agent_type),
"image", bundle_data->image,
"unique", pe__rsc_bool_str(rsc, pcmk_rsc_unique),
"maintenance",
pe__rsc_bool_str(rsc, pcmk_rsc_maintenance),
"managed", pe__rsc_bool_str(rsc, pcmk_rsc_managed),
"failed", pe__rsc_bool_str(rsc, pcmk_rsc_failed),
PCMK_XA_DESCRIPTION, desc);
CRM_ASSERT(rc == pcmk_rc_ok);
}
id = pcmk__itoa(replica->offset);
rc = pe__name_and_nvpairs_xml(out, true, "replica", 1, PCMK_XA_ID, id);
free(id);
CRM_ASSERT(rc == pcmk_rc_ok);
if (print_ip) {
out->message(out, crm_map_element_name(replica->ip->xml), show_opts,
replica->ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, crm_map_element_name(replica->child->xml), show_opts,
replica->child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, crm_map_element_name(replica->container->xml), show_opts,
replica->container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, crm_map_element_name(replica->remote->xml), show_opts,
replica->remote, only_node, only_rsc);
}
pcmk__output_xml_pop_parent(out); // replica
}
if (printed_header) {
pcmk__output_xml_pop_parent(out); // bundle
}
return rc;
}
static void
pe__bundle_replica_output_html(pcmk__output_t *out,
pcmk__bundle_replica_t *replica,
pcmk_node_t *node, uint32_t show_opts)
{
pcmk_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
pe__common_output_html(out, rsc, buffer, node, show_opts);
}
/*!
* \internal
* \brief Get a string describing a resource's unmanaged state or lack thereof
*
* \param[in] rsc Resource to describe
*
* \return A string indicating that a resource is in maintenance mode or
* otherwise unmanaged, or an empty string otherwise
*/
static const char *
get_unmanaged_str(const pcmk_resource_t *rsc)
{
if (pcmk_is_set(rsc->flags, pcmk_rsc_maintenance)) {
return " (maintenance)";
}
if (!pcmk_is_set(rsc->flags, pcmk_rsc_managed)) {
return " (unmanaged)";
}
return "";
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__bundle_html(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const char *desc = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
CRM_ASSERT(rsc != NULL);
get_bundle_variant_data(bundle_data, rsc);
desc = pe__resource_description(rsc, show_opts);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(replica->container, only_node)) {
continue;
}
print_ip = replica->ip != NULL &&
!replica->ip->fns->is_filtered(replica->ip, only_rsc, print_everything);
print_child = replica->child != NULL &&
!replica->child->fns->is_filtered(replica->child, only_rsc, print_everything);
print_ctnr = !replica->container->fns->is_filtered(replica->container, only_rsc, print_everything);
print_remote = replica->remote != NULL &&
!replica->remote->fns->is_filtered(replica->remote, only_rsc, print_everything);
if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) ||
(print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) {
/* The text output messages used below require pe_print_implicit to
* be set to do anything.
*/
uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs;
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk_rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->begin_list(out, NULL, NULL, "Replica[%d]", replica->offset);
}
if (print_ip) {
out->message(out, crm_map_element_name(replica->ip->xml),
new_show_opts, replica->ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, crm_map_element_name(replica->child->xml),
new_show_opts, replica->child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, crm_map_element_name(replica->container->xml),
new_show_opts, replica->container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, crm_map_element_name(replica->remote->xml),
new_show_opts, replica->remote, only_node, only_rsc);
}
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->end_list(out);
}
} else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) {
continue;
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk_rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
pe__bundle_replica_output_html(out, replica, pe__current_node(replica->container),
show_opts);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
static void
pe__bundle_replica_output_text(pcmk__output_t *out,
pcmk__bundle_replica_t *replica,
pcmk_node_t *node, uint32_t show_opts)
{
const pcmk_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
pe__common_output_text(out, rsc, buffer, node, show_opts);
}
PCMK__OUTPUT_ARGS("bundle", "uint32_t", "pcmk_resource_t *", "GList *",
"GList *")
int
pe__bundle_text(pcmk__output_t *out, va_list args)
{
uint32_t show_opts = va_arg(args, uint32_t);
pcmk_resource_t *rsc = va_arg(args, pcmk_resource_t *);
GList *only_node = va_arg(args, GList *);
GList *only_rsc = va_arg(args, GList *);
const char *desc = NULL;
pe__bundle_variant_data_t *bundle_data = NULL;
int rc = pcmk_rc_no_output;
gboolean print_everything = TRUE;
desc = pe__resource_description(rsc, show_opts);
get_bundle_variant_data(bundle_data, rsc);
CRM_ASSERT(rsc != NULL);
if (rsc->fns->is_filtered(rsc, only_rsc, TRUE)) {
return rc;
}
print_everything = pcmk__str_in_list(rsc->id, only_rsc, pcmk__str_star_matches);
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
gboolean print_ip, print_child, print_ctnr, print_remote;
CRM_ASSERT(replica);
if (pcmk__rsc_filtered_by_node(replica->container, only_node)) {
continue;
}
print_ip = replica->ip != NULL &&
!replica->ip->fns->is_filtered(replica->ip, only_rsc, print_everything);
print_child = replica->child != NULL &&
!replica->child->fns->is_filtered(replica->child, only_rsc, print_everything);
print_ctnr = !replica->container->fns->is_filtered(replica->container, only_rsc, print_everything);
print_remote = replica->remote != NULL &&
!replica->remote->fns->is_filtered(replica->remote, only_rsc, print_everything);
if (pcmk_is_set(show_opts, pcmk_show_implicit_rscs) ||
(print_everything == FALSE && (print_ip || print_child || print_ctnr || print_remote))) {
/* The text output messages used below require pe_print_implicit to
* be set to do anything.
*/
uint32_t new_show_opts = show_opts | pcmk_show_implicit_rscs;
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk_rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
if (pcmk__list_of_multiple(bundle_data->replicas)) {
out->list_item(out, NULL, "Replica[%d]", replica->offset);
}
out->begin_list(out, NULL, NULL, NULL);
if (print_ip) {
out->message(out, crm_map_element_name(replica->ip->xml),
new_show_opts, replica->ip, only_node, only_rsc);
}
if (print_child) {
out->message(out, crm_map_element_name(replica->child->xml),
new_show_opts, replica->child, only_node, only_rsc);
}
if (print_ctnr) {
out->message(out, crm_map_element_name(replica->container->xml),
new_show_opts, replica->container, only_node, only_rsc);
}
if (print_remote) {
out->message(out, crm_map_element_name(replica->remote->xml),
new_show_opts, replica->remote, only_node, only_rsc);
}
out->end_list(out);
} else if (print_everything == FALSE && !(print_ip || print_child || print_ctnr || print_remote)) {
continue;
} else {
PCMK__OUTPUT_LIST_HEADER(out, FALSE, rc, "Container bundle%s: %s [%s]%s%s%s%s%s",
(bundle_data->nreplicas > 1)? " set" : "",
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk_rsc_unique)? " (unique)" : "",
desc ? " (" : "", desc ? desc : "", desc ? ")" : "",
get_unmanaged_str(rsc));
pe__bundle_replica_output_text(out, replica, pe__current_node(replica->container),
show_opts);
}
}
PCMK__OUTPUT_LIST_FOOTER(out, rc);
return rc;
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
static void
print_bundle_replica(pcmk__bundle_replica_t *replica, const char *pre_text,
long options, void *print_data)
{
pcmk_node_t *node = NULL;
pcmk_resource_t *rsc = replica->child;
int offset = 0;
char buffer[LINE_MAX];
if(rsc == NULL) {
rsc = replica->container;
}
if (replica->remote) {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->remote));
} else {
offset += snprintf(buffer + offset, LINE_MAX - offset, "%s",
rsc_printable_id(replica->container));
}
if (replica->ipaddr) {
offset += snprintf(buffer + offset, LINE_MAX - offset, " (%s)",
replica->ipaddr);
}
node = pe__current_node(replica->container);
common_print(rsc, pre_text, buffer, node, options, print_data);
}
/*!
* \internal
* \deprecated This function will be removed in a future release
*/
void
pe__print_bundle(pcmk_resource_t *rsc, const char *pre_text, long options,
void *print_data)
{
pe__bundle_variant_data_t *bundle_data = NULL;
char *child_text = NULL;
CRM_CHECK(rsc != NULL, return);
if (options & pe_print_xml) {
bundle_print_xml(rsc, pre_text, options, print_data);
return;
}
get_bundle_variant_data(bundle_data, rsc);
if (pre_text == NULL) {
pre_text = " ";
}
status_print("%sContainer bundle%s: %s [%s]%s%s\n",
pre_text, ((bundle_data->nreplicas > 1)? " set" : ""),
rsc->id, bundle_data->image,
pcmk_is_set(rsc->flags, pcmk_rsc_unique)? " (unique)" : "",
pcmk_is_set(rsc->flags, pcmk_rsc_managed)? "" : " (unmanaged)");
if (options & pe_print_html) {
status_print("<br />\n<ul>\n");
}
for (GList *gIter = bundle_data->replicas; gIter != NULL;
gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
CRM_ASSERT(replica);
if (options & pe_print_html) {
status_print("<li>");
}
if (pcmk_is_set(options, pe_print_implicit)) {
child_text = crm_strdup_printf(" %s", pre_text);
if (pcmk__list_of_multiple(bundle_data->replicas)) {
status_print(" %sReplica[%d]\n", pre_text, replica->offset);
}
if (options & pe_print_html) {
status_print("<br />\n<ul>\n");
}
print_rsc_in_list(replica->ip, child_text, options, print_data);
print_rsc_in_list(replica->container, child_text, options, print_data);
print_rsc_in_list(replica->remote, child_text, options, print_data);
print_rsc_in_list(replica->child, child_text, options, print_data);
if (options & pe_print_html) {
status_print("</ul>\n");
}
} else {
child_text = crm_strdup_printf("%s ", pre_text);
print_bundle_replica(replica, child_text, options, print_data);
}
free(child_text);
if (options & pe_print_html) {
status_print("</li>\n");
}
}
if (options & pe_print_html) {
status_print("</ul>\n");
}
}
static void
free_bundle_replica(pcmk__bundle_replica_t *replica)
{
if (replica == NULL) {
return;
}
if (replica->node) {
free(replica->node);
replica->node = NULL;
}
if (replica->ip) {
free_xml(replica->ip->xml);
replica->ip->xml = NULL;
replica->ip->fns->free(replica->ip);
replica->ip = NULL;
}
if (replica->container) {
free_xml(replica->container->xml);
replica->container->xml = NULL;
replica->container->fns->free(replica->container);
replica->container = NULL;
}
if (replica->remote) {
free_xml(replica->remote->xml);
replica->remote->xml = NULL;
replica->remote->fns->free(replica->remote);
replica->remote = NULL;
}
free(replica->ipaddr);
free(replica);
}
void
pe__free_bundle(pcmk_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
CRM_CHECK(rsc != NULL, return);
get_bundle_variant_data(bundle_data, rsc);
pcmk__rsc_trace(rsc, "Freeing %s", rsc->id);
free(bundle_data->prefix);
free(bundle_data->image);
free(bundle_data->control_port);
free(bundle_data->host_network);
free(bundle_data->host_netmask);
free(bundle_data->ip_range_start);
free(bundle_data->container_network);
free(bundle_data->launcher_options);
free(bundle_data->container_command);
g_free(bundle_data->container_host_options);
g_list_free_full(bundle_data->replicas,
(GDestroyNotify) free_bundle_replica);
g_list_free_full(bundle_data->mounts, (GDestroyNotify)mount_free);
g_list_free_full(bundle_data->ports, (GDestroyNotify)port_free);
g_list_free(rsc->children);
if(bundle_data->child) {
free_xml(bundle_data->child->xml);
bundle_data->child->xml = NULL;
bundle_data->child->fns->free(bundle_data->child);
}
common_free(rsc);
}
enum rsc_role_e
pe__bundle_resource_state(const pcmk_resource_t *rsc, gboolean current)
{
enum rsc_role_e container_role = pcmk_role_unknown;
return container_role;
}
/*!
* \brief Get the number of configured replicas in a bundle
*
* \param[in] rsc Bundle resource
*
* \return Number of configured replicas, or 0 on error
*/
int
pe_bundle_replicas(const pcmk_resource_t *rsc)
{
if ((rsc == NULL) || (rsc->variant != pcmk_rsc_variant_bundle)) {
return 0;
} else {
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
return bundle_data->nreplicas;
}
}
void
pe__count_bundle(pcmk_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
for (GList *item = bundle_data->replicas; item != NULL; item = item->next) {
pcmk__bundle_replica_t *replica = item->data;
if (replica->ip) {
replica->ip->fns->count(replica->ip);
}
if (replica->child) {
replica->child->fns->count(replica->child);
}
if (replica->container) {
replica->container->fns->count(replica->container);
}
if (replica->remote) {
replica->remote->fns->count(replica->remote);
}
}
}
gboolean
pe__bundle_is_filtered(const pcmk_resource_t *rsc, GList *only_rsc,
gboolean check_parent)
{
gboolean passes = FALSE;
pe__bundle_variant_data_t *bundle_data = NULL;
if (pcmk__str_in_list(rsc_printable_id(rsc), only_rsc, pcmk__str_star_matches)) {
passes = TRUE;
} else {
get_bundle_variant_data(bundle_data, rsc);
for (GList *gIter = bundle_data->replicas; gIter != NULL; gIter = gIter->next) {
pcmk__bundle_replica_t *replica = gIter->data;
if (replica->ip != NULL && !replica->ip->fns->is_filtered(replica->ip, only_rsc, FALSE)) {
passes = TRUE;
break;
} else if (replica->child != NULL && !replica->child->fns->is_filtered(replica->child, only_rsc, FALSE)) {
passes = TRUE;
break;
} else if (!replica->container->fns->is_filtered(replica->container, only_rsc, FALSE)) {
passes = TRUE;
break;
} else if (replica->remote != NULL && !replica->remote->fns->is_filtered(replica->remote, only_rsc, FALSE)) {
passes = TRUE;
break;
}
}
}
return !passes;
}
/*!
* \internal
* \brief Get a list of a bundle's containers
*
* \param[in] bundle Bundle resource
*
* \return Newly created list of \p bundle's containers
* \note It is the caller's responsibility to free the result with
* g_list_free().
*/
GList *
pe__bundle_containers(const pcmk_resource_t *bundle)
{
GList *containers = NULL;
const pe__bundle_variant_data_t *data = NULL;
get_bundle_variant_data(data, bundle);
for (GList *iter = data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
containers = g_list_append(containers, replica->container);
}
return containers;
}
// Bundle implementation of pcmk_rsc_methods_t:active_node()
pcmk_node_t *
pe__bundle_active_node(const pcmk_resource_t *rsc, unsigned int *count_all,
unsigned int *count_clean)
{
pcmk_node_t *active = NULL;
pcmk_node_t *node = NULL;
pcmk_resource_t *container = NULL;
GList *containers = NULL;
GList *iter = NULL;
GHashTable *nodes = NULL;
const pe__bundle_variant_data_t *data = NULL;
if (count_all != NULL) {
*count_all = 0;
}
if (count_clean != NULL) {
*count_clean = 0;
}
if (rsc == NULL) {
return NULL;
}
/* For the purposes of this method, we only care about where the bundle's
* containers are active, so build a list of active containers.
*/
get_bundle_variant_data(data, rsc);
for (iter = data->replicas; iter != NULL; iter = iter->next) {
pcmk__bundle_replica_t *replica = iter->data;
if (replica->container->running_on != NULL) {
containers = g_list_append(containers, replica->container);
}
}
if (containers == NULL) {
return NULL;
}
/* If the bundle has only a single active container, just use that
* container's method. If live migration is ever supported for bundle
* containers, this will allow us to prefer the migration source when there
* is only one container and it is migrating. For now, this just lets us
* avoid creating the nodes table.
*/
if (pcmk__list_of_1(containers)) {
container = containers->data;
node = container->fns->active_node(container, count_all, count_clean);
g_list_free(containers);
return node;
}
// Add all containers' active nodes to a hash table (for uniqueness)
nodes = g_hash_table_new(NULL, NULL);
for (iter = containers; iter != NULL; iter = iter->next) {
container = iter->data;
for (GList *node_iter = container->running_on; node_iter != NULL;
node_iter = node_iter->next) {
node = node_iter->data;
// If insert returns true, we haven't counted this node yet
if (g_hash_table_insert(nodes, (gpointer) node->details,
(gpointer) node)
&& !pe__count_active_node(rsc, node, &active, count_all,
count_clean)) {
goto done;
}
}
}
done:
g_list_free(containers);
g_hash_table_destroy(nodes);
return active;
}
/*!
* \internal
* \brief Get maximum bundle resource instances per node
*
* \param[in] rsc Bundle resource to check
*
* \return Maximum number of \p rsc instances that can be active on one node
*/
unsigned int
pe__bundle_max_per_node(const pcmk_resource_t *rsc)
{
pe__bundle_variant_data_t *bundle_data = NULL;
get_bundle_variant_data(bundle_data, rsc);
CRM_ASSERT(bundle_data->nreplicas_per_host >= 0);
return (unsigned int) bundle_data->nreplicas_per_host;
}
diff --git a/lib/pengine/pe_digest.c b/lib/pengine/pe_digest.c
index 7f1f365b2a..934b55bcca 100644
--- a/lib/pengine/pe_digest.c
+++ b/lib/pengine/pe_digest.c
@@ -1,609 +1,609 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <glib.h>
#include <stdbool.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
#include <crm/pengine/internal.h>
#include "pe_status_private.h"
extern bool pcmk__is_daemon;
/*!
* \internal
* \brief Free an operation digest cache entry
*
* \param[in,out] ptr Pointer to cache entry to free
*
* \note The argument is a gpointer so this can be used as a hash table
* free function.
*/
void
pe__free_digests(gpointer ptr)
{
pcmk__op_digest_t *data = ptr;
if (data != NULL) {
free_xml(data->params_all);
free_xml(data->params_secure);
free_xml(data->params_restart);
free(data->digest_all_calc);
free(data->digest_restart_calc);
free(data->digest_secure_calc);
free(data);
}
}
// Return true if XML attribute name is not substring of a given string
static bool
attr_not_in_string(xmlAttrPtr a, void *user_data)
{
bool filter = false;
char *name = crm_strdup_printf(" %s ", (const char *) a->name);
if (strstr((const char *) user_data, name) == NULL) {
crm_trace("Filtering %s (not found in '%s')",
(const char *) a->name, (const char *) user_data);
filter = true;
}
free(name);
return filter;
}
// Return true if XML attribute name is substring of a given string
static bool
attr_in_string(xmlAttrPtr a, void *user_data)
{
bool filter = false;
char *name = crm_strdup_printf(" %s ", (const char *) a->name);
if (strstr((const char *) user_data, name) != NULL) {
crm_trace("Filtering %s (found in '%s')",
(const char *) a->name, (const char *) user_data);
filter = true;
}
free(name);
return filter;
}
/*!
* \internal
* \brief Add digest of all parameters to a digest cache entry
*
* \param[out] data Digest cache entry to modify
* \param[in,out] rsc Resource that action was for
* \param[in] node Node action was performed on
* \param[in] params Resource parameters evaluated for node
* \param[in] task Name of action performed
* \param[in,out] interval_ms Action's interval (will be reset if in overrides)
* \param[in] xml_op Unused
* \param[in] op_version CRM feature set to use for digest calculation
* \param[in] overrides Key/value table to override resource parameters
* \param[in,out] scheduler Scheduler data
*/
static void
calculate_main_digest(pcmk__op_digest_t *data, pcmk_resource_t *rsc,
const pcmk_node_t *node, GHashTable *params,
const char *task, guint *interval_ms,
const xmlNode *xml_op, const char *op_version,
GHashTable *overrides, pcmk_scheduler_t *scheduler)
{
xmlNode *action_config = NULL;
data->params_all = create_xml_node(NULL, XML_TAG_PARAMS);
/* REMOTE_CONTAINER_HACK: Allow Pacemaker Remote nodes to run containers
* that themselves are Pacemaker Remote nodes
*/
(void) pe__add_bundle_remote_name(rsc, scheduler, data->params_all,
- XML_RSC_ATTR_REMOTE_RA_ADDR);
+ PCMK_REMOTE_RA_ADDR);
if (overrides != NULL) {
// If interval was overridden, reset it
const char *interval_s = g_hash_table_lookup(overrides, CRM_META "_"
XML_LRM_ATTR_INTERVAL);
if (interval_s != NULL) {
long long value_ll;
if ((pcmk__scan_ll(interval_s, &value_ll, 0LL) == pcmk_rc_ok)
&& (value_ll >= 0) && (value_ll <= G_MAXUINT)) {
*interval_ms = (guint) value_ll;
}
}
// Add overrides to list of all parameters
g_hash_table_foreach(overrides, hash2field, data->params_all);
}
// Add provided instance parameters
g_hash_table_foreach(params, hash2field, data->params_all);
// Find action configuration XML in CIB
action_config = pcmk__find_action_config(rsc, task, *interval_ms, true);
/* Add action-specific resource instance attributes to the digest list.
*
* If this is a one-time action with action-specific instance attributes,
* enforce a restart instead of reload-agent in case the main digest doesn't
* match, even if the restart digest does. This ensures any changes of the
* action-specific parameters get applied for this specific action, and
* digests calculated for the resulting history will be correct. Default the
* result to RSC_DIGEST_RESTART for the case where the main digest doesn't
* match.
*/
params = pcmk__unpack_action_rsc_params(action_config, node->details->attrs,
scheduler);
if ((*interval_ms == 0) && (g_hash_table_size(params) > 0)) {
data->rc = pcmk__digest_restart;
}
g_hash_table_foreach(params, hash2field, data->params_all);
g_hash_table_destroy(params);
// Add action meta-attributes
params = pcmk__unpack_action_meta(rsc, node, task, *interval_ms,
action_config);
g_hash_table_foreach(params, hash2metafield, data->params_all);
g_hash_table_destroy(params);
pcmk__filter_op_for_digest(data->params_all);
data->digest_all_calc = calculate_operation_digest(data->params_all,
op_version);
}
// Return true if XML attribute name is a Pacemaker-defined fencing parameter
static bool
is_fence_param(xmlAttrPtr attr, void *user_data)
{
return pcmk_stonith_param((const char *) attr->name);
}
/*!
* \internal
* \brief Add secure digest to a digest cache entry
*
* \param[out] data Digest cache entry to modify
* \param[in] rsc Resource that action was for
* \param[in] params Resource parameters evaluated for node
* \param[in] xml_op XML of operation in CIB status (if available)
* \param[in] op_version CRM feature set to use for digest calculation
* \param[in] overrides Key/value hash table to override resource parameters
*/
static void
calculate_secure_digest(pcmk__op_digest_t *data, const pcmk_resource_t *rsc,
GHashTable *params, const xmlNode *xml_op,
const char *op_version, GHashTable *overrides)
{
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
const char *secure_list = NULL;
bool old_version = (compare_version(op_version, "3.16.0") < 0);
if (xml_op == NULL) {
secure_list = " passwd password user ";
} else {
secure_list = crm_element_value(xml_op, XML_LRM_ATTR_OP_SECURE);
}
if (old_version) {
data->params_secure = create_xml_node(NULL, XML_TAG_PARAMS);
if (overrides != NULL) {
g_hash_table_foreach(overrides, hash2field, data->params_secure);
}
g_hash_table_foreach(params, hash2field, data->params_secure);
} else {
// Start with a copy of all parameters
data->params_secure = copy_xml(data->params_all);
}
if (secure_list != NULL) {
pcmk__xe_remove_matching_attrs(data->params_secure, attr_in_string,
(void *) secure_list);
}
if (old_version
&& pcmk_is_set(pcmk_get_ra_caps(class),
pcmk_ra_cap_fence_params)) {
/* For stonith resources, Pacemaker adds special parameters,
* but these are not listed in fence agent meta-data, so with older
* versions of DC, the controller will not hash them. That means we have
* to filter them out before calculating our hash for comparison.
*/
pcmk__xe_remove_matching_attrs(data->params_secure, is_fence_param,
NULL);
}
pcmk__filter_op_for_digest(data->params_secure);
/* CRM_meta_timeout *should* be part of a digest for recurring operations.
* However, with older versions of DC, the controller does not add timeout
* to secure digests, because it only includes parameters declared by the
* resource agent.
* Remove any timeout that made it this far, to match.
*/
if (old_version) {
xml_remove_prop(data->params_secure, CRM_META "_" XML_ATTR_TIMEOUT);
}
data->digest_secure_calc = calculate_operation_digest(data->params_secure,
op_version);
}
/*!
* \internal
* \brief Add restart digest to a digest cache entry
*
* \param[out] data Digest cache entry to modify
* \param[in] xml_op XML of operation in CIB status (if available)
* \param[in] op_version CRM feature set to use for digest calculation
*
* \note This function doesn't need to handle overrides because it starts with
* data->params_all, which already has overrides applied.
*/
static void
calculate_restart_digest(pcmk__op_digest_t *data, const xmlNode *xml_op,
const char *op_version)
{
const char *value = NULL;
// We must have XML of resource operation history
if (xml_op == NULL) {
return;
}
// And the history must have a restart digest to compare against
if (crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST) == NULL) {
return;
}
// Start with a copy of all parameters
data->params_restart = copy_xml(data->params_all);
// Then filter out reloadable parameters, if any
value = crm_element_value(xml_op, XML_LRM_ATTR_OP_RESTART);
if (value != NULL) {
pcmk__xe_remove_matching_attrs(data->params_restart, attr_not_in_string,
(void *) value);
}
value = crm_element_value(xml_op, PCMK_XA_CRM_FEATURE_SET);
data->digest_restart_calc = calculate_operation_digest(data->params_restart,
value);
}
/*!
* \internal
* \brief Create a new digest cache entry with calculated digests
*
* \param[in,out] rsc Resource that action was for
* \param[in] task Name of action performed
* \param[in,out] interval_ms Action's interval (will be reset if in overrides)
* \param[in] node Node action was performed on
* \param[in] xml_op XML of operation in CIB status (if available)
* \param[in] overrides Key/value table to override resource parameters
* \param[in] calc_secure Whether to calculate secure digest
* \param[in,out] scheduler Scheduler data
*
* \return Pointer to new digest cache entry (or NULL on memory error)
* \note It is the caller's responsibility to free the result using
* pe__free_digests().
*/
pcmk__op_digest_t *
pe__calculate_digests(pcmk_resource_t *rsc, const char *task,
guint *interval_ms, const pcmk_node_t *node,
const xmlNode *xml_op, GHashTable *overrides,
bool calc_secure, pcmk_scheduler_t *scheduler)
{
pcmk__op_digest_t *data = calloc(1, sizeof(pcmk__op_digest_t));
const char *op_version = NULL;
GHashTable *params = NULL;
if (data == NULL) {
pcmk__sched_err("Could not allocate memory for operation digest");
return NULL;
}
data->rc = pcmk__digest_match;
if (xml_op != NULL) {
op_version = crm_element_value(xml_op, PCMK_XA_CRM_FEATURE_SET);
}
if (op_version == NULL && scheduler != NULL && scheduler->input != NULL) {
op_version = crm_element_value(scheduler->input,
PCMK_XA_CRM_FEATURE_SET);
}
if (op_version == NULL) {
op_version = CRM_FEATURE_SET;
}
params = pe_rsc_params(rsc, node, scheduler);
calculate_main_digest(data, rsc, node, params, task, interval_ms, xml_op,
op_version, overrides, scheduler);
if (calc_secure) {
calculate_secure_digest(data, rsc, params, xml_op, op_version,
overrides);
}
calculate_restart_digest(data, xml_op, op_version);
return data;
}
/*!
* \internal
* \brief Calculate action digests and store in node's digest cache
*
* \param[in,out] rsc Resource that action was for
* \param[in] task Name of action performed
* \param[in] interval_ms Action's interval
* \param[in,out] node Node action was performed on
* \param[in] xml_op XML of operation in CIB status (if available)
* \param[in] calc_secure Whether to calculate secure digest
* \param[in,out] scheduler Scheduler data
*
* \return Pointer to node's digest cache entry
*/
static pcmk__op_digest_t *
rsc_action_digest(pcmk_resource_t *rsc, const char *task, guint interval_ms,
pcmk_node_t *node, const xmlNode *xml_op,
bool calc_secure, pcmk_scheduler_t *scheduler)
{
pcmk__op_digest_t *data = NULL;
char *key = pcmk__op_key(rsc->id, task, interval_ms);
data = g_hash_table_lookup(node->details->digest_cache, key);
if (data == NULL) {
data = pe__calculate_digests(rsc, task, &interval_ms, node, xml_op,
NULL, calc_secure, scheduler);
CRM_ASSERT(data != NULL);
g_hash_table_insert(node->details->digest_cache, strdup(key), data);
}
free(key);
return data;
}
/*!
* \internal
* \brief Calculate operation digests and compare against an XML history entry
*
* \param[in,out] rsc Resource to check
* \param[in] xml_op Resource history XML
* \param[in,out] node Node to use for digest calculation
* \param[in,out] scheduler Scheduler data
*
* \return Pointer to node's digest cache entry, with comparison result set
*/
pcmk__op_digest_t *
rsc_action_digest_cmp(pcmk_resource_t *rsc, const xmlNode *xml_op,
pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
pcmk__op_digest_t *data = NULL;
guint interval_ms = 0;
const char *op_version;
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *digest_all;
const char *digest_restart;
CRM_ASSERT(node != NULL);
op_version = crm_element_value(xml_op, PCMK_XA_CRM_FEATURE_SET);
digest_all = crm_element_value(xml_op, XML_LRM_ATTR_OP_DIGEST);
digest_restart = crm_element_value(xml_op, XML_LRM_ATTR_RESTART_DIGEST);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
data = rsc_action_digest(rsc, task, interval_ms, node, xml_op,
pcmk_is_set(scheduler->flags,
pcmk_sched_sanitized),
scheduler);
if (digest_restart && data->digest_restart_calc && strcmp(data->digest_restart_calc, digest_restart) != 0) {
pcmk__rsc_info(rsc,
"Parameters to %ums-interval %s action for %s on %s "
"changed: hash was %s vs. now %s (restart:%s) %s",
interval_ms, task, rsc->id, pe__node_name(node),
pcmk__s(digest_restart, "missing"),
data->digest_restart_calc, op_version,
crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC));
data->rc = pcmk__digest_restart;
} else if (digest_all == NULL) {
/* it is unknown what the previous op digest was */
data->rc = pcmk__digest_unknown;
} else if (strcmp(digest_all, data->digest_all_calc) != 0) {
/* Given a non-recurring operation with extra parameters configured,
* in case that the main digest doesn't match, even if the restart
* digest matches, enforce a restart rather than a reload-agent anyway.
* So that it ensures any changes of the extra parameters get applied
* for this specific operation, and the digests calculated for the
* resulting lrm_rsc_op will be correct.
* Preserve the implied rc pcmk__digest_restart for the case that the
* main digest doesn't match.
*/
if ((interval_ms == 0) && (data->rc == pcmk__digest_restart)) {
pcmk__rsc_info(rsc,
"Parameters containing extra ones to %ums-interval"
" %s action for %s on %s "
"changed: hash was %s vs. now %s (restart:%s) %s",
interval_ms, task, rsc->id, pe__node_name(node),
pcmk__s(digest_all, "missing"),
data->digest_all_calc, op_version,
crm_element_value(xml_op,
XML_ATTR_TRANSITION_MAGIC));
} else {
pcmk__rsc_info(rsc,
"Parameters to %ums-interval %s action for %s on %s "
"changed: hash was %s vs. now %s (%s:%s) %s",
interval_ms, task, rsc->id, pe__node_name(node),
pcmk__s(digest_all, "missing"),
data->digest_all_calc,
(interval_ms > 0)? "reschedule" : "reload",
op_version,
crm_element_value(xml_op,
XML_ATTR_TRANSITION_MAGIC));
data->rc = pcmk__digest_mismatch;
}
} else {
data->rc = pcmk__digest_match;
}
return data;
}
/*!
* \internal
* \brief Create an unfencing summary for use in special node attribute
*
* Create a string combining a fence device's resource ID, agent type, and
* parameter digest (whether for all parameters or just non-private parameters).
* This can be stored in a special node attribute, allowing us to detect changes
* in either the agent type or parameters, to know whether unfencing must be
* redone or can be safely skipped when the device's history is cleaned.
*
* \param[in] rsc_id Fence device resource ID
* \param[in] agent_type Fence device agent
* \param[in] param_digest Fence device parameter digest
*
* \return Newly allocated string with unfencing digest
* \note The caller is responsible for freeing the result.
*/
static inline char *
create_unfencing_summary(const char *rsc_id, const char *agent_type,
const char *param_digest)
{
return crm_strdup_printf("%s:%s:%s", rsc_id, agent_type, param_digest);
}
/*!
* \internal
* \brief Check whether a node can skip unfencing
*
* Check whether a fence device's current definition matches a node's
* stored summary of when it was last unfenced by the device.
*
* \param[in] rsc_id Fence device's resource ID
* \param[in] agent Fence device's agent type
* \param[in] digest_calc Fence device's current parameter digest
* \param[in] node_summary Value of node's special unfencing node attribute
* (a comma-separated list of unfencing summaries for
* all devices that have unfenced this node)
*
* \return TRUE if digest matches, FALSE otherwise
*/
static bool
unfencing_digest_matches(const char *rsc_id, const char *agent,
const char *digest_calc, const char *node_summary)
{
bool matches = FALSE;
if (rsc_id && agent && digest_calc && node_summary) {
char *search_secure = create_unfencing_summary(rsc_id, agent,
digest_calc);
/* The digest was calculated including the device ID and agent,
* so there is no risk of collision using strstr().
*/
matches = (strstr(node_summary, search_secure) != NULL);
crm_trace("Calculated unfencing digest '%s' %sfound in '%s'",
search_secure, matches? "" : "not ", node_summary);
free(search_secure);
}
return matches;
}
/* Magic string to use as action name for digest cache entries used for
* unfencing checks. This is not a real action name (i.e. "on"), so
* pcmk__check_action_config() won't confuse these entries with real actions.
*/
#define STONITH_DIGEST_TASK "stonith-on"
/*!
* \internal
* \brief Calculate fence device digests and digest comparison result
*
* \param[in,out] rsc Fence device resource
* \param[in] agent Fence device's agent type
* \param[in,out] node Node with digest cache to use
* \param[in,out] scheduler Scheduler data
*
* \return Node's digest cache entry
*/
pcmk__op_digest_t *
pe__compare_fencing_digest(pcmk_resource_t *rsc, const char *agent,
pcmk_node_t *node, pcmk_scheduler_t *scheduler)
{
const char *node_summary = NULL;
// Calculate device's current parameter digests
pcmk__op_digest_t *data = rsc_action_digest(rsc, STONITH_DIGEST_TASK, 0U,
node, NULL, TRUE, scheduler);
// Check whether node has special unfencing summary node attribute
node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_ALL);
if (node_summary == NULL) {
data->rc = pcmk__digest_unknown;
return data;
}
// Check whether full parameter digest matches
if (unfencing_digest_matches(rsc->id, agent, data->digest_all_calc,
node_summary)) {
data->rc = pcmk__digest_match;
return data;
}
// Check whether secure parameter digest matches
node_summary = pe_node_attribute_raw(node, CRM_ATTR_DIGESTS_SECURE);
if (unfencing_digest_matches(rsc->id, agent, data->digest_secure_calc,
node_summary)) {
data->rc = pcmk__digest_match;
if (!pcmk__is_daemon && scheduler->priv != NULL) {
pcmk__output_t *out = scheduler->priv;
out->info(out, "Only 'private' parameters to %s "
"for unfencing %s changed", rsc->id,
pe__node_name(node));
}
return data;
}
// Parameters don't match
data->rc = pcmk__digest_mismatch;
if (pcmk_is_set(scheduler->flags, pcmk_sched_sanitized)
&& (data->digest_secure_calc != NULL)) {
if (scheduler->priv != NULL) {
pcmk__output_t *out = scheduler->priv;
char *digest = create_unfencing_summary(rsc->id, agent,
data->digest_secure_calc);
out->info(out, "Parameters to %s for unfencing "
"%s changed, try '%s'", rsc->id,
pe__node_name(node), digest);
free(digest);
} else if (!pcmk__is_daemon) {
char *digest = create_unfencing_summary(rsc->id, agent,
data->digest_secure_calc);
printf("Parameters to %s for unfencing %s changed, try '%s'\n",
rsc->id, pe__node_name(node), digest);
free(digest);
}
}
return data;
}
diff --git a/lib/pengine/remote.c b/lib/pengine/remote.c
index 42163f61f9..22808925ca 100644
--- a/lib/pengine/remote.c
+++ b/lib/pengine/remote.c
@@ -1,275 +1,274 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/scheduler_internal.h>
#include <crm/pengine/internal.h>
#include <glib.h>
bool
pe__resource_is_remote_conn(const pcmk_resource_t *rsc)
{
return (rsc != NULL) && rsc->is_remote_node
&& pe__is_remote_node(pe_find_node(rsc->cluster->nodes, rsc->id));
}
bool
pe__is_remote_node(const pcmk_node_t *node)
{
return (node != NULL) && (node->details->type == pcmk_node_variant_remote)
&& ((node->details->remote_rsc == NULL)
|| (node->details->remote_rsc->container == NULL));
}
bool
pe__is_guest_node(const pcmk_node_t *node)
{
return (node != NULL) && (node->details->type == pcmk_node_variant_remote)
&& (node->details->remote_rsc != NULL)
&& (node->details->remote_rsc->container != NULL);
}
bool
pe__is_guest_or_remote_node(const pcmk_node_t *node)
{
return (node != NULL) && (node->details->type == pcmk_node_variant_remote);
}
bool
pe__is_bundle_node(const pcmk_node_t *node)
{
return pe__is_guest_node(node)
&& pe_rsc_is_bundled(node->details->remote_rsc);
}
/*!
* \internal
* \brief Check whether a resource creates a guest node
*
* If a given resource contains a filler resource that is a remote connection,
* return that filler resource (or NULL if none is found).
*
* \param[in] scheduler Scheduler data
* \param[in] rsc Resource to check
*
* \return Filler resource with remote connection, or NULL if none found
*/
pcmk_resource_t *
pe__resource_contains_guest_node(const pcmk_scheduler_t *scheduler,
const pcmk_resource_t *rsc)
{
if ((rsc != NULL) && (scheduler != NULL)
&& pcmk_is_set(scheduler->flags, pcmk_sched_have_remote_nodes)) {
for (GList *gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *filler = gIter->data;
if (filler->is_remote_node) {
return filler;
}
}
}
return NULL;
}
bool
xml_contains_remote_node(xmlNode *xml)
{
const char *value = NULL;
if (xml == NULL) {
return false;
}
value = crm_element_value(xml, XML_ATTR_TYPE);
if (!pcmk__str_eq(value, "remote", pcmk__str_casei)) {
return false;
}
value = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
if (!pcmk__str_eq(value, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
return false;
}
value = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
if (!pcmk__str_eq(value, "pacemaker", pcmk__str_casei)) {
return false;
}
return true;
}
/*!
* \internal
* \brief Execute a supplied function for each guest node running on a host
*
* \param[in] scheduler Scheduler data
* \param[in] host Host node to check
* \param[in] helper Function to call for each guest node
* \param[in,out] user_data Pointer to pass to helper function
*/
void
pe_foreach_guest_node(const pcmk_scheduler_t *scheduler,
const pcmk_node_t *host,
void (*helper)(const pcmk_node_t*, void*),
void *user_data)
{
GList *iter;
CRM_CHECK(scheduler && host && host->details && helper, return);
if (!pcmk_is_set(scheduler->flags, pcmk_sched_have_remote_nodes)) {
return;
}
for (iter = host->details->running_rsc; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
if (rsc->is_remote_node && (rsc->container != NULL)) {
pcmk_node_t *guest_node = pe_find_node(scheduler->nodes, rsc->id);
if (guest_node) {
(*helper)(guest_node, user_data);
}
}
}
}
/*!
* \internal
* \brief Create CIB XML for an implicit remote connection
*
* \param[in,out] parent If not NULL, use as parent XML element
* \param[in] uname Name of Pacemaker Remote node
* \param[in] container If not NULL, use this as connection container
* \param[in] migrateable If not NULL, use as allow-migrate value
* \param[in] is_managed If not NULL, use as is-managed value
* \param[in] start_timeout If not NULL, use as remote connect timeout
* \param[in] server If not NULL, use as remote server value
* \param[in] port If not NULL, use as remote port value
*
* \return Newly created XML
*/
xmlNode *
pe_create_remote_xml(xmlNode *parent, const char *uname,
const char *container_id, const char *migrateable,
const char *is_managed, const char *start_timeout,
const char *server, const char *port)
{
xmlNode *remote;
xmlNode *xml_sub;
remote = create_xml_node(parent, XML_CIB_TAG_RESOURCE);
// Add identity
crm_xml_add(remote, PCMK_XA_ID, uname);
crm_xml_add(remote, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF);
crm_xml_add(remote, XML_AGENT_ATTR_PROVIDER, "pacemaker");
crm_xml_add(remote, XML_ATTR_TYPE, "remote");
// Add meta-attributes
xml_sub = create_xml_node(remote, XML_TAG_META_SETS);
crm_xml_set_id(xml_sub, "%s-%s", uname, XML_TAG_META_SETS);
crm_create_nvpair_xml(xml_sub, NULL,
PCMK__META_INTERNAL_RSC, XML_BOOLEAN_TRUE);
if (container_id) {
crm_create_nvpair_xml(xml_sub, NULL,
PCMK__META_CONTAINER, container_id);
}
if (migrateable) {
crm_create_nvpair_xml(xml_sub, NULL,
PCMK_META_ALLOW_MIGRATE, migrateable);
}
if (is_managed) {
crm_create_nvpair_xml(xml_sub, NULL, PCMK_META_IS_MANAGED, is_managed);
}
// Add instance attributes
if (port || server) {
xml_sub = create_xml_node(remote, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_sub, "%s-%s", uname, XML_TAG_ATTR_SETS);
if (server) {
- crm_create_nvpair_xml(xml_sub, NULL, XML_RSC_ATTR_REMOTE_RA_ADDR,
- server);
+ crm_create_nvpair_xml(xml_sub, NULL, PCMK_REMOTE_RA_ADDR, server);
}
if (port) {
crm_create_nvpair_xml(xml_sub, NULL,
XML_RSC_ATTR_REMOTE_RA_PORT, port);
}
}
// Add operations
xml_sub = create_xml_node(remote, "operations");
crm_create_op_xml(xml_sub, uname, PCMK_ACTION_MONITOR, "30s", "30s");
if (start_timeout) {
crm_create_op_xml(xml_sub, uname, PCMK_ACTION_START, "0",
start_timeout);
}
return remote;
}
// History entry to be checked for fail count clearing
struct check_op {
const xmlNode *rsc_op; // History entry XML
pcmk_resource_t *rsc; // Known resource corresponding to history entry
pcmk_node_t *node; // Known node corresponding to history entry
enum pcmk__check_parameters check_type; // What needs checking
};
void
pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc,
pcmk_node_t *node, enum pcmk__check_parameters flag,
pcmk_scheduler_t *scheduler)
{
struct check_op *check_op = NULL;
CRM_CHECK(scheduler && rsc_op && rsc && node, return);
check_op = calloc(1, sizeof(struct check_op));
CRM_ASSERT(check_op != NULL);
crm_trace("Deferring checks of %s until after allocation", ID(rsc_op));
check_op->rsc_op = rsc_op;
check_op->rsc = rsc;
check_op->node = node;
check_op->check_type = flag;
scheduler->param_check = g_list_prepend(scheduler->param_check, check_op);
}
/*!
* \internal
* \brief Call a function for each action to be checked for addr substitution
*
* \param[in,out] scheduler Scheduler data
* \param[in] cb Function to be called
*/
void
pe__foreach_param_check(pcmk_scheduler_t *scheduler,
void (*cb)(pcmk_resource_t*, pcmk_node_t*,
const xmlNode*, enum pcmk__check_parameters))
{
CRM_CHECK(scheduler && cb, return);
for (GList *item = scheduler->param_check;
item != NULL; item = item->next) {
struct check_op *check_op = item->data;
cb(check_op->rsc, check_op->node, check_op->rsc_op,
check_op->check_type);
}
}
void
pe__free_param_checks(pcmk_scheduler_t *scheduler)
{
if (scheduler && scheduler->param_check) {
g_list_free_full(scheduler->param_check, free);
scheduler->param_check = NULL;
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 8:04 PM (11 h, 28 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665638
Default Alt Text
(246 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment