Page Menu
Home
ClusterLabs Projects
Search
Configure Global Search
Log In
Files
F3687141
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
76 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 5109e7a41f..1066d22182 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -1,1477 +1,1479 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml_internal.h>
#include <crm/lrmd.h>
#include <crm/lrmd_internal.h>
#include <crm/services.h>
#include <pacemaker-controld.h>
#define REMOTE_LRMD_RA "remote"
/* The max start timeout before cmd retry */
#define MAX_START_TIMEOUT_MS 10000
#define cmd_set_flags(cmd, flags_to_set) do { \
(cmd)->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define cmd_clear_flags(cmd, flags_to_clear) do { \
(cmd)->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, \
"Remote command", (cmd)->rsc_id, (cmd)->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_cmd_status {
cmd_reported_success = (1 << 0),
cmd_cancel = (1 << 1),
};
typedef struct remote_ra_cmd_s {
/*! the local node the cmd is issued from */
char *owner;
/*! the remote node the cmd is executed on */
char *rsc_id;
/*! the action to execute */
char *action;
/*! some string the client wants us to give it back */
char *userdata;
/*! start delay in ms */
int start_delay;
/*! timer id used for start delay. */
int delay_id;
/*! timeout in ms for cmd */
int timeout;
int remaining_timeout;
/*! recurring interval in ms */
guint interval_ms;
/*! interval timer id */
int interval_id;
int monitor_timeout_id;
int takeover_timeout_id;
/*! action parameters */
lrmd_key_value_t *params;
pcmk__action_result_t result;
int call_id;
time_t start_time;
uint32_t status;
} remote_ra_cmd_t;
#define lrm_remote_set_flags(lrm_state, flags_to_set) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__set_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_set), #flags_to_set); \
} while (0)
#define lrm_remote_clear_flags(lrm_state, flags_to_clear) do { \
lrm_state_t *lrm = (lrm_state); \
remote_ra_data_t *ra = lrm->remote_ra_data; \
ra->status = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE, "Remote", \
lrm->node_name, ra->status, \
(flags_to_clear), #flags_to_clear); \
} while (0)
enum remote_status {
expect_takeover = (1 << 0),
takeover_complete = (1 << 1),
remote_active = (1 << 2),
/* Maintenance mode is difficult to determine from the controller's context,
* so we have it signalled back with the transition from the scheduler.
*/
remote_in_maint = (1 << 3),
/* Similar for whether we are controlling a guest node or remote node.
* Fortunately there is a meta-attribute in the transition already and
* as the situation doesn't change over time we can use the
* resource start for noting down the information for later use when
* the attributes aren't at hand.
*/
controlling_guest = (1 << 4),
};
typedef struct remote_ra_data_s {
crm_trigger_t *work;
remote_ra_cmd_t *cur_cmd;
GList *cmds;
GList *recurring_cmds;
uint32_t status;
} remote_ra_data_t;
static int handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms);
static void handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd);
static GList *fail_all_monitor_cmds(GList * list);
static void
free_cmd(gpointer user_data)
{
remote_ra_cmd_t *cmd = user_data;
if (!cmd) {
return;
}
if (cmd->delay_id) {
g_source_remove(cmd->delay_id);
}
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
}
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
}
if (cmd->takeover_timeout_id) {
g_source_remove(cmd->takeover_timeout_id);
}
free(cmd->owner);
free(cmd->rsc_id);
free(cmd->action);
free(cmd->userdata);
pcmk__reset_result(&(cmd->result));
lrmd_key_value_freeall(cmd->params);
free(cmd);
}
static int
generate_callid(void)
{
static int remote_ra_callid = 0;
remote_ra_callid++;
if (remote_ra_callid <= 0) {
remote_ra_callid = 1;
}
return remote_ra_callid;
}
static gboolean
recurring_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->interval_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
ra_data->recurring_cmds = g_list_remove(ra_data->recurring_cmds, cmd);
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static gboolean
start_delay_helper(gpointer data)
{
remote_ra_cmd_t *cmd = data;
lrm_state_t *connection_rsc = NULL;
cmd->delay_id = 0;
connection_rsc = lrm_state_find(cmd->rsc_id);
if (connection_rsc && connection_rsc->remote_ra_data) {
remote_ra_data_t *ra_data = connection_rsc->remote_ra_data;
mainloop_set_trigger(ra_data->work);
}
return FALSE;
}
static bool
should_purge_attributes(crm_node_t *node)
{
bool purge = true;
crm_node_t *conn_node = NULL;
lrm_state_t *connection_rsc = NULL;
if (!node->conn_host) {
return purge;
}
/* Get the node that was hosting the remote connection resource from the
* peer cache. That's the one we really care about here.
*/
conn_node = crm_get_peer(0, node->conn_host);
if (conn_node == NULL) {
return purge;
}
/* Check the uptime of connection_rsc. If it hasn't been running long
* enough, set purge=true. "Long enough" means it started running earlier
* than the timestamp when we noticed it went away in the first place.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
time_t uptime = lrmd__uptime(lrm);
time_t now = time(NULL);
/* Add 20s of fuzziness to give corosync a while to notice the remote
* host is gone. On various error conditions (failure to get uptime,
* peer_lost isn't set) we default to purging.
*/
if (uptime > 0 &&
conn_node->peer_lost > 0 &&
uptime + 20 >= now - conn_node->peer_lost) {
purge = false;
}
}
return purge;
}
static enum controld_section_e
section_to_delete(bool purge)
{
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
if (purge) {
return controld_section_all_unlocked;
} else {
return controld_section_lrm_unlocked;
}
} else {
if (purge) {
return controld_section_all;
} else {
return controld_section_lrm;
}
}
}
static void
purge_remote_node_attrs(int call_opt, crm_node_t *node)
{
bool purge = should_purge_attributes(node);
enum controld_section_e section = section_to_delete(purge);
/* Purge node from attrd's memory */
if (purge) {
update_attrd_remote_node_removed(node->uname, NULL);
}
controld_delete_node_state(node->uname, section, call_opt);
}
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node joining
*
* \param[in] node_name Name of newly integrated pacemaker_remote node
*/
static void
remote_node_up(const char *node_name)
{
int call_opt;
xmlNode *update, *state;
crm_node_t *node;
lrm_state_t *connection_rsc = NULL;
CRM_CHECK(node_name != NULL, return);
crm_info("Announcing Pacemaker Remote node %s", node_name);
call_opt = crmd_cib_smart_opt();
/* Delete node's probe_complete attribute. This serves two purposes:
*
* - @COMPAT DCs < 1.1.14 in a rolling upgrade might use it
* - deleting it (or any attribute for that matter) here ensures the
* attribute manager learns the node is remote
*/
update_attrd(node_name, CRM_OP_PROBED, NULL, NULL, TRUE);
/* Ensure node is in the remote peer cache with member status */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
purge_remote_node_attrs(call_opt, node);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
/* Apply any start state that we were given from the environment on the
* remote node.
*/
connection_rsc = lrm_state_find(node->uname);
if (connection_rsc != NULL) {
lrmd_t *lrm = connection_rsc->conn;
const char *start_state = lrmd__node_start_state(lrm);
if (start_state) {
set_join_state(start_state, node->uname, node->uuid, true);
}
}
/* pacemaker_remote nodes don't participate in the membership layer,
* so cluster nodes don't automatically get notified when they come and go.
* We send a cluster message to the DC, and update the CIB node state entry,
* so the DC will get it sooner (via message) or later (via CIB refresh),
* and any other interested parties can query the CIB.
*/
broadcast_remote_state_message(node_name, true);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_cluster, update,
__func__);
/* Clear the XML_NODE_IS_FENCED flag in the node state. If the node ever
* needs to be fenced, this flag will allow various actions to determine
* whether the fencing has happened yet.
*/
crm_xml_add(state, XML_NODE_IS_FENCED, "0");
/* TODO: If the remote connection drops, and this (async) CIB update either
* failed or has not yet completed, later actions could mistakenly think the
* node has already been fenced (if the XML_NODE_IS_FENCED attribute was
* previously set, because it won't have been cleared). This could prevent
* actual fencing or allow recurring monitor failures to be cleared too
* soon. Ideally, we wouldn't rely on the CIB for the fenced status.
*/
controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
free_xml(update);
}
enum down_opts {
DOWN_KEEP_LRM,
DOWN_ERASE_LRM
};
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
* \param[in] opts Whether to keep or erase LRM history
*/
static void
remote_node_down(const char *node_name, const enum down_opts opts)
{
xmlNode *update;
int call_opt = crmd_cib_smart_opt();
crm_node_t *node;
/* Purge node from attrd's memory */
update_attrd_remote_node_removed(node_name, NULL);
/* Normally, only node attributes should be erased, and the resource history
* should be kept until the node comes back up. However, after a successful
* fence, we want to clear the history as well, so we don't think resources
* are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
controld_delete_node_state(node_name, controld_section_all, call_opt);
} else {
controld_delete_node_state(node_name, controld_section_attrs, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
node = crm_remote_peer_get(node_name);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_LOST, 0);
/* Notify DC */
broadcast_remote_state_message(node_name, false);
/* Update CIB node state */
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
create_node_state_update(node, node_update_cluster, update, __func__);
controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt, NULL);
free_xml(update);
}
/*!
* \internal
* \brief Handle effects of a remote RA command on node state
*
* \param[in] cmd Completed remote RA command
*/
static void
check_remote_node_state(const remote_ra_cmd_t *cmd)
{
/* Only successful actions can change node state */
if (!pcmk__result_ok(&(cmd->result))) {
return;
}
if (pcmk__str_eq(cmd->action, PCMK_ACTION_START, pcmk__str_casei)) {
remote_node_up(cmd->rsc_id);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_MIGRATE_FROM,
pcmk__str_casei)) {
/* After a successful migration, we don't need to do remote_node_up()
* because the DC already knows the node is up, and we don't want to
* clear LRM history etc. We do need to add the remote node to this
* host's remote peer cache, because (unless it happens to be DC)
* it hasn't been tracking the remote node, and other code relies on
* the cache to distinguish remote nodes from unseen cluster nodes.
*/
crm_node_t *node = crm_remote_peer_get(cmd->rsc_id);
CRM_CHECK(node != NULL, return);
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
} else if (pcmk__str_eq(cmd->action, PCMK_ACTION_STOP, pcmk__str_casei)) {
lrm_state_t *lrm_state = lrm_state_find(cmd->rsc_id);
remote_ra_data_t *ra_data = lrm_state? lrm_state->remote_ra_data : NULL;
if (ra_data) {
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* Stop means down if we didn't successfully migrate elsewhere */
remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
* un-cache the node, so we don't have stale info
*/
crm_remote_peer_cache_remove(cmd->rsc_id);
}
}
}
/* We don't do anything for successful monitors, which is correct for
* routine recurring monitors, and for monitors on nodes where the
* connection isn't supposed to be (the cluster will stop the connection in
* that case). However, if the initial probe finds the connection already
* active on the node where we want it, we probably should do
* remote_node_up(). Unfortunately, we can't distinguish that case here.
* Given that connections have to be initiated by the cluster, the chance of
* that should be close to zero.
*/
}
static void
report_remote_ra_result(remote_ra_cmd_t * cmd)
{
lrmd_event_data_t op = { 0, };
check_remote_node_state(cmd);
op.type = lrmd_event_exec_complete;
op.rsc_id = cmd->rsc_id;
op.op_type = cmd->action;
op.user_data = cmd->userdata;
op.timeout = cmd->timeout;
op.interval_ms = cmd->interval_ms;
op.t_run = (unsigned int) cmd->start_time;
op.t_rcchange = (unsigned int) cmd->start_time;
lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status,
cmd->result.exit_reason);
if (pcmk_is_set(cmd->status, cmd_reported_success) && !pcmk__result_ok(&(cmd->result))) {
op.t_rcchange = (unsigned int) time(NULL);
/* This edge case will likely never ever occur, but if it does the
* result is that a failure will not be processed correctly. This is only
* remotely possible because we are able to detect a connection resource's tcp
* connection has failed at any moment after start has completed. The actual
* recurring operation is just a connectivity ping.
*
* basically, we are not guaranteed that the first successful monitor op and
* a subsequent failed monitor op will not occur in the same timestamp. We have to
* make it look like the operations occurred at separate times though. */
if (op.t_rcchange == op.t_run) {
op.t_rcchange++;
}
}
if (cmd->params) {
lrmd_key_value_t *tmp;
op.params = pcmk__strkey_table(free, free);
for (tmp = cmd->params; tmp; tmp = tmp->next) {
g_hash_table_insert(op.params, strdup(tmp->key), strdup(tmp->value));
}
}
op.call_id = cmd->call_id;
op.remote_nodename = cmd->owner;
lrm_op_callback(&op);
if (op.params) {
g_hash_table_destroy(op.params);
}
lrmd__reset_result(&op);
}
static void
update_remaining_timeout(remote_ra_cmd_t * cmd)
{
cmd->remaining_timeout = ((cmd->timeout / 1000) - (time(NULL) - cmd->start_time)) * 1000;
}
static gboolean
retry_start_cmd_cb(gpointer data)
{
lrm_state_t *lrm_state = data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd = NULL;
int rc = ETIME;
if (!ra_data || !ra_data->cur_cmd) {
return FALSE;
}
cmd = ra_data->cur_cmd;
if (!pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
return FALSE;
}
update_remaining_timeout(cmd);
if (cmd->remaining_timeout > 0) {
rc = handle_remote_ra_start(lrm_state, cmd, cmd->remaining_timeout);
} else {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"Not enough time remains to retry remote connection");
}
if (rc != pcmk_rc_ok) {
report_remote_ra_result(cmd);
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
ra_data->cur_cmd = NULL;
free_cmd(cmd);
} else {
/* wait for connection event */
}
return FALSE;
}
static gboolean
connection_takeover_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
crm_info("takeover event timed out for node %s", cmd->rsc_id);
cmd->takeover_timeout_id = 0;
lrm_state = lrm_state_find(cmd->rsc_id);
handle_remote_ra_stop(lrm_state, cmd);
free_cmd(cmd);
return FALSE;
}
static gboolean
monitor_timeout_cb(gpointer data)
{
lrm_state_t *lrm_state = NULL;
remote_ra_cmd_t *cmd = data;
lrm_state = lrm_state_find(cmd->rsc_id);
crm_info("Timed out waiting for remote poke response from %s%s",
cmd->rsc_id, (lrm_state? "" : " (no LRM state)"));
cmd->monitor_timeout_id = 0;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Remote executor did not respond");
if (lrm_state && lrm_state->remote_ra_data) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (ra_data->cur_cmd == cmd) {
ra_data->cur_cmd = NULL;
}
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
}
report_remote_ra_result(cmd);
free_cmd(cmd);
if(lrm_state) {
lrm_state_disconnect(lrm_state);
}
return FALSE;
}
static void
synthesize_lrmd_success(lrm_state_t *lrm_state, const char *rsc_id, const char *op_type)
{
lrmd_event_data_t op = { 0, };
if (lrm_state == NULL) {
/* if lrm_state not given assume local */
lrm_state = lrm_state_find(controld_globals.our_nodename);
}
CRM_ASSERT(lrm_state != NULL);
op.type = lrmd_event_exec_complete;
op.rsc_id = rsc_id;
op.op_type = op_type;
op.t_run = (unsigned int) time(NULL);
op.t_rcchange = op.t_run;
op.call_id = generate_callid();
lrmd__set_result(&op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
process_lrm_event(lrm_state, &op, NULL, NULL);
}
void
remote_lrm_op_callback(lrmd_event_data_t * op)
{
gboolean cmd_handled = FALSE;
lrm_state_t *lrm_state = NULL;
remote_ra_data_t *ra_data = NULL;
remote_ra_cmd_t *cmd = NULL;
crm_debug("Processing '%s%s%s' event on remote connection to %s: %s "
"(%d) status=%s (%d)",
(op->op_type? op->op_type : ""), (op->op_type? " " : ""),
lrmd_event_type2str(op->type), op->remote_nodename,
services_ocf_exitcode_str(op->rc), op->rc,
pcmk_exec_status_str(op->op_status), op->op_status);
lrm_state = lrm_state_find(op->remote_nodename);
if (!lrm_state || !lrm_state->remote_ra_data) {
crm_debug("No state information found for remote connection event");
return;
}
ra_data = lrm_state->remote_ra_data;
if (op->type == lrmd_event_new_client) {
// Another client has connected to the remote daemon
if (pcmk_is_set(ra_data->status, expect_takeover)) {
// Great, we knew this was coming
lrm_remote_clear_flags(lrm_state, expect_takeover);
lrm_remote_set_flags(lrm_state, takeover_complete);
} else {
crm_err("Disconnecting from Pacemaker Remote node %s due to "
"unexpected client takeover", op->remote_nodename);
/* In this case, lrmd_tls_connection_destroy() will be called under the control of mainloop. */
/* Do not free lrm_state->conn yet. */
/* It'll be freed in the following stop action. */
lrm_state_disconnect_only(lrm_state);
}
return;
}
/* filter all EXEC events up */
if (op->type == lrmd_event_exec_complete) {
if (pcmk_is_set(ra_data->status, takeover_complete)) {
crm_debug("ignoring event, this connection is taken over by another node");
} else {
lrm_op_callback(op);
}
return;
}
if ((op->type == lrmd_event_disconnect) && (ra_data->cur_cmd == NULL)) {
if (!pcmk_is_set(ra_data->status, remote_active)) {
crm_debug("Disconnection from Pacemaker Remote node %s complete",
lrm_state->node_name);
} else if (!remote_ra_is_in_maintenance(lrm_state)) {
crm_err("Lost connection to Pacemaker Remote node %s",
lrm_state->node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
} else {
crm_notice("Unmanaged Pacemaker Remote node %s disconnected",
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name,
PCMK_ACTION_STOP);
}
return;
}
if (!ra_data->cur_cmd) {
crm_debug("no event to match");
return;
}
cmd = ra_data->cur_cmd;
/* Start actions and migrate from actions complete after connection
* comes back to us. */
if ((op->type == lrmd_event_connect)
&& pcmk__strcase_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
if (op->connection_rc < 0) {
update_remaining_timeout(cmd);
if ((op->connection_rc == -ENOKEY)
|| (op->connection_rc == -EKEYREJECTED)) {
// Hard error, don't retry
pcmk__set_result(&(cmd->result), PCMK_OCF_INVALID_PARAM,
PCMK_EXEC_ERROR,
pcmk_strerror(op->connection_rc));
} else if (cmd->remaining_timeout > 3000) {
crm_trace("rescheduling start, remaining timeout %d", cmd->remaining_timeout);
g_timeout_add(1000, retry_start_cmd_cb, lrm_state);
return;
} else {
crm_trace("can't reschedule start, remaining timeout too small %d",
cmd->remaining_timeout);
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_TIMEOUT,
"%s without enough time to retry",
pcmk_strerror(op->connection_rc));
}
} else {
lrm_state_reset_tables(lrm_state, TRUE);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
lrm_remote_set_flags(lrm_state, remote_active);
}
crm_debug("Remote connection event matched %s action", cmd->action);
report_remote_ra_result(cmd);
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_poke)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (cmd->monitor_timeout_id) {
g_source_remove(cmd->monitor_timeout_id);
cmd->monitor_timeout_id = 0;
}
/* Only report success the first time, after that only worry about failures.
* For this function, if we get the poke pack, it is always a success. Pokes
* only fail if the send fails, or the response times out. */
if (!pcmk_is_set(cmd->status, cmd_reported_success)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
cmd_set_flags(cmd, cmd_reported_success);
}
crm_debug("Remote poke event matched %s action", cmd->action);
/* success, keep rescheduling if interval is present. */
if (cmd->interval_ms && !pcmk_is_set(cmd->status, cmd_cancel)) {
ra_data->recurring_cmds = g_list_append(ra_data->recurring_cmds, cmd);
cmd->interval_id = g_timeout_add(cmd->interval_ms,
recurring_helper, cmd);
cmd = NULL; /* prevent free */
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_disconnect)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
if (pcmk_is_set(ra_data->status, remote_active) &&
!pcmk_is_set(cmd->status, cmd_cancel)) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Remote connection unexpectedly dropped "
"during monitor");
report_remote_ra_result(cmd);
crm_err("Remote connection to %s unexpectedly dropped during monitor",
lrm_state->node_name);
}
cmd_handled = TRUE;
} else if ((op->type == lrmd_event_new_client)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_STOP,
pcmk__str_casei)) {
handle_remote_ra_stop(lrm_state, cmd);
cmd_handled = TRUE;
} else {
crm_debug("Event did not match %s action", ra_data->cur_cmd->action);
}
if (cmd_handled) {
ra_data->cur_cmd = NULL;
if (ra_data->cmds) {
mainloop_set_trigger(ra_data->work);
}
free_cmd(cmd);
}
}
static void
handle_remote_ra_stop(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd)
{
remote_ra_data_t *ra_data = NULL;
CRM_ASSERT(lrm_state);
ra_data = lrm_state->remote_ra_data;
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* delete pending ops when ever the remote connection is intentionally stopped */
g_hash_table_remove_all(lrm_state->active_ops);
} else {
/* we no longer hold the history if this connection has been migrated,
* however, we keep metadata cache for future use */
lrm_state_reset_tables(lrm_state, FALSE);
}
lrm_remote_clear_flags(lrm_state, remote_active);
lrm_state_disconnect(lrm_state);
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
ra_data->cmds = NULL;
ra_data->recurring_cmds = NULL;
ra_data->cur_cmd = NULL;
if (cmd) {
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
}
// \return Standard Pacemaker return code
static int
handle_remote_ra_start(lrm_state_t * lrm_state, remote_ra_cmd_t * cmd, int timeout_ms)
{
const char *server = NULL;
lrmd_key_value_t *tmp = NULL;
int port = 0;
int timeout_used = timeout_ms > MAX_START_TIMEOUT_MS ? MAX_START_TIMEOUT_MS : timeout_ms;
int rc = pcmk_rc_ok;
for (tmp = cmd->params; tmp; tmp = tmp->next) {
if (pcmk__strcase_any_of(tmp->key,
PCMK_REMOTE_RA_ADDR, PCMK_REMOTE_RA_SERVER,
NULL)) {
server = tmp->value;
- } else if (pcmk__str_eq(tmp->key, XML_RSC_ATTR_REMOTE_RA_PORT, pcmk__str_casei)) {
+
+ } else if (pcmk__str_eq(tmp->key, PCMK_REMOTE_RA_PORT,
+ pcmk__str_none)) {
port = atoi(tmp->value);
} else if (pcmk__str_eq(tmp->key, CRM_META "_" PCMK__META_CONTAINER,
pcmk__str_none)) {
lrm_remote_set_flags(lrm_state, controlling_guest);
}
}
rc = controld_connect_remote_executor(lrm_state, server, port,
timeout_used);
if (rc != pcmk_rc_ok) {
pcmk__format_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR,
"Could not connect to Pacemaker Remote node %s: %s",
lrm_state->node_name, pcmk_rc_str(rc));
}
return rc;
}
static gboolean
handle_remote_ra_exec(gpointer user_data)
{
int rc = 0;
lrm_state_t *lrm_state = user_data;
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
remote_ra_cmd_t *cmd;
GList *first = NULL;
if (ra_data->cur_cmd) {
/* still waiting on previous cmd */
return TRUE;
}
while (ra_data->cmds) {
first = ra_data->cmds;
cmd = first->data;
if (cmd->delay_id) {
/* still waiting for start delay timer to trip */
return TRUE;
}
ra_data->cmds = g_list_remove_link(ra_data->cmds, first);
g_list_free_1(first);
if (pcmk__str_any_of(cmd->action, PCMK_ACTION_START,
PCMK_ACTION_MIGRATE_FROM, NULL)) {
lrm_remote_clear_flags(lrm_state, expect_takeover | takeover_complete);
if (handle_remote_ra_start(lrm_state, cmd,
cmd->timeout) == pcmk_rc_ok) {
/* take care of this later when we get async connection result */
crm_debug("Initiated async remote connection, %s action will complete after connect event",
cmd->action);
ra_data->cur_cmd = cmd;
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_MONITOR)) {
if (lrm_state_is_connected(lrm_state) == TRUE) {
rc = lrm_state_poke_connection(lrm_state);
if (rc < 0) {
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, pcmk_strerror(rc));
}
} else {
rc = -1;
pcmk__set_result(&(cmd->result), PCMK_OCF_NOT_RUNNING,
PCMK_EXEC_DONE, "Remote connection inactive");
}
if (rc == 0) {
crm_debug("Poked Pacemaker Remote at node %s, waiting for async response",
cmd->rsc_id);
ra_data->cur_cmd = cmd;
cmd->monitor_timeout_id = g_timeout_add(cmd->timeout, monitor_timeout_cb, cmd);
return TRUE;
}
report_remote_ra_result(cmd);
} else if (!strcmp(cmd->action, PCMK_ACTION_STOP)) {
if (pcmk_is_set(ra_data->status, expect_takeover)) {
/* briefly wait on stop for the takeover event to occur. If the
* takeover event does not occur during the wait period, that's fine.
* It just means that the remote-node's lrm_status section is going to get
* cleared which will require all the resources running in the remote-node
* to be explicitly re-detected via probe actions. If the takeover does occur
* successfully, then we can leave the status section intact. */
cmd->takeover_timeout_id = g_timeout_add((cmd->timeout/2), connection_takeover_timeout_cb, cmd);
ra_data->cur_cmd = cmd;
return TRUE;
}
handle_remote_ra_stop(lrm_state, cmd);
} else if (strcmp(cmd->action, PCMK_ACTION_MIGRATE_TO) == 0) {
lrm_remote_clear_flags(lrm_state, takeover_complete);
lrm_remote_set_flags(lrm_state, expect_takeover);
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
} else if (pcmk__str_any_of(cmd->action, PCMK_ACTION_RELOAD,
PCMK_ACTION_RELOAD_AGENT, NULL)) {
/* Currently the only reloadable parameter is reconnect_interval,
* which is only used by the scheduler via the CIB, so reloads are a
* no-op.
*
* @COMPAT DC <2.1.0: We only need to check for "reload" in case
* we're in a rolling upgrade with a DC scheduling "reload" instead
* of "reload-agent". An OCF 1.1 "reload" would be a no-op anyway,
* so this would work for that purpose as well.
*/
pcmk__set_result(&(cmd->result), PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
report_remote_ra_result(cmd);
}
free_cmd(cmd);
}
return TRUE;
}
static void
remote_ra_data_init(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = NULL;
if (lrm_state->remote_ra_data) {
return;
}
ra_data = calloc(1, sizeof(remote_ra_data_t));
ra_data->work = mainloop_add_trigger(G_PRIORITY_HIGH, handle_remote_ra_exec, lrm_state);
lrm_state->remote_ra_data = ra_data;
}
void
remote_ra_cleanup(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
if (!ra_data) {
return;
}
if (ra_data->cmds) {
g_list_free_full(ra_data->cmds, free_cmd);
}
if (ra_data->recurring_cmds) {
g_list_free_full(ra_data->recurring_cmds, free_cmd);
}
mainloop_destroy_trigger(ra_data->work);
free(ra_data);
lrm_state->remote_ra_data = NULL;
}
gboolean
is_remote_lrmd_ra(const char *agent, const char *provider, const char *id)
{
if (agent && provider && !strcmp(agent, REMOTE_LRMD_RA) && !strcmp(provider, "pacemaker")) {
return TRUE;
}
if ((id != NULL) && (lrm_state_find(id) != NULL)
&& !pcmk__str_eq(id, controld_globals.our_nodename, pcmk__str_casei)) {
return TRUE;
}
return FALSE;
}
lrmd_rsc_info_t *
remote_ra_get_rsc_info(lrm_state_t * lrm_state, const char *rsc_id)
{
lrmd_rsc_info_t *info = NULL;
if ((lrm_state_find(rsc_id))) {
info = calloc(1, sizeof(lrmd_rsc_info_t));
info->id = strdup(rsc_id);
info->type = strdup(REMOTE_LRMD_RA);
info->standard = strdup(PCMK_RESOURCE_CLASS_OCF);
info->provider = strdup("pacemaker");
}
return info;
}
static gboolean
is_remote_ra_supported_action(const char *action)
{
return pcmk__str_any_of(action,
PCMK_ACTION_START,
PCMK_ACTION_STOP,
PCMK_ACTION_MONITOR,
PCMK_ACTION_MIGRATE_TO,
PCMK_ACTION_MIGRATE_FROM,
PCMK_ACTION_RELOAD_AGENT,
PCMK_ACTION_RELOAD,
NULL);
}
static GList *
fail_all_monitor_cmds(GList * list)
{
GList *rm_list = NULL;
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms > 0)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
rm_list = g_list_append(rm_list, cmd);
}
}
for (gIter = rm_list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
pcmk__set_result(&(cmd->result), PCMK_OCF_UNKNOWN_ERROR,
PCMK_EXEC_ERROR, "Lost connection to remote executor");
crm_trace("Pre-emptively failing %s %s (interval=%u, %s)",
cmd->action, cmd->rsc_id, cmd->interval_ms, cmd->userdata);
report_remote_ra_result(cmd);
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
/* frees only the list data, not the cmds */
g_list_free(rm_list);
return list;
}
static GList *
remove_cmd(GList * list, const char *action, guint interval_ms)
{
remote_ra_cmd_t *cmd = NULL;
GList *gIter = NULL;
for (gIter = list; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, action, pcmk__str_casei)) {
break;
}
cmd = NULL;
}
if (cmd) {
list = g_list_remove(list, cmd);
free_cmd(cmd);
}
return list;
}
int
remote_ra_cancel(lrm_state_t *lrm_state, const char *rsc_id,
const char *action, guint interval_ms)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_data_t *ra_data = NULL;
connection_rsc = lrm_state_find(rsc_id);
if (!connection_rsc || !connection_rsc->remote_ra_data) {
return -EINVAL;
}
ra_data = connection_rsc->remote_ra_data;
ra_data->cmds = remove_cmd(ra_data->cmds, action, interval_ms);
ra_data->recurring_cmds = remove_cmd(ra_data->recurring_cmds, action,
interval_ms);
if (ra_data->cur_cmd &&
(ra_data->cur_cmd->interval_ms == interval_ms) &&
(pcmk__str_eq(ra_data->cur_cmd->action, action, pcmk__str_casei))) {
cmd_set_flags(ra_data->cur_cmd, cmd_cancel);
}
return 0;
}
static remote_ra_cmd_t *
handle_dup_monitor(remote_ra_data_t *ra_data, guint interval_ms,
const char *userdata)
{
GList *gIter = NULL;
remote_ra_cmd_t *cmd = NULL;
/* there are 3 places a potential duplicate monitor operation
* could exist.
* 1. recurring_cmds list. where the op is waiting for its next interval
* 2. cmds list, where the op is queued to get executed immediately
* 3. cur_cmd, which means the monitor op is in flight right now.
*/
if (interval_ms == 0) {
return NULL;
}
if (ra_data->cur_cmd &&
!pcmk_is_set(ra_data->cur_cmd->status, cmd_cancel) &&
(ra_data->cur_cmd->interval_ms == interval_ms)
&& pcmk__str_eq(ra_data->cur_cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
cmd = ra_data->cur_cmd;
goto handle_dup;
}
for (gIter = ra_data->recurring_cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
for (gIter = ra_data->cmds; gIter != NULL; gIter = gIter->next) {
cmd = gIter->data;
if ((cmd->interval_ms == interval_ms)
&& pcmk__str_eq(cmd->action, PCMK_ACTION_MONITOR,
pcmk__str_casei)) {
goto handle_dup;
}
}
return NULL;
handle_dup:
crm_trace("merging duplicate monitor cmd " PCMK__OP_FMT,
cmd->rsc_id, PCMK_ACTION_MONITOR, interval_ms);
/* update the userdata */
if (userdata) {
free(cmd->userdata);
cmd->userdata = strdup(userdata);
}
/* if we've already reported success, generate a new call id */
if (pcmk_is_set(cmd->status, cmd_reported_success)) {
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
cmd_clear_flags(cmd, cmd_reported_success);
}
/* if we have an interval_id set, that means we are in the process of
* waiting for this cmd's next interval. instead of waiting, cancel
* the timer and execute the action immediately */
if (cmd->interval_id) {
g_source_remove(cmd->interval_id);
cmd->interval_id = 0;
recurring_helper(cmd);
}
return cmd;
}
/*!
* \internal
* \brief Execute an action using the (internal) ocf:pacemaker:remote agent
*
* \param[in] lrm_state Executor state object for remote connection
* \param[in] rsc_id Connection resource ID
* \param[in] action Action to execute
* \param[in] userdata String to copy and pass to execution callback
* \param[in] interval_ms Action interval (in milliseconds)
* \param[in] timeout_ms Action timeout (in milliseconds)
* \param[in] start_delay_ms Delay (in milliseconds) before executing action
* \param[in,out] params Connection resource parameters
* \param[out] call_id Where to store call ID on success
*
* \return Standard Pacemaker return code
* \note This takes ownership of \p params, which should not be used or freed
* after calling this function.
*/
int
controld_execute_remote_agent(const lrm_state_t *lrm_state, const char *rsc_id,
const char *action, const char *userdata,
guint interval_ms, int timeout_ms,
int start_delay_ms, lrmd_key_value_t *params,
int *call_id)
{
lrm_state_t *connection_rsc = NULL;
remote_ra_cmd_t *cmd = NULL;
remote_ra_data_t *ra_data = NULL;
*call_id = 0;
CRM_CHECK((lrm_state != NULL) && (rsc_id != NULL) && (action != NULL)
&& (userdata != NULL) && (call_id != NULL),
lrmd_key_value_freeall(params); return EINVAL);
if (!is_remote_ra_supported_action(action)) {
lrmd_key_value_freeall(params);
return EOPNOTSUPP;
}
connection_rsc = lrm_state_find(rsc_id);
if (connection_rsc == NULL) {
lrmd_key_value_freeall(params);
return ENOTCONN;
}
remote_ra_data_init(connection_rsc);
ra_data = connection_rsc->remote_ra_data;
cmd = handle_dup_monitor(ra_data, interval_ms, userdata);
if (cmd) {
*call_id = cmd->call_id;
lrmd_key_value_freeall(params);
return pcmk_rc_ok;
}
cmd = calloc(1, sizeof(remote_ra_cmd_t));
if (cmd == NULL) {
lrmd_key_value_freeall(params);
return ENOMEM;
}
cmd->owner = strdup(lrm_state->node_name);
cmd->rsc_id = strdup(rsc_id);
cmd->action = strdup(action);
cmd->userdata = strdup(userdata);
if ((cmd->owner == NULL) || (cmd->rsc_id == NULL) || (cmd->action == NULL)
|| (cmd->userdata == NULL)) {
free_cmd(cmd);
lrmd_key_value_freeall(params);
return ENOMEM;
}
cmd->interval_ms = interval_ms;
cmd->timeout = timeout_ms;
cmd->start_delay = start_delay_ms;
cmd->params = params;
cmd->start_time = time(NULL);
cmd->call_id = generate_callid();
if (cmd->start_delay) {
cmd->delay_id = g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
}
ra_data->cmds = g_list_append(ra_data->cmds, cmd);
mainloop_set_trigger(ra_data->work);
*call_id = cmd->call_id;
return pcmk_rc_ok;
}
/*!
* \internal
* \brief Immediately fail all monitors of a remote node, if proxied here
*
* \param[in] node_name Name of pacemaker_remote node
*/
void
remote_ra_fail(const char *node_name)
{
lrm_state_t *lrm_state = lrm_state_find(node_name);
if (lrm_state && lrm_state_is_connected(lrm_state)) {
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
crm_info("Failing monitors on Pacemaker Remote node %s", node_name);
ra_data->recurring_cmds = fail_all_monitor_cmds(ra_data->recurring_cmds);
ra_data->cmds = fail_all_monitor_cmds(ra_data->cmds);
}
}
/* A guest node fencing implied by host fencing looks like:
*
* <pseudo_event id="103" operation="stonith" operation_key="stonith-lxc1-off"
* on_node="lxc1" on_node_uuid="lxc1">
* <attributes CRM_meta_on_node="lxc1" CRM_meta_on_node_uuid="lxc1"
* CRM_meta_stonith_action="off" crm_feature_set="3.0.12"/>
* <downed>
* <node id="lxc1"/>
* </downed>
* </pseudo_event>
*/
#define XPATH_PSEUDO_FENCE "/" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='stonith']/" XML_GRAPH_TAG_DOWNED \
"/" XML_CIB_TAG_NODE
/*!
* \internal
* \brief Check a pseudo-action for Pacemaker Remote node side effects
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_pseudo(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_FENCE);
if (numXpathResults(search) == 1) {
xmlNode *result = getXpathResult(search, 0);
/* Normally, we handle the necessary side effects of a guest node stop
* action when reporting the remote agent's result. However, if the stop
* is implied due to fencing, it will be a fencing pseudo-event, and
* there won't be a result to report. Handle that case here.
*
* This will result in a duplicate call to remote_node_down() if the
* guest stop was real instead of implied, but that shouldn't hurt.
*
* There is still one corner case that isn't handled: if a guest node
* isn't running any resources when its host is fenced, it will appear
* to be cleanly stopped, so there will be no pseudo-fence, and our
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
if (result) {
const char *remote = ID(result);
if (remote) {
remote_node_down(remote, DOWN_ERASE_LRM);
}
}
}
freeXpathObject(search);
}
static void
remote_ra_maintenance(lrm_state_t * lrm_state, gboolean maintenance)
{
xmlNode *update, *state;
int call_opt;
crm_node_t *node;
call_opt = crmd_cib_smart_opt();
node = crm_remote_peer_get(lrm_state->node_name);
CRM_CHECK(node != NULL, return);
update = create_xml_node(NULL, XML_CIB_TAG_STATUS);
state = create_node_state_update(node, node_update_none, update,
__func__);
crm_xml_add(state, XML_NODE_IS_MAINTENANCE, maintenance?"1":"0");
if (controld_update_cib(XML_CIB_TAG_STATUS, update, call_opt,
NULL) == pcmk_rc_ok) {
/* TODO: still not 100% sure that async update will succeed ... */
if (maintenance) {
lrm_remote_set_flags(lrm_state, remote_in_maint);
} else {
lrm_remote_clear_flags(lrm_state, remote_in_maint);
}
}
free_xml(update);
}
#define XPATH_PSEUDO_MAINTENANCE "//" XML_GRAPH_TAG_PSEUDO_EVENT \
"[@" XML_LRM_ATTR_TASK "='" PCMK_ACTION_MAINTENANCE_NODES "']/" \
XML_GRAPH_TAG_MAINTENANCE
/*!
* \internal
* \brief Check a pseudo-action holding updates for maintenance state
*
* \param[in,out] xml XML of pseudo-action to check
*/
void
remote_ra_process_maintenance_nodes(xmlNode *xml)
{
xmlXPathObjectPtr search = xpath_search(xml, XPATH_PSEUDO_MAINTENANCE);
if (numXpathResults(search) == 1) {
xmlNode *node;
int cnt = 0, cnt_remote = 0;
for (node = first_named_child(getXpathResult(search, 0),
XML_CIB_TAG_NODE);
node != NULL; node = crm_next_same_xml(node)) {
lrm_state_t *lrm_state = lrm_state_find(ID(node));
cnt++;
if (lrm_state && lrm_state->remote_ra_data &&
pcmk_is_set(((remote_ra_data_t *) lrm_state->remote_ra_data)->status, remote_active)) {
int is_maint;
cnt_remote++;
pcmk__scan_min_int(crm_element_value(node, XML_NODE_IS_MAINTENANCE),
&is_maint, 0);
remote_ra_maintenance(lrm_state, is_maint);
}
}
crm_trace("Action holds %d nodes (%d remotes found) adjusting "
PCMK_OPT_MAINTENANCE_MODE,
cnt, cnt_remote);
}
freeXpathObject(search);
}
gboolean
remote_ra_is_in_maintenance(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, remote_in_maint);
}
gboolean
remote_ra_controlling_guest(lrm_state_t * lrm_state)
{
remote_ra_data_t *ra_data = lrm_state->remote_ra_data;
return pcmk_is_set(ra_data->status, controlling_guest);
}
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
index 48c954b430..18a614b6e5 100644
--- a/include/crm/msg_xml.h
+++ b/include/crm/msg_xml.h
@@ -1,463 +1,464 @@
/*
* Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PCMK__CRM_MSG_XML__H
# define PCMK__CRM_MSG_XML__H
# include <crm/common/xml.h>
#if !defined(PCMK_ALLOW_DEPRECATED) || (PCMK_ALLOW_DEPRECATED == 1)
#include <crm/msg_xml_compat.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* This file defines constants for various XML syntax (mainly element and
* attribute names).
*
* For consistency, new constants should start with "PCMK_", followed by "XE"
* for XML element names, "XA" for XML attribute names, and "META" for meta
* attribute names. Old names that don't follow this policy should eventually be
* deprecated and replaced with names that do.
*/
/*
* XML elements
*/
#define PCMK_XE_DATE_EXPRESSION "date_expression"
#define PCMK_XE_OP_EXPRESSION "op_expression"
/* This has been deprecated as a CIB element (an alias for <clone> with
* "promotable" set to "true") since 2.0.0.
*/
#define PCMK_XE_PROMOTABLE_LEGACY "master"
#define PCMK_XE_RSC_EXPRESSION "rsc_expression"
/*
* XML attributes
*/
#define PCMK_XA_ADMIN_EPOCH "admin_epoch"
#define PCMK_XA_CIB_LAST_WRITTEN "cib-last-written"
#define PCMK_XA_CRM_DEBUG_ORIGIN "crm-debug-origin"
#define PCMK_XA_CRM_FEATURE_SET "crm_feature_set"
#define PCMK_XA_CRM_TIMESTAMP "crm-timestamp"
#define PCMK_XA_DESCRIPTION "description"
#define PCMK_XA_EPOCH "epoch"
#define PCMK_XA_FORMAT "format"
#define PCMK_XA_HAVE_QUORUM "have-quorum"
#define PCMK_XA_ID "id"
#define PCMK_XA_NO_QUORUM_PANIC "no-quorum-panic"
#define PCMK_XA_NUM_UPDATES "num_updates"
#define PCMK_XA_VALIDATE_WITH "validate-with"
#define PCMK_XA_VERSION "version"
/*
* Meta attributes
*/
#define PCMK_META_ALLOW_MIGRATE "allow-migrate"
#define PCMK_META_CLONE_MAX "clone-max"
#define PCMK_META_CLONE_MIN "clone-min"
#define PCMK_META_CLONE_NODE_MAX "clone-node-max"
#define PCMK_META_CONTAINER_ATTR_TARGET "container-attribute-target"
#define PCMK_META_CRITICAL "critical"
#define PCMK_META_ENABLED "enabled"
#define PCMK_META_FAILURE_TIMEOUT "failure-timeout"
#define PCMK_META_GLOBALLY_UNIQUE "globally-unique"
#define PCMK_META_INTERLEAVE "interleave"
#define PCMK_META_IS_MANAGED "is-managed"
#define PCMK_META_MAINTENANCE "maintenance"
#define PCMK_META_MIGRATION_THRESHOLD "migration-threshold"
#define PCMK_META_MULTIPLE_ACTIVE "multiple-active"
#define PCMK_META_NOTIFY "notify"
#define PCMK_META_ORDERED "ordered"
#define PCMK_META_PRIORITY "priority"
#define PCMK_META_PROMOTABLE "promotable"
#define PCMK_META_PROMOTED_MAX "promoted-max"
#define PCMK_META_PROMOTED_NODE_MAX "promoted-node-max"
#define PCMK_META_REMOTE_ADDR "remote-addr"
#define PCMK_META_REMOTE_ALLOW_MIGRATE "remote-allow-migrate"
#define PCMK_META_REMOTE_CONNECT_TIMEOUT "remote-connect-timeout"
#define PCMK_META_REMOTE_NODE "remote-node"
#define PCMK_META_REMOTE_PORT "remote-port"
#define PCMK_META_REQUIRES "requires"
#define PCMK_META_RESOURCE_STICKINESS "resource-stickiness"
#define PCMK_META_TARGET_ROLE "target-role"
/*
* Remote resource instance attributes
*/
#define PCMK_REMOTE_RA_ADDR "addr"
+#define PCMK_REMOTE_RA_PORT "port"
#define PCMK_REMOTE_RA_SERVER "server"
/*
* Older constants that don't follow current naming
*/
# ifndef F_ORIG
# define F_ORIG "src"
# endif
# ifndef F_SEQ
# define F_SEQ "seq"
# endif
# ifndef F_SUBTYPE
# define F_SUBTYPE "subt"
# endif
# ifndef F_TYPE
# define F_TYPE "t"
# endif
# ifndef F_CLIENTNAME
# define F_CLIENTNAME "cn"
# endif
# ifndef F_XML_TAGNAME
# define F_XML_TAGNAME "__name__"
# endif
# ifndef T_CRM
# define T_CRM "crmd"
# endif
# ifndef T_ATTRD
# define T_ATTRD "attrd"
# endif
# define CIB_OPTIONS_FIRST "cib-bootstrap-options"
# define F_CRM_DATA "crm_xml"
# define F_CRM_TASK "crm_task"
# define F_CRM_HOST_TO "crm_host_to"
# define F_CRM_MSG_TYPE F_SUBTYPE
# define F_CRM_SYS_TO "crm_sys_to"
# define F_CRM_SYS_FROM "crm_sys_from"
# define F_CRM_HOST_FROM F_ORIG
# define F_CRM_REFERENCE XML_ATTR_REFERENCE
# define F_CRM_VERSION PCMK_XA_VERSION
# define F_CRM_ORIGIN "origin"
# define F_CRM_USER "crm_user"
# define F_CRM_JOIN_ID "join_id"
# define F_CRM_DC_LEAVING "dc-leaving"
# define F_CRM_ELECTION_ID "election-id"
# define F_CRM_ELECTION_AGE_S "election-age-sec"
# define F_CRM_ELECTION_AGE_US "election-age-nano-sec"
# define F_CRM_ELECTION_OWNER "election-owner"
# define F_CRM_TGRAPH "crm-tgraph-file"
# define F_CRM_TGRAPH_INPUT "crm-tgraph-in"
# define F_CRM_THROTTLE_MODE "crm-limit-mode"
# define F_CRM_THROTTLE_MAX "crm-limit-max"
/*---- Common tags/attrs */
# define XML_DIFF_MARKER "__crm_diff_marker__"
# define XML_TAG_CIB "cib"
# define XML_TAG_FAILED "failed"
# define XML_ATTR_TIMEOUT "timeout"
# define XML_ATTR_NAME "name"
# define XML_ATTR_IDREF "id-ref"
# define XML_ATTR_ID_LONG "long-id"
# define XML_ATTR_TYPE "type"
# define XML_ATTR_OP "op"
# define XML_ATTR_DC_UUID "dc-uuid"
# define XML_ATTR_UPDATE_ORIG "update-origin"
# define XML_ATTR_UPDATE_CLIENT "update-client"
# define XML_ATTR_UPDATE_USER "update-user"
# define XML_BOOLEAN_TRUE "true"
# define XML_BOOLEAN_FALSE "false"
# define XML_TAG_OPTIONS "options"
/*---- top level tags/attrs */
# define XML_ATTR_REQUEST "request"
# define XML_ATTR_RESPONSE "response"
# define XML_ATTR_UNAME "uname"
# define XML_ATTR_REFERENCE "reference"
# define XML_CRM_TAG_PING "ping_response"
# define XML_PING_ATTR_STATUS "result"
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
# define XML_PING_ATTR_PACEMAKERDSTATE_REMOTE "remote"
# define XML_FAIL_TAG_CIB "failed_update"
# define XML_FAILCIB_ATTR_OBJTYPE "object_type"
# define XML_FAILCIB_ATTR_OP "operation"
# define XML_FAILCIB_ATTR_REASON "reason"
/*---- CIB specific tags/attrs */
# define XML_CIB_TAG_SECTION_ALL "all"
# define XML_CIB_TAG_CONFIGURATION "configuration"
# define XML_CIB_TAG_STATUS "status"
# define XML_CIB_TAG_RESOURCES "resources"
# define XML_CIB_TAG_NODES "nodes"
# define XML_CIB_TAG_CONSTRAINTS "constraints"
# define XML_CIB_TAG_CRMCONFIG "crm_config"
# define XML_CIB_TAG_OPCONFIG "op_defaults"
# define XML_CIB_TAG_RSCCONFIG "rsc_defaults"
# define XML_CIB_TAG_ACLS "acls"
# define XML_CIB_TAG_ALERTS "alerts"
# define XML_CIB_TAG_ALERT "alert"
# define XML_CIB_TAG_ALERT_RECIPIENT "recipient"
# define XML_CIB_TAG_ALERT_SELECT "select"
# define XML_CIB_TAG_ALERT_ATTRIBUTES "select_attributes"
# define XML_CIB_TAG_ALERT_FENCING "select_fencing"
# define XML_CIB_TAG_ALERT_NODES "select_nodes"
# define XML_CIB_TAG_ALERT_RESOURCES "select_resources"
# define XML_CIB_TAG_ALERT_ATTR "attribute"
# define XML_CIB_TAG_STATE "node_state"
# define XML_CIB_TAG_NODE "node"
# define XML_CIB_TAG_NVPAIR "nvpair"
# define XML_CIB_TAG_PROPSET "cluster_property_set"
# define XML_TAG_ATTR_SETS "instance_attributes"
# define XML_TAG_META_SETS "meta_attributes"
# define XML_TAG_ATTRS "attributes"
# define XML_TAG_PARAMS "parameters"
# define XML_TAG_PARAM "param"
# define XML_TAG_UTILIZATION "utilization"
# define XML_TAG_RESOURCE_REF "resource_ref"
# define XML_CIB_TAG_RESOURCE "primitive"
# define XML_CIB_TAG_GROUP "group"
# define XML_CIB_TAG_INCARNATION "clone"
# define XML_CIB_TAG_CONTAINER "bundle"
# define XML_CIB_TAG_RSC_TEMPLATE "template"
# define XML_RSC_ATTR_REMOTE_RA_PORT "port"
# define XML_REMOTE_ATTR_RECONNECT_INTERVAL "reconnect_interval"
# define XML_OP_ATTR_ON_FAIL "on-fail"
# define XML_OP_ATTR_START_DELAY "start-delay"
# define XML_OP_ATTR_ORIGIN "interval-origin"
# define XML_OP_ATTR_PENDING "record-pending"
# define XML_OP_ATTR_DIGESTS_ALL "digests-all"
# define XML_OP_ATTR_DIGESTS_SECURE "digests-secure"
# define XML_CIB_TAG_LRM "lrm"
# define XML_LRM_TAG_RESOURCES "lrm_resources"
# define XML_LRM_TAG_RESOURCE "lrm_resource"
# define XML_LRM_TAG_RSC_OP "lrm_rsc_op"
# define XML_AGENT_ATTR_CLASS "class"
# define XML_AGENT_ATTR_PROVIDER "provider"
//! \deprecated Do not use (will be removed in a future release)
# define XML_CIB_ATTR_REPLACE "replace"
# define XML_CIB_ATTR_PRIORITY "priority"
# define XML_NODE_IS_REMOTE "remote_node"
# define XML_NODE_IS_FENCED "node_fenced"
# define XML_NODE_IS_MAINTENANCE "node_in_maintenance"
# define XML_CIB_ATTR_SHUTDOWN "shutdown"
/* Aside from being an old name for the executor, LRM is a misnomer here because
* the controller and scheduler use these to track actions, which are not always
* executor operations.
*/
// XML attribute that takes interval specification (user-facing configuration)
# define XML_LRM_ATTR_INTERVAL "interval"
// XML attribute that takes interval in milliseconds (daemon APIs)
// (identical value as above, but different constant allows clearer code intent)
# define XML_LRM_ATTR_INTERVAL_MS XML_LRM_ATTR_INTERVAL
# define XML_LRM_ATTR_TASK "operation"
# define XML_LRM_ATTR_TASK_KEY "operation_key"
# define XML_LRM_ATTR_TARGET "on_node"
# define XML_LRM_ATTR_TARGET_UUID "on_node_uuid"
/*! Actions to be executed on Pacemaker Remote nodes are routed through the
* controller on the cluster node hosting the remote connection. That cluster
* node is considered the router node for the action.
*/
# define XML_LRM_ATTR_ROUTER_NODE "router_node"
# define XML_LRM_ATTR_RSCID "rsc-id"
# define XML_LRM_ATTR_OPSTATUS "op-status"
# define XML_LRM_ATTR_RC "rc-code"
# define XML_LRM_ATTR_CALLID "call-id"
# define XML_LRM_ATTR_OP_DIGEST "op-digest"
# define XML_LRM_ATTR_OP_RESTART "op-force-restart"
# define XML_LRM_ATTR_OP_SECURE "op-secure-params"
# define XML_LRM_ATTR_RESTART_DIGEST "op-restart-digest"
# define XML_LRM_ATTR_SECURE_DIGEST "op-secure-digest"
# define XML_LRM_ATTR_EXIT_REASON "exit-reason"
# define XML_RSC_OP_LAST_CHANGE "last-rc-change"
# define XML_RSC_OP_T_EXEC "exec-time"
# define XML_RSC_OP_T_QUEUE "queue-time"
# define XML_LRM_ATTR_MIGRATE_SOURCE "migrate_source"
# define XML_LRM_ATTR_MIGRATE_TARGET "migrate_target"
# define XML_TAG_GRAPH "transition_graph"
# define XML_GRAPH_TAG_RSC_OP "rsc_op"
# define XML_GRAPH_TAG_PSEUDO_EVENT "pseudo_event"
# define XML_GRAPH_TAG_CRM_EVENT "crm_event"
# define XML_GRAPH_TAG_DOWNED "downed"
# define XML_GRAPH_TAG_MAINTENANCE "maintenance"
# define XML_TAG_RULE "rule"
# define XML_RULE_ATTR_SCORE "score"
# define XML_RULE_ATTR_SCORE_ATTRIBUTE "score-attribute"
# define XML_RULE_ATTR_ROLE "role"
# define XML_RULE_ATTR_BOOLEAN_OP "boolean-op"
# define XML_TAG_EXPRESSION "expression"
# define XML_EXPR_ATTR_ATTRIBUTE "attribute"
# define XML_EXPR_ATTR_OPERATION "operation"
# define XML_EXPR_ATTR_VALUE "value"
# define XML_EXPR_ATTR_TYPE "type"
# define XML_EXPR_ATTR_VALUE_SOURCE "value-source"
# define XML_CONS_TAG_RSC_DEPEND "rsc_colocation"
# define XML_CONS_TAG_RSC_ORDER "rsc_order"
# define XML_CONS_TAG_RSC_LOCATION "rsc_location"
# define XML_CONS_TAG_RSC_TICKET "rsc_ticket"
# define XML_CONS_TAG_RSC_SET "resource_set"
# define XML_CONS_ATTR_SYMMETRICAL "symmetrical"
# define XML_LOCATION_ATTR_DISCOVERY "resource-discovery"
# define XML_COLOC_ATTR_SOURCE "rsc"
# define XML_COLOC_ATTR_SOURCE_ROLE "rsc-role"
# define XML_COLOC_ATTR_TARGET "with-rsc"
# define XML_COLOC_ATTR_TARGET_ROLE "with-rsc-role"
# define XML_COLOC_ATTR_NODE_ATTR "node-attribute"
# define XML_COLOC_ATTR_INFLUENCE "influence"
//! \deprecated Deprecated since 2.1.5
# define XML_COLOC_ATTR_SOURCE_INSTANCE "rsc-instance"
//! \deprecated Deprecated since 2.1.5
# define XML_COLOC_ATTR_TARGET_INSTANCE "with-rsc-instance"
# define XML_LOC_ATTR_SOURCE "rsc"
# define XML_LOC_ATTR_SOURCE_PATTERN "rsc-pattern"
# define XML_ORDER_ATTR_FIRST "first"
# define XML_ORDER_ATTR_THEN "then"
# define XML_ORDER_ATTR_FIRST_ACTION "first-action"
# define XML_ORDER_ATTR_THEN_ACTION "then-action"
# define XML_ORDER_ATTR_KIND "kind"
//! \deprecated Deprecated since 2.1.5
# define XML_ORDER_ATTR_FIRST_INSTANCE "first-instance"
//! \deprecated Deprecated since 2.1.5
# define XML_ORDER_ATTR_THEN_INSTANCE "then-instance"
# define XML_TICKET_ATTR_TICKET "ticket"
# define XML_TICKET_ATTR_LOSS_POLICY "loss-policy"
# define XML_NVPAIR_ATTR_NAME "name"
# define XML_NVPAIR_ATTR_VALUE "value"
# define XML_NODE_ATTR_RSC_DISCOVERY "resource-discovery-enabled"
# define XML_ALERT_ATTR_PATH "path"
# define XML_ALERT_ATTR_TIMEOUT "timeout"
# define XML_ALERT_ATTR_TSTAMP_FORMAT "timestamp-format"
# define XML_ALERT_ATTR_REC_VALUE "value"
# define XML_CIB_TAG_GENERATION_TUPPLE "generation_tuple"
# define XML_ATTR_TRANSITION_MAGIC "transition-magic"
# define XML_ATTR_TRANSITION_KEY "transition-key"
# define XML_ATTR_TE_NOWAIT "op_no_wait"
# define XML_ATTR_TE_TARGET_RC "op_target_rc"
# define XML_TAG_TRANSIENT_NODEATTRS "transient_attributes"
//! \deprecated Do not use (will be removed in a future release)
# define XML_TAG_DIFF_ADDED "diff-added"
//! \deprecated Do not use (will be removed in a future release)
# define XML_TAG_DIFF_REMOVED "diff-removed"
# define XML_ACL_TAG_USER "acl_target"
# define XML_ACL_TAG_USERv1 "acl_user"
# define XML_ACL_TAG_GROUP "acl_group"
# define XML_ACL_TAG_ROLE "acl_role"
# define XML_ACL_TAG_PERMISSION "acl_permission"
# define XML_ACL_TAG_ROLE_REF "role"
# define XML_ACL_TAG_ROLE_REFv1 "role_ref"
# define XML_ACL_ATTR_KIND "kind"
# define XML_ACL_TAG_READ "read"
# define XML_ACL_TAG_WRITE "write"
# define XML_ACL_TAG_DENY "deny"
# define XML_ACL_ATTR_REF "reference"
# define XML_ACL_ATTR_REFv1 "ref"
# define XML_ACL_ATTR_TAG "object-type"
# define XML_ACL_ATTR_TAGv1 "tag"
# define XML_ACL_ATTR_XPATH "xpath"
# define XML_ACL_ATTR_ATTRIBUTE "attribute"
# define XML_CIB_TAG_TICKETS "tickets"
# define XML_CIB_TAG_TICKET_STATE "ticket_state"
# define XML_CIB_TAG_TAGS "tags"
# define XML_CIB_TAG_TAG "tag"
# define XML_CIB_TAG_OBJ_REF "obj_ref"
# define XML_TAG_FENCING_TOPOLOGY "fencing-topology"
# define XML_TAG_FENCING_LEVEL "fencing-level"
# define XML_ATTR_STONITH_INDEX "index"
# define XML_ATTR_STONITH_TARGET "target"
# define XML_ATTR_STONITH_TARGET_VALUE "target-value"
# define XML_ATTR_STONITH_TARGET_PATTERN "target-pattern"
# define XML_ATTR_STONITH_TARGET_ATTRIBUTE "target-attribute"
# define XML_ATTR_STONITH_DEVICES "devices"
# define XML_TAG_DIFF "diff"
# define XML_DIFF_VERSION "version"
# define XML_DIFF_VSOURCE "source"
# define XML_DIFF_VTARGET "target"
# define XML_DIFF_CHANGE "change"
# define XML_DIFF_LIST "change-list"
# define XML_DIFF_ATTR "change-attr"
# define XML_DIFF_RESULT "change-result"
# define XML_DIFF_OP "operation"
# define XML_DIFF_PATH "path"
# define XML_DIFF_POSITION "position"
# define ID(x) crm_element_value(x, PCMK_XA_ID)
#ifdef __cplusplus
}
#endif
#endif
diff --git a/lib/pengine/remote.c b/lib/pengine/remote.c
index 22808925ca..2cfbc53175 100644
--- a/lib/pengine/remote.c
+++ b/lib/pengine/remote.c
@@ -1,274 +1,273 @@
/*
* Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include <crm_internal.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/scheduler_internal.h>
#include <crm/pengine/internal.h>
#include <glib.h>
bool
pe__resource_is_remote_conn(const pcmk_resource_t *rsc)
{
return (rsc != NULL) && rsc->is_remote_node
&& pe__is_remote_node(pe_find_node(rsc->cluster->nodes, rsc->id));
}
bool
pe__is_remote_node(const pcmk_node_t *node)
{
return (node != NULL) && (node->details->type == pcmk_node_variant_remote)
&& ((node->details->remote_rsc == NULL)
|| (node->details->remote_rsc->container == NULL));
}
bool
pe__is_guest_node(const pcmk_node_t *node)
{
return (node != NULL) && (node->details->type == pcmk_node_variant_remote)
&& (node->details->remote_rsc != NULL)
&& (node->details->remote_rsc->container != NULL);
}
bool
pe__is_guest_or_remote_node(const pcmk_node_t *node)
{
return (node != NULL) && (node->details->type == pcmk_node_variant_remote);
}
bool
pe__is_bundle_node(const pcmk_node_t *node)
{
return pe__is_guest_node(node)
&& pe_rsc_is_bundled(node->details->remote_rsc);
}
/*!
* \internal
* \brief Check whether a resource creates a guest node
*
* If a given resource contains a filler resource that is a remote connection,
* return that filler resource (or NULL if none is found).
*
* \param[in] scheduler Scheduler data
* \param[in] rsc Resource to check
*
* \return Filler resource with remote connection, or NULL if none found
*/
pcmk_resource_t *
pe__resource_contains_guest_node(const pcmk_scheduler_t *scheduler,
const pcmk_resource_t *rsc)
{
if ((rsc != NULL) && (scheduler != NULL)
&& pcmk_is_set(scheduler->flags, pcmk_sched_have_remote_nodes)) {
for (GList *gIter = rsc->fillers; gIter != NULL; gIter = gIter->next) {
pcmk_resource_t *filler = gIter->data;
if (filler->is_remote_node) {
return filler;
}
}
}
return NULL;
}
bool
xml_contains_remote_node(xmlNode *xml)
{
const char *value = NULL;
if (xml == NULL) {
return false;
}
value = crm_element_value(xml, XML_ATTR_TYPE);
if (!pcmk__str_eq(value, "remote", pcmk__str_casei)) {
return false;
}
value = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
if (!pcmk__str_eq(value, PCMK_RESOURCE_CLASS_OCF, pcmk__str_casei)) {
return false;
}
value = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
if (!pcmk__str_eq(value, "pacemaker", pcmk__str_casei)) {
return false;
}
return true;
}
/*!
* \internal
* \brief Execute a supplied function for each guest node running on a host
*
* \param[in] scheduler Scheduler data
* \param[in] host Host node to check
* \param[in] helper Function to call for each guest node
* \param[in,out] user_data Pointer to pass to helper function
*/
void
pe_foreach_guest_node(const pcmk_scheduler_t *scheduler,
const pcmk_node_t *host,
void (*helper)(const pcmk_node_t*, void*),
void *user_data)
{
GList *iter;
CRM_CHECK(scheduler && host && host->details && helper, return);
if (!pcmk_is_set(scheduler->flags, pcmk_sched_have_remote_nodes)) {
return;
}
for (iter = host->details->running_rsc; iter != NULL; iter = iter->next) {
pcmk_resource_t *rsc = (pcmk_resource_t *) iter->data;
if (rsc->is_remote_node && (rsc->container != NULL)) {
pcmk_node_t *guest_node = pe_find_node(scheduler->nodes, rsc->id);
if (guest_node) {
(*helper)(guest_node, user_data);
}
}
}
}
/*!
* \internal
* \brief Create CIB XML for an implicit remote connection
*
* \param[in,out] parent If not NULL, use as parent XML element
* \param[in] uname Name of Pacemaker Remote node
* \param[in] container If not NULL, use this as connection container
* \param[in] migrateable If not NULL, use as allow-migrate value
* \param[in] is_managed If not NULL, use as is-managed value
* \param[in] start_timeout If not NULL, use as remote connect timeout
* \param[in] server If not NULL, use as remote server value
* \param[in] port If not NULL, use as remote port value
*
* \return Newly created XML
*/
xmlNode *
pe_create_remote_xml(xmlNode *parent, const char *uname,
const char *container_id, const char *migrateable,
const char *is_managed, const char *start_timeout,
const char *server, const char *port)
{
xmlNode *remote;
xmlNode *xml_sub;
remote = create_xml_node(parent, XML_CIB_TAG_RESOURCE);
// Add identity
crm_xml_add(remote, PCMK_XA_ID, uname);
crm_xml_add(remote, XML_AGENT_ATTR_CLASS, PCMK_RESOURCE_CLASS_OCF);
crm_xml_add(remote, XML_AGENT_ATTR_PROVIDER, "pacemaker");
crm_xml_add(remote, XML_ATTR_TYPE, "remote");
// Add meta-attributes
xml_sub = create_xml_node(remote, XML_TAG_META_SETS);
crm_xml_set_id(xml_sub, "%s-%s", uname, XML_TAG_META_SETS);
crm_create_nvpair_xml(xml_sub, NULL,
PCMK__META_INTERNAL_RSC, XML_BOOLEAN_TRUE);
if (container_id) {
crm_create_nvpair_xml(xml_sub, NULL,
PCMK__META_CONTAINER, container_id);
}
if (migrateable) {
crm_create_nvpair_xml(xml_sub, NULL,
PCMK_META_ALLOW_MIGRATE, migrateable);
}
if (is_managed) {
crm_create_nvpair_xml(xml_sub, NULL, PCMK_META_IS_MANAGED, is_managed);
}
// Add instance attributes
if (port || server) {
xml_sub = create_xml_node(remote, XML_TAG_ATTR_SETS);
crm_xml_set_id(xml_sub, "%s-%s", uname, XML_TAG_ATTR_SETS);
if (server) {
crm_create_nvpair_xml(xml_sub, NULL, PCMK_REMOTE_RA_ADDR, server);
}
if (port) {
- crm_create_nvpair_xml(xml_sub, NULL,
- XML_RSC_ATTR_REMOTE_RA_PORT, port);
+ crm_create_nvpair_xml(xml_sub, NULL, PCMK_REMOTE_RA_PORT, port);
}
}
// Add operations
xml_sub = create_xml_node(remote, "operations");
crm_create_op_xml(xml_sub, uname, PCMK_ACTION_MONITOR, "30s", "30s");
if (start_timeout) {
crm_create_op_xml(xml_sub, uname, PCMK_ACTION_START, "0",
start_timeout);
}
return remote;
}
// History entry to be checked for fail count clearing
struct check_op {
const xmlNode *rsc_op; // History entry XML
pcmk_resource_t *rsc; // Known resource corresponding to history entry
pcmk_node_t *node; // Known node corresponding to history entry
enum pcmk__check_parameters check_type; // What needs checking
};
void
pe__add_param_check(const xmlNode *rsc_op, pcmk_resource_t *rsc,
pcmk_node_t *node, enum pcmk__check_parameters flag,
pcmk_scheduler_t *scheduler)
{
struct check_op *check_op = NULL;
CRM_CHECK(scheduler && rsc_op && rsc && node, return);
check_op = calloc(1, sizeof(struct check_op));
CRM_ASSERT(check_op != NULL);
crm_trace("Deferring checks of %s until after allocation", ID(rsc_op));
check_op->rsc_op = rsc_op;
check_op->rsc = rsc;
check_op->node = node;
check_op->check_type = flag;
scheduler->param_check = g_list_prepend(scheduler->param_check, check_op);
}
/*!
* \internal
* \brief Call a function for each action to be checked for addr substitution
*
* \param[in,out] scheduler Scheduler data
* \param[in] cb Function to be called
*/
void
pe__foreach_param_check(pcmk_scheduler_t *scheduler,
void (*cb)(pcmk_resource_t*, pcmk_node_t*,
const xmlNode*, enum pcmk__check_parameters))
{
CRM_CHECK(scheduler && cb, return);
for (GList *item = scheduler->param_check;
item != NULL; item = item->next) {
struct check_op *check_op = item->data;
cb(check_op->rsc, check_op->node, check_op->rsc_op,
check_op->check_type);
}
}
void
pe__free_param_checks(pcmk_scheduler_t *scheduler)
{
if (scheduler && scheduler->param_check) {
g_list_free_full(scheduler->param_check, free);
scheduler->param_check = NULL;
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Apr 21, 7:01 PM (20 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1665292
Default Alt Text
(76 KB)
Attached To
Mode
rP Pacemaker
Attached
Detach File
Event Timeline
Log In to Comment