diff --git a/cts/scheduler/inc6.xml b/cts/scheduler/inc6.xml
index 2ff44eb9d9..b463ddb7fd 100644
--- a/cts/scheduler/inc6.xml
+++ b/cts/scheduler/inc6.xml
@@ -1,278 +1,275 @@
-
-
-
diff --git a/daemons/schedulerd/sched_clone.c b/daemons/schedulerd/sched_clone.c
index da6c0cd1e4..1900857dae 100644
--- a/daemons/schedulerd/sched_clone.c
+++ b/daemons/schedulerd/sched_clone.c
@@ -1,1483 +1,1460 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#define VARIANT_CLONE 1
#include
gint sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set);
static void append_parent_colocation(resource_t * rsc, resource_t * child, gboolean all);
static gint
sort_rsc_id(gconstpointer a, gconstpointer b)
{
const resource_t *resource1 = (const resource_t *)a;
const resource_t *resource2 = (const resource_t *)b;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
return strcmp(resource1->id, resource2->id);
}
static node_t *
parent_node_instance(const resource_t * rsc, node_t * node)
{
node_t *ret = NULL;
if (node != NULL && rsc->parent) {
ret = pe_hash_table_lookup(rsc->parent->allowed_nodes, node->details->id);
} else if(node != NULL) {
ret = pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
}
return ret;
}
static gboolean
did_fail(const resource_t * rsc)
{
GListPtr gIter = rsc->children;
if (is_set(rsc->flags, pe_rsc_failed)) {
return TRUE;
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
if (did_fail(child_rsc)) {
return TRUE;
}
}
return FALSE;
}
gint
sort_clone_instance(gconstpointer a, gconstpointer b, gpointer data_set)
{
int rc = 0;
node_t *node1 = NULL;
node_t *node2 = NULL;
node_t *current_node1 = NULL;
node_t *current_node2 = NULL;
unsigned int nnodes1 = 0;
unsigned int nnodes2 = 0;
gboolean can1 = TRUE;
gboolean can2 = TRUE;
const resource_t *resource1 = (const resource_t *)a;
const resource_t *resource2 = (const resource_t *)b;
CRM_ASSERT(resource1 != NULL);
CRM_ASSERT(resource2 != NULL);
/* allocation order:
* - active instances
* - instances running on nodes with the least copies
* - active instances on nodes that can't support them or are to be fenced
* - failed instances
* - inactive instances
*/
current_node1 = pe__find_active_on(resource1, &nnodes1, NULL);
current_node2 = pe__find_active_on(resource2, &nnodes2, NULL);
if (nnodes1 && nnodes2) {
if (nnodes1 < nnodes2) {
crm_trace("%s < %s: running_on", resource1->id, resource2->id);
return -1;
} else if (nnodes1 > nnodes2) {
crm_trace("%s > %s: running_on", resource1->id, resource2->id);
return 1;
}
}
node1 = current_node1;
node2 = current_node2;
if (node1) {
node_t *match = pe_hash_table_lookup(resource1->allowed_nodes, node1->details->id);
if (match == NULL || match->weight < 0) {
crm_trace("%s: current location is unavailable", resource1->id);
node1 = NULL;
can1 = FALSE;
}
}
if (node2) {
node_t *match = pe_hash_table_lookup(resource2->allowed_nodes, node2->details->id);
if (match == NULL || match->weight < 0) {
crm_trace("%s: current location is unavailable", resource2->id);
node2 = NULL;
can2 = FALSE;
}
}
if (can1 != can2) {
if (can1) {
crm_trace("%s < %s: availability of current location", resource1->id, resource2->id);
return -1;
}
crm_trace("%s > %s: availability of current location", resource1->id, resource2->id);
return 1;
}
if (resource1->priority < resource2->priority) {
crm_trace("%s < %s: priority", resource1->id, resource2->id);
return 1;
} else if (resource1->priority > resource2->priority) {
crm_trace("%s > %s: priority", resource1->id, resource2->id);
return -1;
}
if (node1 == NULL && node2 == NULL) {
crm_trace("%s == %s: not active", resource1->id, resource2->id);
return 0;
}
if (node1 != node2) {
if (node1 == NULL) {
crm_trace("%s > %s: active", resource1->id, resource2->id);
return 1;
} else if (node2 == NULL) {
crm_trace("%s < %s: active", resource1->id, resource2->id);
return -1;
}
}
can1 = can_run_resources(node1);
can2 = can_run_resources(node2);
if (can1 != can2) {
if (can1) {
crm_trace("%s < %s: can", resource1->id, resource2->id);
return -1;
}
crm_trace("%s > %s: can", resource1->id, resource2->id);
return 1;
}
node1 = parent_node_instance(resource1, node1);
node2 = parent_node_instance(resource2, node2);
if (node1 != NULL && node2 == NULL) {
crm_trace("%s < %s: not allowed", resource1->id, resource2->id);
return -1;
} else if (node1 == NULL && node2 != NULL) {
crm_trace("%s > %s: not allowed", resource1->id, resource2->id);
return 1;
}
if (node1 == NULL || node2 == NULL) {
crm_trace("%s == %s: not allowed", resource1->id, resource2->id);
return 0;
}
if (node1->count < node2->count) {
crm_trace("%s < %s: count", resource1->id, resource2->id);
return -1;
} else if (node1->count > node2->count) {
crm_trace("%s > %s: count", resource1->id, resource2->id);
return 1;
}
can1 = did_fail(resource1);
can2 = did_fail(resource2);
if (can1 != can2) {
if (can1) {
crm_trace("%s > %s: failed", resource1->id, resource2->id);
return 1;
}
crm_trace("%s < %s: failed", resource1->id, resource2->id);
return -1;
}
if (node1 && node2) {
int lpc = 0;
int max = 0;
node_t *n = NULL;
GListPtr gIter = NULL;
GListPtr list1 = NULL;
GListPtr list2 = NULL;
GHashTable *hash1 =
g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free);
GHashTable *hash2 =
g_hash_table_new_full(crm_str_hash, g_str_equal, NULL, free);
n = node_copy(current_node1);
g_hash_table_insert(hash1, (gpointer) n->details->id, n);
n = node_copy(current_node2);
g_hash_table_insert(hash2, (gpointer) n->details->id, n);
if(resource1->parent) {
for (gIter = resource1->parent->rsc_cons; gIter; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, resource1->id);
hash1 = native_merge_weights(constraint->rsc_rh, resource1->id, hash1,
constraint->node_attribute,
(float)constraint->score / INFINITY, 0);
}
for (gIter = resource1->parent->rsc_cons_lhs; gIter; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, resource1->id);
hash1 = native_merge_weights(constraint->rsc_lh, resource1->id, hash1,
constraint->node_attribute,
(float)constraint->score / INFINITY, pe_weights_positive);
}
}
if(resource2->parent) {
for (gIter = resource2->parent->rsc_cons; gIter; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, resource2->id);
hash2 = native_merge_weights(constraint->rsc_rh, resource2->id, hash2,
constraint->node_attribute,
(float)constraint->score / INFINITY, 0);
}
for (gIter = resource2->parent->rsc_cons_lhs; gIter; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
crm_trace("Applying %s to %s", constraint->id, resource2->id);
hash2 = native_merge_weights(constraint->rsc_lh, resource2->id, hash2,
constraint->node_attribute,
(float)constraint->score / INFINITY, pe_weights_positive);
}
}
/* Current location score */
node1 = g_hash_table_lookup(hash1, current_node1->details->id);
node2 = g_hash_table_lookup(hash2, current_node2->details->id);
if (node1->weight < node2->weight) {
if (node1->weight < 0) {
crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight);
rc = -1;
goto out;
} else {
crm_trace("%s < %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight);
rc = 1;
goto out;
}
} else if (node1->weight > node2->weight) {
crm_trace("%s > %s: current score: %d %d", resource1->id, resource2->id, node1->weight, node2->weight);
rc = -1;
goto out;
}
/* All location scores */
list1 = g_hash_table_get_values(hash1);
list2 = g_hash_table_get_values(hash2);
list1 = g_list_sort_with_data(list1, sort_node_weight, current_node1);
list2 = g_list_sort_with_data(list2, sort_node_weight, current_node2);
max = g_list_length(list1);
if (max < g_list_length(list2)) {
max = g_list_length(list2);
}
for (; lpc < max; lpc++) {
node1 = g_list_nth_data(list1, lpc);
node2 = g_list_nth_data(list2, lpc);
if (node1 == NULL) {
crm_trace("%s < %s: colocated score NULL", resource1->id, resource2->id);
rc = 1;
break;
} else if (node2 == NULL) {
crm_trace("%s > %s: colocated score NULL", resource1->id, resource2->id);
rc = -1;
break;
}
if (node1->weight < node2->weight) {
crm_trace("%s < %s: colocated score", resource1->id, resource2->id);
rc = 1;
break;
} else if (node1->weight > node2->weight) {
crm_trace("%s > %s: colocated score", resource1->id, resource2->id);
rc = -1;
break;
}
}
/* Order by reverse uname - same as sort_node_weight() does? */
out:
g_hash_table_destroy(hash1); /* Free mem */
g_hash_table_destroy(hash2); /* Free mem */
g_list_free(list1);
g_list_free(list2);
if (rc != 0) {
return rc;
}
}
rc = strcmp(resource1->id, resource2->id);
crm_trace("%s %c %s: default", resource1->id, rc < 0 ? '<' : '>', resource2->id);
return rc;
}
static node_t *
can_run_instance(resource_t * rsc, node_t * node, int limit)
{
node_t *local_node = NULL;
if (node == NULL && rsc->allowed_nodes) {
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&local_node)) {
can_run_instance(rsc, local_node, limit);
}
return NULL;
}
if (can_run_resources(node) == FALSE) {
goto bail;
} else if (is_set(rsc->flags, pe_rsc_orphan)) {
goto bail;
}
local_node = parent_node_instance(rsc, node);
if (local_node == NULL) {
crm_warn("%s cannot run on %s: node not allowed", rsc->id, node->details->uname);
goto bail;
} else if (local_node->weight < 0) {
common_update_score(rsc, node->details->id, local_node->weight);
pe_rsc_trace(rsc, "%s cannot run on %s: Parent node weight doesn't allow it.",
rsc->id, node->details->uname);
} else if (local_node->count < limit) {
pe_rsc_trace(rsc, "%s can run on %s (already running %d)",
rsc->id, node->details->uname, local_node->count);
return local_node;
} else {
pe_rsc_trace(rsc, "%s cannot run on %s: node full (%d >= %d)",
rsc->id, node->details->uname, local_node->count, limit);
}
bail:
if (node) {
common_update_score(rsc, node->details->id, -INFINITY);
}
return NULL;
}
static node_t *
color_instance(resource_t * rsc, node_t * prefer, gboolean all_coloc, int limit, pe_working_set_t * data_set)
{
node_t *chosen = NULL;
GHashTable *backup = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Checking allocation of %s (preferring %s, using %s parent colocations)",
rsc->id, (prefer? prefer->details->uname: "none"),
(all_coloc? "all" : "some"));
if (is_not_set(rsc->flags, pe_rsc_provisional)) {
return rsc->fns->location(rsc, NULL, FALSE);
} else if (is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
/* Only include positive colocation preferences of dependent resources
* if not every node will get a copy of the clone
*/
append_parent_colocation(rsc->parent, rsc, all_coloc);
if (prefer) {
node_t *local_prefer = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (local_prefer == NULL || local_prefer->weight < 0) {
pe_rsc_trace(rsc, "Not pre-allocating %s to %s - unavailable", rsc->id,
prefer->details->uname);
return NULL;
}
}
can_run_instance(rsc, NULL, limit);
backup = node_hash_dup(rsc->allowed_nodes);
chosen = rsc->cmds->allocate(rsc, prefer, data_set);
if (chosen) {
node_t *local_node = parent_node_instance(rsc, chosen);
if (prefer && (chosen->details != prefer->details)) {
crm_notice("Pre-allocation failed: got %s instead of %s",
chosen->details->uname, prefer->details->uname);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = backup;
native_deallocate(rsc);
chosen = NULL;
backup = NULL;
} else if (local_node) {
local_node->count++;
} else if (is_set(rsc->flags, pe_rsc_managed)) {
/* what to do? we can't enforce per-node limits in this case */
crm_config_err("%s not found in %s (list=%d)",
chosen->details->id, rsc->parent->id,
g_hash_table_size(rsc->parent->allowed_nodes));
}
}
if(backup) {
g_hash_table_destroy(backup);
}
return chosen;
}
static void
append_parent_colocation(resource_t * rsc, resource_t * child, gboolean all)
{
GListPtr gIter = NULL;
gIter = rsc->rsc_cons;
for (; gIter != NULL; gIter = gIter->next) {
rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data;
if (all || cons->score < 0 || cons->score == INFINITY) {
child->rsc_cons = g_list_prepend(child->rsc_cons, cons);
}
}
gIter = rsc->rsc_cons_lhs;
for (; gIter != NULL; gIter = gIter->next) {
rsc_colocation_t *cons = (rsc_colocation_t *) gIter->data;
if (all || cons->score < 0) {
child->rsc_cons_lhs = g_list_prepend(child->rsc_cons_lhs, cons);
}
}
}
void
distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes,
int max, int per_host_max, pe_working_set_t * data_set);
void
distribute_children(resource_t *rsc, GListPtr children, GListPtr nodes,
int max, int per_host_max, pe_working_set_t * data_set)
{
int loop_max = 0;
int allocated = 0;
int available_nodes = 0;
/* count now tracks the number of clones currently allocated */
for(GListPtr nIter = nodes; nIter != NULL; nIter = nIter->next) {
pe_node_t *node = nIter->data;
node->count = 0;
if (can_run_resources(node)) {
available_nodes++;
}
}
if(available_nodes) {
loop_max = max / available_nodes;
}
if (loop_max < 1) {
loop_max = 1;
}
pe_rsc_debug(rsc, "Allocating up to %d %s instances to a possible %d nodes (at most %d per host, %d optimal)",
max, rsc->id, available_nodes, per_host_max, loop_max);
/* Pre-allocate as many instances as we can to their current location */
for (GListPtr gIter = children; gIter != NULL && allocated < max; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
if (child->running_on && is_set(child->flags, pe_rsc_provisional)
&& is_not_set(child->flags, pe_rsc_failed)) {
node_t *child_node = pe__current_node(child);
node_t *local_node = parent_node_instance(child, child_node);
pe_rsc_trace(rsc, "Checking pre-allocation of %s to %s (%d remaining of %d)",
child->id, child_node->details->uname, max - allocated, max);
if (can_run_resources(child_node) == FALSE || child_node->weight < 0) {
pe_rsc_trace(rsc, "Not pre-allocating because %s can not run %s",
child_node->details->uname, child->id);
} else if(local_node && local_node->count >= loop_max) {
pe_rsc_trace(rsc,
"Not pre-allocating because %s already allocated optimal instances",
child_node->details->uname);
} else if (color_instance(child, child_node, max < available_nodes, per_host_max, data_set)) {
pe_rsc_trace(rsc, "Pre-allocated %s to %s", child->id,
child_node->details->uname);
allocated++;
}
}
}
pe_rsc_trace(rsc, "Done pre-allocating (%d of %d)", allocated, max);
for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
if (child->running_on != NULL) {
node_t *child_node = pe__current_node(child);
node_t *local_node = parent_node_instance(child, child_node);
if (local_node == NULL) {
crm_err("%s is running on %s which isn't allowed",
child->id, child_node->details->uname);
}
}
if (is_not_set(child->flags, pe_rsc_provisional)) {
} else if (allocated >= max) {
pe_rsc_debug(rsc, "Child %s not allocated - limit reached %d %d", child->id, allocated, max);
resource_location(child, NULL, -INFINITY, "clone_color:limit_reached", data_set);
} else {
if (color_instance(child, NULL, max < available_nodes, per_host_max, data_set)) {
allocated++;
}
}
}
pe_rsc_debug(rsc, "Allocated %d %s instances of a possible %d",
allocated, rsc->id, max);
}
node_t *
clone_color(resource_t *rsc, node_t *prefer, pe_working_set_t *data_set)
{
GListPtr nodes = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (is_not_set(rsc->flags, pe_rsc_provisional)) {
return NULL;
} else if (is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
if (is_set(rsc->flags, pe_rsc_promotable)) {
apply_master_prefs(rsc);
}
set_bit(rsc->flags, pe_rsc_allocating);
pe_rsc_trace(rsc, "Processing %s", rsc->id);
/* this information is used by sort_clone_instance() when deciding in which
* order to allocate clone instances
*/
for (GListPtr gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
pe_rsc_trace(rsc, "%s: Coloring %s first", rsc->id, constraint->rsc_rh->id);
constraint->rsc_rh->cmds->allocate(constraint->rsc_rh, prefer, data_set);
}
for (GListPtr gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
rsc->allowed_nodes =
constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
(float)constraint->score / INFINITY,
(pe_weights_rollback | pe_weights_positive));
}
dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__, rsc->allowed_nodes);
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = g_list_sort_with_data(nodes, sort_node_weight, NULL);
rsc->children = g_list_sort_with_data(rsc->children, sort_clone_instance, data_set);
distribute_children(rsc, rsc->children, nodes, clone_data->clone_max, clone_data->clone_node_max, data_set);
g_list_free(nodes);
if (is_set(rsc->flags, pe_rsc_promotable)) {
color_promotable(rsc, data_set);
}
clear_bit(rsc->flags, pe_rsc_provisional);
clear_bit(rsc->flags, pe_rsc_allocating);
pe_rsc_trace(rsc, "Done allocating %s", rsc->id);
return NULL;
}
static void
clone_update_pseudo_status(resource_t * rsc, gboolean * stopping, gboolean * starting,
gboolean * active)
{
GListPtr gIter = NULL;
if (rsc->children) {
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
clone_update_pseudo_status(child, stopping, starting, active);
}
return;
}
CRM_ASSERT(active != NULL);
CRM_ASSERT(starting != NULL);
CRM_ASSERT(stopping != NULL);
if (rsc->running_on) {
*active = TRUE;
}
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (*starting && *stopping) {
return;
} else if (is_set(action->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "Skipping optional: %s", action->uuid);
continue;
} else if (is_set(action->flags, pe_action_pseudo) == FALSE
&& is_set(action->flags, pe_action_runnable) == FALSE) {
pe_rsc_trace(rsc, "Skipping unrunnable: %s", action->uuid);
continue;
} else if (safe_str_eq(RSC_STOP, action->task)) {
pe_rsc_trace(rsc, "Stopping due to: %s", action->uuid);
*stopping = TRUE;
} else if (safe_str_eq(RSC_START, action->task)) {
if (is_set(action->flags, pe_action_runnable) == FALSE) {
pe_rsc_trace(rsc, "Skipping pseudo-op: %s run=%d, pseudo=%d",
action->uuid, is_set(action->flags, pe_action_runnable),
is_set(action->flags, pe_action_pseudo));
} else {
pe_rsc_trace(rsc, "Starting due to: %s", action->uuid);
pe_rsc_trace(rsc, "%s run=%d, pseudo=%d",
action->uuid, is_set(action->flags, pe_action_runnable),
is_set(action->flags, pe_action_pseudo));
*starting = TRUE;
}
}
}
}
static action_t *
find_rsc_action(resource_t * rsc, const char *key, gboolean active_only, GListPtr * list)
{
action_t *match = NULL;
GListPtr possible = NULL;
GListPtr active = NULL;
possible = find_actions(rsc->actions, key, NULL);
if (active_only) {
GListPtr gIter = possible;
for (; gIter != NULL; gIter = gIter->next) {
action_t *op = (action_t *) gIter->data;
if (is_set(op->flags, pe_action_optional) == FALSE) {
active = g_list_prepend(active, op);
}
}
if (active && g_list_length(active) == 1) {
match = g_list_nth_data(active, 0);
}
if (list) {
*list = active;
active = NULL;
}
} else if (possible && g_list_length(possible) == 1) {
match = g_list_nth_data(possible, 0);
}
if (list) {
*list = possible;
possible = NULL;
}
if (possible) {
g_list_free(possible);
}
if (active) {
g_list_free(active);
}
return match;
}
static void
child_ordering_constraints(resource_t * rsc, pe_working_set_t * data_set)
{
char *key = NULL;
action_t *stop = NULL;
action_t *start = NULL;
action_t *last_stop = NULL;
action_t *last_start = NULL;
GListPtr gIter = NULL;
gboolean active_only = TRUE; /* change to false to get the old behavior */
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
if (clone_data->ordered == FALSE) {
return;
}
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child = (resource_t *) gIter->data;
key = stop_key(child);
stop = find_rsc_action(child, key, active_only, NULL);
free(key);
key = start_key(child);
start = find_rsc_action(child, key, active_only, NULL);
free(key);
if (stop) {
if (last_stop) {
/* child/child relative stop */
order_actions(stop, last_stop, pe_order_optional);
}
last_stop = stop;
}
if (start) {
if (last_start) {
/* child/child relative start */
order_actions(last_start, start, pe_order_optional);
}
last_start = start;
}
}
}
void
clone_create_actions(resource_t *rsc, pe_working_set_t *data_set)
{
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
clone_create_pseudo_actions(rsc, rsc->children, &clone_data->start_notify, &clone_data->stop_notify,data_set);
child_ordering_constraints(rsc, data_set);
if (is_set(rsc->flags, pe_rsc_promotable)) {
create_promotable_actions(rsc, data_set);
}
}
void
clone_create_pseudo_actions(
resource_t * rsc, GListPtr children, notify_data_t **start_notify, notify_data_t **stop_notify, pe_working_set_t * data_set)
{
gboolean child_active = FALSE;
gboolean child_starting = FALSE;
gboolean child_stopping = FALSE;
gboolean allow_dependent_migrations = TRUE;
action_t *stop = NULL;
action_t *stopped = NULL;
action_t *start = NULL;
action_t *started = NULL;
pe_rsc_trace(rsc, "Creating actions for %s", rsc->id);
for (GListPtr gIter = children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
gboolean starting = FALSE;
gboolean stopping = FALSE;
child_rsc->cmds->create_actions(child_rsc, data_set);
clone_update_pseudo_status(child_rsc, &stopping, &starting, &child_active);
if (stopping && starting) {
allow_dependent_migrations = FALSE;
}
child_stopping |= stopping;
child_starting |= starting;
}
/* start */
start = create_pseudo_resource_op(rsc, RSC_START, !child_starting, TRUE, data_set);
started = create_pseudo_resource_op(rsc, RSC_STARTED, !child_starting, FALSE, data_set);
started->priority = INFINITY;
if (child_active || child_starting) {
update_action_flags(started, pe_action_runnable, __FUNCTION__, __LINE__);
}
if (start_notify != NULL && *start_notify == NULL) {
*start_notify = create_notification_boundaries(rsc, RSC_START, start, started, data_set);
}
/* stop */
stop = create_pseudo_resource_op(rsc, RSC_STOP, !child_stopping, TRUE, data_set);
stopped = create_pseudo_resource_op(rsc, RSC_STOPPED, !child_stopping, TRUE, data_set);
stopped->priority = INFINITY;
if (allow_dependent_migrations) {
update_action_flags(stop, pe_action_migrate_runnable, __FUNCTION__, __LINE__);
}
if (stop_notify != NULL && *stop_notify == NULL) {
*stop_notify = create_notification_boundaries(rsc, RSC_STOP, stop, stopped, data_set);
if (start_notify && *start_notify && *stop_notify) {
order_actions((*stop_notify)->post_done, (*start_notify)->pre, pe_order_optional);
}
}
}
void
clone_internal_constraints(resource_t *rsc, pe_working_set_t *data_set)
{
resource_t *last_rsc = NULL;
GListPtr gIter;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
pe_rsc_trace(rsc, "Internal constraints for %s", rsc->id);
new_rsc_order(rsc, RSC_STOPPED, rsc, RSC_START, pe_order_optional, data_set);
new_rsc_order(rsc, RSC_START, rsc, RSC_STARTED, pe_order_runnable_left, data_set);
new_rsc_order(rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_runnable_left, data_set);
if (is_set(rsc->flags, pe_rsc_promotable)) {
new_rsc_order(rsc, RSC_DEMOTED, rsc, RSC_STOP, pe_order_optional, data_set);
new_rsc_order(rsc, RSC_STARTED, rsc, RSC_PROMOTE, pe_order_runnable_left, data_set);
}
if (clone_data->ordered) {
/* we have to maintain a consistent sorted child list when building order constraints */
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
child_rsc->cmds->internal_constraints(child_rsc, data_set);
order_start_start(rsc, child_rsc, pe_order_runnable_left | pe_order_implies_first_printed);
new_rsc_order(child_rsc, RSC_START, rsc, RSC_STARTED, pe_order_implies_then_printed,
data_set);
if (clone_data->ordered && last_rsc) {
order_start_start(last_rsc, child_rsc, pe_order_optional);
}
order_stop_stop(rsc, child_rsc, pe_order_implies_first_printed);
new_rsc_order(child_rsc, RSC_STOP, rsc, RSC_STOPPED, pe_order_implies_then_printed,
data_set);
if (clone_data->ordered && last_rsc) {
order_stop_stop(child_rsc, last_rsc, pe_order_optional);
}
last_rsc = child_rsc;
}
if (is_set(rsc->flags, pe_rsc_promotable)) {
promotable_constraints(rsc, data_set);
}
}
bool
assign_node(resource_t * rsc, node_t * node, gboolean force)
{
bool changed = FALSE;
if (rsc->children) {
for (GListPtr gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
changed |= assign_node(child_rsc, node, force);
}
return changed;
}
if (rsc->allocated_to != NULL) {
changed = true;
}
native_assign_node(rsc, NULL, node, force);
return changed;
}
gboolean
is_child_compatible(resource_t *child_rsc, node_t * local_node, enum rsc_role_e filter, gboolean current)
{
node_t *node = NULL;
enum rsc_role_e next_role = child_rsc->fns->state(child_rsc, current);
CRM_CHECK(child_rsc && local_node, return FALSE);
if (is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
/* We only want instances that haven't failed */
node = child_rsc->fns->location(child_rsc, NULL, current);
}
if (filter != RSC_ROLE_UNKNOWN && next_role != filter) {
crm_trace("Filtered %s", child_rsc->id);
return FALSE;
}
if (node && (node->details == local_node->details)) {
return TRUE;
} else if (node) {
crm_trace("%s - %s vs %s", child_rsc->id, node->details->uname,
local_node->details->uname);
} else {
crm_trace("%s - not allocated %d", child_rsc->id, current);
}
return FALSE;
}
resource_t *
find_compatible_child(resource_t * local_child, resource_t * rsc, enum rsc_role_e filter, gboolean current)
{
resource_t *pair = NULL;
GListPtr gIter = NULL;
GListPtr scratch = NULL;
node_t *local_node = NULL;
local_node = local_child->fns->location(local_child, NULL, current);
if (local_node) {
return find_compatible_child_by_node(local_child, local_node, rsc, filter, current);
}
scratch = g_hash_table_get_values(local_child->allowed_nodes);
scratch = g_list_sort_with_data(scratch, sort_node_weight, NULL);
gIter = scratch;
for (; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
pair = find_compatible_child_by_node(local_child, node, rsc, filter, current);
if (pair) {
goto done;
}
}
pe_rsc_debug(rsc, "Can't pair %s with %s", local_child->id, rsc->id);
done:
g_list_free(scratch);
return pair;
}
void
clone_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
{
/* -- Never called --
*
* Instead we add the colocation constraints to the child and call from there
*/
CRM_ASSERT(FALSE);
}
void
clone_rsc_colocation_rh(resource_t *rsc_lh, resource_t *rsc_rh,
rsc_colocation_t *constraint)
{
GListPtr gIter = NULL;
gboolean do_interleave = FALSE;
const char *interleave_s = NULL;
CRM_CHECK(constraint != NULL, return);
CRM_CHECK(rsc_lh != NULL, pe_err("rsc_lh was NULL for %s", constraint->id); return);
CRM_CHECK(rsc_rh != NULL, pe_err("rsc_rh was NULL for %s", constraint->id); return);
CRM_CHECK(rsc_lh->variant == pe_native, return);
pe_rsc_trace(rsc_rh, "Processing constraint %s: %s -> %s %d",
constraint->id, rsc_lh->id, rsc_rh->id, constraint->score);
if (is_set(rsc_rh->flags, pe_rsc_promotable)) {
if (is_set(rsc_rh->flags, pe_rsc_provisional)) {
pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id);
return;
} else if (constraint->role_rh == RSC_ROLE_UNKNOWN) {
pe_rsc_trace(rsc_rh, "Handling %s as a clone colocation", constraint->id);
} else {
promotable_colocation_rh(rsc_lh, rsc_rh, constraint);
return;
}
}
/* only the LHS side needs to be labeled as interleave */
interleave_s = g_hash_table_lookup(constraint->rsc_lh->meta, XML_RSC_ATTR_INTERLEAVE);
if(crm_is_true(interleave_s) && constraint->rsc_lh->variant > pe_group) {
// TODO: Do we actually care about multiple RH copies sharing a LH copy anymore?
if (copies_per_node(constraint->rsc_lh) != copies_per_node(constraint->rsc_rh)) {
crm_config_err("Cannot interleave %s and %s because"
" they do not support the same number of copies per node",
constraint->rsc_lh->id, constraint->rsc_rh->id);
} else {
do_interleave = TRUE;
}
}
if (is_set(rsc_rh->flags, pe_rsc_provisional)) {
pe_rsc_trace(rsc_rh, "%s is still provisional", rsc_rh->id);
return;
} else if (do_interleave) {
resource_t *rh_child = NULL;
rh_child = find_compatible_child(rsc_lh, rsc_rh, RSC_ROLE_UNKNOWN, FALSE);
if (rh_child) {
pe_rsc_debug(rsc_rh, "Pairing %s with %s", rsc_lh->id, rh_child->id);
rsc_lh->cmds->rsc_colocation_lh(rsc_lh, rh_child, constraint);
} else if (constraint->score >= INFINITY) {
crm_notice("Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id);
assign_node(rsc_lh, NULL, TRUE);
} else {
pe_rsc_debug(rsc_rh, "Cannot pair %s with instance of %s", rsc_lh->id, rsc_rh->id);
}
return;
} else if (constraint->score >= INFINITY) {
GListPtr rhs = NULL;
gIter = rsc_rh->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
node_t *chosen = child_rsc->fns->location(child_rsc, NULL, FALSE);
if (chosen != NULL && is_set_recursive(child_rsc, pe_rsc_block, TRUE) == FALSE) {
pe_rsc_trace(rsc_rh, "Allowing %s: %s %d", constraint->id, chosen->details->uname, chosen->weight);
rhs = g_list_prepend(rhs, chosen);
}
}
node_list_exclude(rsc_lh->allowed_nodes, rhs, FALSE);
g_list_free(rhs);
return;
}
gIter = rsc_rh->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
child_rsc->cmds->rsc_colocation_rh(rsc_lh, child_rsc, constraint);
}
}
enum action_tasks
clone_child_action(action_t * action)
{
enum action_tasks result = no_action;
resource_t *child = (resource_t *) action->rsc->children->data;
if (safe_str_eq(action->task, "notify")
|| safe_str_eq(action->task, "notified")) {
/* Find the action we're notifying about instead */
int stop = 0;
char *key = action->uuid;
int lpc = strlen(key);
for (; lpc > 0; lpc--) {
if (key[lpc] == '_' && stop == 0) {
stop = lpc;
} else if (key[lpc] == '_') {
char *task_mutable = NULL;
lpc++;
task_mutable = strdup(key + lpc);
task_mutable[stop - lpc] = 0;
crm_trace("Extracted action '%s' from '%s'", task_mutable, key);
result = get_complex_task(child, task_mutable, TRUE);
free(task_mutable);
break;
}
}
} else {
result = get_complex_task(child, action->task, TRUE);
}
return result;
}
enum pe_action_flags
summary_action_flags(action_t * action, GListPtr children, node_t * node)
{
GListPtr gIter = NULL;
gboolean any_runnable = FALSE;
gboolean check_runnable = TRUE;
enum action_tasks task = clone_child_action(action);
enum pe_action_flags flags = (pe_action_optional | pe_action_runnable | pe_action_pseudo);
const char *task_s = task2text(task);
for (gIter = children; gIter != NULL; gIter = gIter->next) {
action_t *child_action = NULL;
resource_t *child = (resource_t *) gIter->data;
child_action = find_first_action(child->actions, NULL, task_s, child->children ? NULL : node);
pe_rsc_trace(action->rsc, "Checking for %s in %s on %s (%s)", task_s, child->id,
node ? node->details->uname : "none", child_action?child_action->uuid:"NA");
if (child_action) {
enum pe_action_flags child_flags = child->cmds->action_flags(child_action, node);
if (is_set(flags, pe_action_optional)
&& is_set(child_flags, pe_action_optional) == FALSE) {
pe_rsc_trace(child, "%s is mandatory because of %s", action->uuid,
child_action->uuid);
flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_optional);
pe_clear_action_bit(action, pe_action_optional);
}
if (is_set(child_flags, pe_action_runnable)) {
any_runnable = TRUE;
}
}
}
if (check_runnable && any_runnable == FALSE) {
pe_rsc_trace(action->rsc, "%s is not runnable because no children are", action->uuid);
flags = crm_clear_bit(__FUNCTION__, __LINE__, action->rsc->id, flags, pe_action_runnable);
if (node == NULL) {
pe_clear_action_bit(action, pe_action_runnable);
}
}
return flags;
}
enum pe_action_flags
clone_action_flags(action_t * action, node_t * node)
{
return summary_action_flags(action, action->rsc->children, node);
}
void
clone_rsc_location(resource_t * rsc, rsc_to_node_t * constraint)
{
GListPtr gIter = rsc->children;
pe_rsc_trace(rsc, "Processing location constraint %s for %s", constraint->id, rsc->id);
native_rsc_location(rsc, constraint);
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
child_rsc->cmds->rsc_location(child_rsc, constraint);
}
}
void
clone_expand(resource_t * rsc, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
gIter = rsc->actions;
for (; gIter != NULL; gIter = gIter->next) {
action_t *op = (action_t *) gIter->data;
rsc->cmds->action_flags(op, NULL);
}
if (clone_data->start_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->start_notify);
expand_notification_data(rsc, clone_data->start_notify, data_set);
create_notifications(rsc, clone_data->start_notify, data_set);
}
if (clone_data->stop_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->stop_notify);
expand_notification_data(rsc, clone_data->stop_notify, data_set);
create_notifications(rsc, clone_data->stop_notify, data_set);
}
if (clone_data->promote_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->promote_notify);
expand_notification_data(rsc, clone_data->promote_notify, data_set);
create_notifications(rsc, clone_data->promote_notify, data_set);
}
if (clone_data->demote_notify) {
collect_notification_data(rsc, TRUE, TRUE, clone_data->demote_notify);
expand_notification_data(rsc, clone_data->demote_notify, data_set);
create_notifications(rsc, clone_data->demote_notify, data_set);
}
/* Now that the notifcations have been created we can expand the children */
gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
child_rsc->cmds->expand(child_rsc, data_set);
}
native_expand(rsc, data_set);
/* The notifications are in the graph now, we can destroy the notify_data */
free_notification_data(clone_data->demote_notify);
clone_data->demote_notify = NULL;
free_notification_data(clone_data->stop_notify);
clone_data->stop_notify = NULL;
free_notification_data(clone_data->start_notify);
clone_data->start_notify = NULL;
free_notification_data(clone_data->promote_notify);
clone_data->promote_notify = NULL;
}
-node_t *
-rsc_known_on(resource_t * rsc, GListPtr * list)
+// Check whether a resource or any of its children is known on node
+static bool
+rsc_known_on(pe_resource_t *rsc, pe_node_t *node)
{
- GListPtr gIter = NULL;
- node_t *one = NULL;
- GListPtr result = NULL;
-
if (rsc->children) {
+ for (GList *child_iter = rsc->children; child_iter != NULL;
+ child_iter = child_iter->next) {
- gIter = rsc->children;
- for (; gIter != NULL; gIter = gIter->next) {
- resource_t *child = (resource_t *) gIter->data;
+ resource_t *child = (resource_t *) child_iter->data;
- rsc_known_on(child, &result);
+ if (rsc_known_on(child, node)) {
+ return TRUE;
+ }
}
} else if (rsc->known_on) {
- result = g_hash_table_get_values(rsc->known_on);
- }
+ GHashTableIter iter;
+ node_t *known_node = NULL;
- if (result && g_list_length(result) == 1) {
- one = g_list_nth_data(result, 0);
+ g_hash_table_iter_init(&iter, rsc->known_on);
+ while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &known_node)) {
+ if (node->details == known_node->details) {
+ return TRUE;
+ }
+ }
}
+ return FALSE;
+}
- if (list) {
- GListPtr gIter = NULL;
-
- gIter = result;
- for (; gIter != NULL; gIter = gIter->next) {
- node_t *node = (node_t *) gIter->data;
+// Look for an instance of clone that is known on node
+static pe_resource_t *
+find_instance_on(pe_resource_t *clone, pe_node_t *node)
+{
+ for (GList *gIter = clone->children; gIter != NULL; gIter = gIter->next) {
+ resource_t *child = (resource_t *) gIter->data;
- if (*list == NULL || pe_find_node_id(*list, node->details->id) == NULL) {
- *list = g_list_prepend(*list, node);
- }
+ if (rsc_known_on(child, node)) {
+ return child;
}
}
-
- g_list_free(result);
- return one;
+ return NULL;
}
-static resource_t *
-find_instance_on(resource_t * rsc, node_t * node)
+// For unique clones, probe each instance separately
+static gboolean
+probe_unique_clone(pe_resource_t *rsc, pe_node_t *node, pe_action_t *complete,
+ gboolean force, pe_working_set_t *data_set)
{
- GListPtr gIter = NULL;
+ gboolean any_created = FALSE;
- gIter = rsc->children;
- for (; gIter != NULL; gIter = gIter->next) {
- GListPtr gIter2 = NULL;
- GListPtr known_list = NULL;
- resource_t *child = (resource_t *) gIter->data;
+ for (GList *child_iter = rsc->children; child_iter != NULL;
+ child_iter = child_iter->next) {
- rsc_known_on(child, &known_list);
+ resource_t *child = (resource_t *) child_iter->data;
- gIter2 = known_list;
- for (; gIter2 != NULL; gIter2 = gIter2->next) {
- node_t *known = (node_t *) gIter2->data;
+ any_created |= child->cmds->create_probe(child, node, complete, force,
+ data_set);
+ }
+ return any_created;
+}
- if (node->details == known->details) {
- g_list_free(known_list);
- return child;
+// For anonymous clones, only a single instance needs to be probed
+static gboolean
+probe_anonymous_clone(pe_resource_t *rsc, pe_node_t *node,
+ pe_action_t *complete, gboolean force,
+ pe_working_set_t *data_set)
+{
+ // First, check if we probed an instance on this node last time
+ pe_resource_t *child = find_instance_on(rsc, node);
+
+ // Otherwise, check if we plan to start an instance on this node
+ if (child == NULL) {
+ for (GList *child_iter = rsc->children; child_iter && !child;
+ child_iter = child_iter->next) {
+
+ node_t *local_node = NULL;
+ resource_t *child_rsc = (resource_t *) child_iter->data;
+
+ local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
+ if (local_node && (local_node->details == node->details)) {
+ child = child_rsc;
}
}
- g_list_free(known_list);
}
- return NULL;
+ // Otherwise, use the first clone instance
+ if (child == NULL) {
+ child = rsc->children->data;
+ }
+ return child->cmds->create_probe(child, node, complete, force, data_set);
}
gboolean
clone_create_probe(resource_t * rsc, node_t * node, action_t * complete,
gboolean force, pe_working_set_t * data_set)
{
- GListPtr gIter = NULL;
gboolean any_created = FALSE;
- clone_variant_data_t *clone_data = NULL;
CRM_ASSERT(rsc);
- get_clone_variant_data(clone_data, rsc);
rsc->children = g_list_sort(rsc->children, sort_rsc_id);
if (rsc->children == NULL) {
pe_warn("Clone %s has no children", rsc->id);
return FALSE;
}
if (rsc->exclusive_discover) {
node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (allowed && allowed->rsc_discover_mode != pe_discover_exclusive) {
/* exclusive discover is enabled and this node is not marked
* as a node this resource should be discovered on
*
* remove the node from allowed_nodes so that the
* notification contains only nodes that we might ever run
* on
*/
g_hash_table_remove(rsc->allowed_nodes, node->details->id);
/* Bit of a shortcut - might as well take it */
return FALSE;
}
}
- if (is_not_set(rsc->flags, pe_rsc_unique)
- && clone_data->clone_node_max == 1) {
- /* only look for one copy */
- resource_t *child = NULL;
-
- /* Try whoever we probed last time */
- child = find_instance_on(rsc, node);
- if (child) {
- return child->cmds->create_probe(child, node, complete, force, data_set);
- }
-
- /* Try whoever we plan on starting there */
- gIter = rsc->children;
- for (; gIter != NULL; gIter = gIter->next) {
- node_t *local_node = NULL;
- resource_t *child_rsc = (resource_t *) gIter->data;
-
- CRM_ASSERT(child_rsc);
- local_node = child_rsc->fns->location(child_rsc, NULL, FALSE);
- if (local_node == NULL) {
- continue;
- }
-
- if (local_node->details == node->details) {
- return child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set);
- }
- }
-
- /* Fall back to the first clone instance */
- CRM_ASSERT(rsc->children);
- child = rsc->children->data;
- return child->cmds->create_probe(child, node, complete, force, data_set);
- }
-
- gIter = rsc->children;
- for (; gIter != NULL; gIter = gIter->next) {
- resource_t *child_rsc = (resource_t *) gIter->data;
-
- if (child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)) {
- any_created = TRUE;
- }
-
- if (any_created && is_not_set(rsc->flags, pe_rsc_unique)
- && clone_data->clone_node_max == 1) {
- /* only look for one copy (clone :0) */
- break;
- }
+ if (is_set(rsc->flags, pe_rsc_unique)) {
+ any_created = probe_unique_clone(rsc, node, complete, force, data_set);
+ } else {
+ any_created = probe_anonymous_clone(rsc, node, complete, force,
+ data_set);
}
-
return any_created;
}
void
clone_append_meta(resource_t * rsc, xmlNode * xml)
{
char *name = NULL;
clone_variant_data_t *clone_data = NULL;
get_clone_variant_data(clone_data, rsc);
name = crm_meta_name(XML_RSC_ATTR_UNIQUE);
crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_unique) ? "true" : "false");
free(name);
name = crm_meta_name(XML_RSC_ATTR_NOTIFY);
crm_xml_add(xml, name, is_set(rsc->flags, pe_rsc_notify) ? "true" : "false");
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_MAX);
crm_xml_add_int(xml, name, clone_data->clone_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_INCARNATION_NODEMAX);
crm_xml_add_int(xml, name, clone_data->clone_node_max);
free(name);
if (is_set(rsc->flags, pe_rsc_promotable)) {
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_MAX);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_PROMOTED_NODEMAX);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
/* @COMPAT Maintain backward compatibility with resource agents that
* expect the old names (deprecated since 2.0.0).
*/
name = crm_meta_name(XML_RSC_ATTR_MASTER_MAX);
crm_xml_add_int(xml, name, clone_data->promoted_max);
free(name);
name = crm_meta_name(XML_RSC_ATTR_MASTER_NODEMAX);
crm_xml_add_int(xml, name, clone_data->promoted_node_max);
free(name);
}
}
GHashTable *
clone_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr,
float factor, enum pe_weights flags)
{
return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags);
}
diff --git a/daemons/schedulerd/sched_native.c b/daemons/schedulerd/sched_native.c
index af3a072761..4f79bf349f 100644
--- a/daemons/schedulerd/sched_native.c
+++ b/daemons/schedulerd/sched_native.c
@@ -1,3375 +1,3253 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
// The controller removes the resource from the CIB, making this redundant
// #define DELETE_THEN_REFRESH 1
#define INFINITY_HACK (INFINITY * -100)
#define VARIANT_NATIVE 1
#include
gboolean update_action(action_t * then);
void native_rsc_colocation_rh_must(resource_t * rsc_lh, gboolean update_lh,
resource_t * rsc_rh, gboolean update_rh);
void native_rsc_colocation_rh_mustnot(resource_t * rsc_lh, gboolean update_lh,
resource_t * rsc_rh, gboolean update_rh);
static void Recurring(resource_t *rsc, action_t *start, node_t *node,
pe_working_set_t *data_set);
static void RecurringOp(resource_t *rsc, action_t *start, node_t *node,
xmlNode *operation, pe_working_set_t *data_set);
static void Recurring_Stopped(resource_t *rsc, action_t *start, node_t *node,
pe_working_set_t *data_set);
static void RecurringOp_Stopped(resource_t *rsc, action_t *start, node_t *node,
xmlNode *operation, pe_working_set_t *data_set);
void ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set);
gboolean DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set);
gboolean StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean PromoteRsc(resource_t * rsc, node_t * next, gboolean optional,
pe_working_set_t * data_set);
gboolean RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
gboolean NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set);
/* *INDENT-OFF* */
enum rsc_role_e rsc_state_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX] = {
/* Current State */
/* Next State: Unknown Stopped Started Slave Master */
/* Unknown */ { RSC_ROLE_UNKNOWN, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, },
/* Stopped */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, },
/* Started */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STARTED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, },
/* Slave */ { RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, },
/* Master */ { RSC_ROLE_STOPPED, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_SLAVE, RSC_ROLE_MASTER, },
};
gboolean (*rsc_action_matrix[RSC_ROLE_MAX][RSC_ROLE_MAX])(resource_t*,node_t*,gboolean,pe_working_set_t*) = {
/* Current State */
/* Next State: Unknown Stopped Started Slave Master */
/* Unknown */ { RoleError, StopRsc, RoleError, RoleError, RoleError, },
/* Stopped */ { RoleError, NullOp, StartRsc, StartRsc, RoleError, },
/* Started */ { RoleError, StopRsc, NullOp, NullOp, PromoteRsc, },
/* Slave */ { RoleError, StopRsc, StopRsc, NullOp, PromoteRsc, },
/* Master */ { RoleError, DemoteRsc, DemoteRsc, DemoteRsc, NullOp, },
};
/* *INDENT-ON* */
static gboolean
native_choose_node(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
{
GListPtr nodes = NULL;
node_t *chosen = NULL;
node_t *best = NULL;
int multiple = 1;
int length = 0;
gboolean result = FALSE;
process_utilization(rsc, &prefer, data_set);
if (is_not_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to ? TRUE : FALSE;
}
// Sort allowed nodes by weight
if (rsc->allowed_nodes) {
length = g_hash_table_size(rsc->allowed_nodes);
}
if (length > 0) {
nodes = g_hash_table_get_values(rsc->allowed_nodes);
nodes = g_list_sort_with_data(nodes, sort_node_weight,
pe__current_node(rsc));
// First node in sorted list has the best score
best = g_list_nth_data(nodes, 0);
}
if (prefer && nodes) {
chosen = g_hash_table_lookup(rsc->allowed_nodes, prefer->details->id);
if (chosen == NULL) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unknown",
prefer->details->uname, rsc->id);
/* Favor the preferred node as long as its weight is at least as good as
* the best allowed node's.
*
* An alternative would be to favor the preferred node even if the best
* node is better, when the best node's weight is less than INFINITY.
*/
} else if ((chosen->weight < 0) || (chosen->weight < best->weight)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unsuitable",
chosen->details->uname, rsc->id);
chosen = NULL;
} else if (!can_run_resources(chosen)) {
pe_rsc_trace(rsc, "Preferred node %s for %s was unavailable",
chosen->details->uname, rsc->id);
chosen = NULL;
} else {
pe_rsc_trace(rsc,
"Chose preferred node %s for %s (ignoring %d candidates)",
chosen->details->uname, rsc->id, length);
}
}
if ((chosen == NULL) && nodes) {
/* Either there is no preferred node, or the preferred node is not
* available, but there are other nodes allowed to run the resource.
*/
chosen = best;
pe_rsc_trace(rsc, "Chose node %s for %s from %d candidates",
chosen ? chosen->details->uname : "", rsc->id, length);
if (!pe_rsc_is_unique_clone(rsc->parent)
&& chosen && (chosen->weight > 0) && can_run_resources(chosen)) {
/* If the resource is already running on a node, prefer that node if
* it is just as good as the chosen node.
*
* We don't do this for unique clone instances, because
* distribute_children() has already assigned instances to their
* running nodes when appropriate, and if we get here, we don't want
* remaining unallocated instances to prefer a node that's already
* running another instance.
*/
node_t *running = pe__current_node(rsc);
if (running && (can_run_resources(running) == FALSE)) {
pe_rsc_trace(rsc, "Current node for %s (%s) can't run resources",
rsc->id, running->details->uname);
} else if (running) {
for (GList *iter = nodes->next; iter; iter = iter->next) {
node_t *tmp = (node_t *) iter->data;
if (tmp->weight != chosen->weight) {
// The nodes are sorted by weight, so no more are equal
break;
}
if (tmp->details == running->details) {
// Scores are equal, so prefer the current node
chosen = tmp;
}
multiple++;
}
}
}
}
if (multiple > 1) {
static char score[33];
int log_level = (chosen->weight >= INFINITY)? LOG_WARNING : LOG_INFO;
score2char_stack(chosen->weight, score, sizeof(score));
do_crm_log(log_level,
"Chose node %s for %s from %d nodes with score %s",
chosen->details->uname, rsc->id, multiple, score);
}
result = native_assign_node(rsc, nodes, chosen, FALSE);
g_list_free(nodes);
return result;
}
static int
node_list_attr_score(GHashTable * list, const char *attr, const char *value)
{
GHashTableIter iter;
node_t *node = NULL;
int best_score = -INFINITY;
const char *best_node = NULL;
if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
g_hash_table_iter_init(&iter, list);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
int weight = node->weight;
if (can_run_resources(node) == FALSE) {
weight = -INFINITY;
}
if (weight > best_score || best_node == NULL) {
const char *tmp = pe_node_attribute_raw(node, attr);
if (safe_str_eq(value, tmp)) {
best_score = weight;
best_node = node->details->uname;
}
}
}
if (safe_str_neq(attr, CRM_ATTR_UNAME)) {
crm_info("Best score for %s=%s was %s with %d",
attr, value, best_node ? best_node : "", best_score);
}
return best_score;
}
static void
node_hash_update(GHashTable * list1, GHashTable * list2, const char *attr, float factor,
gboolean only_positive)
{
int score = 0;
int new_score = 0;
GHashTableIter iter;
node_t *node = NULL;
if (attr == NULL) {
attr = CRM_ATTR_UNAME;
}
g_hash_table_iter_init(&iter, list1);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
float weight_f = 0;
int weight = 0;
CRM_LOG_ASSERT(node != NULL);
if(node == NULL) { continue; };
score = node_list_attr_score(list2, attr, pe_node_attribute_raw(node, attr));
weight_f = factor * score;
/* Round the number */
/* http://c-faq.com/fp/round.html */
weight = (int)(weight_f < 0 ? weight_f - 0.5 : weight_f + 0.5);
new_score = merge_weights(weight, node->weight);
if (factor < 0 && score < 0) {
/* Negative preference for a node with a negative score
* should not become a positive preference
*
* TODO - Decide if we want to filter only if weight == -INFINITY
*
*/
crm_trace("%s: Filtering %d + %f*%d (factor * score)",
node->details->uname, node->weight, factor, score);
} else if (node->weight == INFINITY_HACK) {
crm_trace("%s: Filtering %d + %f*%d (node < 0)",
node->details->uname, node->weight, factor, score);
} else if (only_positive && new_score < 0 && node->weight > 0) {
node->weight = INFINITY_HACK;
crm_trace("%s: Filtering %d + %f*%d (score > 0)",
node->details->uname, node->weight, factor, score);
} else if (only_positive && new_score < 0 && node->weight == 0) {
crm_trace("%s: Filtering %d + %f*%d (score == 0)",
node->details->uname, node->weight, factor, score);
} else {
crm_trace("%s: %d + %f*%d", node->details->uname, node->weight, factor, score);
node->weight = new_score;
}
}
}
GHashTable *
node_hash_dup(GHashTable * hash)
{
/* Hack! */
GListPtr list = g_hash_table_get_values(hash);
GHashTable *result = node_hash_from_list(list);
g_list_free(list);
return result;
}
GHashTable *
native_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr,
float factor, enum pe_weights flags)
{
return rsc_merge_weights(rsc, rhs, nodes, attr, factor, flags);
}
GHashTable *
rsc_merge_weights(resource_t * rsc, const char *rhs, GHashTable * nodes, const char *attr,
float factor, enum pe_weights flags)
{
GHashTable *work = NULL;
int multiplier = 1;
if (factor < 0) {
multiplier = -1;
}
if (is_set(rsc->flags, pe_rsc_merging)) {
pe_rsc_info(rsc, "%s: Breaking dependency loop at %s", rhs, rsc->id);
return nodes;
}
set_bit(rsc->flags, pe_rsc_merging);
if (is_set(flags, pe_weights_init)) {
if (rsc->variant == pe_group && rsc->children) {
GListPtr last = rsc->children;
while (last->next != NULL) {
last = last->next;
}
pe_rsc_trace(rsc, "Merging %s as a group %p %p", rsc->id, rsc->children, last);
work = rsc_merge_weights(last->data, rhs, NULL, attr, factor, flags);
} else {
work = node_hash_dup(rsc->allowed_nodes);
}
clear_bit(flags, pe_weights_init);
} else if (rsc->variant == pe_group && rsc->children) {
GListPtr iter = rsc->children;
pe_rsc_trace(rsc, "%s: Combining scores from %d children of %s", rhs, g_list_length(iter), rsc->id);
work = node_hash_dup(nodes);
for(iter = rsc->children; iter->next != NULL; iter = iter->next) {
work = rsc_merge_weights(iter->data, rhs, work, attr, factor, flags);
}
} else {
pe_rsc_trace(rsc, "%s: Combining scores from %s", rhs, rsc->id);
work = node_hash_dup(nodes);
node_hash_update(work, rsc->allowed_nodes, attr, factor,
is_set(flags, pe_weights_positive));
}
if (is_set(flags, pe_weights_rollback) && can_run_any(work) == FALSE) {
pe_rsc_info(rsc, "%s: Rolling back scores from %s", rhs, rsc->id);
g_hash_table_destroy(work);
clear_bit(rsc->flags, pe_rsc_merging);
return nodes;
}
if (can_run_any(work)) {
GListPtr gIter = NULL;
if (is_set(flags, pe_weights_forward)) {
gIter = rsc->rsc_cons;
crm_trace("Checking %d additional colocation constraints", g_list_length(gIter));
} else if(rsc->variant == pe_group && rsc->children) {
GListPtr last = rsc->children;
while (last->next != NULL) {
last = last->next;
}
gIter = ((resource_t*)last->data)->rsc_cons_lhs;
crm_trace("Checking %d additional optional group colocation constraints from %s",
g_list_length(gIter), ((resource_t*)last->data)->id);
} else {
gIter = rsc->rsc_cons_lhs;
crm_trace("Checking %d additional optional colocation constraints %s", g_list_length(gIter), rsc->id);
}
for (; gIter != NULL; gIter = gIter->next) {
resource_t *other = NULL;
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
if (is_set(flags, pe_weights_forward)) {
other = constraint->rsc_rh;
} else {
other = constraint->rsc_lh;
}
pe_rsc_trace(rsc, "Applying %s (%s)", constraint->id, other->id);
work = rsc_merge_weights(other, rhs, work, constraint->node_attribute,
multiplier * (float)constraint->score / INFINITY, flags|pe_weights_rollback);
dump_node_scores(LOG_TRACE, NULL, rhs, work);
}
}
if (is_set(flags, pe_weights_positive)) {
node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, work);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node->weight == INFINITY_HACK) {
node->weight = 1;
}
}
}
if (nodes) {
g_hash_table_destroy(nodes);
}
clear_bit(rsc->flags, pe_rsc_merging);
return work;
}
static inline bool
node_has_been_unfenced(node_t *node)
{
const char *unfenced = pe_node_attribute_raw(node, CRM_ATTR_UNFENCED);
return unfenced && strcmp("0", unfenced);
}
static inline bool
is_unfence_device(resource_t *rsc, pe_working_set_t *data_set)
{
return is_set(rsc->flags, pe_rsc_fence_device)
&& is_set(data_set->flags, pe_flag_enable_unfencing);
}
node_t *
native_color(resource_t * rsc, node_t * prefer, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int alloc_details = scores_log_level + 1;
if (rsc->parent && is_not_set(rsc->parent->flags, pe_rsc_allocating)) {
/* never allocate children on their own */
pe_rsc_debug(rsc, "Escalating allocation of %s to its parent: %s", rsc->id,
rsc->parent->id);
rsc->parent->cmds->allocate(rsc->parent, prefer, data_set);
}
if (is_not_set(rsc->flags, pe_rsc_provisional)) {
return rsc->allocated_to;
}
if (is_set(rsc->flags, pe_rsc_allocating)) {
pe_rsc_debug(rsc, "Dependency loop detected involving %s", rsc->id);
return NULL;
}
set_bit(rsc->flags, pe_rsc_allocating);
print_resource(alloc_details, "Allocating: ", rsc, FALSE);
dump_node_scores(alloc_details, rsc, "Pre-alloc", rsc->allowed_nodes);
for (gIter = rsc->rsc_cons; gIter != NULL; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
GHashTable *archive = NULL;
resource_t *rsc_rh = constraint->rsc_rh;
pe_rsc_trace(rsc, "%s: Pre-Processing %s (%s, %d, %s)",
rsc->id, constraint->id, rsc_rh->id,
constraint->score, role2text(constraint->role_lh));
if (constraint->role_lh >= RSC_ROLE_MASTER
|| (constraint->score < 0 && constraint->score > -INFINITY)) {
archive = node_hash_dup(rsc->allowed_nodes);
}
rsc_rh->cmds->allocate(rsc_rh, NULL, data_set);
rsc->cmds->rsc_colocation_lh(rsc, rsc_rh, constraint);
if (archive && can_run_any(rsc->allowed_nodes) == FALSE) {
pe_rsc_info(rsc, "%s: Rolling back scores from %s", rsc->id, rsc_rh->id);
g_hash_table_destroy(rsc->allowed_nodes);
rsc->allowed_nodes = archive;
archive = NULL;
}
if (archive) {
g_hash_table_destroy(archive);
}
}
dump_node_scores(alloc_details, rsc, "Post-coloc", rsc->allowed_nodes);
for (gIter = rsc->rsc_cons_lhs; gIter != NULL; gIter = gIter->next) {
rsc_colocation_t *constraint = (rsc_colocation_t *) gIter->data;
rsc->allowed_nodes =
constraint->rsc_lh->cmds->merge_weights(constraint->rsc_lh, rsc->id, rsc->allowed_nodes,
constraint->node_attribute,
(float)constraint->score / INFINITY,
pe_weights_rollback);
}
print_resource(LOG_TRACE, "Allocating: ", rsc, FALSE);
if (rsc->next_role == RSC_ROLE_STOPPED) {
pe_rsc_trace(rsc, "Making sure %s doesn't get allocated", rsc->id);
/* make sure it doesn't come up again */
resource_location(rsc, NULL, -INFINITY, XML_RSC_ATTR_TARGET_ROLE, data_set);
} else if(rsc->next_role > rsc->role
&& is_set(data_set->flags, pe_flag_have_quorum) == FALSE
&& data_set->no_quorum_policy == no_quorum_freeze) {
crm_notice("Resource %s cannot be elevated from %s to %s: no-quorum-policy=freeze",
rsc->id, role2text(rsc->role), role2text(rsc->next_role));
rsc->next_role = rsc->role;
}
dump_node_scores(show_scores ? 0 : scores_log_level, rsc, __FUNCTION__,
rsc->allowed_nodes);
if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
clear_bit(rsc->flags, pe_rsc_managed);
}
if (is_not_set(rsc->flags, pe_rsc_managed)) {
const char *reason = NULL;
node_t *assign_to = NULL;
rsc->next_role = rsc->role;
assign_to = pe__current_node(rsc);
if (assign_to == NULL) {
reason = "inactive";
} else if (rsc->role == RSC_ROLE_MASTER) {
reason = "master";
} else if (is_set(rsc->flags, pe_rsc_failed)) {
reason = "failed";
} else {
reason = "active";
}
pe_rsc_info(rsc, "Unmanaged resource %s allocated to %s: %s", rsc->id,
(assign_to? assign_to->details->uname : "no node"), reason);
native_assign_node(rsc, NULL, assign_to, TRUE);
} else if (is_set(data_set->flags, pe_flag_stop_everything)) {
pe_rsc_debug(rsc, "Forcing %s to stop", rsc->id);
native_assign_node(rsc, NULL, NULL, TRUE);
} else if (is_set(rsc->flags, pe_rsc_provisional)
&& native_choose_node(rsc, prefer, data_set)) {
pe_rsc_trace(rsc, "Allocated resource %s to %s", rsc->id,
rsc->allocated_to->details->uname);
} else if (rsc->allocated_to == NULL) {
if (is_not_set(rsc->flags, pe_rsc_orphan)) {
pe_rsc_info(rsc, "Resource %s cannot run anywhere", rsc->id);
} else if (rsc->running_on != NULL) {
pe_rsc_info(rsc, "Stopping orphan resource %s", rsc->id);
}
} else {
pe_rsc_debug(rsc, "Pre-Allocated resource %s to %s", rsc->id,
rsc->allocated_to->details->uname);
}
clear_bit(rsc->flags, pe_rsc_allocating);
print_resource(LOG_TRACE, "Allocated ", rsc, TRUE);
if (rsc->is_remote_node) {
node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
CRM_ASSERT(remote_node != NULL);
if (rsc->allocated_to && rsc->next_role != RSC_ROLE_STOPPED) {
crm_trace("Setting Pacemaker Remote node %s to ONLINE",
remote_node->details->id);
remote_node->details->online = TRUE;
/* We shouldn't consider an unseen remote-node unclean if we are going
* to try and connect to it. Otherwise we get an unnecessary fence */
if (remote_node->details->unseen == TRUE) {
remote_node->details->unclean = FALSE;
}
} else {
crm_trace("Setting Pacemaker Remote node %s to SHUTDOWN (next role %s, %sallocated)",
remote_node->details->id, role2text(rsc->next_role),
(rsc->allocated_to? "" : "un"));
remote_node->details->shutdown = TRUE;
}
}
return rsc->allocated_to;
}
static gboolean
is_op_dup(resource_t *rsc, const char *name, guint interval_ms)
{
gboolean dup = FALSE;
const char *id = NULL;
const char *value = NULL;
xmlNode *operation = NULL;
guint interval2_ms = 0;
CRM_ASSERT(rsc);
for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
value = crm_element_value(operation, "name");
if (safe_str_neq(value, name)) {
continue;
}
value = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
interval2_ms = crm_parse_interval_spec(value);
if (interval_ms != interval2_ms) {
continue;
}
if (id == NULL) {
id = ID(operation);
} else {
crm_config_err("Operation %s is a duplicate of %s", ID(operation), id);
crm_config_err
("Do not use the same (name, interval) combination more than once per resource");
dup = TRUE;
}
}
}
return dup;
}
static bool
op_cannot_recur(const char *name)
{
return safe_str_eq(name, RSC_STOP)
|| safe_str_eq(name, RSC_START)
|| safe_str_eq(name, RSC_DEMOTE)
|| safe_str_eq(name, RSC_PROMOTE);
}
static void
RecurringOp(resource_t * rsc, action_t * start, node_t * node,
xmlNode * operation, pe_working_set_t * data_set)
{
char *key = NULL;
const char *name = NULL;
const char *role = NULL;
const char *interval_spec = NULL;
const char *node_uname = node? node->details->uname : "n/a";
guint interval_ms = 0;
action_t *mon = NULL;
gboolean is_optional = TRUE;
GListPtr possible_matches = NULL;
CRM_ASSERT(rsc);
/* Only process for the operations without role="Stopped" */
role = crm_element_value(operation, "role");
if (role && text2role(role) == RSC_ROLE_STOPPED) {
return;
}
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms == 0) {
return;
}
name = crm_element_value(operation, "name");
if (is_op_dup(rsc, name, interval_ms)) {
crm_trace("Not creating duplicate recurring action %s for %dms %s",
ID(operation), interval_ms, name);
return;
}
if (op_cannot_recur(name)) {
crm_config_err("Ignoring %s because action '%s' cannot be recurring",
ID(operation), name);
return;
}
key = generate_op_key(rsc->id, name, interval_ms);
if (find_rsc_op_entry(rsc, key) == NULL) {
crm_trace("Not creating recurring action %s for disabled resource %s",
ID(operation), rsc->id);
free(key);
return;
}
pe_rsc_trace(rsc, "Creating recurring action %s for %s in role %s on %s",
ID(operation), rsc->id, role2text(rsc->next_role), node_uname);
if (start != NULL) {
pe_rsc_trace(rsc, "Marking %s %s due to %s",
key, is_set(start->flags, pe_action_optional) ? "optional" : "mandatory",
start->uuid);
is_optional = (rsc->cmds->action_flags(start, NULL) & pe_action_optional);
} else {
pe_rsc_trace(rsc, "Marking %s optional", key);
is_optional = TRUE;
}
/* start a monitor for an already active resource */
possible_matches = find_actions_exact(rsc->actions, key, node);
if (possible_matches == NULL) {
is_optional = FALSE;
pe_rsc_trace(rsc, "Marking %s mandatory: not active", key);
} else {
GListPtr gIter = NULL;
for (gIter = possible_matches; gIter != NULL; gIter = gIter->next) {
action_t *op = (action_t *) gIter->data;
if (is_set(op->flags, pe_action_reschedule)) {
is_optional = FALSE;
break;
}
}
g_list_free(possible_matches);
}
if ((rsc->next_role == RSC_ROLE_MASTER && role == NULL)
|| (role != NULL && text2role(role) != rsc->next_role)) {
int log_level = LOG_TRACE;
const char *result = "Ignoring";
if (is_optional) {
char *after_key = NULL;
action_t *cancel_op = NULL;
// It's running, so cancel it
log_level = LOG_INFO;
result = "Cancelling";
cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set);
switch (rsc->role) {
case RSC_ROLE_SLAVE:
case RSC_ROLE_STARTED:
if (rsc->next_role == RSC_ROLE_MASTER) {
after_key = promote_key(rsc);
} else if (rsc->next_role == RSC_ROLE_STOPPED) {
after_key = stop_key(rsc);
}
break;
case RSC_ROLE_MASTER:
after_key = demote_key(rsc);
break;
default:
break;
}
if (after_key) {
custom_action_order(rsc, NULL, cancel_op, rsc, after_key, NULL,
pe_order_runnable_left, data_set);
}
}
do_crm_log(log_level, "%s action %s (%s vs. %s)",
result, key, role ? role : role2text(RSC_ROLE_SLAVE),
role2text(rsc->next_role));
free(key);
return;
}
mon = custom_action(rsc, key, name, node, is_optional, TRUE, data_set);
key = mon->uuid;
if (is_optional) {
pe_rsc_trace(rsc, "%s\t %s (optional)", node_uname, mon->uuid);
}
if (start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) {
pe_rsc_debug(rsc, "%s\t %s (cancelled : start un-runnable)",
node_uname, mon->uuid);
update_action_flags(mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
} else if (node == NULL || node->details->online == FALSE || node->details->unclean) {
pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)",
node_uname, mon->uuid);
update_action_flags(mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
} else if (is_set(mon->flags, pe_action_optional) == FALSE) {
pe_rsc_info(rsc, " Start recurring %s (%us) for %s on %s",
mon->task, interval_ms / 1000, rsc->id, node_uname);
}
if (rsc->next_role == RSC_ROLE_MASTER) {
char *running_master = crm_itoa(PCMK_OCF_RUNNING_MASTER);
add_hash_param(mon->meta, XML_ATTR_TE_TARGET_RC, running_master);
free(running_master);
}
if (node == NULL || is_set(rsc->flags, pe_rsc_managed)) {
custom_action_order(rsc, start_key(rsc), NULL,
NULL, strdup(key), mon,
pe_order_implies_then | pe_order_runnable_left, data_set);
custom_action_order(rsc, reload_key(rsc), NULL,
NULL, strdup(key), mon,
pe_order_implies_then | pe_order_runnable_left, data_set);
if (rsc->next_role == RSC_ROLE_MASTER) {
custom_action_order(rsc, promote_key(rsc), NULL,
rsc, NULL, mon,
pe_order_optional | pe_order_runnable_left, data_set);
} else if (rsc->role == RSC_ROLE_MASTER) {
custom_action_order(rsc, demote_key(rsc), NULL,
rsc, NULL, mon,
pe_order_optional | pe_order_runnable_left, data_set);
}
}
}
static void
Recurring(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set)
{
if (is_not_set(rsc->flags, pe_rsc_maintenance) &&
(node == NULL || node->details->maintenance == FALSE)) {
xmlNode *operation = NULL;
for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
RecurringOp(rsc, start, node, operation, data_set);
}
}
}
}
static void
RecurringOp_Stopped(resource_t * rsc, action_t * start, node_t * node,
xmlNode * operation, pe_working_set_t * data_set)
{
char *key = NULL;
const char *name = NULL;
const char *role = NULL;
const char *interval_spec = NULL;
const char *node_uname = node? node->details->uname : "n/a";
guint interval_ms = 0;
GListPtr possible_matches = NULL;
GListPtr gIter = NULL;
/* Only process for the operations with role="Stopped" */
role = crm_element_value(operation, "role");
if (role == NULL || text2role(role) != RSC_ROLE_STOPPED) {
return;
}
interval_spec = crm_element_value(operation, XML_LRM_ATTR_INTERVAL);
interval_ms = crm_parse_interval_spec(interval_spec);
if (interval_ms == 0) {
return;
}
name = crm_element_value(operation, "name");
if (is_op_dup(rsc, name, interval_ms)) {
crm_trace("Not creating duplicate recurring action %s for %dms %s",
ID(operation), interval_ms, name);
return;
}
if (op_cannot_recur(name)) {
crm_config_err("Invalid recurring action %s wth name: '%s'", ID(operation), name);
return;
}
key = generate_op_key(rsc->id, name, interval_ms);
if (find_rsc_op_entry(rsc, key) == NULL) {
crm_trace("Not creating recurring action %s for disabled resource %s",
ID(operation), rsc->id);
free(key);
return;
}
// @TODO add support
if (is_set(rsc->flags, pe_rsc_unique) == FALSE) {
crm_notice("Ignoring %s (recurring monitors for role=Stopped are not supported for anonyous clones)",
ID(operation));
return;
}
pe_rsc_trace(rsc,
"Creating recurring action %s for %s in role %s on nodes where it should not be running",
ID(operation), rsc->id, role2text(rsc->next_role));
/* if the monitor exists on the node where the resource will be running, cancel it */
if (node != NULL) {
possible_matches = find_actions_exact(rsc->actions, key, node);
if (possible_matches) {
action_t *cancel_op = NULL;
g_list_free(possible_matches);
cancel_op = pe_cancel_op(rsc, name, interval_ms, node, data_set);
if (rsc->next_role == RSC_ROLE_STARTED || rsc->next_role == RSC_ROLE_SLAVE) {
/* rsc->role == RSC_ROLE_STOPPED: cancel the monitor before start */
/* rsc->role == RSC_ROLE_STARTED: for a migration, cancel the monitor on the target node before start */
custom_action_order(rsc, NULL, cancel_op, rsc, start_key(rsc), NULL,
pe_order_runnable_left, data_set);
}
pe_rsc_info(rsc, "Cancel action %s (%s vs. %s) on %s",
key, role, role2text(rsc->next_role), node_uname);
}
}
for (gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *stop_node = (node_t *) gIter->data;
const char *stop_node_uname = stop_node->details->uname;
gboolean is_optional = TRUE;
gboolean probe_is_optional = TRUE;
gboolean stop_is_optional = TRUE;
action_t *stopped_mon = NULL;
char *rc_inactive = NULL;
GListPtr probe_complete_ops = NULL;
GListPtr stop_ops = NULL;
GListPtr local_gIter = NULL;
char *stop_op_key = NULL;
if (node && safe_str_eq(stop_node_uname, node_uname)) {
continue;
}
pe_rsc_trace(rsc, "Creating recurring action %s for %s on %s",
ID(operation), rsc->id, crm_str(stop_node_uname));
/* start a monitor for an already stopped resource */
possible_matches = find_actions_exact(rsc->actions, key, stop_node);
if (possible_matches == NULL) {
pe_rsc_trace(rsc, "Marking %s mandatory on %s: not active", key,
crm_str(stop_node_uname));
is_optional = FALSE;
} else {
pe_rsc_trace(rsc, "Marking %s optional on %s: already active", key,
crm_str(stop_node_uname));
is_optional = TRUE;
g_list_free(possible_matches);
}
stopped_mon = custom_action(rsc, strdup(key), name, stop_node, is_optional, TRUE, data_set);
rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING);
add_hash_param(stopped_mon->meta, XML_ATTR_TE_TARGET_RC, rc_inactive);
free(rc_inactive);
if (is_set(rsc->flags, pe_rsc_managed)) {
char *probe_key = generate_op_key(rsc->id, CRMD_ACTION_STATUS, 0);
GListPtr probes = find_actions(rsc->actions, probe_key, stop_node);
GListPtr pIter = NULL;
for (pIter = probes; pIter != NULL; pIter = pIter->next) {
action_t *probe = (action_t *) pIter->data;
order_actions(probe, stopped_mon, pe_order_runnable_left);
crm_trace("%s then %s on %s", probe->uuid, stopped_mon->uuid, stop_node->details->uname);
}
g_list_free(probes);
free(probe_key);
}
if (probe_complete_ops) {
g_list_free(probe_complete_ops);
}
stop_op_key = stop_key(rsc);
stop_ops = find_actions_exact(rsc->actions, stop_op_key, stop_node);
for (local_gIter = stop_ops; local_gIter != NULL; local_gIter = local_gIter->next) {
action_t *stop = (action_t *) local_gIter->data;
if (is_set(stop->flags, pe_action_optional) == FALSE) {
stop_is_optional = FALSE;
}
if (is_set(stop->flags, pe_action_runnable) == FALSE) {
crm_debug("%s\t %s (cancelled : stop un-runnable)",
crm_str(stop_node_uname), stopped_mon->uuid);
update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
}
if (is_set(rsc->flags, pe_rsc_managed)) {
custom_action_order(rsc, strdup(stop_op_key), stop,
NULL, strdup(key), stopped_mon,
pe_order_implies_then | pe_order_runnable_left, data_set);
}
}
if (stop_ops) {
g_list_free(stop_ops);
}
free(stop_op_key);
if (is_optional == FALSE && probe_is_optional && stop_is_optional
&& is_set(rsc->flags, pe_rsc_managed) == FALSE) {
pe_rsc_trace(rsc, "Marking %s optional on %s due to unmanaged",
key, crm_str(stop_node_uname));
update_action_flags(stopped_mon, pe_action_optional, __FUNCTION__, __LINE__);
}
if (is_set(stopped_mon->flags, pe_action_optional)) {
pe_rsc_trace(rsc, "%s\t %s (optional)", crm_str(stop_node_uname), stopped_mon->uuid);
}
if (stop_node->details->online == FALSE || stop_node->details->unclean) {
pe_rsc_debug(rsc, "%s\t %s (cancelled : no node available)",
crm_str(stop_node_uname), stopped_mon->uuid);
update_action_flags(stopped_mon, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
}
if (is_set(stopped_mon->flags, pe_action_runnable)
&& is_set(stopped_mon->flags, pe_action_optional) == FALSE) {
crm_notice(" Start recurring %s (%us) for %s on %s", stopped_mon->task,
interval_ms / 1000, rsc->id, crm_str(stop_node_uname));
}
}
free(key);
}
static void
Recurring_Stopped(resource_t * rsc, action_t * start, node_t * node, pe_working_set_t * data_set)
{
if (is_not_set(rsc->flags, pe_rsc_maintenance) &&
(node == NULL || node->details->maintenance == FALSE)) {
xmlNode *operation = NULL;
for (operation = __xml_first_child(rsc->ops_xml); operation != NULL;
operation = __xml_next_element(operation)) {
if (crm_str_eq((const char *)operation->name, "op", TRUE)) {
RecurringOp_Stopped(rsc, start, node, operation, data_set);
}
}
}
}
static void
handle_migration_actions(resource_t * rsc, node_t *current, node_t *chosen, pe_working_set_t * data_set)
{
action_t *migrate_to = NULL;
action_t *migrate_from = NULL;
action_t *start = NULL;
action_t *stop = NULL;
gboolean partial = rsc->partial_migration_target ? TRUE : FALSE;
pe_rsc_trace(rsc, "Processing migration actions %s moving from %s to %s . partial migration = %s",
rsc->id, current->details->id, chosen->details->id, partial ? "TRUE" : "FALSE");
start = start_action(rsc, chosen, TRUE);
stop = stop_action(rsc, current, TRUE);
if (partial == FALSE) {
migrate_to = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), RSC_MIGRATE, current, TRUE, TRUE, data_set);
}
migrate_from = custom_action(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), RSC_MIGRATED, chosen, TRUE, TRUE, data_set);
if ((migrate_to && migrate_from) || (migrate_from && partial)) {
set_bit(start->flags, pe_action_migrate_runnable);
set_bit(stop->flags, pe_action_migrate_runnable);
update_action_flags(start, pe_action_pseudo, __FUNCTION__, __LINE__); /* easier than trying to delete it from the graph */
/* order probes before migrations */
if (partial) {
set_bit(migrate_from->flags, pe_action_migrate_runnable);
migrate_from->needs = start->needs;
custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional, data_set);
} else {
set_bit(migrate_from->flags, pe_action_migrate_runnable);
set_bit(migrate_to->flags, pe_action_migrate_runnable);
migrate_to->needs = start->needs;
custom_action_order(rsc, generate_op_key(rsc->id, RSC_STATUS, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL, pe_order_optional, data_set);
custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set);
}
custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL, pe_order_optional | pe_order_implies_first_migratable, data_set);
custom_action_order(rsc, generate_op_key(rsc->id, RSC_MIGRATED, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, pe_order_optional | pe_order_implies_first_migratable | pe_order_pseudo_left, data_set);
}
if (migrate_to) {
add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname);
add_hash_param(migrate_to->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname);
/* Pacemaker Remote connections don't require pending to be recorded in
* the CIB. We can reduce CIB writes by not setting PENDING for them.
*/
if (rsc->is_remote_node == FALSE) {
/* migrate_to takes place on the source node, but can
* have an effect on the target node depending on how
* the agent is written. Because of this, we have to maintain
* a record that the migrate_to occurred, in case the source node
* loses membership while the migrate_to action is still in-flight.
*/
add_hash_param(migrate_to->meta, XML_OP_ATTR_PENDING, "true");
}
}
if (migrate_from) {
add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_SOURCE, current->details->uname);
add_hash_param(migrate_from->meta, XML_LRM_ATTR_MIGRATE_TARGET, chosen->details->uname);
}
}
void
native_create_actions(resource_t * rsc, pe_working_set_t * data_set)
{
action_t *start = NULL;
node_t *chosen = NULL;
node_t *current = NULL;
gboolean need_stop = FALSE;
gboolean is_moving = FALSE;
gboolean allow_migrate = is_set(rsc->flags, pe_rsc_allow_migrate) ? TRUE : FALSE;
GListPtr gIter = NULL;
unsigned int num_all_active = 0;
unsigned int num_clean_active = 0;
bool multiply_active = FALSE;
enum rsc_role_e role = RSC_ROLE_UNKNOWN;
enum rsc_role_e next_role = RSC_ROLE_UNKNOWN;
CRM_ASSERT(rsc);
chosen = rsc->allocated_to;
if (chosen != NULL && rsc->next_role == RSC_ROLE_UNKNOWN) {
rsc->next_role = RSC_ROLE_STARTED;
pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role));
} else if (rsc->next_role == RSC_ROLE_UNKNOWN) {
rsc->next_role = RSC_ROLE_STOPPED;
pe_rsc_trace(rsc, "Fixed next_role: unknown -> %s", role2text(rsc->next_role));
}
pe_rsc_trace(rsc, "Processing state transition for %s %p: %s->%s", rsc->id, rsc,
role2text(rsc->role), role2text(rsc->next_role));
current = pe__find_active_on(rsc, &num_all_active, &num_clean_active);
for (gIter = rsc->dangling_migrations; gIter != NULL; gIter = gIter->next) {
node_t *dangling_source = (node_t *) gIter->data;
action_t *stop = stop_action(rsc, dangling_source, FALSE);
set_bit(stop->flags, pe_action_dangle);
pe_rsc_trace(rsc, "Forcing a cleanup of %s on %s",
rsc->id, dangling_source->details->uname);
if (is_set(data_set->flags, pe_flag_remove_after_stop)) {
DeleteRsc(rsc, dangling_source, FALSE, data_set);
}
}
if ((num_all_active == 2) && (num_clean_active == 2) && chosen
&& rsc->partial_migration_source && rsc->partial_migration_target
&& (current->details == rsc->partial_migration_source->details)
&& (chosen->details == rsc->partial_migration_target->details)) {
/* The chosen node is still the migration target from a partial
* migration. Attempt to continue the migration instead of recovering
* by stopping the resource everywhere and starting it on a single node.
*/
pe_rsc_trace(rsc,
"Will attempt to continue with a partial migration to target %s from %s",
rsc->partial_migration_target->details->id,
rsc->partial_migration_source->details->id);
} else if (is_not_set(rsc->flags, pe_rsc_needs_fencing)) {
/* If a resource has "requires" set to nothing or quorum, don't consider
* it active on unclean nodes (similar to how all resources behave when
* stonith-enabled is false). We can start such resources elsewhere
* before fencing completes, and if we considered the resource active on
* the failed node, we would attempt recovery for being active on
* multiple nodes.
*/
multiply_active = (num_clean_active > 1);
} else {
multiply_active = (num_all_active > 1);
}
if (multiply_active) {
if (rsc->partial_migration_target && rsc->partial_migration_source) {
// Migration was in progress, but we've chosen a different target
crm_notice("Resource %s can no longer migrate to %s. Stopping on %s too",
rsc->id, rsc->partial_migration_target->details->uname,
rsc->partial_migration_source->details->uname);
} else {
// Resource was incorrectly multiply active
pe_proc_err("Resource %s is active on %u nodes (%s)",
rsc->id, num_all_active,
recovery2text(rsc->recovery_type));
crm_notice("See https://wiki.clusterlabs.org/wiki/FAQ#Resource_is_Too_Active for more information");
}
if (rsc->recovery_type == recovery_stop_start) {
need_stop = TRUE;
}
/* If by chance a partial migration is in process, but the migration
* target is not chosen still, clear all partial migration data.
*/
rsc->partial_migration_source = rsc->partial_migration_target = NULL;
allow_migrate = FALSE;
}
if (is_set(rsc->flags, pe_rsc_start_pending)) {
start = start_action(rsc, chosen, TRUE);
set_bit(start->flags, pe_action_print_always);
}
if (current && chosen && current->details != chosen->details) {
pe_rsc_trace(rsc, "Moving %s", rsc->id);
is_moving = TRUE;
need_stop = TRUE;
} else if (is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Recovering %s", rsc->id);
need_stop = TRUE;
} else if (is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "Block %s", rsc->id);
need_stop = TRUE;
} else if (rsc->role > RSC_ROLE_STARTED && current != NULL && chosen != NULL) {
/* Recovery of a promoted resource */
start = start_action(rsc, chosen, TRUE);
if (is_set(start->flags, pe_action_optional) == FALSE) {
pe_rsc_trace(rsc, "Forced start %s", rsc->id);
need_stop = TRUE;
}
}
pe_rsc_trace(rsc, "Creating actions for %s: %s->%s", rsc->id,
role2text(rsc->role), role2text(rsc->next_role));
/* Create any additional actions required when bringing resource down and
* back up to same level.
*/
role = rsc->role;
while (role != RSC_ROLE_STOPPED) {
next_role = rsc_state_matrix[role][RSC_ROLE_STOPPED];
pe_rsc_trace(rsc, "Down: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role),
rsc->id, need_stop ? " required" : "");
if (rsc_action_matrix[role][next_role] (rsc, current, !need_stop, data_set) == FALSE) {
break;
}
role = next_role;
}
while (rsc->role <= rsc->next_role && role != rsc->role && is_not_set(rsc->flags, pe_rsc_block)) {
next_role = rsc_state_matrix[role][rsc->role];
pe_rsc_trace(rsc, "Up: Executing: %s->%s (%s)%s", role2text(role), role2text(next_role),
rsc->id, need_stop ? " required" : "");
if (rsc_action_matrix[role][next_role] (rsc, chosen, !need_stop, data_set) == FALSE) {
break;
}
role = next_role;
}
role = rsc->role;
/* Required steps from this role to the next */
while (role != rsc->next_role) {
next_role = rsc_state_matrix[role][rsc->next_role];
pe_rsc_trace(rsc, "Role: Executing: %s->%s = (%s on %s)", role2text(role), role2text(next_role), rsc->id, chosen?chosen->details->uname:"NA");
if (rsc_action_matrix[role][next_role] (rsc, chosen, FALSE, data_set) == FALSE) {
break;
}
role = next_role;
}
if(is_set(rsc->flags, pe_rsc_block)) {
pe_rsc_trace(rsc, "No monitor additional ops for blocked resource");
} else if (rsc->next_role != RSC_ROLE_STOPPED || is_set(rsc->flags, pe_rsc_managed) == FALSE) {
pe_rsc_trace(rsc, "Monitor ops for active resource");
start = start_action(rsc, chosen, TRUE);
Recurring(rsc, start, chosen, data_set);
Recurring_Stopped(rsc, start, chosen, data_set);
} else {
pe_rsc_trace(rsc, "Monitor ops for inactive resource");
Recurring_Stopped(rsc, NULL, NULL, data_set);
}
/* if we are stuck in a partial migration, where the target
* of the partial migration no longer matches the chosen target.
* A full stop/start is required */
if (rsc->partial_migration_target && (chosen == NULL || rsc->partial_migration_target->details != chosen->details)) {
pe_rsc_trace(rsc, "Not allowing partial migration to continue. %s", rsc->id);
allow_migrate = FALSE;
} else if (is_moving == FALSE ||
is_not_set(rsc->flags, pe_rsc_managed) ||
is_set(rsc->flags, pe_rsc_failed) ||
is_set(rsc->flags, pe_rsc_start_pending) ||
(current && current->details->unclean) ||
rsc->next_role < RSC_ROLE_STARTED) {
allow_migrate = FALSE;
}
if (allow_migrate) {
handle_migration_actions(rsc, current, chosen, data_set);
}
}
static void
rsc_avoids_remote_nodes(resource_t *rsc)
{
GHashTableIter iter;
node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node->details->remote_rsc) {
node->weight = -INFINITY;
}
}
}
void
native_internal_constraints(resource_t * rsc, pe_working_set_t * data_set)
{
/* This function is on the critical path and worth optimizing as much as possible */
resource_t *top = uber_parent(rsc);
int type = pe_order_optional | pe_order_implies_then | pe_order_restart;
gboolean is_stonith = is_set(rsc->flags, pe_rsc_fence_device);
custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_START, 0), NULL, type, data_set);
if (is_set(top->flags, pe_rsc_promotable) || (rsc->role > RSC_ROLE_SLAVE)) {
custom_action_order(rsc, generate_op_key(rsc->id, RSC_DEMOTE, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL,
pe_order_implies_first_master, data_set);
custom_action_order(rsc, generate_op_key(rsc->id, RSC_START, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_PROMOTE, 0), NULL,
pe_order_runnable_left, data_set);
}
if (is_stonith == FALSE
&& is_set(data_set->flags, pe_flag_enable_unfencing)
&& is_set(rsc->flags, pe_rsc_needs_unfencing)) {
/* Check if the node needs to be unfenced first */
node_t *node = NULL;
GHashTableIter iter;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, data_set);
crm_debug("Ordering any stops of %s before %s, and any starts after",
rsc->id, unfence->uuid);
/*
* It would be more efficient to order clone resources once,
* rather than order each instance, but ordering the instance
* allows us to avoid unnecessary dependencies that might conflict
* with user constraints.
*
* @TODO: This constraint can still produce a transition loop if the
* resource has a stop scheduled on the node being unfenced, and
* there is a user ordering constraint to start some other resource
* (which will be ordered after the unfence) before stopping this
* resource. An example is "start some slow-starting cloned service
* before stopping an associated virtual IP that may be moving to
* it":
* stop this -> unfencing -> start that -> stop this
*/
custom_action_order(rsc, stop_key(rsc), NULL,
NULL, strdup(unfence->uuid), unfence,
pe_order_optional|pe_order_same_node, data_set);
custom_action_order(NULL, strdup(unfence->uuid), unfence,
rsc, start_key(rsc), NULL,
pe_order_implies_then_on_node|pe_order_same_node,
data_set);
}
}
if (is_not_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id);
return;
}
{
action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
custom_action_order(rsc, stop_key(rsc), NULL,
NULL, strdup(all_stopped->task), all_stopped,
pe_order_implies_then | pe_order_runnable_left, data_set);
}
if (g_hash_table_size(rsc->utilization) > 0
&& safe_str_neq(data_set->placement_strategy, "default")) {
GHashTableIter iter;
node_t *next = NULL;
GListPtr gIter = NULL;
pe_rsc_trace(rsc, "Creating utilization constraints for %s - strategy: %s",
rsc->id, data_set->placement_strategy);
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
node_t *current = (node_t *) gIter->data;
char *load_stopped_task = crm_concat(LOAD_STOPPED, current->details->uname, '_');
action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set);
if (load_stopped->node == NULL) {
load_stopped->node = node_copy(current);
update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__);
}
custom_action_order(rsc, stop_key(rsc), NULL,
NULL, load_stopped_task, load_stopped, pe_order_load, data_set);
}
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&next)) {
char *load_stopped_task = crm_concat(LOAD_STOPPED, next->details->uname, '_');
action_t *load_stopped = get_pseudo_op(load_stopped_task, data_set);
if (load_stopped->node == NULL) {
load_stopped->node = node_copy(next);
update_action_flags(load_stopped, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__);
}
custom_action_order(NULL, strdup(load_stopped_task), load_stopped,
rsc, start_key(rsc), NULL, pe_order_load, data_set);
custom_action_order(NULL, strdup(load_stopped_task), load_stopped,
rsc, generate_op_key(rsc->id, RSC_MIGRATE, 0), NULL,
pe_order_load, data_set);
free(load_stopped_task);
}
}
if (rsc->container) {
resource_t *remote_rsc = NULL;
/* A user can specify that a resource must start on a Pacemaker Remote
* node by explicitly configuring it with the container=NODENAME
* meta-attribute. This is of questionable merit, since location
* constraints can accomplish the same thing. But we support it, so here
* we check whether a resource (that is not itself a remote connection)
* has container set to a remote node or guest node resource.
*/
if (rsc->container->is_remote_node) {
remote_rsc = rsc->container;
} else if (rsc->is_remote_node == FALSE) {
remote_rsc = rsc_contains_remote_node(data_set, rsc->container);
}
if (remote_rsc) {
/* The container represents a Pacemaker Remote node, so force the
* resource on the Pacemaker Remote node instead of colocating the
* resource with the container resource.
*/
GHashTableIter iter;
node_t *node = NULL;
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
if (node->details->remote_rsc != remote_rsc) {
node->weight = -INFINITY;
}
}
} else {
/* This resource is either a filler for a container that does NOT
* represent a Pacemaker Remote node, or a Pacemaker Remote
* connection resource for a guest node or bundle.
*/
int score;
crm_trace("Order and colocate %s relative to its container %s",
rsc->id, rsc->container->id);
custom_action_order(rsc->container, generate_op_key(rsc->container->id, RSC_START, 0), NULL,
rsc, generate_op_key(rsc->id, RSC_START, 0), NULL,
pe_order_implies_then | pe_order_runnable_left, data_set);
custom_action_order(rsc, generate_op_key(rsc->id, RSC_STOP, 0), NULL,
rsc->container, generate_op_key(rsc->container->id, RSC_STOP, 0), NULL,
pe_order_implies_first, data_set);
if (is_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
score = 10000; /* Highly preferred but not essential */
} else {
score = INFINITY; /* Force them to run on the same host */
}
rsc_colocation_new("resource-with-container", NULL, score, rsc,
rsc->container, NULL, NULL, data_set);
}
}
if (rsc->is_remote_node || is_stonith) {
/* don't allow remote nodes to run stonith devices
* or remote connection resources.*/
rsc_avoids_remote_nodes(rsc);
}
/* If this is a guest node's implicit remote connection, do not allow the
* guest resource to live on a Pacemaker Remote node, to avoid nesting
* remotes. However, allow bundles to run on remote nodes.
*/
if (rsc->is_remote_node && rsc->container
&& is_not_set(rsc->flags, pe_rsc_allow_remote_remotes)) {
rsc_avoids_remote_nodes(rsc->container);
}
}
void
native_rsc_colocation_lh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
{
if (rsc_lh == NULL) {
pe_err("rsc_lh was NULL for %s", constraint->id);
return;
} else if (constraint->rsc_rh == NULL) {
pe_err("rsc_rh was NULL for %s", constraint->id);
return;
}
pe_rsc_trace(rsc_lh, "Processing colocation constraint between %s and %s", rsc_lh->id,
rsc_rh->id);
rsc_rh->cmds->rsc_colocation_rh(rsc_lh, rsc_rh, constraint);
}
enum filter_colocation_res
filter_colocation_constraint(resource_t * rsc_lh, resource_t * rsc_rh,
rsc_colocation_t * constraint, gboolean preview)
{
if (constraint->score == 0) {
return influence_nothing;
}
/* rh side must be allocated before we can process constraint */
if (preview == FALSE && is_set(rsc_rh->flags, pe_rsc_provisional)) {
return influence_nothing;
}
if ((constraint->role_lh >= RSC_ROLE_SLAVE) &&
rsc_lh->parent && is_set(rsc_lh->parent->flags, pe_rsc_promotable)
&& is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
/* LH and RH resources have already been allocated, place the correct
* priority on LH rsc for the given promotable clone resource role */
return influence_rsc_priority;
}
if (preview == FALSE && is_not_set(rsc_lh->flags, pe_rsc_provisional)) {
/* error check */
struct pe_node_shared_s *details_lh;
struct pe_node_shared_s *details_rh;
if ((constraint->score > -INFINITY) && (constraint->score < INFINITY)) {
return influence_nothing;
}
details_rh = rsc_rh->allocated_to ? rsc_rh->allocated_to->details : NULL;
details_lh = rsc_lh->allocated_to ? rsc_lh->allocated_to->details : NULL;
if (constraint->score == INFINITY && details_lh != details_rh) {
crm_err("%s and %s are both allocated"
" but to different nodes: %s vs. %s",
rsc_lh->id, rsc_rh->id,
details_lh ? details_lh->uname : "n/a", details_rh ? details_rh->uname : "n/a");
} else if (constraint->score == -INFINITY && details_lh == details_rh) {
crm_err("%s and %s are both allocated"
" but to the SAME node: %s",
rsc_lh->id, rsc_rh->id, details_rh ? details_rh->uname : "n/a");
}
return influence_nothing;
}
if (constraint->score > 0
&& constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh != rsc_lh->next_role) {
crm_trace("LH: Skipping constraint: \"%s\" state filter nextrole is %s",
role2text(constraint->role_lh), role2text(rsc_lh->next_role));
return influence_nothing;
}
if (constraint->score > 0
&& constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh != rsc_rh->next_role) {
crm_trace("RH: Skipping constraint: \"%s\" state filter", role2text(constraint->role_rh));
return FALSE;
}
if (constraint->score < 0
&& constraint->role_lh != RSC_ROLE_UNKNOWN && constraint->role_lh == rsc_lh->next_role) {
crm_trace("LH: Skipping negative constraint: \"%s\" state filter",
role2text(constraint->role_lh));
return influence_nothing;
}
if (constraint->score < 0
&& constraint->role_rh != RSC_ROLE_UNKNOWN && constraint->role_rh == rsc_rh->next_role) {
crm_trace("RH: Skipping negative constraint: \"%s\" state filter",
role2text(constraint->role_rh));
return influence_nothing;
}
return influence_rsc_location;
}
static void
influence_priority(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
{
const char *rh_value = NULL;
const char *lh_value = NULL;
const char *attribute = CRM_ATTR_ID;
int score_multiplier = 1;
if (constraint->node_attribute != NULL) {
attribute = constraint->node_attribute;
}
if (!rsc_rh->allocated_to || !rsc_lh->allocated_to) {
return;
}
lh_value = pe_node_attribute_raw(rsc_lh->allocated_to, attribute);
rh_value = pe_node_attribute_raw(rsc_rh->allocated_to, attribute);
if (!safe_str_eq(lh_value, rh_value)) {
if(constraint->score == INFINITY && constraint->role_lh == RSC_ROLE_MASTER) {
rsc_lh->priority = -INFINITY;
}
return;
}
if (constraint->role_rh && (constraint->role_rh != rsc_rh->next_role)) {
return;
}
if (constraint->role_lh == RSC_ROLE_SLAVE) {
score_multiplier = -1;
}
rsc_lh->priority = merge_weights(score_multiplier * constraint->score, rsc_lh->priority);
}
static void
colocation_match(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
{
const char *tmp = NULL;
const char *value = NULL;
const char *attribute = CRM_ATTR_ID;
GHashTable *work = NULL;
gboolean do_check = FALSE;
GHashTableIter iter;
node_t *node = NULL;
if (constraint->node_attribute != NULL) {
attribute = constraint->node_attribute;
}
if (rsc_rh->allocated_to) {
value = pe_node_attribute_raw(rsc_rh->allocated_to, attribute);
do_check = TRUE;
} else if (constraint->score < 0) {
/* nothing to do:
* anti-colocation with something that is not running
*/
return;
}
work = node_hash_dup(rsc_lh->allowed_nodes);
g_hash_table_iter_init(&iter, work);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
tmp = pe_node_attribute_raw(node, attribute);
if (do_check && safe_str_eq(tmp, value)) {
if (constraint->score < INFINITY) {
pe_rsc_trace(rsc_lh, "%s: %s.%s += %d", constraint->id, rsc_lh->id,
node->details->uname, constraint->score);
node->weight = merge_weights(constraint->score, node->weight);
}
} else if (do_check == FALSE || constraint->score >= INFINITY) {
pe_rsc_trace(rsc_lh, "%s: %s.%s -= %d (%s)", constraint->id, rsc_lh->id,
node->details->uname, constraint->score,
do_check ? "failed" : "unallocated");
node->weight = merge_weights(-constraint->score, node->weight);
}
}
if (can_run_any(work)
|| constraint->score <= -INFINITY || constraint->score >= INFINITY) {
g_hash_table_destroy(rsc_lh->allowed_nodes);
rsc_lh->allowed_nodes = work;
work = NULL;
} else {
static char score[33];
score2char_stack(constraint->score, score, sizeof(score));
pe_rsc_info(rsc_lh, "%s: Rolling back scores from %s (%d, %s)",
rsc_lh->id, rsc_rh->id, do_check, score);
}
if (work) {
g_hash_table_destroy(work);
}
}
void
native_rsc_colocation_rh(resource_t * rsc_lh, resource_t * rsc_rh, rsc_colocation_t * constraint)
{
enum filter_colocation_res filter_results;
CRM_ASSERT(rsc_lh);
CRM_ASSERT(rsc_rh);
filter_results = filter_colocation_constraint(rsc_lh, rsc_rh, constraint, FALSE);
pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d, filter=%d)",
constraint->score >= 0 ? "" : "Anti-",
rsc_lh->id, rsc_rh->id, constraint->id, constraint->score, filter_results);
switch (filter_results) {
case influence_rsc_priority:
influence_priority(rsc_lh, rsc_rh, constraint);
break;
case influence_rsc_location:
pe_rsc_trace(rsc_lh, "%sColocating %s with %s (%s, weight=%d)",
constraint->score >= 0 ? "" : "Anti-",
rsc_lh->id, rsc_rh->id, constraint->id, constraint->score);
colocation_match(rsc_lh, rsc_rh, constraint);
break;
case influence_nothing:
default:
return;
}
}
static gboolean
filter_rsc_ticket(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket)
{
if (rsc_ticket->role_lh != RSC_ROLE_UNKNOWN && rsc_ticket->role_lh != rsc_lh->role) {
pe_rsc_trace(rsc_lh, "LH: Skipping constraint: \"%s\" state filter",
role2text(rsc_ticket->role_lh));
return FALSE;
}
return TRUE;
}
void
rsc_ticket_constraint(resource_t * rsc_lh, rsc_ticket_t * rsc_ticket, pe_working_set_t * data_set)
{
if (rsc_ticket == NULL) {
pe_err("rsc_ticket was NULL");
return;
}
if (rsc_lh == NULL) {
pe_err("rsc_lh was NULL for %s", rsc_ticket->id);
return;
}
if (rsc_ticket->ticket->granted && rsc_ticket->ticket->standby == FALSE) {
return;
}
if (rsc_lh->children) {
GListPtr gIter = rsc_lh->children;
pe_rsc_trace(rsc_lh, "Processing ticket dependencies from %s", rsc_lh->id);
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
rsc_ticket_constraint(child_rsc, rsc_ticket, data_set);
}
return;
}
pe_rsc_trace(rsc_lh, "%s: Processing ticket dependency on %s (%s, %s)",
rsc_lh->id, rsc_ticket->ticket->id, rsc_ticket->id,
role2text(rsc_ticket->role_lh));
if ((rsc_ticket->ticket->granted == FALSE)
&& (rsc_lh->running_on != NULL)) {
GListPtr gIter = NULL;
switch (rsc_ticket->loss_policy) {
case loss_ticket_stop:
resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set);
break;
case loss_ticket_demote:
// Promotion score will be set to -INFINITY in promotion_order()
if (rsc_ticket->role_lh != RSC_ROLE_MASTER) {
resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set);
}
break;
case loss_ticket_fence:
if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) {
return;
}
resource_location(rsc_lh, NULL, -INFINITY, "__loss_of_ticket__", data_set);
for (gIter = rsc_lh->running_on; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
pe_fence_node(data_set, node, "deadman ticket was lost");
}
break;
case loss_ticket_freeze:
if (filter_rsc_ticket(rsc_lh, rsc_ticket) == FALSE) {
return;
}
if (rsc_lh->running_on != NULL) {
clear_bit(rsc_lh->flags, pe_rsc_managed);
set_bit(rsc_lh->flags, pe_rsc_block);
}
break;
}
} else if (rsc_ticket->ticket->granted == FALSE) {
if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) {
resource_location(rsc_lh, NULL, -INFINITY, "__no_ticket__", data_set);
}
} else if (rsc_ticket->ticket->standby) {
if (rsc_ticket->role_lh != RSC_ROLE_MASTER || rsc_ticket->loss_policy == loss_ticket_stop) {
resource_location(rsc_lh, NULL, -INFINITY, "__ticket_standby__", data_set);
}
}
}
enum pe_action_flags
native_action_flags(action_t * action, node_t * node)
{
return action->flags;
}
enum pe_graph_flags
native_update_actions(action_t * first, action_t * then, node_t * node, enum pe_action_flags flags,
enum pe_action_flags filter, enum pe_ordering type)
{
/* flags == get_action_flags(first, then_node) called from update_action() */
enum pe_graph_flags changed = pe_graph_none;
enum pe_action_flags then_flags = then->flags;
enum pe_action_flags first_flags = first->flags;
crm_trace( "Testing %s on %s (0x%.6x) with %s 0x%.6x",
first->uuid, first->node ? first->node->details->uname : "[none]",
first->flags, then->uuid, then->flags);
if (type & pe_order_asymmetrical) {
resource_t *then_rsc = then->rsc;
enum rsc_role_e then_rsc_role = then_rsc ? then_rsc->fns->state(then_rsc, TRUE) : 0;
if (!then_rsc) {
/* ignore */
} else if ((then_rsc_role == RSC_ROLE_STOPPED) && safe_str_eq(then->task, RSC_STOP)) {
/* ignore... if 'then' is supposed to be stopped after 'first', but
* then is already stopped, there is nothing to be done when non-symmetrical. */
} else if ((then_rsc_role >= RSC_ROLE_STARTED)
&& safe_str_eq(then->task, RSC_START)
&& then->node
&& g_list_length(then_rsc->running_on) == 1
&& then->node->details == ((node_t *) then_rsc->running_on->data)->details) {
/* ignore... if 'then' is supposed to be started after 'first', but
* then is already started, there is nothing to be done when non-symmetrical. */
} else if (!(first->flags & pe_action_runnable)) {
/* prevent 'then' action from happening if 'first' is not runnable and
* 'then' has not yet occurred. */
pe_action_implies(then, first, pe_action_optional);
pe_action_implies(then, first, pe_action_runnable);
pe_rsc_trace(then->rsc, "Unset optional and runnable on %s", then->uuid);
} else {
/* ignore... then is allowed to start/stop if it wants to. */
}
}
if (type & pe_order_implies_first) {
if (is_set(filter, pe_action_optional) && is_not_set(flags /* Should be then_flags? */, pe_action_optional)) {
// Needs is_set(first_flags, pe_action_optional) too?
pe_rsc_trace(first->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid);
pe_action_implies(first, then, pe_action_optional);
}
if (is_set(flags, pe_action_migrate_runnable) &&
is_set(then->flags, pe_action_migrate_runnable) == FALSE &&
is_set(then->flags, pe_action_optional) == FALSE) {
pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s",
first->uuid, then->uuid);
pe_action_implies(first, then, pe_action_migrate_runnable);
}
}
if (type & pe_order_implies_first_master) {
if ((filter & pe_action_optional) &&
((then->flags & pe_action_optional) == FALSE) &&
then->rsc && (then->rsc->role == RSC_ROLE_MASTER)) {
pe_action_implies(first, then, pe_action_optional);
if (is_set(first->flags, pe_action_migrate_runnable) &&
is_set(then->flags, pe_action_migrate_runnable) == FALSE) {
pe_rsc_trace(first->rsc, "Unset migrate runnable on %s because of %s", first->uuid, then->uuid);
pe_action_implies(first, then, pe_action_migrate_runnable);
}
pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", first->uuid, then->uuid);
}
}
if ((type & pe_order_implies_first_migratable)
&& is_set(filter, pe_action_optional)) {
if (((then->flags & pe_action_migrate_runnable) == FALSE) ||
((then->flags & pe_action_runnable) == FALSE)) {
pe_rsc_trace(then->rsc, "Unset runnable on %s because %s is neither runnable or migratable", first->uuid, then->uuid);
pe_action_implies(first, then, pe_action_runnable);
}
if ((then->flags & pe_action_optional) == 0) {
pe_rsc_trace(then->rsc, "Unset optional on %s because %s is not optional", first->uuid, then->uuid);
pe_action_implies(first, then, pe_action_optional);
}
}
if ((type & pe_order_pseudo_left)
&& is_set(filter, pe_action_optional)) {
if ((first->flags & pe_action_runnable) == FALSE) {
pe_action_implies(then, first, pe_action_migrate_runnable);
pe_clear_action_bit(then, pe_action_pseudo);
pe_rsc_trace(then->rsc, "Unset pseudo on %s because %s is not runnable", then->uuid, first->uuid);
}
}
if (is_set(type, pe_order_runnable_left)
&& is_set(filter, pe_action_runnable)
&& is_set(then->flags, pe_action_runnable)
&& is_set(flags, pe_action_runnable) == FALSE) {
pe_rsc_trace(then->rsc, "Unset runnable on %s because of %s", then->uuid, first->uuid);
pe_action_implies(then, first, pe_action_runnable);
pe_action_implies(then, first, pe_action_migrate_runnable);
}
if (is_set(type, pe_order_implies_then)
&& is_set(filter, pe_action_optional)
&& is_set(then->flags, pe_action_optional)
&& is_set(flags, pe_action_optional) == FALSE) {
/* in this case, treat migrate_runnable as if first is optional */
if (is_set(first->flags, pe_action_migrate_runnable) == FALSE) {
pe_rsc_trace(then->rsc, "Unset optional on %s because of %s", then->uuid, first->uuid);
pe_action_implies(then, first, pe_action_optional);
}
}
if (is_set(type, pe_order_restart)) {
const char *reason = NULL;
CRM_ASSERT(first->rsc && first->rsc->variant == pe_native);
CRM_ASSERT(then->rsc && then->rsc->variant == pe_native);
if ((filter & pe_action_runnable)
&& (then->flags & pe_action_runnable) == 0
&& (then->rsc->flags & pe_rsc_managed)) {
reason = "shutdown";
}
if ((filter & pe_action_optional) && (then->flags & pe_action_optional) == 0) {
reason = "recover";
}
if (reason && is_set(first->flags, pe_action_optional)) {
if (is_set(first->flags, pe_action_runnable)
|| is_not_set(then->flags, pe_action_optional)) {
pe_rsc_trace(first->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid);
pe_action_implies(first, then, pe_action_optional);
}
}
if (reason && is_not_set(first->flags, pe_action_optional)
&& is_not_set(first->flags, pe_action_runnable)) {
pe_rsc_trace(then->rsc, "Handling %s: %s -> %s", reason, first->uuid, then->uuid);
pe_action_implies(then, first, pe_action_runnable);
}
if (reason &&
is_not_set(first->flags, pe_action_optional) &&
is_set(first->flags, pe_action_migrate_runnable) &&
is_not_set(then->flags, pe_action_migrate_runnable)) {
pe_action_implies(first, then, pe_action_migrate_runnable);
}
}
if (then_flags != then->flags) {
changed |= pe_graph_updated_then;
pe_rsc_trace(then->rsc,
"Then: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x",
then->uuid, then->node ? then->node->details->uname : "[none]", then->flags,
then_flags, first->uuid, first->flags);
if(then->rsc && then->rsc->parent) {
/* "X_stop then X_start" doesn't get handled for cloned groups unless we do this */
update_action(then);
}
}
if (first_flags != first->flags) {
changed |= pe_graph_updated_first;
pe_rsc_trace(first->rsc,
"First: Flags for %s on %s are now 0x%.6x (was 0x%.6x) because of %s 0x%.6x",
first->uuid, first->node ? first->node->details->uname : "[none]",
first->flags, first_flags, then->uuid, then->flags);
}
return changed;
}
void
native_rsc_location(resource_t * rsc, rsc_to_node_t * constraint)
{
GListPtr gIter = NULL;
GHashTableIter iter;
node_t *node = NULL;
if (constraint == NULL) {
pe_err("Constraint is NULL");
return;
} else if (rsc == NULL) {
pe_err("LHS of rsc_to_node (%s) is NULL", constraint->id);
return;
}
pe_rsc_trace(rsc, "Applying %s (%s) to %s", constraint->id,
role2text(constraint->role_filter), rsc->id);
/* take "lifetime" into account */
if (constraint->role_filter > RSC_ROLE_UNKNOWN && constraint->role_filter != rsc->next_role) {
pe_rsc_debug(rsc, "Constraint (%s) is not active (role : %s vs. %s)",
constraint->id, role2text(constraint->role_filter), role2text(rsc->next_role));
return;
} else if (is_active(constraint) == FALSE) {
pe_rsc_trace(rsc, "Constraint (%s) is not active", constraint->id);
return;
}
if (constraint->node_list_rh == NULL) {
pe_rsc_trace(rsc, "RHS of constraint %s is NULL", constraint->id);
return;
}
for (gIter = constraint->node_list_rh; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
node_t *other_node = NULL;
other_node = (node_t *) pe_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (other_node != NULL) {
pe_rsc_trace(rsc, "%s + %s: %d + %d",
node->details->uname,
other_node->details->uname, node->weight, other_node->weight);
other_node->weight = merge_weights(other_node->weight, node->weight);
} else {
other_node = node_copy(node);
pe_rsc_trace(rsc, "%s: %d (insert %d)", other_node->details->uname, other_node->weight, constraint->discover_mode);
g_hash_table_insert(rsc->allowed_nodes, (gpointer) other_node->details->id, other_node);
}
if (other_node->rsc_discover_mode < constraint->discover_mode) {
if (constraint->discover_mode == pe_discover_exclusive) {
rsc->exclusive_discover = TRUE;
}
/* exclusive > never > always... always is default */
other_node->rsc_discover_mode = constraint->discover_mode;
}
}
g_hash_table_iter_init(&iter, rsc->allowed_nodes);
while (g_hash_table_iter_next(&iter, NULL, (void **)&node)) {
pe_rsc_trace(rsc, "%s + %s : %d", rsc->id, node->details->uname, node->weight);
}
}
void
native_expand(resource_t * rsc, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Processing actions from %s", rsc->id);
for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
crm_trace("processing action %d for rsc=%s", action->id, rsc->id);
graph_element_from_action(action, data_set);
}
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
child_rsc->cmds->expand(child_rsc, data_set);
}
}
#define log_change(a, fmt, args...) do { \
if(a && a->reason && terminal) { \
printf(" * "fmt" \tdue to %s\n", ##args, a->reason); \
} else if(a && a->reason) { \
crm_notice(fmt" \tdue to %s", ##args, a->reason); \
} else if(terminal) { \
printf(" * "fmt"\n", ##args); \
} else { \
crm_notice(fmt, ##args); \
} \
} while(0)
#define STOP_SANITY_ASSERT(lineno) do { \
if(current && current->details->unclean) { \
/* It will be a pseudo op */ \
} else if(stop == NULL) { \
crm_err("%s:%d: No stop action exists for %s", __FUNCTION__, lineno, rsc->id); \
CRM_ASSERT(stop != NULL); \
} else if(is_set(stop->flags, pe_action_optional)) { \
crm_err("%s:%d: Action %s is still optional", __FUNCTION__, lineno, stop->uuid); \
CRM_ASSERT(is_not_set(stop->flags, pe_action_optional)); \
} \
} while(0)
static int rsc_width = 5;
static int detail_width = 5;
static void
LogAction(const char *change, resource_t *rsc, pe_node_t *origin, pe_node_t *destination, pe_action_t *action, pe_action_t *source, gboolean terminal)
{
int len = 0;
char *reason = NULL;
char *details = NULL;
bool same_host = FALSE;
bool same_role = FALSE;
bool need_role = FALSE;
CRM_ASSERT(action);
CRM_ASSERT(destination != NULL || origin != NULL);
if(source == NULL) {
source = action;
}
len = strlen(rsc->id);
if(len > rsc_width) {
rsc_width = len + 2;
}
if(rsc->role > RSC_ROLE_STARTED || rsc->next_role > RSC_ROLE_SLAVE) {
need_role = TRUE;
}
if(origin != NULL && destination != NULL && origin->details == destination->details) {
same_host = TRUE;
}
if(rsc->role == rsc->next_role) {
same_role = TRUE;
}
if(need_role && origin == NULL) {
/* Promoting from Stopped */
details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), destination->details->uname);
} else if(need_role && destination == NULL) {
/* Demoting a Master or Stopping a Slave */
details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname);
} else if(origin == NULL || destination == NULL) {
/* Starting or stopping a resource */
details = crm_strdup_printf("%s", origin?origin->details->uname:destination->details->uname);
} else if(need_role && same_role && same_host) {
/* Recovering or restarting a promotable clone resource */
details = crm_strdup_printf("%s %s", role2text(rsc->role), origin->details->uname);
} else if(same_role && same_host) {
/* Recovering or Restarting a normal resource */
details = crm_strdup_printf("%s", origin->details->uname);
} else if(same_role && need_role) {
/* Moving a promotable clone resource */
details = crm_strdup_printf("%s -> %s %s", origin->details->uname, destination->details->uname, role2text(rsc->role));
} else if(same_role) {
/* Moving a normal resource */
details = crm_strdup_printf("%s -> %s", origin->details->uname, destination->details->uname);
} else if(same_host) {
/* Promoting or demoting a promotable clone resource */
details = crm_strdup_printf("%s -> %s %s", role2text(rsc->role), role2text(rsc->next_role), origin->details->uname);
} else {
/* Moving and promoting/demoting */
details = crm_strdup_printf("%s %s -> %s %s", role2text(rsc->role), origin->details->uname, role2text(rsc->next_role), destination->details->uname);
}
len = strlen(details);
if(len > detail_width) {
detail_width = len;
}
if(source->reason && is_not_set(action->flags, pe_action_runnable)) {
reason = crm_strdup_printf(" due to %s (blocked)", source->reason);
} else if(source->reason) {
reason = crm_strdup_printf(" due to %s", source->reason);
} else if(is_not_set(action->flags, pe_action_runnable)) {
reason = strdup(" blocked");
} else {
reason = strdup("");
}
if(terminal) {
printf(" * %-8s %-*s ( %*s ) %s\n", change, rsc_width, rsc->id, detail_width, details, reason);
} else {
crm_notice(" * %-8s %-*s ( %*s ) %s", change, rsc_width, rsc->id, detail_width, details, reason);
}
free(details);
free(reason);
}
void
LogActions(resource_t * rsc, pe_working_set_t * data_set, gboolean terminal)
{
node_t *next = NULL;
node_t *current = NULL;
action_t *stop = NULL;
action_t *start = NULL;
action_t *demote = NULL;
action_t *promote = NULL;
char *key = NULL;
gboolean moving = FALSE;
GListPtr possible_matches = NULL;
if(rsc->variant == pe_container) {
container_LogActions(rsc, data_set, terminal);
return;
}
if (rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
LogActions(child_rsc, data_set, terminal);
}
return;
}
next = rsc->allocated_to;
if (rsc->running_on) {
current = pe__current_node(rsc);
if (rsc->role == RSC_ROLE_STOPPED) {
/*
* This can occur when resources are being recovered
* We fiddle with the current role in native_create_actions()
*/
rsc->role = RSC_ROLE_STARTED;
}
}
if (current == NULL && is_set(rsc->flags, pe_rsc_orphan)) {
/* Don't log stopped orphans */
return;
}
if (is_not_set(rsc->flags, pe_rsc_managed)
|| (current == NULL && next == NULL)) {
pe_rsc_info(rsc, "Leave %s\t(%s%s)",
rsc->id, role2text(rsc->role), is_not_set(rsc->flags,
pe_rsc_managed) ? " unmanaged" : "");
return;
}
if (current != NULL && next != NULL && safe_str_neq(current->details->id, next->details->id)) {
moving = TRUE;
}
key = start_key(rsc);
possible_matches = find_actions(rsc->actions, key, next);
free(key);
if (possible_matches) {
start = possible_matches->data;
g_list_free(possible_matches);
}
key = stop_key(rsc);
if(start == NULL || is_set(start->flags, pe_action_runnable) == FALSE) {
possible_matches = find_actions(rsc->actions, key, NULL);
} else {
possible_matches = find_actions(rsc->actions, key, current);
}
if (possible_matches) {
stop = possible_matches->data;
g_list_free(possible_matches);
}
free(key);
key = promote_key(rsc);
possible_matches = find_actions(rsc->actions, key, next);
free(key);
if (possible_matches) {
promote = possible_matches->data;
g_list_free(possible_matches);
}
key = demote_key(rsc);
possible_matches = find_actions(rsc->actions, key, next);
free(key);
if (possible_matches) {
demote = possible_matches->data;
g_list_free(possible_matches);
}
if (rsc->role == rsc->next_role) {
action_t *migrate_op = NULL;
key = generate_op_key(rsc->id, RSC_MIGRATED, 0);
possible_matches = find_actions(rsc->actions, key, next);
free(key);
if (possible_matches) {
migrate_op = possible_matches->data;
}
CRM_CHECK(next != NULL,);
if (next == NULL) {
} else if (migrate_op && is_set(migrate_op->flags, pe_action_runnable) && current) {
LogAction("Migrate", rsc, current, next, start, NULL, terminal);
} else if (is_set(rsc->flags, pe_rsc_reload)) {
LogAction("Reload", rsc, current, next, start, NULL, terminal);
} else if (start == NULL || is_set(start->flags, pe_action_optional)) {
pe_rsc_info(rsc, "Leave %s\t(%s %s)", rsc->id, role2text(rsc->role),
next->details->uname);
} else if (start && is_set(start->flags, pe_action_runnable) == FALSE) {
LogAction("Stop", rsc, current, NULL, stop,
(stop && stop->reason)? stop : start, terminal);
STOP_SANITY_ASSERT(__LINE__);
} else if (moving && current) {
LogAction(is_set(rsc->flags, pe_rsc_failed) ? "Recover" : "Move",
rsc, current, next, stop, NULL, terminal);
} else if (is_set(rsc->flags, pe_rsc_failed)) {
LogAction("Recover", rsc, current, NULL, stop, NULL, terminal);
STOP_SANITY_ASSERT(__LINE__);
} else {
LogAction("Restart", rsc, current, next, start, NULL, terminal);
/* STOP_SANITY_ASSERT(__LINE__); False positive for migrate-fail-7 */
}
g_list_free(possible_matches);
return;
}
if(stop
&& (rsc->next_role == RSC_ROLE_STOPPED
|| (start && is_not_set(start->flags, pe_action_runnable)))) {
GListPtr gIter = NULL;
key = stop_key(rsc);
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
node_t *node = (node_t *) gIter->data;
action_t *stop_op = NULL;
possible_matches = find_actions(rsc->actions, key, node);
if (possible_matches) {
stop_op = possible_matches->data;
g_list_free(possible_matches);
}
if (stop_op && (stop_op->flags & pe_action_runnable)) {
STOP_SANITY_ASSERT(__LINE__);
}
LogAction("Stop", rsc, node, NULL, stop_op,
(stop_op && stop_op->reason)? stop_op : start, terminal);
}
free(key);
} else if (stop && is_set(rsc->flags, pe_rsc_failed)) {
/* 'stop' may be NULL if the failure was ignored */
LogAction("Recover", rsc, current, next, stop, start, terminal);
STOP_SANITY_ASSERT(__LINE__);
} else if (moving) {
LogAction("Move", rsc, current, next, stop, NULL, terminal);
STOP_SANITY_ASSERT(__LINE__);
} else if (is_set(rsc->flags, pe_rsc_reload)) {
LogAction("Reload", rsc, current, next, start, NULL, terminal);
} else if (stop != NULL && is_not_set(stop->flags, pe_action_optional)) {
LogAction("Restart", rsc, current, next, start, NULL, terminal);
STOP_SANITY_ASSERT(__LINE__);
} else if (rsc->role == RSC_ROLE_MASTER) {
CRM_LOG_ASSERT(current != NULL);
LogAction("Demote", rsc, current, next, demote, NULL, terminal);
} else if(rsc->next_role == RSC_ROLE_MASTER) {
CRM_LOG_ASSERT(next);
LogAction("Promote", rsc, current, next, promote, NULL, terminal);
} else if (rsc->role == RSC_ROLE_STOPPED && rsc->next_role > RSC_ROLE_STOPPED) {
LogAction("Start", rsc, current, next, start, NULL, terminal);
}
}
gboolean
StopRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s", rsc->id);
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
node_t *current = (node_t *) gIter->data;
action_t *stop;
if (rsc->partial_migration_target) {
if (rsc->partial_migration_target->details == current->details) {
pe_rsc_trace(rsc, "Filtered %s -> %s %s", current->details->uname,
next->details->uname, rsc->id);
continue;
} else {
pe_rsc_trace(rsc, "Forced on %s %s", current->details->uname, rsc->id);
optional = FALSE;
}
}
pe_rsc_trace(rsc, "%s on %s", rsc->id, current->details->uname);
stop = stop_action(rsc, current, optional);
if(rsc->allocated_to == NULL) {
pe_action_set_reason(stop, "node availability", TRUE);
}
if (is_not_set(rsc->flags, pe_rsc_managed)) {
update_action_flags(stop, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
}
if (is_set(data_set->flags, pe_flag_remove_after_stop)) {
DeleteRsc(rsc, current, optional, data_set);
}
if(is_set(rsc->flags, pe_rsc_needs_unfencing)) {
action_t *unfence = pe_fence_op(current, "on", TRUE, NULL, data_set);
order_actions(stop, unfence, pe_order_implies_first);
if (!node_has_been_unfenced(current)) {
pe_proc_err("Stopping %s until %s can be unfenced", rsc->id, current->details->uname);
}
}
}
return TRUE;
}
static void
order_after_unfencing(resource_t *rsc, pe_node_t *node, action_t *action,
enum pe_ordering order, pe_working_set_t *data_set)
{
/* When unfencing is in use, we order unfence actions before any probe or
* start of resources that require unfencing, and also of fence devices.
*
* This might seem to violate the principle that fence devices require
* only quorum. However, fence agents that unfence often don't have enough
* information to even probe or start unless the node is first unfenced.
*/
if (is_unfence_device(rsc, data_set)
|| is_set(rsc->flags, pe_rsc_needs_unfencing)) {
/* Start with an optional ordering. Requiring unfencing would result in
* the node being unfenced, and all its resources being stopped,
* whenever a new resource is added -- which would be highly suboptimal.
*/
action_t *unfence = pe_fence_op(node, "on", TRUE, NULL, data_set);
order_actions(unfence, action, order);
if (!node_has_been_unfenced(node)) {
// But unfencing is required if it has never been done
char *reason = crm_strdup_printf("required by %s %s",
rsc->id, action->task);
trigger_unfencing(NULL, node, reason, NULL, data_set);
free(reason);
}
}
}
gboolean
StartRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
{
action_t *start = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s on %s %d %d", rsc->id, next ? next->details->uname : "N/A", optional, next ? next->weight : 0);
start = start_action(rsc, next, TRUE);
order_after_unfencing(rsc, next, start, pe_order_implies_then, data_set);
if (is_set(start->flags, pe_action_runnable) && optional == FALSE) {
update_action_flags(start, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__);
}
return TRUE;
}
gboolean
PromoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
{
char *key = NULL;
GListPtr gIter = NULL;
gboolean runnable = TRUE;
GListPtr action_list = NULL;
CRM_ASSERT(rsc);
CRM_CHECK(next != NULL, return FALSE);
pe_rsc_trace(rsc, "%s on %s", rsc->id, next->details->uname);
key = start_key(rsc);
action_list = find_actions_exact(rsc->actions, key, next);
free(key);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
action_t *start = (action_t *) gIter->data;
if (is_set(start->flags, pe_action_runnable) == FALSE) {
runnable = FALSE;
}
}
g_list_free(action_list);
if (runnable) {
promote_action(rsc, next, optional);
return TRUE;
}
pe_rsc_debug(rsc, "%s\tPromote %s (canceled)", next->details->uname, rsc->id);
key = promote_key(rsc);
action_list = find_actions_exact(rsc->actions, key, next);
free(key);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
action_t *promote = (action_t *) gIter->data;
update_action_flags(promote, pe_action_runnable | pe_action_clear, __FUNCTION__, __LINE__);
}
g_list_free(action_list);
return TRUE;
}
gboolean
DemoteRsc(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s", rsc->id);
/* CRM_CHECK(rsc->next_role == RSC_ROLE_SLAVE, return FALSE); */
for (gIter = rsc->running_on; gIter != NULL; gIter = gIter->next) {
node_t *current = (node_t *) gIter->data;
pe_rsc_trace(rsc, "%s on %s", rsc->id, next ? next->details->uname : "N/A");
demote_action(rsc, current, optional);
}
return TRUE;
}
gboolean
RoleError(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
{
CRM_ASSERT(rsc);
crm_err("%s on %s", rsc->id, next ? next->details->uname : "N/A");
CRM_CHECK(FALSE, return FALSE);
return FALSE;
}
gboolean
NullOp(resource_t * rsc, node_t * next, gboolean optional, pe_working_set_t * data_set)
{
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s", rsc->id);
return FALSE;
}
gboolean
DeleteRsc(resource_t * rsc, node_t * node, gboolean optional, pe_working_set_t * data_set)
{
if (is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_trace(rsc, "Resource %s not deleted from %s: failed", rsc->id, node->details->uname);
return FALSE;
} else if (node == NULL) {
pe_rsc_trace(rsc, "Resource %s not deleted: NULL node", rsc->id);
return FALSE;
} else if (node->details->unclean || node->details->online == FALSE) {
pe_rsc_trace(rsc, "Resource %s not deleted from %s: unrunnable", rsc->id,
node->details->uname);
return FALSE;
}
crm_notice("Removing %s from %s", rsc->id, node->details->uname);
delete_action(rsc, node, optional);
new_rsc_order(rsc, RSC_STOP, rsc, RSC_DELETE,
optional ? pe_order_implies_then : pe_order_optional, data_set);
new_rsc_order(rsc, RSC_DELETE, rsc, RSC_START,
optional ? pe_order_implies_then : pe_order_optional, data_set);
return TRUE;
}
-#include <../lib/pengine/unpack.h>
-#define set_char(x) last_rsc_id[lpc] = x; complete = TRUE;
-static char *
-increment_clone(char *last_rsc_id)
-{
- int lpc = 0;
- int len = 0;
- char *tmp = NULL;
- gboolean complete = FALSE;
-
- CRM_CHECK(last_rsc_id != NULL, return NULL);
- len = strlen(last_rsc_id);
- lpc = len - 1;
- while (complete == FALSE && lpc > 0) {
- switch (last_rsc_id[lpc]) {
- case 0:
- lpc--;
- break;
- case '0':
- set_char('1');
- break;
- case '1':
- set_char('2');
- break;
- case '2':
- set_char('3');
- break;
- case '3':
- set_char('4');
- break;
- case '4':
- set_char('5');
- break;
- case '5':
- set_char('6');
- break;
- case '6':
- set_char('7');
- break;
- case '7':
- set_char('8');
- break;
- case '8':
- set_char('9');
- break;
- case '9':
- last_rsc_id[lpc] = '0';
- lpc--;
- break;
- case ':':
- tmp = last_rsc_id;
- last_rsc_id = crm_strdup_printf("%s:10", tmp);
- complete = TRUE;
- free(tmp);
- break;
- default:
- crm_err("Unexpected char: %c (%d)", last_rsc_id[lpc], lpc);
- return NULL;
- break;
- }
- }
- return last_rsc_id;
-}
-
-static node_t *
-probe_anon_group_member(resource_t *rsc, node_t *node,
- pe_working_set_t *data_set)
-{
- resource_t *top = uber_parent(rsc);
-
- if (is_not_set(top->flags, pe_rsc_unique)) {
- /* Annoyingly we also need to check any other clone instances
- * Clumsy, but it will work.
- *
- * An alternative would be to update known_on for every peer
- * during process_rsc_state()
- *
- * This code desperately needs optimization
- * ptest -x with 100 nodes, 100 clones and clone-max=10:
- * No probes O(25s)
- * Detection without clone loop O(3m)
- * Detection with clone loop O(8m)
-
- ptest[32211]: 2010/02/18_14:27:55 CRIT: stage5: Probing for unknown resources
- ptest[32211]: 2010/02/18_14:33:39 CRIT: stage5: Done
- ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Updating action states
- ptest[32211]: 2010/02/18_14:35:05 CRIT: stage7: Done
-
- */
- char *clone_id = clone_zero(rsc->id);
- resource_t *peer = pe_find_resource(top->children, clone_id);
- node_t *running = NULL;
-
- while (peer) {
- running = pe_hash_table_lookup(peer->known_on, node->details->id);
- if (running != NULL) {
- /* we already know the status of the resource on this node */
- pe_rsc_trace(rsc, "Skipping active clone: %s", rsc->id);
- free(clone_id);
- return running;
- }
- clone_id = increment_clone(clone_id);
- peer = pe_find_resource(data_set->resources, clone_id);
- }
-
- free(clone_id);
- }
- return NULL;
-}
-
gboolean
native_create_probe(resource_t * rsc, node_t * node, action_t * complete,
gboolean force, pe_working_set_t * data_set)
{
enum pe_ordering flags = pe_order_optional;
char *key = NULL;
action_t *probe = NULL;
node_t *running = NULL;
node_t *allowed = NULL;
resource_t *top = uber_parent(rsc);
static const char *rc_master = NULL;
static const char *rc_inactive = NULL;
if (rc_inactive == NULL) {
rc_inactive = crm_itoa(PCMK_OCF_NOT_RUNNING);
rc_master = crm_itoa(PCMK_OCF_RUNNING_MASTER);
}
CRM_CHECK(node != NULL, return FALSE);
if (force == FALSE && is_not_set(data_set->flags, pe_flag_startup_probes)) {
pe_rsc_trace(rsc, "Skipping active resource detection for %s", rsc->id);
return FALSE;
}
if (is_remote_node(node)) {
const char *class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
if (safe_str_eq(class, PCMK_RESOURCE_CLASS_STONITH)) {
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because Pacemaker Remote nodes cannot run stonith agents",
rsc->id, node->details->id);
return FALSE;
} else if (is_container_remote_node(node) && rsc_contains_remote_node(data_set, rsc)) {
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because guest nodes cannot run resources containing guest nodes",
rsc->id, node->details->id);
return FALSE;
} else if (rsc->is_remote_node) {
pe_rsc_trace(rsc,
"Skipping probe for %s on %s because Pacemaker Remote nodes cannot host remote connections",
rsc->id, node->details->id);
return FALSE;
}
}
if (rsc->children) {
GListPtr gIter = NULL;
gboolean any_created = FALSE;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
any_created = child_rsc->cmds->create_probe(child_rsc, node, complete, force, data_set)
|| any_created;
}
return any_created;
} else if ((rsc->container) && (!rsc->is_remote_node)) {
pe_rsc_trace(rsc, "Skipping %s: it is within container %s", rsc->id, rsc->container->id);
return FALSE;
}
if (is_set(rsc->flags, pe_rsc_orphan)) {
pe_rsc_trace(rsc, "Skipping orphan: %s", rsc->id);
return FALSE;
}
- running = g_hash_table_lookup(rsc->known_on, node->details->id);
- if (running == NULL && is_set(rsc->flags, pe_rsc_unique) == FALSE) {
- /* Anonymous clones */
- if (rsc->parent == top) {
- running = g_hash_table_lookup(rsc->parent->known_on, node->details->id);
-
- } else {
- // Members of anonymous-cloned groups need special handling
- running = probe_anon_group_member(rsc, node, data_set);
- }
- }
-
- if (force == FALSE && running != NULL) {
- /* we already know the status of the resource on this node */
+ // Check whether resource is already known on node
+ if (!force && g_hash_table_lookup(rsc->known_on, node->details->id)) {
pe_rsc_trace(rsc, "Skipping known: %s on %s", rsc->id, node->details->uname);
return FALSE;
}
allowed = g_hash_table_lookup(rsc->allowed_nodes, node->details->id);
if (rsc->exclusive_discover || top->exclusive_discover) {
if (allowed == NULL) {
/* exclusive discover is enabled and this node is not in the allowed list. */
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, A", rsc->id, node->details->id);
return FALSE;
} else if (allowed->rsc_discover_mode != pe_discover_exclusive) {
/* exclusive discover is enabled and this node is not marked
* as a node this resource should be discovered on */
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, B", rsc->id, node->details->id);
return FALSE;
}
}
if(allowed == NULL && node->rsc_discover_mode == pe_discover_never) {
/* If this node was allowed to host this resource it would
* have been explicitly added to the 'allowed_nodes' list.
* However it wasn't and the node has discovery disabled, so
* no need to probe for this resource.
*/
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, C", rsc->id, node->details->id);
return FALSE;
}
if (allowed && allowed->rsc_discover_mode == pe_discover_never) {
/* this resource is marked as not needing to be discovered on this node */
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, discovery mode", rsc->id, node->details->id);
return FALSE;
}
if(is_container_remote_node(node)) {
resource_t *remote = node->details->remote_rsc->container;
if(remote->role == RSC_ROLE_STOPPED) {
/* If the container is stopped, then we know anything that
* might have been inside it is also stopped and there is
* no need to probe.
*
* If we don't know the container's state on the target
* either:
*
* - the container is running, the transition will abort
* and we'll end up in a different case next time, or
*
* - the container is stopped
*
* Either way there is no need to probe.
*
*/
if(remote->allocated_to
&& g_hash_table_lookup(remote->known_on, remote->allocated_to->details->id) == NULL) {
/* For safety, we order the 'rsc' start after 'remote'
* has been probed.
*
* Using 'top' helps for groups, but in we may need to
* follow the start's ordering chain backwards.
*/
custom_action_order(remote, generate_op_key(remote->id, RSC_STATUS, 0), NULL,
top, generate_op_key(top->id, RSC_START, 0), NULL,
pe_order_optional, data_set);
}
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopped",
rsc->id, node->details->id, remote->id);
return FALSE;
/* Here we really we want to check if remote->stop is required,
* but that information doesn't exist yet
*/
} else if(node->details->remote_requires_reset
|| node->details->unclean
|| is_set(remote->flags, pe_rsc_failed)
|| remote->next_role == RSC_ROLE_STOPPED
|| (remote->allocated_to
&& pe_find_node(remote->running_on, remote->allocated_to->details->uname) == NULL)
) {
/* The container is stopping or restarting, don't start
* 'rsc' until 'remote' stops as this also implies that
* 'rsc' is stopped - avoiding the need to probe
*/
custom_action_order(remote, generate_op_key(remote->id, RSC_STOP, 0), NULL,
top, generate_op_key(top->id, RSC_START, 0), NULL,
pe_order_optional, data_set);
pe_rsc_trace(rsc, "Skipping probe for %s on node %s, %s is stopping, restarting or moving",
rsc->id, node->details->id, remote->id);
return FALSE;
/* } else {
* The container is running so there is no problem probing it
*/
}
}
key = generate_op_key(rsc->id, RSC_STATUS, 0);
probe = custom_action(rsc, key, RSC_STATUS, node, FALSE, TRUE, data_set);
update_action_flags(probe, pe_action_optional | pe_action_clear, __FUNCTION__, __LINE__);
order_after_unfencing(rsc, node, probe, pe_order_optional, data_set);
/*
* We need to know if it's running_on (not just known_on) this node
* to correctly determine the target rc.
*/
running = pe_find_node_id(rsc->running_on, node->details->id);
if (running == NULL) {
add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_inactive);
} else if (rsc->role == RSC_ROLE_MASTER) {
add_hash_param(probe->meta, XML_ATTR_TE_TARGET_RC, rc_master);
}
crm_debug("Probing %s on %s (%s) %d %p", rsc->id, node->details->uname, role2text(rsc->role),
is_set(probe->flags, pe_action_runnable), rsc->running_on);
if (is_unfence_device(rsc, data_set) || !pe_rsc_is_clone(top)) {
top = rsc;
} else {
crm_trace("Probing %s on %s (%s) as %s", rsc->id, node->details->uname, role2text(rsc->role), top->id);
}
if(is_not_set(probe->flags, pe_action_runnable) && rsc->running_on == NULL) {
/* Prevent the start from occurring if rsc isn't active, but
* don't cause it to stop if it was active already
*/
flags |= pe_order_runnable_left;
}
custom_action_order(rsc, NULL, probe,
top, generate_op_key(top->id, RSC_START, 0), NULL,
flags, data_set);
/* Before any reloads, if they exist */
custom_action_order(rsc, NULL, probe,
top, reload_key(rsc), NULL,
pe_order_optional, data_set);
#if 0
// complete is always null currently
if (!is_unfence_device(rsc, data_set)) {
/* Normally rsc.start depends on probe complete which depends
* on rsc.probe. But this can't be the case for fence devices
* with unfencing, as it would create graph loops.
*
* So instead we explicitly order 'rsc.probe then rsc.start'
*/
order_actions(probe, complete, pe_order_implies_then);
}
#endif
return TRUE;
}
/*!
* \internal
* \brief Check whether a resource is known on a particular node
*
* \param[in] rsc Resource to check
* \param[in] node Node to check
*
* \return TRUE if resource (or parent if an anonymous clone) is known
*/
static bool
rsc_is_known_on(pe_resource_t *rsc, const pe_node_t *node)
{
if (pe_hash_table_lookup(rsc->known_on, node->details->id)) {
return TRUE;
} else if ((rsc->variant == pe_native)
&& pe_rsc_is_anon_clone(rsc->parent)
&& pe_hash_table_lookup(rsc->parent->known_on, node->details->id)) {
/* We check only the parent, not the uber-parent, because we cannot
* assume that the resource is known if it is in an anonymously cloned
* group (which may be only partially known).
*/
return TRUE;
}
return FALSE;
}
static void
native_start_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)
{
node_t *target;
GListPtr gIter = NULL;
action_t *all_stopped = get_pseudo_op(ALL_STOPPED, data_set);
action_t *stonith_done = get_pseudo_op(STONITH_DONE, data_set);
CRM_CHECK(stonith_op && stonith_op->node, return);
target = stonith_op->node;
for (gIter = rsc->actions; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if(action->needs == rsc_req_nothing) {
/* Anything other than start or promote requires nothing */
} else if (action->needs == rsc_req_stonith) {
order_actions(stonith_done, action, pe_order_optional);
} else if (safe_str_eq(action->task, RSC_START)
&& NULL != pe_hash_table_lookup(rsc->allowed_nodes, target->details->id)
&& !rsc_is_known_on(rsc, target)) {
/* if known == NULL, then we don't know if
* the resource is active on the node
* we're about to shoot
*
* in this case, regardless of action->needs,
* the only safe option is to wait until
* the node is shot before doing anything
* to with the resource
*
* it's analogous to waiting for all the probes
* for rscX to complete before starting rscX
*
* the most likely explanation is that the
* DC died and took its status with it
*/
pe_rsc_debug(rsc, "Ordering %s after %s recovery", action->uuid,
target->details->uname);
order_actions(all_stopped, action, pe_order_optional | pe_order_runnable_left);
}
}
}
static void
native_stop_constraints(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)
{
char *key = NULL;
GListPtr gIter = NULL;
GListPtr action_list = NULL;
bool order_implicit = FALSE;
resource_t *top = uber_parent(rsc);
node_t *target;
CRM_CHECK(stonith_op && stonith_op->node, return);
target = stonith_op->node;
/* Get a list of stop actions potentially implied by the fencing */
key = stop_key(rsc);
action_list = find_actions(rsc->actions, key, target);
free(key);
// If resource requires fencing, implicit actions must occur after fencing
if (is_set(rsc->flags, pe_rsc_needs_fencing)) {
order_implicit = TRUE;
}
/* Implied stops and demotes of resources running on guest nodes are always
* ordered after fencing, even if the resource does not require fencing,
* because guest node "fencing" is actually just a resource stop.
*/
if (is_container_remote_node(target)) {
order_implicit = TRUE;
}
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
// The stop would never complete, so convert it into a pseudo-action.
update_action_flags(action, pe_action_pseudo, __FUNCTION__, __LINE__);
update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__);
if (order_implicit) {
enum pe_ordering flags = pe_order_optional;
action_t *parent_stop = find_first_action(top->actions, NULL, RSC_STOP, NULL);
if (is_set(rsc->flags, pe_rsc_failed)) {
crm_notice("Stop of failed resource %s is implicit after %s is fenced",
rsc->id, target->details->uname);
} else {
crm_info("%s is implicit after %s is fenced",
action->uuid, target->details->uname);
}
update_action_flags(action, pe_action_implied_by_stonith,
__FUNCTION__, __LINE__);
if (target->details->remote_rsc) {
/* User constraints must not order a resource in a guest node
* relative to the guest node container resource. This flag
* marks constraints as generated by the cluster and thus
* immune to that check.
*/
flags |= pe_order_preserve;
}
if (pe_rsc_is_bundled(rsc) == FALSE) {
order_actions(stonith_op, action, flags);
}
order_actions(stonith_op, parent_stop, flags);
} else {
if (is_set(rsc->flags, pe_rsc_failed)) {
crm_notice("Stop of failed resource %s is implicit because %s will be fenced",
rsc->id, target->details->uname);
} else {
crm_info("%s is implicit because %s will be fenced",
action->uuid, target->details->uname);
}
}
if (is_set(rsc->flags, pe_rsc_notify)) {
/* Create a second notification that will be delivered
* immediately after the node is fenced
*
* Basic problem:
* - C is a clone active on the node to be shot and stopping on another
* - R is a resource that depends on C
*
* + C.stop depends on R.stop
* + C.stopped depends on STONITH
* + C.notify depends on C.stopped
* + C.healthy depends on C.notify
* + R.stop depends on C.healthy
*
* The extra notification here changes
* + C.healthy depends on C.notify
* into:
* + C.healthy depends on C.notify'
* + C.notify' depends on STONITH'
* thus breaking the loop
*/
create_secondary_notification(action, rsc, stonith_op, data_set);
}
/* From Bug #1601, successful fencing must be an input to a failed resources stop action.
However given group(rA, rB) running on nodeX and B.stop has failed,
A := stop healthy resource (rA.stop)
B := stop failed resource (pseudo operation B.stop)
C := stonith nodeX
A requires B, B requires C, C requires A
This loop would prevent the cluster from making progress.
This block creates the "C requires A" dependency and therefore must (at least
for now) be disabled.
Instead, run the block above and treat all resources on nodeX as B would be
(marked as a pseudo op depending on the STONITH).
TODO: Break the "A requires B" dependency in update_action() and re-enable this block
} else if(is_stonith == FALSE) {
crm_info("Moving healthy resource %s"
" off %s before fencing",
rsc->id, node->details->uname);
* stop healthy resources before the
* stonith op
*
custom_action_order(
rsc, stop_key(rsc), NULL,
NULL,strdup(CRM_OP_FENCE),stonith_op,
pe_order_optional, data_set);
*/
}
g_list_free(action_list);
/* Get a list of demote actions potentially implied by the fencing */
key = demote_key(rsc);
action_list = find_actions(rsc->actions, key, target);
free(key);
for (gIter = action_list; gIter != NULL; gIter = gIter->next) {
action_t *action = (action_t *) gIter->data;
if (action->node->details->online == FALSE || action->node->details->unclean == TRUE
|| is_set(rsc->flags, pe_rsc_failed)) {
if (is_set(rsc->flags, pe_rsc_failed)) {
pe_rsc_info(rsc,
"Demote of failed resource %s is implicit after %s is fenced",
rsc->id, target->details->uname);
} else {
pe_rsc_info(rsc, "%s is implicit after %s is fenced",
action->uuid, target->details->uname);
}
/* The demote would never complete and is now implied by the
* fencing, so convert it into a pseudo-action.
*/
update_action_flags(action, pe_action_pseudo, __FUNCTION__, __LINE__);
update_action_flags(action, pe_action_runnable, __FUNCTION__, __LINE__);
if (pe_rsc_is_bundled(rsc)) {
/* Do nothing, let the recovery be ordered after the parent's implied stop */
} else if (order_implicit) {
order_actions(stonith_op, action, pe_order_preserve|pe_order_optional);
}
}
}
g_list_free(action_list);
}
void
rsc_stonith_ordering(resource_t * rsc, action_t * stonith_op, pe_working_set_t * data_set)
{
if (rsc->children) {
GListPtr gIter = NULL;
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
rsc_stonith_ordering(child_rsc, stonith_op, data_set);
}
} else if (is_not_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "Skipping fencing constraints for unmanaged resource: %s", rsc->id);
} else {
native_start_constraints(rsc, stonith_op, data_set);
native_stop_constraints(rsc, stonith_op, data_set);
}
}
enum stack_activity {
stack_stable = 0,
stack_starting = 1,
stack_stopping = 2,
stack_middle = 4,
};
void
ReloadRsc(resource_t * rsc, node_t *node, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
action_t *reload = NULL;
if (rsc->children) {
for (gIter = rsc->children; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
ReloadRsc(child_rsc, node, data_set);
}
return;
} else if (rsc->variant > pe_native) {
/* Complex resource with no children */
return;
} else if (is_not_set(rsc->flags, pe_rsc_managed)) {
pe_rsc_trace(rsc, "%s: unmanaged", rsc->id);
return;
} else if (is_set(rsc->flags, pe_rsc_failed) || is_set(rsc->flags, pe_rsc_start_pending)) {
pe_rsc_trace(rsc, "%s: general resource state: flags=0x%.16llx", rsc->id, rsc->flags);
stop_action(rsc, node, FALSE); /* Force a full restart, overkill? */
return;
} else if (node == NULL) {
pe_rsc_trace(rsc, "%s: not active", rsc->id);
return;
}
pe_rsc_trace(rsc, "Processing %s", rsc->id);
set_bit(rsc->flags, pe_rsc_reload);
reload = custom_action(
rsc, reload_key(rsc), CRMD_ACTION_RELOAD, node, FALSE, TRUE, data_set);
pe_action_set_reason(reload, "resource definition change", FALSE);
custom_action_order(NULL, NULL, reload, rsc, stop_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
data_set);
custom_action_order(NULL, NULL, reload, rsc, demote_key(rsc), NULL,
pe_order_optional|pe_order_then_cancels_first,
data_set);
}
void
native_append_meta(resource_t * rsc, xmlNode * xml)
{
char *value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_INCARNATION);
resource_t *parent;
if (value) {
char *name = NULL;
name = crm_meta_name(XML_RSC_ATTR_INCARNATION);
crm_xml_add(xml, name, value);
free(name);
}
value = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_REMOTE_NODE);
if (value) {
char *name = NULL;
name = crm_meta_name(XML_RSC_ATTR_REMOTE_NODE);
crm_xml_add(xml, name, value);
free(name);
}
for (parent = rsc; parent != NULL; parent = parent->parent) {
if (parent->container) {
crm_xml_add(xml, CRM_META"_"XML_RSC_ATTR_CONTAINER, parent->container->id);
}
}
}
diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h
index beeb86c850..c13fc6f3d9 100644
--- a/include/crm/pengine/internal.h
+++ b/include/crm/pengine/internal.h
@@ -1,324 +1,323 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#ifndef PE_INTERNAL__H
# define PE_INTERNAL__H
# include
# include
# include
# define pe_rsc_info(rsc, fmt, args...) crm_log_tag(LOG_INFO, rsc ? rsc->id : "", fmt, ##args)
# define pe_rsc_debug(rsc, fmt, args...) crm_log_tag(LOG_DEBUG, rsc ? rsc->id : "", fmt, ##args)
# define pe_rsc_trace(rsc, fmt, args...) crm_log_tag(LOG_TRACE, rsc ? rsc->id : "", fmt, ##args)
# define pe_err(fmt...) { was_processing_error = TRUE; crm_config_error = TRUE; crm_err(fmt); }
# define pe_warn(fmt...) { was_processing_warning = TRUE; crm_config_warning = TRUE; crm_warn(fmt); }
# define pe_proc_err(fmt...) { was_processing_error = TRUE; crm_err(fmt); }
# define pe_proc_warn(fmt...) { was_processing_warning = TRUE; crm_warn(fmt); }
# define pe_set_action_bit(action, bit) action->flags = crm_set_bit(__FUNCTION__, __LINE__, action->uuid, action->flags, bit)
# define pe_clear_action_bit(action, bit) action->flags = crm_clear_bit(__FUNCTION__, __LINE__, action->uuid, action->flags, bit)
typedef struct notify_data_s {
GHashTable *keys;
const char *action;
action_t *pre;
action_t *post;
action_t *pre_done;
action_t *post_done;
GListPtr active; /* notify_entry_t* */
GListPtr inactive; /* notify_entry_t* */
GListPtr start; /* notify_entry_t* */
GListPtr stop; /* notify_entry_t* */
GListPtr demote; /* notify_entry_t* */
GListPtr promote; /* notify_entry_t* */
GListPtr master; /* notify_entry_t* */
GListPtr slave; /* notify_entry_t* */
GHashTable *allowed_nodes;
} notify_data_t;
bool pe_can_fence(pe_working_set_t *data_set, node_t *node);
int merge_weights(int w1, int w2);
void add_hash_param(GHashTable * hash, const char *name, const char *value);
char *native_parameter(resource_t * rsc, node_t * node, gboolean create, const char *name,
pe_working_set_t * data_set);
node_t *native_location(resource_t * rsc, GListPtr * list, gboolean current);
void pe_metadata(void);
void verify_pe_options(GHashTable * options);
void common_update_score(resource_t * rsc, const char *id, int score);
void native_add_running(resource_t * rsc, node_t * node, pe_working_set_t * data_set);
-node_t *rsc_known_on(resource_t * rsc, GListPtr * list);
gboolean native_unpack(resource_t * rsc, pe_working_set_t * data_set);
gboolean group_unpack(resource_t * rsc, pe_working_set_t * data_set);
gboolean clone_unpack(resource_t * rsc, pe_working_set_t * data_set);
gboolean container_unpack(resource_t * rsc, pe_working_set_t * data_set);
resource_t *native_find_rsc(resource_t *rsc, const char *id, const node_t *node,
int flags);
gboolean native_active(resource_t * rsc, gboolean all);
gboolean group_active(resource_t * rsc, gboolean all);
gboolean clone_active(resource_t * rsc, gboolean all);
gboolean container_active(resource_t * rsc, gboolean all);
void native_print(resource_t * rsc, const char *pre_text, long options, void *print_data);
void group_print(resource_t * rsc, const char *pre_text, long options, void *print_data);
void clone_print(resource_t * rsc, const char *pre_text, long options, void *print_data);
void container_print(resource_t * rsc, const char *pre_text, long options, void *print_data);
void native_free(resource_t * rsc);
void group_free(resource_t * rsc);
void clone_free(resource_t * rsc);
void container_free(resource_t * rsc);
enum rsc_role_e native_resource_state(const resource_t * rsc, gboolean current);
enum rsc_role_e group_resource_state(const resource_t * rsc, gboolean current);
enum rsc_role_e clone_resource_state(const resource_t * rsc, gboolean current);
enum rsc_role_e container_resource_state(const resource_t * rsc, gboolean current);
gboolean common_unpack(xmlNode * xml_obj, resource_t ** rsc, resource_t * parent,
pe_working_set_t * data_set);
void common_free(resource_t * rsc);
extern pe_working_set_t *pe_dataset;
extern node_t *node_copy(const node_t *this_node);
extern time_t get_effective_time(pe_working_set_t * data_set);
/* Failure handling utilities (from failcounts.c) */
// bit flags for fail count handling options
enum pe_fc_flags_e {
pe_fc_default = 0x00,
pe_fc_effective = 0x01, // don't count expired failures
pe_fc_fillers = 0x02, // if container, include filler failures in count
};
int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure,
uint32_t flags, xmlNode *xml_op,
pe_working_set_t *data_set);
/* Functions for finding/counting a resource's active nodes */
pe_node_t *pe__find_active_on(const pe_resource_t *rsc,
unsigned int *count_all,
unsigned int *count_clean);
pe_node_t *pe__find_active_requires(const pe_resource_t *rsc,
unsigned int *count);
static inline pe_node_t *
pe__current_node(const pe_resource_t *rsc)
{
return pe__find_active_on(rsc, NULL, NULL);
}
/* Binary like operators for lists of nodes */
extern void node_list_exclude(GHashTable * list, GListPtr list2, gboolean merge_scores);
extern GListPtr node_list_dup(GListPtr list, gboolean reset, gboolean filter);
extern GHashTable *node_hash_from_list(GListPtr list);
static inline gpointer
pe_hash_table_lookup(GHashTable * hash, gconstpointer key)
{
if (hash) {
return g_hash_table_lookup(hash, key);
}
return NULL;
}
extern action_t *get_pseudo_op(const char *name, pe_working_set_t * data_set);
extern gboolean order_actions(action_t * lh_action, action_t * rh_action, enum pe_ordering order);
GHashTable *node_hash_dup(GHashTable * hash);
/* Printing functions for debug */
extern void print_node(const char *pre_text, node_t * node, gboolean details);
extern void print_resource(int log_level, const char *pre_text, resource_t * rsc, gboolean details);
extern void dump_node_scores_worker(int level, const char *file, const char *function, int line,
resource_t * rsc, const char *comment, GHashTable * nodes);
extern void dump_node_capacity(int level, const char *comment, node_t * node);
extern void dump_rsc_utilization(int level, const char *comment, resource_t * rsc, node_t * node);
# define dump_node_scores(level, rsc, text, nodes) do { \
dump_node_scores_worker(level, __FILE__, __FUNCTION__, __LINE__, rsc, text, nodes); \
} while(0)
/* Sorting functions */
extern gint sort_rsc_priority(gconstpointer a, gconstpointer b);
extern gint sort_rsc_index(gconstpointer a, gconstpointer b);
extern xmlNode *find_rsc_op_entry(resource_t * rsc, const char *key);
extern action_t *custom_action(resource_t * rsc, char *key, const char *task, node_t * on_node,
gboolean optional, gboolean foo, pe_working_set_t * data_set);
# define delete_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DELETE, 0)
# define delete_action(rsc, node, optional) custom_action( \
rsc, delete_key(rsc), CRMD_ACTION_DELETE, node, \
optional, TRUE, data_set);
# define stopped_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOPPED, 0)
# define stopped_action(rsc, node, optional) custom_action( \
rsc, stopped_key(rsc), CRMD_ACTION_STOPPED, node, \
optional, TRUE, data_set);
# define stop_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STOP, 0)
# define stop_action(rsc, node, optional) custom_action( \
rsc, stop_key(rsc), CRMD_ACTION_STOP, node, \
optional, TRUE, data_set);
# define reload_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_RELOAD, 0)
# define start_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_START, 0)
# define start_action(rsc, node, optional) custom_action( \
rsc, start_key(rsc), CRMD_ACTION_START, node, \
optional, TRUE, data_set)
# define started_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_STARTED, 0)
# define started_action(rsc, node, optional) custom_action( \
rsc, started_key(rsc), CRMD_ACTION_STARTED, node, \
optional, TRUE, data_set)
# define promote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTE, 0)
# define promote_action(rsc, node, optional) custom_action( \
rsc, promote_key(rsc), CRMD_ACTION_PROMOTE, node, \
optional, TRUE, data_set)
# define promoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_PROMOTED, 0)
# define promoted_action(rsc, node, optional) custom_action( \
rsc, promoted_key(rsc), CRMD_ACTION_PROMOTED, node, \
optional, TRUE, data_set)
# define demote_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTE, 0)
# define demote_action(rsc, node, optional) custom_action( \
rsc, demote_key(rsc), CRMD_ACTION_DEMOTE, node, \
optional, TRUE, data_set)
# define demoted_key(rsc) generate_op_key(rsc->id, CRMD_ACTION_DEMOTED, 0)
# define demoted_action(rsc, node, optional) custom_action( \
rsc, demoted_key(rsc), CRMD_ACTION_DEMOTED, node, \
optional, TRUE, data_set)
extern int pe_get_configured_timeout(resource_t *rsc, const char *action,
pe_working_set_t *data_set);
extern action_t *find_first_action(GListPtr input, const char *uuid, const char *task,
node_t * on_node);
extern enum action_tasks get_complex_task(resource_t * rsc, const char *name,
gboolean allow_non_atomic);
extern GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node);
extern GListPtr find_actions_exact(GListPtr input, const char *key, node_t * on_node);
extern GListPtr find_recurring_actions(GListPtr input, node_t * not_on_node);
extern void pe_free_action(action_t * action);
extern void resource_location(resource_t * rsc, node_t * node, int score, const char *tag,
pe_working_set_t * data_set);
extern gint sort_op_by_callid(gconstpointer a, gconstpointer b);
extern gboolean get_target_role(resource_t * rsc, enum rsc_role_e *role);
extern resource_t *find_clone_instance(resource_t * rsc, const char *sub_id,
pe_working_set_t * data_set);
extern void destroy_ticket(gpointer data);
extern ticket_t *ticket_new(const char *ticket_id, pe_working_set_t * data_set);
// Resources for manipulating resource names
const char *pe_base_name_end(const char *id);
char *clone_strip(const char *last_rsc_id);
char *clone_zero(const char *last_rsc_id);
static inline bool
pe_base_name_eq(resource_t *rsc, const char *id)
{
if (id && rsc && rsc->id) {
// Number of characters in rsc->id before any clone suffix
size_t base_len = pe_base_name_end(rsc->id) - rsc->id + 1;
return (strlen(id) == base_len) && !strncmp(id, rsc->id, base_len);
}
return FALSE;
}
int get_target_rc(xmlNode * xml_op);
gint sort_node_uname(gconstpointer a, gconstpointer b);
bool is_set_recursive(resource_t * rsc, long long flag, bool any);
enum rsc_digest_cmp_val {
/*! Digests are the same */
RSC_DIGEST_MATCH = 0,
/*! Params that require a restart changed */
RSC_DIGEST_RESTART,
/*! Some parameter changed. */
RSC_DIGEST_ALL,
/*! rsc op didn't have a digest associated with it, so
* it is unknown if parameters changed or not. */
RSC_DIGEST_UNKNOWN,
};
typedef struct op_digest_cache_s {
enum rsc_digest_cmp_val rc;
xmlNode *params_all;
xmlNode *params_secure;
xmlNode *params_restart;
char *digest_all_calc;
char *digest_secure_calc;
char *digest_restart_calc;
} op_digest_cache_t;
op_digest_cache_t *rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
pe_working_set_t * data_set);
action_t *pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set);
void trigger_unfencing(
resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set);
void pe_action_set_reason(pe_action_t *action, const char *reason, bool overwrite);
void pe_action_set_flag_reason(const char *function, long line, pe_action_t *action, pe_action_t *reason, const char *text, enum pe_action_flags flags, bool overwrite);
#define pe_action_required(action, reason, text) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, text, pe_action_optional, FALSE)
#define pe_action_implies(action, reason, flag) pe_action_set_flag_reason(__FUNCTION__, __LINE__, action, reason, NULL, flag, FALSE)
void set_bit_recursive(resource_t * rsc, unsigned long long flag);
void clear_bit_recursive(resource_t * rsc, unsigned long long flag);
gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj_ref);
void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options,
void * print_data, gboolean print_all);
void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason);
node_t *pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set);
bool remote_id_conflict(const char *remote_name, pe_working_set_t *data);
void common_print(resource_t * rsc, const char *pre_text, const char *name, node_t *node, long options, void *print_data);
resource_t *find_container_child(const resource_t *bundle, const node_t *node);
bool container_fix_remote_addr(resource_t *rsc);
const char *container_fix_remote_addr_in(resource_t *rsc, xmlNode *xml, const char *field);
const char *pe_node_attribute_calculated(const pe_node_t *node,
const char *name,
const resource_t *rsc);
const char *pe_node_attribute_raw(pe_node_t *node, const char *name);
bool pe__is_universal_clone(pe_resource_t *rsc,
pe_working_set_t *data_set);
#endif
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index 42135f8cb1..d15852b75f 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,3442 +1,3441 @@
/*
* Copyright 2004-2018 Andrew Beekhof
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
CRM_TRACE_INIT_DATA(pe_status);
#define set_config_flag(data_set, option, flag) do { \
const char *tmp = pe_pref(data_set->config_hash, option); \
if(tmp) { \
if(crm_is_true(tmp)) { \
set_bit(data_set->flags, flag); \
} else { \
clear_bit(data_set->flags, flag); \
} \
} \
} while(0)
gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response *failed, pe_working_set_t * data_set);
static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
// Bitmask for warnings we only want to print once
uint32_t pe_wo = 0;
static gboolean
is_dangling_container_remote_node(node_t *node)
{
/* we are looking for a remote-node that was supposed to be mapped to a
* container resource, but all traces of that container have disappeared
* from both the config and the status section. */
if (is_remote_node(node) &&
node->details->remote_rsc &&
node->details->remote_rsc->container == NULL &&
is_set(node->details->remote_rsc->flags, pe_rsc_orphan_container_filler)) {
return TRUE;
}
return FALSE;
}
/*!
* \brief Schedule a fence action for a node
*
* \param[in,out] data_set Current working set of cluster
* \param[in,out] node Node to fence
* \param[in] reason Text description of why fencing is needed
*/
void
pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason)
{
CRM_CHECK(node, return);
/* A guest node is fenced by marking its container as failed */
if (is_container_remote_node(node)) {
resource_t *rsc = node->details->remote_rsc->container;
if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
if (!is_set(rsc->flags, pe_rsc_managed)) {
crm_notice("Not fencing guest node %s "
"(otherwise would because %s): "
"its guest resource %s is unmanaged",
node->details->uname, reason, rsc->id);
} else {
crm_warn("Guest node %s will be fenced "
"(by recovering its guest resource %s): %s",
node->details->uname, rsc->id, reason);
/* We don't mark the node as unclean because that would prevent the
* node from running resources. We want to allow it to run resources
* in this transition if the recovery succeeds.
*/
node->details->remote_requires_reset = TRUE;
set_bit(rsc->flags, pe_rsc_failed);
}
}
} else if (is_dangling_container_remote_node(node)) {
crm_info("Cleaning up dangling connection for guest node %s: "
"fencing was already done because %s, "
"and guest resource no longer exists",
node->details->uname, reason);
set_bit(node->details->remote_rsc->flags, pe_rsc_failed);
} else if (is_baremetal_remote_node(node)) {
resource_t *rsc = node->details->remote_rsc;
if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
crm_notice("Not fencing remote node %s "
"(otherwise would because %s): connection is unmanaged",
node->details->uname, reason);
} else if(node->details->remote_requires_reset == FALSE) {
node->details->remote_requires_reset = TRUE;
crm_warn("Remote node %s %s: %s",
node->details->uname,
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
}
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, data_set);
} else if (node->details->unclean) {
crm_trace("Cluster node %s %s because %s",
node->details->uname,
pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
reason);
} else {
crm_warn("Cluster node %s %s: %s",
node->details->uname,
pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
reason);
node->details->unclean = TRUE;
pe_fence_op(node, NULL, TRUE, reason, data_set);
}
}
// @TODO xpaths can't handle templates, rules, or id-refs
// nvpair with provides or requires set to unfencing
#define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
"[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
"or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
"and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
// unfencing in rsc_defaults or any resource
#define XPATH_ENABLE_UNFENCING \
"/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
"//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
"|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
"/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
static
void set_if_xpath(unsigned long long flag, const char *xpath,
pe_working_set_t *data_set)
{
xmlXPathObjectPtr result = NULL;
if (is_not_set(data_set->flags, flag)) {
result = xpath_search(data_set->input, xpath);
if (result && (numXpathResults(result) > 0)) {
set_bit(data_set->flags, flag);
}
freeXpathObject(result);
}
}
gboolean
unpack_config(xmlNode * config, pe_working_set_t * data_set)
{
const char *value = NULL;
GHashTable *config_hash = crm_str_table_new();
data_set->config_hash = config_hash;
unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
CIB_OPTIONS_FIRST, FALSE, data_set->now);
verify_pe_options(data_set->config_hash);
set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
crm_info("Startup probes: disabled (dangerous)");
}
value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
if (value && crm_is_true(value)) {
crm_notice("Watchdog will be used via SBD if fencing is required");
set_bit(data_set->flags, pe_flag_have_stonith_resource);
}
/* Set certain flags via xpath here, so they can be used before the relevant
* configuration sections are unpacked.
*/
set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
value = pe_pref(data_set->config_hash, "stonith-timeout");
data_set->stonith_timeout = crm_get_msec(value);
crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
crm_debug("STONITH of failed nodes is %s",
is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
if (!strcmp(data_set->stonith_action, "poweroff")) {
pe_warn_once(pe_wo_poweroff,
"Support for stonith-action of 'poweroff' is deprecated "
"and will be removed in a future release (use 'off' instead)");
data_set->stonith_action = "off";
}
crm_trace("STONITH will %s nodes", data_set->stonith_action);
set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
crm_debug("Concurrent fencing is %s",
is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
crm_debug("Stop all active resources: %s",
is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
}
value = pe_pref(data_set->config_hash, "no-quorum-policy");
if (safe_str_eq(value, "ignore")) {
data_set->no_quorum_policy = no_quorum_ignore;
} else if (safe_str_eq(value, "freeze")) {
data_set->no_quorum_policy = no_quorum_freeze;
} else if (safe_str_eq(value, "suicide")) {
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
int do_panic = 0;
crm_element_value_int(data_set->input, XML_ATTR_QUORUM_PANIC,
&do_panic);
if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
data_set->no_quorum_policy = no_quorum_suicide;
} else {
crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
data_set->no_quorum_policy = no_quorum_stop;
}
} else {
data_set->no_quorum_policy = no_quorum_stop;
}
switch (data_set->no_quorum_policy) {
case no_quorum_freeze:
crm_debug("On loss of quorum: Freeze resources");
break;
case no_quorum_stop:
crm_debug("On loss of quorum: Stop ALL resources");
break;
case no_quorum_suicide:
crm_notice("On loss of quorum: Fence all remaining nodes");
break;
case no_quorum_ignore:
crm_notice("On loss of quorum: Ignore");
break;
}
set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
crm_trace("Orphan resources are %s",
is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
crm_trace("Orphan resource actions are %s",
is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
crm_trace("Stopped resources are removed from the status section: %s",
is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
crm_trace("Maintenance mode: %s",
is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
crm_trace("Start failures are %s",
is_set(data_set->flags,
pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
}
if (is_set(data_set->flags, pe_flag_startup_fencing)) {
crm_trace("Unseen nodes will be fenced");
} else {
pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
}
node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
pe_pref(data_set->config_hash, "node-health-red"),
pe_pref(data_set->config_hash, "node-health-yellow"),
pe_pref(data_set->config_hash, "node-health-green"));
data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
crm_trace("Placement strategy: %s", data_set->placement_strategy);
return TRUE;
}
static void
destroy_digest_cache(gpointer ptr)
{
op_digest_cache_t *data = ptr;
free_xml(data->params_all);
free_xml(data->params_secure);
free_xml(data->params_restart);
free(data->digest_all_calc);
free(data->digest_restart_calc);
free(data->digest_secure_calc);
free(data);
}
node_t *
pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set)
{
node_t *new_node = NULL;
if (pe_find_node(data_set->nodes, uname) != NULL) {
crm_config_warn("Detected multiple node entries with uname=%s"
" - this is rarely intended", uname);
}
new_node = calloc(1, sizeof(node_t));
if (new_node == NULL) {
return NULL;
}
new_node->weight = char2score(score);
new_node->fixed = FALSE;
new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
if (new_node->details == NULL) {
free(new_node);
return NULL;
}
crm_trace("Creating node for entry %s/%s", uname, id);
new_node->details->id = id;
new_node->details->uname = uname;
new_node->details->online = FALSE;
new_node->details->shutdown = FALSE;
new_node->details->rsc_discovery_enabled = TRUE;
new_node->details->running_rsc = NULL;
new_node->details->type = node_ping;
if (safe_str_eq(type, "remote")) {
new_node->details->type = node_remote;
set_bit(data_set->flags, pe_flag_have_remote_nodes);
} else if ((type == NULL) || safe_str_eq(type, "member")) {
new_node->details->type = node_member;
}
new_node->details->attrs = crm_str_table_new();
if (is_remote_node(new_node)) {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("remote"));
} else {
g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("cluster"));
}
new_node->details->utilization = crm_str_table_new();
new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
g_str_equal, free,
destroy_digest_cache);
data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
return new_node;
}
bool
remote_id_conflict(const char *remote_name, pe_working_set_t *data)
{
bool match = FALSE;
#if 1
pe_find_resource(data->resources, remote_name);
#else
if (data->name_check == NULL) {
data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
for (xml_rsc = __xml_first_child(parent); xml_rsc != NULL; xml_rsc = __xml_next_element(xml_rsc)) {
const char *id = ID(xml_rsc);
/* avoiding heap allocation here because we know the duration of this hashtable allows us to */
g_hash_table_insert(data->name_check, (char *) id, (char *) id);
}
}
if (g_hash_table_lookup(data->name_check, remote_name)) {
match = TRUE;
}
#endif
if (match) {
crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
return NULL;
}
return match;
}
static const char *
expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
{
xmlNode *attr_set = NULL;
xmlNode *attr = NULL;
const char *container_id = ID(xml_obj);
const char *remote_name = NULL;
const char *remote_server = NULL;
const char *remote_port = NULL;
const char *connect_timeout = "60s";
const char *remote_allow_migrate=NULL;
const char *container_managed = NULL;
for (attr_set = __xml_first_child(xml_obj); attr_set != NULL; attr_set = __xml_next_element(attr_set)) {
if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
continue;
}
for (attr = __xml_first_child(attr_set); attr != NULL; attr = __xml_next_element(attr)) {
const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
if (safe_str_eq(name, XML_RSC_ATTR_REMOTE_NODE)) {
remote_name = value;
} else if (safe_str_eq(name, "remote-addr")) {
remote_server = value;
} else if (safe_str_eq(name, "remote-port")) {
remote_port = value;
} else if (safe_str_eq(name, "remote-connect-timeout")) {
connect_timeout = value;
} else if (safe_str_eq(name, "remote-allow-migrate")) {
remote_allow_migrate=value;
} else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
container_managed = value;
}
}
}
if (remote_name == NULL) {
return NULL;
}
if (remote_id_conflict(remote_name, data)) {
return NULL;
}
pe_create_remote_xml(parent, remote_name, container_id,
remote_allow_migrate, container_managed,
connect_timeout, remote_server, remote_port);
return remote_name;
}
static void
handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
{
if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
/* Ignore fencing for remote nodes that don't have a connection resource
* associated with them. This happens when remote node entries get left
* in the nodes section after the connection resource is removed.
*/
return;
}
if (is_set(data_set->flags, pe_flag_startup_fencing)) {
// All nodes are unclean until we've seen their status entry
new_node->details->unclean = TRUE;
} else {
// Blind faith ...
new_node->details->unclean = FALSE;
}
/* We need to be able to determine if a node's status section
* exists or not separate from whether the node is unclean. */
new_node->details->unseen = TRUE;
}
gboolean
unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
node_t *new_node = NULL;
const char *id = NULL;
const char *uname = NULL;
const char *type = NULL;
const char *score = NULL;
for (xml_obj = __xml_first_child(xml_nodes); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
new_node = NULL;
id = crm_element_value(xml_obj, XML_ATTR_ID);
uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
type = crm_element_value(xml_obj, XML_ATTR_TYPE);
score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
crm_trace("Processing node %s/%s", uname, id);
if (id == NULL) {
crm_config_err("Must specify id tag in ");
continue;
}
new_node = pe_create_node(id, uname, type, score, data_set);
if (new_node == NULL) {
return FALSE;
}
/* if(data_set->have_quorum == FALSE */
/* && data_set->no_quorum_policy == no_quorum_stop) { */
/* /\* start shutting resources down *\/ */
/* new_node->weight = -INFINITY; */
/* } */
handle_startup_fencing(data_set, new_node);
add_node_attrs(xml_obj, new_node, FALSE, data_set);
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
new_node->details->utilization, NULL, FALSE, data_set->now);
crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
}
}
if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
crm_info("Creating a fake local node");
pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
data_set);
}
return TRUE;
}
static void
setup_container(resource_t * rsc, pe_working_set_t * data_set)
{
const char *container_id = NULL;
if (rsc->children) {
GListPtr gIter = rsc->children;
for (; gIter != NULL; gIter = gIter->next) {
resource_t *child_rsc = (resource_t *) gIter->data;
setup_container(child_rsc, data_set);
}
return;
}
container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
if (container_id && safe_str_neq(container_id, rsc->id)) {
resource_t *container = pe_find_resource(data_set->resources, container_id);
if (container) {
rsc->container = container;
set_bit(container->flags, pe_rsc_is_container);
container->fillers = g_list_append(container->fillers, rsc);
pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
} else {
pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
}
}
}
gboolean
unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
/* generate remote nodes from resource config before unpacking resources */
for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
const char *new_node_id = NULL;
/* first check if this is a bare metal remote node. Bare metal remote nodes
* are defined as a resource primitive only. */
if (xml_contains_remote_node(xml_obj)) {
new_node_id = ID(xml_obj);
/* The "pe_find_node" check is here to make sure we don't iterate over
* an expanded node that has already been added to the node list. */
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
}
/* Now check for guest remote nodes.
* guest remote nodes are defined within a resource primitive.
* Example1: a vm resource might be configured as a remote node.
* Example2: a vm resource might be configured within a group to be a remote node.
* Note: right now we only support guest remote nodes in as a standalone primitive
* or a primitive within a group. No cloned primitives can be a guest remote node
* right now */
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
/* expands a metadata defined remote resource into the xml config
* as an actual rsc primitive to be unpacked later. */
new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
continue;
} else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
xmlNode *xml_obj2 = NULL;
/* search through a group to see if any of the primitive contain a remote node. */
for (xml_obj2 = __xml_first_child(xml_obj); xml_obj2 != NULL; xml_obj2 = __xml_next_element(xml_obj2)) {
new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj));
pe_create_node(new_node_id, new_node_id, "remote", NULL,
data_set);
}
}
}
}
return TRUE;
}
/* Call this after all the nodes and resources have been
* unpacked, but before the status section is read.
*
* A remote node's online status is reflected by the state
* of the remote node's connection resource. We need to link
* the remote node to this connection resource so we can have
* easy access to the connection resource during the PE calculations.
*/
static void
link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
{
node_t *remote_node = NULL;
if (new_rsc->is_remote_node == FALSE) {
return;
}
if (is_set(data_set->flags, pe_flag_quick_location)) {
/* remote_nodes and remote_resources are not linked in quick location calculations */
return;
}
print_resource(LOG_TRACE, "Linking remote-node connection resource, ", new_rsc, FALSE);
remote_node = pe_find_node(data_set->nodes, new_rsc->id);
CRM_CHECK(remote_node != NULL, return;);
remote_node->details->remote_rsc = new_rsc;
/* If this is a baremetal remote-node (no container resource
* associated with it) then we need to handle startup fencing the same way
* as cluster nodes. */
if (new_rsc->container == NULL) {
handle_startup_fencing(data_set, remote_node);
} else {
/* At this point we know if the remote node is a container or baremetal
* remote node, update the #kind attribute if a container is involved */
g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
strdup("container"));
}
}
static void
destroy_tag(gpointer data)
{
tag_t *tag = data;
if (tag) {
free(tag->id);
g_list_free_full(tag->refs, free);
free(tag);
}
}
/*!
* \internal
* \brief Parse configuration XML for resource information
*
* \param[in] xml_resources Top of resource configuration XML
* \param[in,out] data_set Where to put resource information
*
* \return TRUE
*
* \note unpack_remote_nodes() MUST be called before this, so that the nodes can
* be used when common_unpack() calls resource_location()
*/
gboolean
unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
GListPtr gIter = NULL;
data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
g_str_equal, free,
destroy_tag);
for (xml_obj = __xml_first_child(xml_resources); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
resource_t *new_rsc = NULL;
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
const char *template_id = ID(xml_obj);
if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
template_id, NULL, NULL) == FALSE) {
/* Record the template's ID for the knowledge of its existence anyway. */
g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
}
continue;
}
crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
if (common_unpack(xml_obj, &new_rsc, NULL, data_set)) {
data_set->resources = g_list_append(data_set->resources, new_rsc);
print_resource(LOG_TRACE, "Added ", new_rsc, FALSE);
} else {
crm_config_err("Failed unpacking %s %s",
crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
if (new_rsc != NULL && new_rsc->fns != NULL) {
new_rsc->fns->free(new_rsc);
}
}
}
for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
resource_t *rsc = (resource_t *) gIter->data;
setup_container(rsc, data_set);
link_rsc2remotenode(data_set, rsc);
}
data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
if (is_set(data_set->flags, pe_flag_quick_location)) {
/* Ignore */
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
}
return TRUE;
}
gboolean
unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
{
xmlNode *xml_tag = NULL;
data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
destroy_tag);
for (xml_tag = __xml_first_child(xml_tags); xml_tag != NULL; xml_tag = __xml_next_element(xml_tag)) {
xmlNode *xml_obj_ref = NULL;
const char *tag_id = ID(xml_tag);
if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
continue;
}
if (tag_id == NULL) {
crm_config_err("Failed unpacking %s: %s should be specified",
crm_element_name(xml_tag), XML_ATTR_ID);
continue;
}
for (xml_obj_ref = __xml_first_child(xml_tag); xml_obj_ref != NULL; xml_obj_ref = __xml_next_element(xml_obj_ref)) {
const char *obj_ref = ID(xml_obj_ref);
if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
continue;
}
if (obj_ref == NULL) {
crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
continue;
}
if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
return FALSE;
}
}
}
return TRUE;
}
/* The ticket state section:
* "/cib/status/tickets/ticket_state" */
static gboolean
unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
{
const char *ticket_id = NULL;
const char *granted = NULL;
const char *last_granted = NULL;
const char *standby = NULL;
xmlAttrPtr xIter = NULL;
ticket_t *ticket = NULL;
ticket_id = ID(xml_ticket);
if (ticket_id == NULL || strlen(ticket_id) == 0) {
return FALSE;
}
crm_trace("Processing ticket state for %s", ticket_id);
ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
if (ticket == NULL) {
ticket = ticket_new(ticket_id, data_set);
if (ticket == NULL) {
return FALSE;
}
}
for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
const char *prop_name = (const char *)xIter->name;
const char *prop_value = crm_element_value(xml_ticket, prop_name);
if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
continue;
}
g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
}
granted = g_hash_table_lookup(ticket->state, "granted");
if (granted && crm_is_true(granted)) {
ticket->granted = TRUE;
crm_info("We have ticket '%s'", ticket->id);
} else {
ticket->granted = FALSE;
crm_info("We do not have ticket '%s'", ticket->id);
}
last_granted = g_hash_table_lookup(ticket->state, "last-granted");
if (last_granted) {
ticket->last_granted = crm_parse_int(last_granted, 0);
}
standby = g_hash_table_lookup(ticket->state, "standby");
if (standby && crm_is_true(standby)) {
ticket->standby = TRUE;
if (ticket->granted) {
crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
}
} else {
ticket->standby = FALSE;
}
crm_trace("Done with ticket state for %s", ticket_id);
return TRUE;
}
static gboolean
unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
{
xmlNode *xml_obj = NULL;
for (xml_obj = __xml_first_child(xml_tickets); xml_obj != NULL; xml_obj = __xml_next_element(xml_obj)) {
if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
continue;
}
unpack_ticket_state(xml_obj, data_set);
}
return TRUE;
}
static void
unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
{
const char *resource_discovery_enabled = NULL;
xmlNode *attrs = NULL;
resource_t *rsc = NULL;
const char *shutdown = NULL;
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
return;
}
if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
return;
}
crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
this_node->details->remote_maintenance =
crm_atoi(crm_element_value(state, XML_NODE_IS_MAINTENANCE), "0");
rsc = this_node->details->remote_rsc;
if (this_node->details->remote_requires_reset == FALSE) {
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
}
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
if (shutdown != NULL && safe_str_neq("0", shutdown)) {
crm_info("Node %s is shutting down", this_node->details->uname);
this_node->details->shutdown = TRUE;
if (rsc) {
rsc->next_role = RSC_ROLE_STOPPED;
}
}
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
(rsc && !is_set(rsc->flags, pe_rsc_managed))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
} else {
/* if we're here, this is either a baremetal node and fencing is enabled,
* or this is a container node which we don't care if fencing is enabled
* or not on. container nodes are 'fenced' by recovering the container resource
* regardless of whether fencing is enabled. */
crm_info("Node %s has resource discovery disabled", this_node->details->uname);
this_node->details->rsc_discovery_enabled = FALSE;
}
}
}
static bool
unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
{
bool changed = false;
xmlNode *lrm_rsc = NULL;
for (xmlNode *state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
const char *id = NULL;
const char *uname = NULL;
node_t *this_node = NULL;
bool process = FALSE;
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
continue;
}
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (this_node == NULL) {
crm_info("Node %s is unknown", id);
continue;
} else if (this_node->details->unpacked) {
crm_info("Node %s is already processed", id);
continue;
} else if (is_remote_node(this_node) == FALSE && is_set(data_set->flags, pe_flag_stonith_enabled)) {
// A redundant test, but preserves the order for regression tests
process = TRUE;
} else if (is_remote_node(this_node)) {
bool check = FALSE;
resource_t *rsc = this_node->details->remote_rsc;
if(fence) {
check = TRUE;
} else if(rsc == NULL) {
/* Not ready yet */
} else if (is_container_remote_node(this_node)
&& rsc->role == RSC_ROLE_STARTED
&& rsc->container->role == RSC_ROLE_STARTED) {
/* Both the connection and the underlying container
* need to be known 'up' before we volunterily process
* resources inside it
*/
check = TRUE;
crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
} else if (is_container_remote_node(this_node) == FALSE
&& rsc->role == RSC_ROLE_STARTED) {
check = TRUE;
crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
}
if (check) {
determine_remote_online_status(data_set, this_node);
unpack_handle_remote_attrs(this_node, state, data_set);
process = TRUE;
}
} else if (this_node->details->online) {
process = TRUE;
} else if (fence) {
process = TRUE;
}
if(process) {
crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
fence?"un":"", is_remote_node(this_node)?" remote":"",
this_node->details->uname);
changed = TRUE;
this_node->details->unpacked = TRUE;
lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
unpack_lrm_resources(this_node, lrm_rsc, data_set);
}
}
return changed;
}
/* remove nodes that are down, stopping */
/* create positive rsc_to_node constraints between resources and the nodes they are running on */
/* anything else? */
gboolean
unpack_status(xmlNode * status, pe_working_set_t * data_set)
{
const char *id = NULL;
const char *uname = NULL;
xmlNode *state = NULL;
node_t *this_node = NULL;
crm_trace("Beginning unpack");
if (data_set->tickets == NULL) {
data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
free, destroy_ticket);
}
for (state = __xml_first_child(status); state != NULL; state = __xml_next_element(state)) {
if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
unpack_tickets_state((xmlNode *) state, data_set);
} else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
xmlNode *attrs = NULL;
const char *resource_discovery_enabled = NULL;
id = crm_element_value(state, XML_ATTR_ID);
uname = crm_element_value(state, XML_ATTR_UNAME);
this_node = pe_find_node_any(data_set->nodes, id, uname);
if (uname == NULL) {
/* error */
continue;
} else if (this_node == NULL) {
crm_config_warn("Node %s in status section no longer exists", uname);
continue;
} else if (is_remote_node(this_node)) {
/* online state for remote nodes is determined by the
* rsc state after all the unpacking is done. we do however
* need to mark whether or not the node has been fenced as this plays
* a role during unpacking cluster node resource state */
this_node->details->remote_was_fenced =
crm_atoi(crm_element_value(state, XML_NODE_IS_FENCED), "0");
continue;
}
crm_trace("Processing node id=%s, uname=%s", id, uname);
/* Mark the node as provisionally clean
* - at least we have seen it in the current cluster's lifetime
*/
this_node->details->unclean = FALSE;
this_node->details->unseen = FALSE;
attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
add_node_attrs(attrs, this_node, TRUE, data_set);
if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
crm_info("Node %s is in standby-mode", this_node->details->uname);
this_node->details->standby = TRUE;
}
if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
crm_info("Node %s is in maintenance-mode", this_node->details->uname);
this_node->details->maintenance = TRUE;
}
resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
XML_NODE_ATTR_RSC_DISCOVERY, this_node->details->uname);
}
crm_trace("determining node state");
determine_online_status(state, this_node, data_set);
if (is_not_set(data_set->flags, pe_flag_have_quorum)
&& this_node->details->online
&& (data_set->no_quorum_policy == no_quorum_suicide)) {
/* Everything else should flow from this automatically
* At least until the PE becomes able to migrate off healthy resources
*/
pe_fence_node(data_set, this_node, "cluster does not have quorum");
}
}
}
while(unpack_node_loop(status, FALSE, data_set)) {
crm_trace("Start another loop");
}
// Now catch any nodes we didn't see
unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
node_t *this_node = gIter->data;
if (this_node == NULL) {
continue;
} else if(is_remote_node(this_node) == FALSE) {
continue;
} else if(this_node->details->unpacked) {
continue;
}
determine_remote_online_status(data_set, this_node);
}
return TRUE;
}
static gboolean
determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (!crm_is_true(in_cluster)) {
crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
} else if (safe_str_eq(is_peer, ONLINESTATUS)) {
if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
online = TRUE;
} else {
crm_debug("Node is not ready to run resources: %s", join);
}
} else if (this_node->details->expected_up == FALSE) {
crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
crm_trace("\tis_peer=%s, join=%s, expected=%s",
crm_str(is_peer), crm_str(join), crm_str(exp_state));
} else {
/* mark it unclean */
pe_fence_node(data_set, this_node, "peer is unexpectedly down");
crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
}
return online;
}
static gboolean
determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
node_t * this_node)
{
gboolean online = FALSE;
gboolean do_terminate = FALSE;
bool crmd_online = FALSE;
const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
const char *terminate = pe_node_attribute_raw(this_node, "terminate");
/*
- XML_NODE_IN_CLUSTER ::= true|false
- XML_NODE_IS_PEER ::= online|offline
- XML_NODE_JOIN_STATE ::= member|down|pending|banned
- XML_NODE_EXPECTED ::= member|down
*/
if (crm_is_true(terminate)) {
do_terminate = TRUE;
} else if (terminate != NULL && strlen(terminate) > 0) {
/* could be a time() value */
char t = terminate[0];
if (t != '0' && isdigit(t)) {
do_terminate = TRUE;
}
}
crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate);
online = crm_is_true(in_cluster);
crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
if (exp_state == NULL) {
exp_state = CRMD_JOINSTATE_DOWN;
}
if (this_node->details->shutdown) {
crm_debug("%s is shutting down", this_node->details->uname);
/* Slightly different criteria since we can't shut down a dead peer */
online = crmd_online;
} else if (in_cluster == NULL) {
pe_fence_node(data_set, this_node, "peer has not been seen by the cluster");
} else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria");
} else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
if (crm_is_true(in_cluster) || crmd_online) {
crm_info("- Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
crm_trace("%s is down or still coming up", this_node->details->uname);
}
} else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
&& crm_is_true(in_cluster) == FALSE && !crmd_online) {
crm_info("Node %s was just shot", this_node->details->uname);
online = FALSE;
} else if (crm_is_true(in_cluster) == FALSE) {
pe_fence_node(data_set, this_node, "peer is no longer part of the cluster");
} else if (!crmd_online) {
pe_fence_node(data_set, this_node, "peer process is no longer available");
/* Everything is running at this point, now check join state */
} else if (do_terminate) {
pe_fence_node(data_set, this_node, "termination was requested");
} else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
crm_info("Node %s is active", this_node->details->uname);
} else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
|| safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
crm_info("Node %s is not ready to run resources", this_node->details->uname);
this_node->details->standby = TRUE;
this_node->details->pending = TRUE;
} else {
pe_fence_node(data_set, this_node, "peer was in an unknown state");
crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
}
return online;
}
static gboolean
determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
{
resource_t *rsc = this_node->details->remote_rsc;
resource_t *container = NULL;
pe_node_t *host = NULL;
/* If there is a node state entry for a (former) Pacemaker Remote node
* but no resource creating that node, the node's connection resource will
* be NULL. Consider it an offline remote node in that case.
*/
if (rsc == NULL) {
this_node->details->online = FALSE;
goto remote_online_done;
}
container = rsc->container;
if (container && (g_list_length(rsc->running_on) == 1)) {
host = rsc->running_on->data;
}
/* If the resource is currently started, mark it online. */
if (rsc->role == RSC_ROLE_STARTED) {
crm_trace("%s node %s presumed ONLINE because connection resource is started",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = TRUE;
}
/* consider this node shutting down if transitioning start->stop */
if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
crm_trace("%s node %s shutting down because connection resource is stopping",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->shutdown = TRUE;
}
/* Now check all the failure conditions. */
if(container && is_set(container->flags, pe_rsc_failed)) {
crm_trace("Guest node %s UNCLEAN because guest resource failed",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
} else if(is_set(rsc->flags, pe_rsc_failed)) {
crm_trace("%s node %s OFFLINE because connection resource failed",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
} else if (rsc->role == RSC_ROLE_STOPPED
|| (container && container->role == RSC_ROLE_STOPPED)) {
crm_trace("%s node %s OFFLINE because its resource is stopped",
(container? "Guest" : "Remote"), this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = FALSE;
} else if (host && (host->details->online == FALSE)
&& host->details->unclean) {
crm_trace("Guest node %s UNCLEAN because host is unclean",
this_node->details->id);
this_node->details->online = FALSE;
this_node->details->remote_requires_reset = TRUE;
}
remote_online_done:
crm_trace("Remote node %s online=%s",
this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
return this_node->details->online;
}
gboolean
determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
{
gboolean online = FALSE;
const char *shutdown = NULL;
const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
if (this_node == NULL) {
crm_config_err("No node to check");
return online;
}
this_node->details->shutdown = FALSE;
this_node->details->expected_up = FALSE;
shutdown = pe_node_attribute_raw(this_node, XML_CIB_ATTR_SHUTDOWN);
if (shutdown != NULL && safe_str_neq("0", shutdown)) {
this_node->details->shutdown = TRUE;
} else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
this_node->details->expected_up = TRUE;
}
if (this_node->details->type == node_ping) {
this_node->details->unclean = FALSE;
online = FALSE; /* As far as resource management is concerned,
* the node is safely offline.
* Anyone caught abusing this logic will be shot
*/
} else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
online = determine_online_status_no_fencing(data_set, node_state, this_node);
} else {
online = determine_online_status_fencing(data_set, node_state, this_node);
}
if (online) {
this_node->details->online = TRUE;
} else {
/* remove node from contention */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (online && this_node->details->shutdown) {
/* don't run resources here */
this_node->fixed = TRUE;
this_node->weight = -INFINITY;
}
if (this_node->details->type == node_ping) {
crm_info("Node %s is not a pacemaker node", this_node->details->uname);
} else if (this_node->details->unclean) {
pe_proc_warn("Node %s is unclean", this_node->details->uname);
} else if (this_node->details->online) {
crm_info("Node %s is %s", this_node->details->uname,
this_node->details->shutdown ? "shutting down" :
this_node->details->pending ? "pending" :
this_node->details->standby ? "standby" :
this_node->details->maintenance ? "maintenance" : "online");
} else {
crm_trace("Node %s is offline", this_node->details->uname);
}
return online;
}
/*!
* \internal
* \brief Find the end of a resource's name, excluding any clone suffix
*
* \param[in] id Resource ID to check
*
* \return Pointer to last character of resource's base name
*/
const char *
pe_base_name_end(const char *id)
{
if (!crm_strlen_zero(id)) {
const char *end = id + strlen(id) - 1;
for (const char *s = end; s > id; --s) {
switch (*s) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
case ':':
return (s == end)? s : (s - 1);
default:
return end;
}
}
return end;
}
return NULL;
}
/*!
* \internal
* \brief Get a resource name excluding any clone suffix
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_strip(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
char *basename = NULL;
CRM_ASSERT(end);
basename = strndup(last_rsc_id, end - last_rsc_id + 1);
CRM_ASSERT(basename);
return basename;
}
/*!
* \internal
* \brief Get the name of the first instance of a cloned resource
*
* \param[in] last_rsc_id Resource ID to check
*
* \return Pointer to newly allocated string with resource's base name plus :0
* \note It is the caller's responsibility to free() the result.
* This asserts on error, so callers can assume result is not NULL.
*/
char *
clone_zero(const char *last_rsc_id)
{
const char *end = pe_base_name_end(last_rsc_id);
size_t base_name_len = end - last_rsc_id + 1;
char *zero = NULL;
CRM_ASSERT(end);
zero = calloc(base_name_len + 3, sizeof(char));
CRM_ASSERT(zero);
memcpy(zero, last_rsc_id, base_name_len);
zero[base_name_len] = ':';
zero[base_name_len + 1] = '0';
return zero;
}
static resource_t *
create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
copy_in_properties(xml_rsc, rsc_entry);
crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
crm_log_xml_debug(xml_rsc, "Orphan resource");
if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
return NULL;
}
if (xml_contains_remote_node(xml_rsc)) {
node_t *node;
crm_debug("Detected orphaned remote node %s", rsc_id);
node = pe_find_node(data_set->nodes, rsc_id);
if (node == NULL) {
node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
}
link_rsc2remotenode(data_set, rsc);
if (node) {
crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
node->details->shutdown = TRUE;
}
}
if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
/* This orphaned rsc needs to be mapped to a container. */
crm_trace("Detected orphaned container filler %s", rsc_id);
set_bit(rsc->flags, pe_rsc_orphan_container_filler);
}
set_bit(rsc->flags, pe_rsc_orphan);
data_set->resources = g_list_append(data_set->resources, rsc);
return rsc;
}
/*!
* \internal
* \brief Create orphan instance for anonymous clone resource history
*/
static pe_resource_t *
create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
pe_node_t *node, pe_working_set_t *data_set)
{
pe_resource_t *top = pe__create_clone_child(parent, data_set);
// find_rsc() because we might be a cloned group
pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
top->id, parent->id, rsc_id, node->details->uname);
return orphan;
}
/*!
* \internal
* \brief Check a node for an instance of an anonymous clone
*
* Return a child instance of the specified anonymous clone, in order of
* preference: (1) the instance running on the specified node, if any;
* (2) an inactive instance (i.e. within the total of clone-max instances);
* (3) a newly created orphan (i.e. clone-max instances are already active).
*
* \param[in] data_set Cluster information
* \param[in] node Node on which to check for instance
* \param[in] parent Clone to check
- * \param[in] rsc_id ID of (clone or cloned) resource being searched for
+ * \param[in] rsc_id Name of cloned resource in history (without instance)
*/
static resource_t *
find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
const char *rsc_id)
{
GListPtr rIter = NULL;
pe_resource_t *rsc = NULL;
pe_resource_t *inactive_instance = NULL;
gboolean skip_inactive = FALSE;
CRM_ASSERT(parent != NULL);
CRM_ASSERT(pe_rsc_is_clone(parent));
CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
// Check for active (or partially active, for cloned groups) instance
pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
GListPtr locations = NULL;
resource_t *child = rIter->data;
/* Check whether this instance is already known to be active anywhere.
*
* "Active" in this case means known to be active at this stage of
* unpacking. Because this function is called for a resource before the
* resource's individual operation history entries are unpacked,
- * locations will generally be NULL.
+ * locations will generally not contain the desired node.
*
* However, there are three exceptions:
* (1) when child is a cloned group and we have already unpacked the
- * history of another member of the group;
+ * history of another member of the group on the same node;
* (2) when we've already unpacked the history of another numbered
* instance on the same node (which can happen if globally-unique
* was flipped from true to false); and
* (3) when we re-run calculations on the same data set as part of a
* simulation.
*/
child->fns->location(child, &locations, TRUE);
if (locations) {
/* We should never associate the same numbered anonymous clone
* instance with multiple nodes, and clone instances can't migrate,
* so there must be only one location, regardless of history.
*/
CRM_LOG_ASSERT(locations->next == NULL);
if (((pe_node_t *)locations->data)->details == node->details) {
- /* This instance is active on the requested node, so check for
- * a corresponding configured resource. We use find_rsc()
- * because child may be a cloned group, and we need the
- * particular member corresponding to rsc_id.
+ /* This child instance is active on the requested node, so check
+ * for a corresponding configured resource. We use find_rsc()
+ * instead of child because child may be a cloned group, and we
+ * need the particular member corresponding to rsc_id.
*
* If the history entry is orphaned, rsc will be NULL.
*/
rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
if (rsc) {
- pe_rsc_trace(parent, "Resource %s, active", rsc->id);
-
- /* If there are multiple active instances of an anonymous
- * clone in a single node's history (which can happen if
- * globally-unique is switched from true to false), we want
- * to consider the instances beyond the first as orphans.
+ /* If there are multiple instance history entries for an
+ * anonymous clone in a single node's history (which can
+ * happen if globally-unique is switched from true to
+ * false), we want to consider the instances beyond the
+ * first as orphans, even if there are inactive instance
+ * numbers available.
*/
if (rsc->running_on) {
- crm_notice("Now-anonymous clone %s has multiple instances active on %s",
+ crm_notice("Active (now-)anonymous clone %s has "
+ "multiple (orphan) instance histories on %s",
parent->id, node->details->uname);
skip_inactive = TRUE;
rsc = NULL;
+ } else {
+ pe_rsc_trace(parent, "Resource %s, active", rsc->id);
}
}
}
g_list_free(locations);
} else {
pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
if (!skip_inactive && !inactive_instance
&& is_not_set(child->flags, pe_rsc_block)) {
// Remember one inactive instance in case we don't find active
inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
pe_find_clone);
}
}
}
if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
rsc = inactive_instance;
}
/* If the resource has "requires" set to "quorum" or "nothing", and we don't
* have a clone instance for every node, we don't want to consume a valid
* instance number for unclean nodes. Such instances may appear to be active
* according to the history, but should be considered inactive, so we can
* start an instance elsewhere. Treat such instances as orphans.
*
* An exception is instances running on guest nodes -- since guest node
* "fencing" is actually just a resource stop, requires shouldn't apply.
*
* @TODO Ideally, we'd use an inactive instance number if it is not needed
* for any clean instances. However, we don't know that at this point.
*/
if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
&& (!node->details->online || node->details->unclean)
&& !is_container_remote_node(node)
&& !pe__is_universal_clone(parent, data_set)) {
rsc = NULL;
}
if (rsc == NULL) {
rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
}
return rsc;
}
static resource_t *
unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
xmlNode * rsc_entry)
{
resource_t *rsc = NULL;
resource_t *parent = NULL;
crm_trace("looking for %s", rsc_id);
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL) {
/* If we didn't find the resource by its name in the operation history,
* check it again as a clone instance. Even when clone-max=0, we create
* a single :0 orphan to match against here.
*/
char *clone0_id = clone_zero(rsc_id);
resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
rsc = clone0;
+ parent = uber_parent(clone0);
+ crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
} else {
- crm_trace("%s is not known as %s either", rsc_id, clone0_id);
+ crm_trace("%s is not known as %s either (orphan)",
+ rsc_id, clone0_id);
}
-
- /* Grab the parent clone even if this a different unique instance,
- * so we can remember the clone name, which will be the same.
- */
- parent = uber_parent(clone0);
free(clone0_id);
- crm_trace("%s not found: %s", rsc_id, parent ? parent->id : "orphan");
-
} else if (rsc->variant > pe_native) {
- crm_trace("%s is no longer a primitive resource, the lrm_resource entry is obsolete",
+ crm_trace("Resource history for %s is orphaned because it is no longer primitive",
rsc_id);
return NULL;
} else {
parent = uber_parent(rsc);
}
if (pe_rsc_is_anon_clone(parent)) {
if (pe_rsc_is_bundled(parent)) {
rsc = find_container_child(parent->parent, node);
} else {
char *base = clone_strip(rsc_id);
rsc = find_anonymous_clone(data_set, node, parent, base);
free(base);
CRM_ASSERT(rsc != NULL);
}
}
if (rsc && safe_str_neq(rsc_id, rsc->id)
&& safe_str_neq(rsc_id, rsc->clone_name)) {
free(rsc->clone_name);
rsc->clone_name = strdup(rsc_id);
pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
rsc_id, node->details->uname, rsc->id,
(is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
}
return rsc;
}
static resource_t *
process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
{
resource_t *rsc = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
clear_bit(rsc->flags, pe_rsc_managed);
} else {
print_resource(LOG_TRACE, "Added orphan", rsc, FALSE);
CRM_CHECK(rsc != NULL, return NULL);
resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
}
return rsc;
}
static void
process_rsc_state(resource_t * rsc, node_t * node,
enum action_fail_response on_fail,
xmlNode * migrate_op, pe_working_set_t * data_set)
{
node_t *tmpnode = NULL;
char *reason = NULL;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
/* process current state */
if (rsc->role != RSC_ROLE_UNKNOWN) {
resource_t *iter = rsc;
while (iter) {
if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
node_t *n = node_copy(node);
pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
n->details->uname);
g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
}
if (is_set(iter->flags, pe_rsc_unique)) {
break;
}
iter = iter->parent;
}
}
/* If a managed resource is believed to be running, but node is down ... */
if (rsc->role > RSC_ROLE_STOPPED
&& node->details->online == FALSE
&& node->details->maintenance == FALSE
&& is_set(rsc->flags, pe_rsc_managed)) {
gboolean should_fence = FALSE;
/* If this is a guest node, fence it (regardless of whether fencing is
* enabled, because guest node fencing is done by recovery of the
* container resource rather than by the fencer). Mark the resource
* we're processing as failed. When the guest comes back up, its
* operation history in the CIB will be cleared, freeing the affected
* resource to run again once we are sure we know its state.
*/
if (is_container_remote_node(node)) {
set_bit(rsc->flags, pe_rsc_failed);
should_fence = TRUE;
} else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
if (is_baremetal_remote_node(node) && node->details->remote_rsc
&& is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
/* setting unseen = true means that fencing of the remote node will
* only occur if the connection resource is not going to start somewhere.
* This allows connection resources on a failed cluster-node to move to
* another node without requiring the baremetal remote nodes to be fenced
* as well. */
node->details->unseen = TRUE;
reason = crm_strdup_printf("%s is active there (fencing will be"
" revoked if remote connection can "
"be re-established elsewhere)",
rsc->id);
}
should_fence = TRUE;
}
if (should_fence) {
if (reason == NULL) {
reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
}
pe_fence_node(data_set, node, reason);
}
free(reason);
}
if (node->details->unclean) {
/* No extra processing needed
* Also allows resources to be started again after a node is shot
*/
on_fail = action_fail_ignore;
}
switch (on_fail) {
case action_fail_ignore:
/* nothing to do */
break;
case action_fail_fence:
/* treat it as if it is still running
* but also mark the node as unclean
*/
reason = crm_strdup_printf("%s failed there", rsc->id);
pe_fence_node(data_set, node, reason);
free(reason);
break;
case action_fail_standby:
node->details->standby = TRUE;
node->details->standby_onfail = TRUE;
break;
case action_fail_block:
/* is_managed == FALSE will prevent any
* actions being sent for the resource
*/
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
break;
case action_fail_migrate:
/* make sure it comes up somewhere else
* or not at all
*/
resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
break;
case action_fail_stop:
rsc->next_role = RSC_ROLE_STOPPED;
break;
case action_fail_recover:
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
set_bit(rsc->flags, pe_rsc_failed);
stop_action(rsc, node, FALSE);
}
break;
case action_fail_restart_container:
set_bit(rsc->flags, pe_rsc_failed);
if (rsc->container) {
stop_action(rsc->container, node, FALSE);
} else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
stop_action(rsc, node, FALSE);
}
break;
case action_fail_reset_remote:
set_bit(rsc->flags, pe_rsc_failed);
if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
tmpnode = NULL;
if (rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
}
if (tmpnode &&
is_baremetal_remote_node(tmpnode) &&
tmpnode->details->remote_was_fenced == 0) {
/* connection resource to baremetal resource failed in a way that
* should result in fencing the remote-node. */
pe_fence_node(data_set, tmpnode,
"remote connection is unrecoverable");
}
}
/* require the stop action regardless if fencing is occurring or not. */
if (rsc->role > RSC_ROLE_STOPPED) {
stop_action(rsc, node, FALSE);
}
/* if reconnect delay is in use, prevent the connection from exiting the
* "STOPPED" role until the failure is cleared by the delay timeout. */
if (rsc->remote_reconnect_ms) {
rsc->next_role = RSC_ROLE_STOPPED;
}
break;
}
/* ensure a remote-node connection failure forces an unclean remote-node
* to be fenced. By setting unseen = FALSE, the remote-node failure will
* result in a fencing operation regardless if we're going to attempt to
* reconnect to the remote-node in this transition or not. */
if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
tmpnode = pe_find_node(data_set->nodes, rsc->id);
if (tmpnode && tmpnode->details->unclean) {
tmpnode->details->unseen = FALSE;
}
}
if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
if (is_set(rsc->flags, pe_rsc_orphan)) {
if (is_set(rsc->flags, pe_rsc_managed)) {
crm_config_warn("Detected active orphan %s running on %s",
rsc->id, node->details->uname);
} else {
crm_config_warn("Cluster configured not to stop active orphans."
" %s must be stopped manually on %s",
rsc->id, node->details->uname);
}
}
native_add_running(rsc, node, data_set);
if (on_fail != action_fail_ignore) {
set_bit(rsc->flags, pe_rsc_failed);
}
} else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
/* Only do this for older status sections that included instance numbers
* Otherwise stopped instances will appear as orphans
*/
pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
free(rsc->clone_name);
rsc->clone_name = NULL;
} else {
char *key = stop_key(rsc);
GListPtr possible_matches = find_actions(rsc->actions, key, node);
GListPtr gIter = possible_matches;
for (; gIter != NULL; gIter = gIter->next) {
action_t *stop = (action_t *) gIter->data;
stop->flags |= pe_action_optional;
}
g_list_free(possible_matches);
free(key);
}
}
/* create active recurring operations as optional */
static void
process_recurring(node_t * node, resource_t * rsc,
int start_index, int stop_index,
GListPtr sorted_op_list, pe_working_set_t * data_set)
{
int counter = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
CRM_ASSERT(rsc);
pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
guint interval_ms = 0;
char *key = NULL;
const char *id = ID(rsc_op);
const char *interval_ms_s = NULL;
counter++;
if (node->details->online == FALSE) {
pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
break;
/* Need to check if there's a monitor for role="Stopped" */
} else if (start_index < stop_index && counter <= stop_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
continue;
} else if (counter < start_index) {
pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
continue;
}
interval_ms_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL_MS);
interval_ms = crm_parse_ms(interval_ms_s);
if (interval_ms == 0) {
pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
continue;
}
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(status, "-1")) {
pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
continue;
}
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
/* create the action */
key = generate_op_key(rsc->id, task, interval_ms);
pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
}
}
void
calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
{
int counter = -1;
int implied_monitor_start = -1;
int implied_clone_start = -1;
const char *task = NULL;
const char *status = NULL;
GListPtr gIter = sorted_op_list;
*stop_index = -1;
*start_index = -1;
for (; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
if (safe_str_eq(task, CRMD_ACTION_STOP)
&& safe_str_eq(status, "0")) {
*stop_index = counter;
} else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
*start_index = counter;
} else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
implied_monitor_start = counter;
}
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
implied_clone_start = counter;
}
}
if (*start_index == -1) {
if (implied_clone_start != -1) {
*start_index = implied_clone_start;
} else if (implied_monitor_start != -1) {
*start_index = implied_monitor_start;
}
}
}
static resource_t *
unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
{
GListPtr gIter = NULL;
int stop_index = -1;
int start_index = -1;
enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
const char *task = NULL;
const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
resource_t *rsc = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
xmlNode *migrate_op = NULL;
xmlNode *rsc_op = NULL;
xmlNode *last_failure = NULL;
enum action_fail_response on_fail = FALSE;
enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
crm_trace("[%s] Processing %s on %s",
crm_element_name(rsc_entry), rsc_id, node->details->uname);
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
/* find the resource */
rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
if (rsc == NULL) {
rsc = process_orphan_resource(rsc_entry, node, data_set);
}
CRM_ASSERT(rsc != NULL);
/* process operations */
saved_role = rsc->role;
on_fail = action_fail_ignore;
rsc->role = RSC_ROLE_UNKNOWN;
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
migrate_op = rsc_op;
}
unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
}
/* create active recurring operations as optional */
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
/* no need to free the contents */
g_list_free(sorted_op_list);
process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
if (get_target_role(rsc, &req_role)) {
if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
rsc->next_role = req_role;
} else if (req_role > rsc->next_role) {
pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
" with requested next role %s",
rsc->id, role2text(rsc->next_role), role2text(req_role));
}
}
if (saved_role > rsc->role) {
rsc->role = saved_role;
}
return rsc;
}
static void
handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
resource_t *rsc;
resource_t *container;
const char *rsc_id;
const char *container_id;
if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
continue;
}
container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
if (container_id == NULL || rsc_id == NULL) {
continue;
}
container = pe_find_resource(data_set->resources, container_id);
if (container == NULL) {
continue;
}
rsc = pe_find_resource(data_set->resources, rsc_id);
if (rsc == NULL ||
is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
rsc->container != NULL) {
continue;
}
pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id);
rsc->container = container;
container->fillers = g_list_append(container->fillers, rsc);
}
}
gboolean
unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
{
xmlNode *rsc_entry = NULL;
gboolean found_orphaned_container_filler = FALSE;
CRM_CHECK(node != NULL, return FALSE);
crm_trace("Unpacking resources on %s", node->details->uname);
for (rsc_entry = __xml_first_child(lrm_rsc_list); rsc_entry != NULL;
rsc_entry = __xml_next_element(rsc_entry)) {
if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
if (!rsc) {
continue;
}
if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
found_orphaned_container_filler = TRUE;
}
}
}
/* now that all the resource state has been unpacked for this node
* we have to go back and map any orphaned container fillers to their
* container resource */
if (found_orphaned_container_filler) {
handle_orphaned_container_fillers(lrm_rsc_list, data_set);
}
return TRUE;
}
static void
set_active(resource_t * rsc)
{
resource_t *top = uber_parent(rsc);
if (top && is_set(top->flags, pe_rsc_promotable)) {
rsc->role = RSC_ROLE_SLAVE;
} else {
rsc->role = RSC_ROLE_STARTED;
}
}
static void
set_node_score(gpointer key, gpointer value, gpointer user_data)
{
node_t *node = value;
int *score = user_data;
node->weight = *score;
}
#define STATUS_PATH_MAX 1024
static xmlNode *
find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
pe_working_set_t * data_set)
{
int offset = 0;
char xpath[STATUS_PATH_MAX];
offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
resource);
/* Need to check against transition_magic too? */
if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
source);
} else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
source);
} else {
offset +=
snprintf(xpath + offset, STATUS_PATH_MAX - offset,
"/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
}
CRM_LOG_ASSERT(offset > 0);
return get_xpath_object(xpath, data_set->input, LOG_DEBUG);
}
static bool
stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
pe_working_set_t *data_set)
{
xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP, node->details->id,
NULL, data_set);
if (stop_op) {
int stop_id = 0;
int task_id = 0;
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
if (stop_id > task_id) {
return TRUE;
}
}
return FALSE;
}
static void
unpack_rsc_migration(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
{
/* A successful migration sequence is:
* migrate_to on source node
* migrate_from on target node
* stop on source node
*
* If a migrate_to is followed by a stop, the entire migration (successful
* or failed) is complete, and we don't care what happened on the target.
*
* If no migrate_from has happened, the migration is considered to be
* "partial". If the migrate_from failed, make sure the resource gets
* stopped on both source and target (if up).
*
* If the migrate_to and migrate_from both succeeded (which also implies the
* resource is no longer running on the source), but there is no stop, the
* migration is considered to be "dangling".
*/
int from_rc = 0;
int from_status = 0;
const char *migrate_source = NULL;
const char *migrate_target = NULL;
pe_node_t *target = NULL;
pe_node_t *source = NULL;
xmlNode *migrate_from = NULL;
if (stop_happened_after(rsc, node, xml_op, data_set)) {
return;
}
// Clones are not allowed to migrate, so role can't be master
rsc->role = RSC_ROLE_STARTED;
migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
target = pe_find_node(data_set->nodes, migrate_target);
source = pe_find_node(data_set->nodes, migrate_source);
// Check whether there was a migrate_from action
migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target,
migrate_source, data_set);
if (migrate_from) {
crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
ID(migrate_from), migrate_target, from_status, from_rc);
}
if (migrate_from && from_rc == PCMK_OCF_OK
&& from_status == PCMK_LRM_OP_DONE) {
/* The migrate_to and migrate_from both succeeded, so mark the migration
* as "dangling". This will be used to schedule a stop action on the
* source without affecting the target.
*/
pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
migrate_source);
rsc->role = RSC_ROLE_STOPPED;
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
} else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
if (target && target->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
target->details->online);
native_add_running(rsc, target, data_set);
}
} else { // Pending, or complete but erased
if (target && target->details->online) {
pe_rsc_trace(rsc, "Marking active on %s %p %d", migrate_target, target,
target->details->online);
native_add_running(rsc, target, data_set);
if (source && source->details->online) {
/* This is a partial migration: the migrate_to completed
* successfully on the source, but the migrate_from has not
* completed. Remember the source and target; if the newly
* chosen target remains the same when we schedule actions
* later, we may continue with the migration.
*/
rsc->partial_migration_target = target;
rsc->partial_migration_source = source;
}
} else {
/* Consider it failed here - forces a restart, prevents migration */
set_bit(rsc->flags, pe_rsc_failed);
clear_bit(rsc->flags, pe_rsc_allow_migrate);
}
}
}
static void
unpack_rsc_migration_failure(resource_t *rsc, node_t *node, xmlNode *xml_op, pe_working_set_t * data_set)
{
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_ASSERT(rsc);
if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op =
find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_source, NULL, data_set);
xmlNode *migrate_op =
find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE, migrate_source, migrate_target,
data_set);
if (stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if (migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if (stop_op == NULL || stop_id < migrate_id) {
node_t *source = pe_find_node(data_set->nodes, migrate_source);
if (source && source->details->online) {
native_add_running(rsc, source, data_set);
}
}
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
int stop_id = 0;
int migrate_id = 0;
const char *migrate_source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
xmlNode *stop_op =
find_lrm_op(rsc->id, CRMD_ACTION_STOP, migrate_target, NULL, data_set);
xmlNode *migrate_op =
find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, migrate_target, migrate_source,
data_set);
if (stop_op) {
crm_element_value_int(stop_op, XML_LRM_ATTR_CALLID, &stop_id);
}
if (migrate_op) {
crm_element_value_int(migrate_op, XML_LRM_ATTR_CALLID, &migrate_id);
}
/* Get our state right */
rsc->role = RSC_ROLE_STARTED; /* can be master? */
if (stop_op == NULL || stop_id < migrate_id) {
node_t *target = pe_find_node(data_set->nodes, migrate_target);
pe_rsc_trace(rsc, "Stop: %p %d, Migrated: %p %d", stop_op, stop_id, migrate_op,
migrate_id);
if (target && target->details->online) {
native_add_running(rsc, target, data_set);
}
} else if (migrate_op == NULL) {
/* Make sure it gets cleaned up, the stop may pre-date the migrate_from */
rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
}
}
}
static void
record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set)
{
xmlNode *xIter = NULL;
const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
if (node->details->online == FALSE) {
return;
}
for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
return;
}
}
crm_trace("Adding entry %s on %s", op_key, node->details->uname);
crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
add_node_copy(data_set->failed, op);
}
static const char *get_op_key(xmlNode *xml_op)
{
const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
if(key == NULL) {
key = ID(xml_op);
}
return key;
}
static void
unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
guint interval_ms = 0;
bool is_probe = FALSE;
action_t *action = NULL;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
CRM_ASSERT(rsc);
*last_failure = xml_op;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
pe_rsc_trace(rsc, "is a probe: %s", key);
}
if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
(is_probe? "probe" : task), rsc->id, node->details->uname,
services_ocf_exitcode_str(rc), rc);
if (is_probe && (rc != PCMK_OCF_OK)
&& (rc != PCMK_OCF_NOT_RUNNING)
&& (rc != PCMK_OCF_RUNNING_MASTER)) {
/* A failed (not just unexpected) probe result could mean the user
* didn't know resources will be probed even where they can't run.
*/
crm_notice("If it is not possible for %s to run on %s, see "
"the resource-discovery option for location constraints",
rsc->id, node->details->uname);
}
record_failed_op(xml_op, node, rsc, data_set);
} else {
crm_trace("Processing failed op %s for %s on %s: %s (%d)",
task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
rc);
}
action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
(action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
(action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
(*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
fail2text(action->on_fail), action->uuid, key);
*on_fail = action->on_fail;
}
if (safe_str_eq(task, CRMD_ACTION_STOP)) {
resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
unpack_rsc_migration_failure(rsc, node, xml_op, data_set);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
if (action->on_fail == action_fail_block) {
rsc->role = RSC_ROLE_MASTER;
rsc->next_role = RSC_ROLE_STOPPED;
} else if(rc == PCMK_OCF_NOT_RUNNING) {
rsc->role = RSC_ROLE_STOPPED;
} else {
/*
* Staying in master role would put the PE/TE into a loop. Setting
* slave role is not dangerous because the resource will be stopped
* as part of recovery, and any master promotion will be ordered
* after that stop.
*/
rsc->role = RSC_ROLE_SLAVE;
}
}
if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
/* leave stopped */
pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
rsc->role = RSC_ROLE_STOPPED;
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "Setting %s active", rsc->id);
set_active(rsc);
}
pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
rsc->id, role2text(rsc->role),
node->details->unclean ? "true" : "false",
fail2text(action->on_fail), role2text(action->fail_role));
if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
rsc->next_role = action->fail_role;
}
if (action->fail_role == RSC_ROLE_STOPPED) {
int score = -INFINITY;
resource_t *fail_rsc = rsc;
if (fail_rsc->parent) {
resource_t *parent = uber_parent(fail_rsc);
if (pe_rsc_is_clone(parent)
&& is_not_set(parent->flags, pe_rsc_unique)) {
/* For clone resources, if a child fails on an operation
* with on-fail = stop, all the resources fail. Do this by preventing
* the parent from coming up again. */
fail_rsc = parent;
}
}
crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
/* make sure it doesn't come up again */
if (fail_rsc->allowed_nodes != NULL) {
g_hash_table_destroy(fail_rsc->allowed_nodes);
}
fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
}
pe_free_action(action);
}
static int
determine_op_status(
resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
guint interval_ms = 0;
int result = PCMK_LRM_OP_DONE;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
bool is_probe = FALSE;
CRM_ASSERT(rsc);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
if ((interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
is_probe = TRUE;
}
if (target_rc >= 0 && target_rc != rc) {
result = PCMK_LRM_OP_ERROR;
pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
}
/* we could clean this up significantly except for old LRMs and CRMs that
* didn't include target_rc and liked to remap status
*/
switch (rc) {
case PCMK_OCF_OK:
if (is_probe && target_rc == 7) {
result = PCMK_LRM_OP_DONE;
pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
task, rsc->id, node->details->uname);
}
break;
case PCMK_OCF_NOT_RUNNING:
if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
result = PCMK_LRM_OP_DONE;
rsc->role = RSC_ROLE_STOPPED;
/* clear any previous failure actions */
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
} else if (safe_str_neq(task, CRMD_ACTION_STOP)) {
result = PCMK_LRM_OP_ERROR;
}
break;
case PCMK_OCF_RUNNING_MASTER:
if (is_probe) {
result = PCMK_LRM_OP_DONE;
pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
task, rsc->id, node->details->uname);
} else if (target_rc == rc) {
/* nothing to do */
} else if (target_rc >= 0) {
result = PCMK_LRM_OP_ERROR;
}
rsc->role = RSC_ROLE_MASTER;
break;
case PCMK_OCF_DEGRADED_MASTER:
case PCMK_OCF_FAILED_MASTER:
rsc->role = RSC_ROLE_MASTER;
result = PCMK_LRM_OP_ERROR;
break;
case PCMK_OCF_NOT_CONFIGURED:
result = PCMK_LRM_OP_ERROR_FATAL;
break;
case PCMK_OCF_NOT_INSTALLED:
case PCMK_OCF_INVALID_PARAM:
case PCMK_OCF_INSUFFICIENT_PRIV:
case PCMK_OCF_UNIMPLEMENT_FEATURE:
if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && (interval_ms > 0)) {
result = PCMK_LRM_OP_NOTSUPPORTED;
break;
} else if (pe_can_fence(data_set, node) == FALSE
&& safe_str_eq(task, CRMD_ACTION_STOP)) {
/* If a stop fails and we can't fence, there's nothing else we can do */
pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
rsc->id, task, services_ocf_exitcode_str(rc), rc);
clear_bit(rsc->flags, pe_rsc_managed);
set_bit(rsc->flags, pe_rsc_block);
}
result = PCMK_LRM_OP_ERROR_HARD;
break;
default:
if (result == PCMK_LRM_OP_DONE) {
crm_info("Treating %s (rc=%d) on %s as an ERROR",
key, rc, node->details->uname);
result = PCMK_LRM_OP_ERROR;
}
}
return result;
}
static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
{
bool expired = FALSE;
time_t last_failure = 0;
guint interval_ms = 0;
int failure_timeout = rsc->failure_timeout;
const char *key = get_op_key(xml_op);
const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
const char *clear_reason = NULL;
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
/* clearing recurring monitor operation failures automatically
* needs to be carefully considered */
if ((interval_ms != 0) && safe_str_eq(task, "monitor")) {
/* TODO, in the future we should consider not clearing recurring monitor
* op failures unless the last action for a resource was a "stop" action.
* otherwise it is possible that clearing the monitor failure will result
* in the resource being in an undeterministic state.
*
* For now we handle this potential undeterministic condition for remote
* node connection resources by not clearing a recurring monitor op failure
* until after the node has been fenced. */
if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& rsc->remote_reconnect_ms) {
node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
if (remote_node && remote_node->details->remote_was_fenced == 0) {
if (strstr(ID(xml_op), "last_failure")) {
crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id);
}
/* disabling failure timeout for this operation because we believe
* fencing of the remote node should occur first. */
failure_timeout = 0;
}
}
}
if (failure_timeout > 0) {
int last_run = 0;
if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
time_t now = get_effective_time(data_set);
if (now > (last_run + failure_timeout)) {
expired = TRUE;
}
}
}
if (expired) {
if (failure_timeout > 0) {
if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default,
xml_op, data_set)) {
if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective,
xml_op, data_set) == 0) {
clear_reason = "it expired";
} else {
expired = FALSE;
}
} else if (rsc->remote_reconnect_ms
&& strstr(ID(xml_op), "last_failure")) {
/* always clear last failure when reconnect interval is set */
clear_reason = "reconnect interval is set";
}
}
} else if (strstr(ID(xml_op), "last_failure") &&
((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
op_digest_cache_t *digest_data = NULL;
digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
if (digest_data->rc == RSC_DIGEST_UNKNOWN) {
crm_trace("rsc op %s/%s on node %s does not have a op digest to compare against", rsc->id,
key, node->details->id);
} else if(container_fix_remote_addr(rsc) && digest_data->rc != RSC_DIGEST_MATCH) {
// We can't sanely check the changing 'addr' attribute. Yet
crm_trace("Ignoring rsc op %s/%s on node %s", rsc->id, key, node->details->id);
} else if (digest_data->rc != RSC_DIGEST_MATCH) {
clear_reason = "resource parameters have changed";
}
}
if (clear_reason != NULL) {
node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
char *key = generate_op_key(rsc->id, CRM_OP_CLEAR_FAILCOUNT, 0);
action_t *clear_op = custom_action(rsc, key, CRM_OP_CLEAR_FAILCOUNT,
node, FALSE, TRUE, data_set);
add_hash_param(clear_op->meta, XML_ATTR_TE_NOWAIT, XML_BOOLEAN_TRUE);
crm_notice("Clearing failure of %s on %s because %s " CRM_XS " %s",
rsc->id, node->details->uname, clear_reason, clear_op->uuid);
if (is_set(data_set->flags, pe_flag_stonith_enabled)
&& rsc->remote_reconnect_ms
&& remote_node
&& remote_node->details->unclean) {
action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, data_set);
crm_notice("Waiting for %s to complete before clearing %s failure for remote node %s", fence?fence->uuid:"nil", task, rsc->id);
order_actions(fence, clear_op, pe_order_implies_then);
}
}
if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
switch(rc) {
case PCMK_OCF_OK:
case PCMK_OCF_NOT_RUNNING:
case PCMK_OCF_RUNNING_MASTER:
case PCMK_OCF_DEGRADED:
case PCMK_OCF_DEGRADED_MASTER:
/* Don't expire probes that return these values */
expired = FALSE;
break;
}
}
return expired;
}
int get_target_rc(xmlNode *xml_op)
{
int dummy = 0;
int target_rc = 0;
char *dummy_string = NULL;
const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
if (key == NULL) {
return -1;
}
decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
free(dummy_string);
return target_rc;
}
static enum action_fail_response
get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
{
int result = action_fail_recover;
action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
result = action->on_fail;
pe_free_action(action);
return result;
}
static void
update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
gboolean clear_past_failure = FALSE;
CRM_ASSERT(rsc);
CRM_ASSERT(xml_op);
if (rc == PCMK_OCF_NOT_RUNNING) {
clear_past_failure = TRUE;
} else if (rc == PCMK_OCF_NOT_INSTALLED) {
rsc->role = RSC_ROLE_STOPPED;
} else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
if (last_failure) {
const char *op_key = get_op_key(xml_op);
const char *last_failure_key = get_op_key(last_failure);
if (safe_str_eq(op_key, last_failure_key)) {
clear_past_failure = TRUE;
}
}
if (rsc->role < RSC_ROLE_STARTED) {
set_active(rsc);
}
} else if (safe_str_eq(task, CRMD_ACTION_START)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
rsc->role = RSC_ROLE_STOPPED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
/* Demote from Master does not clear an error */
rsc->role = RSC_ROLE_SLAVE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
rsc->role = RSC_ROLE_STARTED;
clear_past_failure = TRUE;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
unpack_rsc_migration(rsc, node, xml_op, data_set);
} else if (rsc->role < RSC_ROLE_STARTED) {
pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
set_active(rsc);
}
/* clear any previous failure actions */
if (clear_past_failure) {
switch (*on_fail) {
case action_fail_stop:
case action_fail_fence:
case action_fail_migrate:
case action_fail_standby:
pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
rsc->id, fail2text(*on_fail));
break;
case action_fail_block:
case action_fail_ignore:
case action_fail_recover:
case action_fail_restart_container:
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
break;
case action_fail_reset_remote:
if (rsc->remote_reconnect_ms == 0) {
/* With no reconnect interval, the connection is allowed to
* start again after the remote node is fenced and
* completely stopped. (With a reconnect interval, we wait
* for the failure to be cleared entirely before attempting
* to reconnect.)
*/
*on_fail = action_fail_ignore;
rsc->next_role = RSC_ROLE_UNKNOWN;
}
break;
}
}
}
gboolean
unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
enum action_fail_response * on_fail, pe_working_set_t * data_set)
{
int task_id = 0;
const char *key = NULL;
const char *task = NULL;
const char *task_key = NULL;
int rc = 0;
int status = PCMK_LRM_OP_UNKNOWN;
int target_rc = get_target_rc(xml_op);
guint interval_ms = 0;
gboolean expired = FALSE;
resource_t *parent = rsc;
enum action_fail_response failure_strategy = action_fail_recover;
CRM_CHECK(rsc != NULL, return FALSE);
CRM_CHECK(node != NULL, return FALSE);
CRM_CHECK(xml_op != NULL, return FALSE);
task_key = get_op_key(xml_op);
task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
CRM_CHECK(task != NULL, return FALSE);
CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
safe_str_eq(task, CRMD_ACTION_METADATA)) {
/* safe to ignore these */
return TRUE;
}
if (is_not_set(rsc->flags, pe_rsc_unique)) {
parent = uber_parent(rsc);
}
pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
if (node->details->unclean) {
pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
" Further action depends on the value of the stop's on-fail attribute",
node->details->uname, rsc->id);
}
if (status == PCMK_LRM_OP_ERROR) {
/* Older versions set this if rc != 0 but it's up to us to decide */
status = PCMK_LRM_OP_DONE;
}
if(status != PCMK_LRM_OP_NOT_INSTALLED) {
expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
}
/* Degraded results are informational only, re-map them to their error-free equivalents */
if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
rc = PCMK_OCF_OK;
/* Add them to the failed list to highlight them for the user */
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
record_failed_op(xml_op, node, rsc, data_set);
}
} else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
rc = PCMK_OCF_RUNNING_MASTER;
/* Add them to the failed list to highlight them for the user */
if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED_MASTER, PCMK_OCF_RUNNING_MASTER);
record_failed_op(xml_op, node, rsc, data_set);
}
}
if (expired && target_rc != rc) {
const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
key, node->details->uname,
services_ocf_exitcode_str(rc), rc,
services_ocf_exitcode_str(target_rc), target_rc);
if (interval_ms == 0) {
crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
goto done;
} else if(node->details->online && node->details->unclean == FALSE) {
crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
task_key, rc, magic, node->details->uname);
/* This is SO horrible, but we don't have access to CancelXmlOp() yet */
crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
goto done;
}
}
if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
}
pe_rsc_trace(rsc, "Handling status: %d", status);
switch (status) {
case PCMK_LRM_OP_CANCELLED:
/* do nothing?? */
pe_err("Don't know what to do for cancelled ops yet");
break;
case PCMK_LRM_OP_PENDING:
if (safe_str_eq(task, CRMD_ACTION_START)) {
set_bit(rsc->flags, pe_rsc_start_pending);
set_active(rsc);
} else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
rsc->role = RSC_ROLE_MASTER;
} else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
/* If a pending migrate_to action is out on a unclean node,
* we have to force the stop action on the target. */
const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
node_t *target = pe_find_node(data_set->nodes, migrate_target);
if (target) {
stop_action(rsc, target, FALSE);
}
}
if (rsc->pending_task == NULL) {
if (safe_str_eq(task, CRMD_ACTION_STATUS) && (interval_ms == 0)) {
/* Pending probes are not printed, even if pending
* operations are requested. If someone ever requests that
* behavior, uncomment this and the corresponding part of
* native.c:native_pending_task().
*/
/*rsc->pending_task = strdup("probe");*/
} else {
rsc->pending_task = strdup(task);
}
}
break;
case PCMK_LRM_OP_DONE:
pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
break;
case PCMK_LRM_OP_NOT_INSTALLED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if (failure_strategy == action_fail_ignore) {
crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
"Resource agent doesn't exist",
task_key, status, rc, node->details->uname);
/* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
*on_fail = action_fail_migrate;
}
resource_location(parent, node, -INFINITY, "hard-error", data_set);
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
break;
case PCMK_LRM_OP_ERROR:
case PCMK_LRM_OP_ERROR_HARD:
case PCMK_LRM_OP_ERROR_FATAL:
case PCMK_LRM_OP_TIMEOUT:
case PCMK_LRM_OP_NOTSUPPORTED:
failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
if ((failure_strategy == action_fail_ignore)
|| (failure_strategy == action_fail_restart_container
&& safe_str_eq(task, CRMD_ACTION_STOP))) {
crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
task_key, rc, node->details->uname);
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
set_bit(rsc->flags, pe_rsc_failure_ignored);
record_failed_op(xml_op, node, rsc, data_set);
if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
*on_fail = failure_strategy;
}
} else {
unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
if(status == PCMK_LRM_OP_ERROR_HARD) {
do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
"Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
parent->id, node->details->uname,
task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, node, -INFINITY, "hard-error", data_set);
} else if(status == PCMK_LRM_OP_ERROR_FATAL) {
crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
parent->id, task, services_ocf_exitcode_str(rc), rc);
resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
}
}
break;
}
done:
pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role));
return TRUE;
}
gboolean
add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
{
const char *cluster_name = NULL;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
strdup(node->details->id));
if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
data_set->dc_node = node;
node->details->is_dc = TRUE;
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
} else {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
}
cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
if (cluster_name) {
g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
strdup(cluster_name));
}
unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
node->details->attrs, NULL, overwrite, data_set->now);
if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
const char *site_name = pe_node_attribute_raw(node, "site-name");
if (site_name) {
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(site_name));
} else if (cluster_name) {
/* Default to cluster-name if unset */
g_hash_table_insert(node->details->attrs,
strdup(CRM_ATTR_SITE_NAME),
strdup(cluster_name));
}
}
return TRUE;
}
static GListPtr
extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
{
int counter = -1;
int stop_index = -1;
int start_index = -1;
xmlNode *rsc_op = NULL;
GListPtr gIter = NULL;
GListPtr op_list = NULL;
GListPtr sorted_op_list = NULL;
/* extract operations */
op_list = NULL;
sorted_op_list = NULL;
for (rsc_op = __xml_first_child(rsc_entry); rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
crm_xml_add(rsc_op, "resource", rsc);
crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
op_list = g_list_prepend(op_list, rsc_op);
}
}
if (op_list == NULL) {
/* if there are no operations, there is nothing to do */
return NULL;
}
sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
/* create active recurring operations as optional */
if (active_filter == FALSE) {
return sorted_op_list;
}
op_list = NULL;
calculate_active_ops(sorted_op_list, &start_index, &stop_index);
for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
xmlNode *rsc_op = (xmlNode *) gIter->data;
counter++;
if (start_index < stop_index) {
crm_trace("Skipping %s: not active", ID(rsc_entry));
break;
} else if (counter < start_index) {
crm_trace("Skipping %s: old", ID(rsc_op));
continue;
}
op_list = g_list_append(op_list, rsc_op);
}
g_list_free(sorted_op_list);
return op_list;
}
GListPtr
find_operations(const char *rsc, const char *node, gboolean active_filter,
pe_working_set_t * data_set)
{
GListPtr output = NULL;
GListPtr intermediate = NULL;
xmlNode *tmp = NULL;
xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
node_t *this_node = NULL;
xmlNode *node_state = NULL;
for (node_state = __xml_first_child(status); node_state != NULL;
node_state = __xml_next_element(node_state)) {
if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
if (node != NULL && safe_str_neq(uname, node)) {
continue;
}
this_node = pe_find_node(data_set->nodes, uname);
if(this_node == NULL) {
CRM_LOG_ASSERT(this_node != NULL);
continue;
} else if (is_remote_node(this_node)) {
determine_remote_online_status(data_set, this_node);
} else {
determine_online_status(node_state, this_node, data_set);
}
if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
/* offline nodes run no resources...
* unless stonith is enabled in which case we need to
* make sure rsc start events happen after the stonith
*/
xmlNode *lrm_rsc = NULL;
tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
for (lrm_rsc = __xml_first_child(tmp); lrm_rsc != NULL;
lrm_rsc = __xml_next_element(lrm_rsc)) {
if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
continue;
}
intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
output = g_list_concat(output, intermediate);
}
}
}
}
}
return output;
}
diff --git a/tools/fake_transition.c b/tools/fake_transition.c
index 911679b458..676583404b 100644
--- a/tools/fake_transition.c
+++ b/tools/fake_transition.c
@@ -1,832 +1,864 @@
/*
* Copyright 2009-2018 Andrew Beekhof
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "fake_transition.h"
static bool fake_quiet = FALSE;
static cib_t *fake_cib = NULL;
static GListPtr fake_resource_list = NULL;
static GListPtr fake_op_fail_list = NULL;
gboolean bringing_nodes_online = FALSE;
#define STATUS_PATH_MAX 512
#define quiet_log(fmt, args...) do { \
if(fake_quiet) { \
crm_trace(fmt, ##args); \
} else { \
printf(fmt , ##args); \
} \
} while(0)
#define new_node_template "//"XML_CIB_TAG_NODE"[@uname='%s']"
#define node_template "//"XML_CIB_TAG_STATE"[@uname='%s']"
#define rsc_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']"
#define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s']"
/* #define op_template "//"XML_CIB_TAG_STATE"[@uname='%s']//"XML_LRM_TAG_RESOURCE"[@id='%s']/"XML_LRM_TAG_RSC_OP"[@id='%s' and @"XML_LRM_ATTR_CALLID"='%d']" */
static void
inject_transient_attr(xmlNode * cib_node, const char *name, const char *value)
{
xmlNode *attrs = NULL;
xmlNode *instance_attrs = NULL;
xmlChar *node_path;
const char *node_uuid = ID(cib_node);
node_path = xmlGetNodePath(cib_node);
quiet_log(" + Injecting attribute %s=%s into %s '%s'\n",
name, value, node_path, ID(cib_node));
free(node_path);
attrs = first_named_child(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
if (attrs == NULL) {
attrs = create_xml_node(cib_node, XML_TAG_TRANSIENT_NODEATTRS);
crm_xml_add(attrs, XML_ATTR_ID, node_uuid);
}
instance_attrs = first_named_child(attrs, XML_TAG_ATTR_SETS);
if (instance_attrs == NULL) {
instance_attrs = create_xml_node(attrs, XML_TAG_ATTR_SETS);
crm_xml_add(instance_attrs, XML_ATTR_ID, node_uuid);
}
crm_create_nvpair_xml(instance_attrs, NULL, name, value);
}
static void
update_failcounts(xmlNode * cib_node, const char *resource, const char *task,
guint interval_ms, int rc)
{
if (rc == 0) {
return;
} else if ((rc == 7) && (interval_ms == 0)) {
return;
} else {
char *name = NULL;
char *now = crm_itoa(time(NULL));
name = crm_failcount_name(resource, task, interval_ms);
inject_transient_attr(cib_node, name, "value++");
free(name);
name = crm_lastfailure_name(resource, task, interval_ms);
inject_transient_attr(cib_node, name, now);
free(name);
free(now);
}
}
static void
create_node_entry(cib_t * cib_conn, const char *node)
{
int rc = pcmk_ok;
char *xpath = crm_strdup_printf(new_node_template, node);
rc = cib_conn->cmds->query(cib_conn, xpath, NULL, cib_xpath | cib_sync_call | cib_scope_local);
if (rc == -ENXIO) {
xmlNode *cib_object = create_xml_node(NULL, XML_CIB_TAG_NODE);
crm_xml_add(cib_object, XML_ATTR_ID, node); // Use node name as ID
crm_xml_add(cib_object, XML_ATTR_UNAME, node);
cib_conn->cmds->create(cib_conn, XML_CIB_TAG_NODES, cib_object,
cib_sync_call | cib_scope_local);
/* Not bothering with subsequent query to see if it exists,
we'll bomb out later in the call to query_node_uuid()... */
free_xml(cib_object);
}
free(xpath);
}
static lrmd_event_data_t *
create_op(xmlNode *cib_resource, const char *task, guint interval_ms,
int outcome)
{
lrmd_event_data_t *op = NULL;
xmlNode *xop = NULL;
op = calloc(1, sizeof(lrmd_event_data_t));
op->rsc_id = strdup(ID(cib_resource));
op->interval_ms = interval_ms;
op->op_type = strdup(task);
op->rc = outcome;
op->op_status = 0;
op->params = NULL; /* TODO: Fill me in */
op->t_run = time(NULL);
op->t_rcchange = op->t_run;
op->call_id = 0;
for (xop = __xml_first_child(cib_resource); xop != NULL; xop = __xml_next(xop)) {
int tmp = 0;
crm_element_value_int(xop, XML_LRM_ATTR_CALLID, &tmp);
if (tmp > op->call_id) {
op->call_id = tmp;
}
}
op->call_id++;
return op;
}
static xmlNode *
inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
{
return create_operation_update(cib_resource, op, CRM_FEATURE_SET, target_rc,
NULL, crm_system_name, LOG_TRACE);
}
static xmlNode *
inject_node_state(cib_t * cib_conn, const char *node, const char *uuid)
{
int rc = pcmk_ok;
xmlNode *cib_object = NULL;
char *xpath = crm_strdup_printf(node_template, node);
if (bringing_nodes_online) {
create_node_entry(cib_conn, node);
}
rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
cib_xpath | cib_sync_call | cib_scope_local);
if (cib_object && ID(cib_object) == NULL) {
crm_err("Detected multiple node_state entries for xpath=%s, bailing", xpath);
crm_log_xml_warn(cib_object, "Duplicates");
free(xpath);
crm_exit(CRM_EX_SOFTWARE);
return NULL; // not reached, but makes static analysis happy
}
if (rc == -ENXIO) {
char *found_uuid = NULL;
if (uuid == NULL) {
query_node_uuid(cib_conn, node, &found_uuid, NULL);
} else {
found_uuid = strdup(uuid);
}
cib_object = create_xml_node(NULL, XML_CIB_TAG_STATE);
crm_xml_add(cib_object, XML_ATTR_UUID, found_uuid);
crm_xml_add(cib_object, XML_ATTR_UNAME, node);
cib_conn->cmds->create(cib_conn, XML_CIB_TAG_STATUS, cib_object,
cib_sync_call | cib_scope_local);
free_xml(cib_object);
free(found_uuid);
rc = cib_conn->cmds->query(cib_conn, xpath, &cib_object,
cib_xpath | cib_sync_call | cib_scope_local);
crm_trace("injecting node state for %s. rc is %d", node, rc);
}
free(xpath);
CRM_ASSERT(rc == pcmk_ok);
return cib_object;
}
static xmlNode *
modify_node(cib_t * cib_conn, char *node, gboolean up)
{
xmlNode *cib_node = inject_node_state(cib_conn, node, NULL);
if (up) {
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_YES);
crm_xml_add(cib_node, XML_NODE_IS_PEER, ONLINESTATUS);
crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_MEMBER);
crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_MEMBER);
} else {
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO);
crm_xml_add(cib_node, XML_NODE_IS_PEER, OFFLINESTATUS);
crm_xml_add(cib_node, XML_NODE_JOIN_STATE, CRMD_JOINSTATE_DOWN);
crm_xml_add(cib_node, XML_NODE_EXPECTED, CRMD_JOINSTATE_DOWN);
}
crm_xml_add(cib_node, XML_ATTR_ORIGIN, crm_system_name);
return cib_node;
}
static xmlNode *
find_resource_xml(xmlNode * cib_node, const char *resource)
{
xmlNode *match = NULL;
const char *node = crm_element_value(cib_node, XML_ATTR_UNAME);
char *xpath = crm_strdup_printf(rsc_template, node, resource);
match = get_xpath_object(xpath, cib_node, LOG_TRACE);
free(xpath);
return match;
}
static xmlNode *
-inject_resource(xmlNode * cib_node, const char *resource, const char *rclass, const char *rtype,
- const char *rprovider)
+inject_resource(xmlNode * cib_node, const char *resource, const char *lrm_name,
+ const char *rclass, const char *rtype, const char *rprovider)
{
xmlNode *lrm = NULL;
xmlNode *container = NULL;
xmlNode *cib_resource = NULL;
char *xpath = NULL;
cib_resource = find_resource_xml(cib_node, resource);
if (cib_resource != NULL) {
+ /* If an existing LRM history entry uses the resource name,
+ * continue using it, even if lrm_name is different.
+ */
return cib_resource;
}
+ // Check for history entry under preferred name
+ if (strcmp(resource, lrm_name)) {
+ cib_resource = find_resource_xml(cib_node, lrm_name);
+ if (cib_resource != NULL) {
+ return cib_resource;
+ }
+ }
+
/* One day, add query for class, provider, type */
if (rclass == NULL || rtype == NULL) {
fprintf(stderr, "Resource %s not found in the status section of %s."
" Please supply the class and type to continue\n", resource, ID(cib_node));
return NULL;
} else if (safe_str_neq(rclass, PCMK_RESOURCE_CLASS_OCF)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_STONITH)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_SERVICE)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_UPSTART)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD)
&& safe_str_neq(rclass, PCMK_RESOURCE_CLASS_LSB)) {
fprintf(stderr, "Invalid class for %s: %s\n", resource, rclass);
return NULL;
} else if (is_set(pcmk_get_ra_caps(rclass), pcmk_ra_cap_provider)
&& (rprovider == NULL)) {
fprintf(stderr, "Please specify the provider for resource %s\n", resource);
return NULL;
}
xpath = (char *)xmlGetNodePath(cib_node);
- crm_info("Injecting new resource %s into %s '%s'", resource, xpath, ID(cib_node));
+ crm_info("Injecting new resource %s into %s '%s'", lrm_name, xpath, ID(cib_node));
free(xpath);
lrm = first_named_child(cib_node, XML_CIB_TAG_LRM);
if (lrm == NULL) {
const char *node_uuid = ID(cib_node);
lrm = create_xml_node(cib_node, XML_CIB_TAG_LRM);
crm_xml_add(lrm, XML_ATTR_ID, node_uuid);
}
container = first_named_child(lrm, XML_LRM_TAG_RESOURCES);
if (container == NULL) {
container = create_xml_node(lrm, XML_LRM_TAG_RESOURCES);
}
cib_resource = create_xml_node(container, XML_LRM_TAG_RESOURCE);
- crm_xml_add(cib_resource, XML_ATTR_ID, resource);
+
+ // If we're creating a new entry, use the preferred name
+ crm_xml_add(cib_resource, XML_ATTR_ID, lrm_name);
crm_xml_add(cib_resource, XML_AGENT_ATTR_CLASS, rclass);
crm_xml_add(cib_resource, XML_AGENT_ATTR_PROVIDER, rprovider);
crm_xml_add(cib_resource, XML_ATTR_TYPE, rtype);
return cib_resource;
}
#define XPATH_MAX 1024
static int
find_ticket_state(cib_t * the_cib, const char *ticket_id, xmlNode ** ticket_state_xml)
{
int offset = 0;
int rc = pcmk_ok;
xmlNode *xml_search = NULL;
char *xpath_string = NULL;
CRM_ASSERT(ticket_state_xml != NULL);
*ticket_state_xml = NULL;
xpath_string = calloc(1, XPATH_MAX);
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "%s", "/cib/status/tickets");
if (ticket_id) {
offset += snprintf(xpath_string + offset, XPATH_MAX - offset, "/%s[@id=\"%s\"]",
XML_CIB_TAG_TICKET_STATE, ticket_id);
}
CRM_LOG_ASSERT(offset > 0);
rc = the_cib->cmds->query(the_cib, xpath_string, &xml_search,
cib_sync_call | cib_scope_local | cib_xpath);
if (rc != pcmk_ok) {
goto bail;
}
crm_log_xml_debug(xml_search, "Match");
if (xml_has_children(xml_search)) {
if (ticket_id) {
fprintf(stdout, "Multiple ticket_states match ticket_id=%s\n", ticket_id);
}
*ticket_state_xml = xml_search;
} else {
*ticket_state_xml = xml_search;
}
bail:
free(xpath_string);
return rc;
}
static int
set_ticket_state_attr(const char *ticket_id, const char *attr_name,
const char *attr_value, cib_t * cib, int cib_options)
{
int rc = pcmk_ok;
xmlNode *xml_top = NULL;
xmlNode *ticket_state_xml = NULL;
rc = find_ticket_state(cib, ticket_id, &ticket_state_xml);
if (rc == pcmk_ok) {
crm_debug("Found a match state for ticket: id=%s", ticket_id);
xml_top = ticket_state_xml;
} else if (rc != -ENXIO) {
return rc;
} else {
xmlNode *xml_obj = NULL;
xml_top = create_xml_node(NULL, XML_CIB_TAG_STATUS);
xml_obj = create_xml_node(xml_top, XML_CIB_TAG_TICKETS);
ticket_state_xml = create_xml_node(xml_obj, XML_CIB_TAG_TICKET_STATE);
crm_xml_add(ticket_state_xml, XML_ATTR_ID, ticket_id);
}
crm_xml_add(ticket_state_xml, attr_name, attr_value);
crm_log_xml_debug(xml_top, "Update");
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, xml_top, cib_options);
free_xml(xml_top);
return rc;
}
void
modify_configuration(pe_working_set_t * data_set, cib_t *cib,
const char *quorum, const char *watchdog, GListPtr node_up, GListPtr node_down, GListPtr node_fail,
GListPtr op_inject, GListPtr ticket_grant, GListPtr ticket_revoke,
GListPtr ticket_standby, GListPtr ticket_activate)
{
int rc = pcmk_ok;
GListPtr gIter = NULL;
xmlNode *cib_op = NULL;
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
lrmd_event_data_t *op = NULL;
if (quorum) {
xmlNode *top = create_xml_node(NULL, XML_TAG_CIB);
quiet_log(" + Setting quorum: %s\n", quorum);
/* crm_xml_add(top, XML_ATTR_DC_UUID, dc_uuid); */
crm_xml_add(top, XML_ATTR_HAVE_QUORUM, quorum);
rc = cib->cmds->modify(cib, NULL, top, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
if (watchdog) {
quiet_log(" + Setting watchdog: %s\n", watchdog);
rc = update_attr_delegate(cib, cib_sync_call | cib_scope_local,
XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
XML_ATTR_HAVE_WATCHDOG, watchdog, FALSE, NULL, NULL);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = node_up; gIter != NULL; gIter = gIter->next) {
char *node = (char *)gIter->data;
quiet_log(" + Bringing node %s online\n", node);
cib_node = modify_node(cib, node, TRUE);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
}
for (gIter = node_down; gIter != NULL; gIter = gIter->next) {
char xpath[STATUS_PATH_MAX];
char *node = (char *)gIter->data;
quiet_log(" + Taking node %s offline\n", node);
cib_node = modify_node(cib, node, FALSE);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node, XML_CIB_TAG_LRM);
cib->cmds->remove(cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", node,
XML_TAG_TRANSIENT_NODEATTRS);
cib->cmds->remove(cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
}
for (gIter = node_fail; gIter != NULL; gIter = gIter->next) {
char *node = (char *)gIter->data;
quiet_log(" + Failing node %s\n", node);
cib_node = modify_node(cib, node, TRUE);
crm_xml_add(cib_node, XML_NODE_IN_CLUSTER, XML_BOOLEAN_NO);
CRM_ASSERT(cib_node != NULL);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
free_xml(cib_node);
}
for (gIter = ticket_grant; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Granting ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "granted", "true",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_revoke; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Revoking ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "granted", "false",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_standby; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Making ticket %s standby\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "standby", "true",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = ticket_activate; gIter != NULL; gIter = gIter->next) {
char *ticket_id = (char *)gIter->data;
quiet_log(" + Activating ticket %s\n", ticket_id);
rc = set_ticket_state_attr(ticket_id, "standby", "false",
cib, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
for (gIter = op_inject; gIter != NULL; gIter = gIter->next) {
char *spec = (char *)gIter->data;
int rc = 0;
int outcome = 0;
guint interval_ms = 0;
char *key = NULL;
char *node = NULL;
char *task = NULL;
char *resource = NULL;
const char *rtype = NULL;
const char *rclass = NULL;
const char *rprovider = NULL;
resource_t *rsc = NULL;
quiet_log(" + Injecting %s into the configuration\n", spec);
key = calloc(1, strlen(spec) + 1);
node = calloc(1, strlen(spec) + 1);
rc = sscanf(spec, "%[^@]@%[^=]=%d", key, node, &outcome);
if (rc != 3) {
fprintf(stderr, "Invalid operation spec: %s. Only found %d fields\n", spec, rc);
free(key);
free(node);
continue;
}
parse_op_key(key, &resource, &task, &interval_ms);
rsc = pe_find_resource(data_set->resources, resource);
if (rsc == NULL) {
fprintf(stderr, " - Invalid resource name: %s\n", resource);
} else {
rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(rsc->xml, XML_ATTR_TYPE);
rprovider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER);
cib_node = inject_node_state(cib, node, NULL);
CRM_ASSERT(cib_node != NULL);
update_failcounts(cib_node, resource, task, interval_ms, outcome);
- cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider);
+ cib_resource = inject_resource(cib_node, resource, resource,
+ rclass, rtype, rprovider);
CRM_ASSERT(cib_resource != NULL);
op = create_op(cib_resource, task, interval_ms, outcome);
CRM_ASSERT(op != NULL);
cib_op = inject_op(cib_resource, op, 0);
CRM_ASSERT(cib_op != NULL);
lrmd_free_event(op);
rc = cib->cmds->modify(cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
}
free(task);
free(node);
free(key);
}
}
static gboolean
exec_pseudo_action(crm_graph_t * graph, crm_action_t * action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK_KEY);
action->confirmed = TRUE;
quiet_log(" * Pseudo action: %s%s%s\n", task, node ? " on " : "", node ? node : "");
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_rsc_action(crm_graph_t * graph, crm_action_t * action)
{
int rc = 0;
GListPtr gIter = NULL;
lrmd_event_data_t *op = NULL;
int target_outcome = 0;
const char *rtype = NULL;
const char *rclass = NULL;
const char *resource = NULL;
const char *rprovider = NULL;
+ const char *lrm_name = NULL;
const char *operation = crm_element_value(action->xml, "operation");
const char *target_rc_s = crm_meta_value(action->params, XML_ATTR_TE_TARGET_RC);
xmlNode *cib_node = NULL;
xmlNode *cib_resource = NULL;
xmlNode *action_rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
char *node = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
char *uuid = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET_UUID);
const char *router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
if (safe_str_eq(operation, CRM_OP_PROBED)
|| safe_str_eq(operation, CRM_OP_REPROBE)) {
crm_info("Skipping %s op for %s", operation, node);
goto done;
}
if (action_rsc == NULL) {
crm_log_xml_err(action->xml, "Bad");
free(node); free(uuid);
return FALSE;
}
/* Look for the preferred name
* If not found, try the expected 'local' name
* If not found use the preferred name anyway
*/
resource = crm_element_value(action_rsc, XML_ATTR_ID);
+ lrm_name = resource; // Preferred name when writing history
if (pe_find_resource(fake_resource_list, resource) == NULL) {
const char *longname = crm_element_value(action_rsc, XML_ATTR_ID_LONG);
if (pe_find_resource(fake_resource_list, longname)) {
resource = longname;
}
}
if (safe_str_eq(operation, "delete") || safe_str_eq(operation, RSC_METADATA)) {
quiet_log(" * Resource action: %-15s %s on %s\n", resource, operation, node);
goto done;
}
rclass = crm_element_value(action_rsc, XML_AGENT_ATTR_CLASS);
rtype = crm_element_value(action_rsc, XML_ATTR_TYPE);
rprovider = crm_element_value(action_rsc, XML_AGENT_ATTR_PROVIDER);
if (target_rc_s != NULL) {
target_outcome = crm_parse_int(target_rc_s, "0");
}
CRM_ASSERT(fake_cib->cmds->query(fake_cib, NULL, NULL, cib_sync_call | cib_scope_local) ==
pcmk_ok);
cib_node = inject_node_state(fake_cib, node, (router_node? node : uuid));
CRM_ASSERT(cib_node != NULL);
- cib_resource = inject_resource(cib_node, resource, rclass, rtype, rprovider);
+ cib_resource = inject_resource(cib_node, resource, lrm_name,
+ rclass, rtype, rprovider);
if (cib_resource == NULL) {
crm_err("invalid resource in transition");
free(node); free(uuid);
free_xml(cib_node);
return FALSE;
}
op = convert_graph_action(cib_resource, action, 0, target_outcome);
if (op->interval_ms) {
quiet_log(" * Resource action: %-15s %s=%u on %s\n",
resource, op->op_type, op->interval_ms, node);
} else {
quiet_log(" * Resource action: %-15s %s on %s\n", resource, op->op_type, node);
}
for (gIter = fake_op_fail_list; gIter != NULL; gIter = gIter->next) {
char *spec = (char *)gIter->data;
- char *key = crm_strdup_printf(CRM_OP_FMT "@%s=", resource, op->op_type,
- op->interval_ms, node);
+ char *key = NULL;
+ const char *match_name = NULL;
+ // Allow user to specify anonymous clone with or without instance number
+ key = crm_strdup_printf(CRM_OP_FMT "@%s=", resource, op->op_type,
+ op->interval_ms, node);
if (strncasecmp(key, spec, strlen(key)) == 0) {
+ match_name = resource;
+ }
+ free(key);
+
+ if ((match_name == NULL) && strcmp(resource, lrm_name)) {
+ key = crm_strdup_printf(CRM_OP_FMT "@%s=", lrm_name, op->op_type,
+ op->interval_ms, node);
+ if (strncasecmp(key, spec, strlen(key)) == 0) {
+ match_name = lrm_name;
+ }
+ free(key);
+ }
+
+ if (match_name != NULL) {
+
rc = sscanf(spec, "%*[^=]=%d", (int *) &op->rc);
- // ${resource}_${task}_${interval_in_ms}@${node}=${rc}
+ // ${match_name}_${task}_${interval_in_ms}@${node}=${rc}
if (rc != 1) {
fprintf(stderr,
"Invalid failed operation spec: %s. Result code must be integer\n",
spec);
- free(key);
continue;
}
action->failed = TRUE;
graph->abort_priority = INFINITY;
printf("\tPretending action %d failed with rc=%d\n", action->id, op->rc);
- update_failcounts(cib_node, resource, op->op_type, op->interval_ms,
- op->rc);
- free(key);
+ update_failcounts(cib_node, match_name, op->op_type,
+ op->interval_ms, op->rc);
break;
}
- free(key);
}
inject_op(cib_resource, op, target_outcome);
lrmd_free_event(op);
rc = fake_cib->cmds->modify(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
done:
free(node); free(uuid);
free_xml(cib_node);
action->confirmed = TRUE;
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_crmd_action(crm_graph_t * graph, crm_action_t * action)
{
const char *node = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
const char *task = crm_element_value(action->xml, XML_LRM_ATTR_TASK);
xmlNode *rsc = first_named_child(action->xml, XML_CIB_TAG_RESOURCE);
action->confirmed = TRUE;
if(rsc) {
quiet_log(" * Cluster action: %s for %s on %s\n", task, ID(rsc), node);
} else {
quiet_log(" * Cluster action: %s on %s\n", task, node);
}
update_graph(graph, action);
return TRUE;
}
static gboolean
exec_stonith_action(crm_graph_t * graph, crm_action_t * action)
{
const char *op = crm_meta_value(action->params, "stonith_action");
char *target = crm_element_value_copy(action->xml, XML_LRM_ATTR_TARGET);
quiet_log(" * Fencing %s (%s)\n", target, op);
if(safe_str_neq(op, "on")) {
int rc = 0;
char xpath[STATUS_PATH_MAX];
xmlNode *cib_node = modify_node(fake_cib, target, FALSE);
crm_xml_add(cib_node, XML_ATTR_ORIGIN, __FUNCTION__);
CRM_ASSERT(cib_node != NULL);
rc = fake_cib->cmds->replace(fake_cib, XML_CIB_TAG_STATUS, cib_node,
cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target, XML_CIB_TAG_LRM);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
snprintf(xpath, STATUS_PATH_MAX, "//node_state[@uname='%s']/%s", target,
XML_TAG_TRANSIENT_NODEATTRS);
fake_cib->cmds->remove(fake_cib, xpath, NULL,
cib_xpath | cib_sync_call | cib_scope_local);
free_xml(cib_node);
}
action->confirmed = TRUE;
update_graph(graph, action);
free(target);
return TRUE;
}
int
run_simulation(pe_working_set_t * data_set, cib_t *cib, GListPtr op_fail_list, bool quiet)
{
crm_graph_t *transition = NULL;
enum transition_status graph_rc = -1;
crm_graph_functions_t exec_fns = {
exec_pseudo_action,
exec_rsc_action,
exec_crmd_action,
exec_stonith_action,
};
fake_cib = cib;
fake_quiet = quiet;
fake_op_fail_list = op_fail_list;
quiet_log("\nExecuting cluster transition:\n");
set_graph_functions(&exec_fns);
transition = unpack_graph(data_set->graph, crm_system_name);
print_graph(LOG_DEBUG, transition);
fake_resource_list = data_set->resources;
do {
graph_rc = run_graph(transition);
} while (graph_rc == transition_active);
fake_resource_list = NULL;
if (graph_rc != transition_complete) {
fprintf(stdout, "Transition failed: %s\n", transition_status(graph_rc));
print_graph(LOG_ERR, transition);
}
destroy_graph(transition);
if (graph_rc != transition_complete) {
fprintf(stdout, "An invalid transition was produced\n");
}
if (quiet == FALSE) {
xmlNode *cib_object = NULL;
int rc = fake_cib->cmds->query(fake_cib, NULL, &cib_object, cib_sync_call | cib_scope_local);
CRM_ASSERT(rc == pcmk_ok);
cleanup_alloc_calculations(data_set);
data_set->input = cib_object;
}
if (graph_rc != transition_complete) {
return graph_rc;
}
return 0;
}